Source code for scitex_stats.tests.nonparametric._test_mannwhitneyu

#!/usr/bin/env python3
# Timestamp: "2025-10-01 17:45:00 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_mannwhitneyu.py

r"""
Mann-Whitney U test (Wilcoxon rank-sum test).

Functionalities:
  - Perform Mann-Whitney U test (Wilcoxon rank-sum test)
  - Non-parametric test for comparing two independent samples
  - Compute rank-biserial correlation effect size
  - Generate visualizations with rank distributions
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Two independent samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import argparse
import os
from typing import Literal, Optional, Union

import matplotlib.axes
import matplotlib.pyplot as _mpl_plt  # noqa: E402
import numpy as np
import pandas as pd
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)



[docs]
def test_mannwhitneyu(  # noqa: C901
    x: Union[np.ndarray, pd.Series, str],
    y: Union[np.ndarray, pd.Series, str],
    var_x: str = "x",
    var_y: str = "y",
    alternative: Literal["two-sided", "less", "greater"] = "two-sided",
    alpha: float = 0.05,
    plot: bool = False,
    ax: Optional[matplotlib.axes.Axes] = None,
    data: Union[pd.DataFrame, str, None] = None,
    return_as: Literal["dict", "dataframe"] = "dict",
    decimals: int = 3,
    verbose: bool = False,
) -> Union[dict, pd.DataFrame]:
    r"""
    Perform Mann-Whitney U test (Wilcoxon rank-sum test).

    Parameters
    ----------
    x, y : arrays or Series
        Two independent samples to compare
    var_x, var_y : str
        Labels for samples
    alternative : {'two-sided', 'less', 'greater'}, default 'two-sided'
        Alternative hypothesis
    alpha : float, default 0.05
        Significance level
    plot : bool, default False
        Whether to generate visualization
    ax : matplotlib.axes.Axes, optional
        Axes object to plot on. If None and plot=True, creates new figure.
        If provided, automatically enables plotting.
    data : DataFrame, str, or None, optional
        DataFrame or CSV path. When provided, string values for x/y
        are resolved as column names (seaborn-style).
    return_as : {'dict', 'dataframe'}, default 'dict'
        Output format
    decimals : int, default 3
        Number of decimal places for rounding
    verbose : bool, default False
        Whether to print test results

    Returns
    -------
    results : dict or DataFrame
        Test results including:
        - test_method: 'Mann-Whitney U test'
        - statistic: U-statistic value
        - pvalue: p-value
        - stars: Significance stars
        - significant: Whether null hypothesis is rejected
        - effect_size: Rank-biserial correlation
        - effect_size_metric: 'rank-biserial correlation'
        - effect_size_interpretation: Interpretation
        - n_x, n_y: Sample sizes
        - var_x, var_y: Variable labels
        - H0: Null hypothesis description

    Notes
    -----
    The Mann-Whitney U test (also known as Wilcoxon rank-sum test) is a
    non-parametric test for comparing two independent samples.

    **Null Hypothesis (H0)**: The two samples come from distributions with
    equal medians (more precisely: P(X > Y) = 0.5)

    **Test Statistic U**:

    .. math::
        U = n_1 n_2 + \frac{n_1(n_1+1)}{2} - R_1

    Where:
    - n_1, n_2: Sample sizes
    - R_1: Sum of ranks for sample 1

    **Effect Size (Rank-biserial correlation)**:

    .. math::
        r = 1 - \frac{2U}{n_1 n_2}

    Or equivalently:

    .. math::
        r = \frac{2(\bar{R}_1 - \bar{R}_2)}{n_1 + n_2}

    Interpretation:
    - |r| < 0.1:  negligible
    - |r| < 0.3:  small
    - |r| < 0.5:  medium
    - |r| ≥ 0.5:  large

    **Advantages**:
    - No normality assumption required
    - Robust to outliers
    - Works with ordinal data
    - More powerful than t-test for non-normal data

    **When to use**:
    - Comparing two independent groups
    - Data violate normality
    - Presence of outliers
    - Ordinal data (e.g., Likert scales)
    - Small sample sizes

    **Comparison with other tests**:
    - vs t-test: More robust, less powerful when assumptions met
    - vs Brunner-Munzel: MWU assumes identical shape, BM does not
    - vs KS test: MWU tests location, KS tests entire distribution

    **Note on relationship to Brunner-Munzel**:
    Mann-Whitney U assumes samples have the same distribution shape
    (differing only in location). For more robust analysis without this
    assumption, use test_brunner_munzel() instead.

    References
    ----------
    .. [1] Mann, H. B., & Whitney, D. R. (1947). "On a test of whether one
           of two random variables is stochastically larger than the other".
           Annals of Mathematical Statistics, 18(1), 50-60.
    .. [2] Kerby, D. S. (2014). "The simple difference formula: An approach
           to teaching nonparametric correlation". Comprehensive Psychology, 3, 11.

    Examples
    --------
    >>> # Basic usage
    >>> x = np.array([1, 2, 3, 4, 5])
    >>> y = np.array([3, 4, 5, 6, 7])
    >>> result = test_mannwhitneyu(x, y)
    >>> result['rejected']
    True

    >>> # With auto-created figure
    >>> result = test_mannwhitneyu(x, y, plot=True)

    >>> # Plot on existing axes
    >>> fig, ax = plt.subplots()
    >>> result = test_mannwhitneyu(x, y, ax=ax)

    >>> # With verbose output
    >>> result = test_mannwhitneyu(x, y, verbose=True)
    """
    # Resolve column names from DataFrame (seaborn-style data= parameter)
    if data is not None:
        from scitex_stats._utils._csv_support import resolve_columns

        resolved = resolve_columns(data, x=x, y=y)
        x, y = resolved["x"], resolved["y"]

    from scitex_stats._utils._formatters import p2stars
    from scitex_stats._utils._normalizers import convert_results, force_dataframe

    # Convert to numpy arrays and remove NaN
    x = np.asarray(x)
    y = np.asarray(y)
    x = x[~np.isnan(x)]
    y = y[~np.isnan(y)]

    n_x = len(x)
    n_y = len(y)

    # Perform Mann-Whitney U test
    u_result = stats.mannwhitneyu(x, y, alternative=alternative)
    u_stat = float(u_result.statistic)
    pvalue = float(u_result.pvalue)

    # Determine rejection
    rejected = pvalue < alpha

    # Compute rank-biserial correlation effect size
    # Formula: r = 1 - (2U) / (n1 * n2)
    r = 1 - (2 * u_stat) / (n_x * n_y)

    # Interpret effect size
    r_abs = abs(r)
    if r_abs < 0.1:
        effect_interp = "negligible"
    elif r_abs < 0.3:
        effect_interp = "small"
    elif r_abs < 0.5:
        effect_interp = "medium"
    else:
        effect_interp = "large"

    # Compile results
    result = {
        "test_method": "Mann-Whitney U test",
        "statistic": round(u_stat, decimals),
        "stat_symbol": "U",
        "n_x": n_x,
        "n_y": n_y,
        "var_x": var_x,
        "var_y": var_y,
        "pvalue": round(pvalue, decimals),
        "stars": p2stars(pvalue),
        "alpha": alpha,
        "significant": rejected,
        "effect_size": round(r, decimals),
        "effect_size_metric": "rank-biserial correlation",
        "effect_size_interpretation": effect_interp,
        "H0": f"Distributions of {var_x} and {var_y} have equal medians",
    }

    # Add recommendation
    if rejected:
        result["recommendation"] = (
            f"{var_x} and {var_y} have significantly different medians."
        )
    else:
        result["recommendation"] = "No significant difference in medians detected."

    # Log results if verbose
    if verbose:
        logger.info(
            f"Mann-Whitney U: U = {u_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}"
        )
        logger.info(f"Rank-biserial r = {r:.3f} ({effect_interp})")

    # Auto-enable plotting if ax is provided
    if ax is not None:
        plot = True

    # Generate plot if requested
    if plot:
        if ax is None:
            _fig, ax = _mpl_plt.subplots()
        _plot_mannwhitneyu(x, y, var_x, var_y, result, ax)

    # Convert to requested format
    if return_as == "dataframe":
        result = force_dataframe(result)
    elif return_as not in ["dict", "dataframe"]:
        return convert_results(result, return_as=return_as)

    return result



def _plot_mannwhitneyu(x, y, var_x, var_y, result, ax):
    """Create violin+swarm visualization on given axes."""
    from scitex_stats._plot_helpers import (
        significance_bracket,
        stats_text_box,
        violin_swarm,
    )

    violin_swarm(ax, [x, y], [0, 1], [var_x, var_y])
    significance_bracket(ax, 0, 1, result["stars"], [x, y])

    stats_text_box(
        ax,
        [
            fmt_stat("U", result["statistic"]),
            fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]),
            f"{fmt_sym('n_1')} = {result['n_x']}, {fmt_sym('n_2')} = {result['n_y']}",
        ],
    )

    ax.set_title("Mann-Whitney U Test")


def main(args):  # noqa: C901
    """Demonstrate Mann-Whitney U test functionality."""
    logger.info("Demonstrating Mann-Whitney U test")

    # Set random seed
    np.random.seed(42)

    # Example 1: Basic usage
    logger.info("\n=== Example 1: Basic usage ===")

    x1 = np.random.normal(5, 1, 30)
    y1 = np.random.normal(6, 1, 30)

    result1 = test_mannwhitneyu(x1, y1, var_x="Group A", var_y="Group B", verbose=True)

    # Example 2: Non-normal data
    logger.info("\n=== Example 2: Non-normal (skewed) data ===")

    x2 = np.random.exponential(2, 40)
    y2 = np.random.exponential(3, 40)

    result2 = test_mannwhitneyu(
        x2, y2, var_x="Exp(λ=0.5)", var_y="Exp(λ=0.33)", verbose=True
    )

    # Example 3: With outliers
    logger.info("\n=== Example 3: Data with outliers ===")

    x3 = np.concatenate([np.random.normal(0, 1, 35), [10, 12]])
    y3 = np.random.normal(0.5, 1, 40)

    result3 = test_mannwhitneyu(
        x3, y3, var_x="With Outliers", var_y="Normal", verbose=True
    )
    logger.info("Mann-Whitney U is robust to outliers")

    # Example 4: Ordinal data (Likert scale)
    logger.info("\n=== Example 4: Ordinal data (Likert scale) ===")

    likert1 = np.random.choice(
        [1, 2, 3, 4, 5], size=50, p=[0.05, 0.15, 0.40, 0.30, 0.10]
    )
    likert2 = np.random.choice(
        [1, 2, 3, 4, 5], size=50, p=[0.05, 0.10, 0.25, 0.35, 0.25]
    )

    result4 = test_mannwhitneyu(
        likert1,
        likert2,
        var_x="Condition A",
        var_y="Condition B",
        verbose=True,
    )
    logger.info(f"Medians: {np.median(likert1):.1f} vs {np.median(likert2):.1f}")

    # Example 5: One-sided tests
    logger.info("\n=== Example 5: One-sided tests ===")

    x5 = np.random.normal(5, 1, 40)
    y5 = np.random.normal(6, 1, 40)

    logger.info("Two-sided:")
    test_mannwhitneyu(x5, y5, alternative="two-sided", verbose=True)

    logger.info("\nOne-sided (less):")
    test_mannwhitneyu(x5, y5, alternative="less", verbose=True)

    # Example 6: With visualization
    logger.info("\n=== Example 6: Complete analysis with visualization ===")

    x6 = np.random.gamma(2, 2, 50)
    y6 = np.random.gamma(3, 2, 50)

    try:
        result6 = test_mannwhitneyu(
            x6, y6, var_x="Gamma(k=2)", var_y="Gamma(k=3)", plot=True, verbose=True
        )
        _mpl_plt.gcf().savefig("./mannwhitneyu_example6.jpg")
        _mpl_plt.close("all")
    except ModuleNotFoundError as exc:
        # Plotting helpers depend on optional figrecipe (see [project.optional-dependencies]).
        logger.info(f"Skipping plot: {exc}")
        result6 = test_mannwhitneyu(
            x6, y6, var_x="Gamma(k=2)", var_y="Gamma(k=3)", verbose=True
        )

    # Example 7: Comparison with t-test
    logger.info("\n=== Example 7: Mann-Whitney U vs t-test ===")

    from ..parametric._test_ttest import test_ttest_ind

    # Normal data - both tests should agree
    x_norm = np.random.normal(5, 1, 50)
    y_norm = np.random.normal(5.5, 1, 50)

    logger.info("Mann-Whitney U:")
    test_mannwhitneyu(x_norm, y_norm, verbose=True)
    logger.info("\nt-test:")
    test_ttest_ind(x_norm, y_norm, verbose=True)

    # Non-normal data - MWU more appropriate
    x_exp = np.random.exponential(2, 50)
    y_exp = np.random.exponential(2.5, 50)

    logger.info("\nFor exponential data:")
    logger.info("Mann-Whitney U:")
    test_mannwhitneyu(x_exp, y_exp, verbose=True)
    logger.info("\nt-test:")
    test_ttest_ind(x_exp, y_exp, verbose=True)
    logger.info("Mann-Whitney U is more reliable for non-normal data")

    # Example 8: Comparison with Brunner-Munzel
    logger.info("\n=== Example 8: Mann-Whitney U vs Brunner-Munzel ===")

    from ._test_brunner_munzel import test_brunner_munzel

    # Same shape distributions
    x8 = np.random.normal(5, 1, 50)
    y8 = np.random.normal(6, 1, 50)

    mwu = test_mannwhitneyu(x8, y8)
    bm = test_brunner_munzel(x8, y8)

    logger.info("Same distribution shape:")
    logger.info(
        f"  Mann-Whitney U: p = {mwu['pvalue']:.4f}, r = {mwu['effect_size']:.3f}"
    )
    logger.info(
        f"  Brunner-Munzel: p = {bm['pvalue']:.4f}, P(X>Y) = {bm['effect_size']:.3f}"
    )

    # Different shapes
    x9 = np.random.normal(5, 1, 50)
    y9 = np.random.normal(6, 3, 50)  # Different variance

    mwu2 = test_mannwhitneyu(x9, y9)
    bm2 = test_brunner_munzel(x9, y9)

    logger.info("\nDifferent distribution shapes:")
    logger.info(f"  Mann-Whitney U: p = {mwu2['pvalue']:.4f}")
    logger.info(f"  Brunner-Munzel: p = {bm2['pvalue']:.4f}")
    logger.info("  Note: Brunner-Munzel is more appropriate for different shapes")

    # Example 9: Export results
    logger.info("\n=== Example 9: Export results ===")

    from scitex_stats._utils._normalizers import force_dataframe

    test_results = [result1, result2, result3, result4, result6]

    df = force_dataframe(test_results)
    logger.info(f"\nDataFrame shape: {df.shape}")

    df.to_excel("./mannwhitneyu_tests.xlsx", index=False)
    logger.info("Results exported to Excel")
    df.to_csv("./mannwhitneyu_tests.csv", index=False)
    logger.info("Results exported to CSV")

    return 0


def parse_args():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description="Demonstrate Mann-Whitney U test")
    parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
    return parser.parse_args()


def run_main():
    """Run main without the scitex umbrella session helpers."""
    import matplotlib

    matplotlib.use("Agg")

    args = parse_args()
    return main(args)


if __name__ == "__main__":
    run_main()

# EOF