Source code for scitex_stats.tests.nonparametric._test_mannwhitneyu

#!/usr/bin/env python3
# Timestamp: "2025-10-01 17:45:00 (ywatanabe)"
# File: scitex_stats/tests/nonparametric/_test_mannwhitneyu.py

r"""
Mann-Whitney U test (Wilcoxon rank-sum test).

Functionalities:
  - Perform Mann-Whitney U test (Wilcoxon rank-sum test)
  - Non-parametric test for comparing two independent samples
  - Compute rank-biserial correlation effect size
  - Generate visualizations with rank distributions
  - Support flexible output formats (dict or DataFrame)

Dependencies:
  - packages: numpy, pandas, scipy, matplotlib

IO:
  - input: Two independent samples (arrays or Series)
  - output: Test results (dict or DataFrame) and optional figure
"""

from __future__ import annotations

import argparse
import os
from typing import Literal, Optional, Union

import matplotlib.axes
import matplotlib.pyplot as _mpl_plt  # noqa: E402
import numpy as np
import pandas as pd
from scipy import stats

from scitex_stats._logging import getLogger
from scitex_stats._utils._formatters import fmt_stat, fmt_sym

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)

logger = getLogger(__name__)


[docs] def test_mannwhitneyu( # noqa: C901 x: Union[np.ndarray, pd.Series, str], y: Union[np.ndarray, pd.Series, str], var_x: str = "x", var_y: str = "y", alternative: Literal["two-sided", "less", "greater"] = "two-sided", alpha: float = 0.05, plot: bool = False, ax: Optional[matplotlib.axes.Axes] = None, data: Union[pd.DataFrame, str, None] = None, return_as: Literal["dict", "dataframe"] = "dict", decimals: int = 3, verbose: bool = False, ) -> Union[dict, pd.DataFrame]: r""" Perform Mann-Whitney U test (Wilcoxon rank-sum test). Parameters ---------- x, y : arrays or Series Two independent samples to compare var_x, var_y : str Labels for samples alternative : {'two-sided', 'less', 'greater'}, default 'two-sided' Alternative hypothesis alpha : float, default 0.05 Significance level plot : bool, default False Whether to generate visualization ax : matplotlib.axes.Axes, optional Axes object to plot on. If None and plot=True, creates new figure. If provided, automatically enables plotting. data : DataFrame, str, or None, optional DataFrame or CSV path. When provided, string values for x/y are resolved as column names (seaborn-style). return_as : {'dict', 'dataframe'}, default 'dict' Output format decimals : int, default 3 Number of decimal places for rounding verbose : bool, default False Whether to print test results Returns ------- results : dict or DataFrame Test results including: - test_method: 'Mann-Whitney U test' - statistic: U-statistic value - pvalue: p-value - stars: Significance stars - significant: Whether null hypothesis is rejected - effect_size: Rank-biserial correlation - effect_size_metric: 'rank-biserial correlation' - effect_size_interpretation: Interpretation - n_x, n_y: Sample sizes - var_x, var_y: Variable labels - H0: Null hypothesis description Notes ----- The Mann-Whitney U test (also known as Wilcoxon rank-sum test) is a non-parametric test for comparing two independent samples. **Null Hypothesis (H0)**: The two samples come from distributions with equal medians (more precisely: P(X > Y) = 0.5) **Test Statistic U**: .. math:: U = n_1 n_2 + \frac{n_1(n_1+1)}{2} - R_1 Where: - n_1, n_2: Sample sizes - R_1: Sum of ranks for sample 1 **Effect Size (Rank-biserial correlation)**: .. math:: r = 1 - \frac{2U}{n_1 n_2} Or equivalently: .. math:: r = \frac{2(\bar{R}_1 - \bar{R}_2)}{n_1 + n_2} Interpretation: - |r| < 0.1: negligible - |r| < 0.3: small - |r| < 0.5: medium - |r| ≥ 0.5: large **Advantages**: - No normality assumption required - Robust to outliers - Works with ordinal data - More powerful than t-test for non-normal data **When to use**: - Comparing two independent groups - Data violate normality - Presence of outliers - Ordinal data (e.g., Likert scales) - Small sample sizes **Comparison with other tests**: - vs t-test: More robust, less powerful when assumptions met - vs Brunner-Munzel: MWU assumes identical shape, BM does not - vs KS test: MWU tests location, KS tests entire distribution **Note on relationship to Brunner-Munzel**: Mann-Whitney U assumes samples have the same distribution shape (differing only in location). For more robust analysis without this assumption, use test_brunner_munzel() instead. References ---------- .. [1] Mann, H. B., & Whitney, D. R. (1947). "On a test of whether one of two random variables is stochastically larger than the other". Annals of Mathematical Statistics, 18(1), 50-60. .. [2] Kerby, D. S. (2014). "The simple difference formula: An approach to teaching nonparametric correlation". Comprehensive Psychology, 3, 11. Examples -------- >>> # Basic usage >>> x = np.array([1, 2, 3, 4, 5]) >>> y = np.array([3, 4, 5, 6, 7]) >>> result = test_mannwhitneyu(x, y) >>> result['rejected'] True >>> # With auto-created figure >>> result = test_mannwhitneyu(x, y, plot=True) >>> # Plot on existing axes >>> fig, ax = plt.subplots() >>> result = test_mannwhitneyu(x, y, ax=ax) >>> # With verbose output >>> result = test_mannwhitneyu(x, y, verbose=True) """ # Resolve column names from DataFrame (seaborn-style data= parameter) if data is not None: from scitex_stats._utils._csv_support import resolve_columns resolved = resolve_columns(data, x=x, y=y) x, y = resolved["x"], resolved["y"] from scitex_stats._utils._formatters import p2stars from scitex_stats._utils._normalizers import convert_results, force_dataframe # Convert to numpy arrays and remove NaN x = np.asarray(x) y = np.asarray(y) x = x[~np.isnan(x)] y = y[~np.isnan(y)] n_x = len(x) n_y = len(y) # Perform Mann-Whitney U test u_result = stats.mannwhitneyu(x, y, alternative=alternative) u_stat = float(u_result.statistic) pvalue = float(u_result.pvalue) # Determine rejection rejected = pvalue < alpha # Compute rank-biserial correlation effect size # Formula: r = 1 - (2U) / (n1 * n2) r = 1 - (2 * u_stat) / (n_x * n_y) # Interpret effect size r_abs = abs(r) if r_abs < 0.1: effect_interp = "negligible" elif r_abs < 0.3: effect_interp = "small" elif r_abs < 0.5: effect_interp = "medium" else: effect_interp = "large" # Compile results result = { "test_method": "Mann-Whitney U test", "statistic": round(u_stat, decimals), "stat_symbol": "U", "n_x": n_x, "n_y": n_y, "var_x": var_x, "var_y": var_y, "pvalue": round(pvalue, decimals), "stars": p2stars(pvalue), "alpha": alpha, "significant": rejected, "effect_size": round(r, decimals), "effect_size_metric": "rank-biserial correlation", "effect_size_interpretation": effect_interp, "H0": f"Distributions of {var_x} and {var_y} have equal medians", } # Add recommendation if rejected: result["recommendation"] = ( f"{var_x} and {var_y} have significantly different medians." ) else: result["recommendation"] = "No significant difference in medians detected." # Log results if verbose if verbose: logger.info( f"Mann-Whitney U: U = {u_stat:.3f}, p = {pvalue:.4f} {p2stars(pvalue)}" ) logger.info(f"Rank-biserial r = {r:.3f} ({effect_interp})") # Auto-enable plotting if ax is provided if ax is not None: plot = True # Generate plot if requested if plot: if ax is None: _fig, ax = _mpl_plt.subplots() _plot_mannwhitneyu(x, y, var_x, var_y, result, ax) # Convert to requested format if return_as == "dataframe": result = force_dataframe(result) elif return_as not in ["dict", "dataframe"]: return convert_results(result, return_as=return_as) return result
def _plot_mannwhitneyu(x, y, var_x, var_y, result, ax): """Create violin+swarm visualization on given axes.""" from scitex_stats._plot_helpers import ( significance_bracket, stats_text_box, violin_swarm, ) violin_swarm(ax, [x, y], [0, 1], [var_x, var_y]) significance_bracket(ax, 0, 1, result["stars"], [x, y]) stats_text_box( ax, [ fmt_stat("U", result["statistic"]), fmt_stat("p", result["pvalue"], fmt=".4f", stars=result["stars"]), f"{fmt_sym('n_1')} = {result['n_x']}, {fmt_sym('n_2')} = {result['n_y']}", ], ) ax.set_title("Mann-Whitney U Test") def main(args): # noqa: C901 """Demonstrate Mann-Whitney U test functionality.""" logger.info("Demonstrating Mann-Whitney U test") # Set random seed np.random.seed(42) # Example 1: Basic usage logger.info("\n=== Example 1: Basic usage ===") x1 = np.random.normal(5, 1, 30) y1 = np.random.normal(6, 1, 30) result1 = test_mannwhitneyu(x1, y1, var_x="Group A", var_y="Group B", verbose=True) # Example 2: Non-normal data logger.info("\n=== Example 2: Non-normal (skewed) data ===") x2 = np.random.exponential(2, 40) y2 = np.random.exponential(3, 40) result2 = test_mannwhitneyu( x2, y2, var_x="Exp(λ=0.5)", var_y="Exp(λ=0.33)", verbose=True ) # Example 3: With outliers logger.info("\n=== Example 3: Data with outliers ===") x3 = np.concatenate([np.random.normal(0, 1, 35), [10, 12]]) y3 = np.random.normal(0.5, 1, 40) result3 = test_mannwhitneyu( x3, y3, var_x="With Outliers", var_y="Normal", verbose=True ) logger.info("Mann-Whitney U is robust to outliers") # Example 4: Ordinal data (Likert scale) logger.info("\n=== Example 4: Ordinal data (Likert scale) ===") likert1 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.15, 0.40, 0.30, 0.10] ) likert2 = np.random.choice( [1, 2, 3, 4, 5], size=50, p=[0.05, 0.10, 0.25, 0.35, 0.25] ) result4 = test_mannwhitneyu( likert1, likert2, var_x="Condition A", var_y="Condition B", verbose=True, ) logger.info(f"Medians: {np.median(likert1):.1f} vs {np.median(likert2):.1f}") # Example 5: One-sided tests logger.info("\n=== Example 5: One-sided tests ===") x5 = np.random.normal(5, 1, 40) y5 = np.random.normal(6, 1, 40) logger.info("Two-sided:") test_mannwhitneyu(x5, y5, alternative="two-sided", verbose=True) logger.info("\nOne-sided (less):") test_mannwhitneyu(x5, y5, alternative="less", verbose=True) # Example 6: With visualization logger.info("\n=== Example 6: Complete analysis with visualization ===") x6 = np.random.gamma(2, 2, 50) y6 = np.random.gamma(3, 2, 50) try: result6 = test_mannwhitneyu( x6, y6, var_x="Gamma(k=2)", var_y="Gamma(k=3)", plot=True, verbose=True ) _mpl_plt.gcf().savefig("./mannwhitneyu_example6.jpg") _mpl_plt.close("all") except ModuleNotFoundError as exc: # Plotting helpers depend on optional figrecipe (see [project.optional-dependencies]). logger.info(f"Skipping plot: {exc}") result6 = test_mannwhitneyu( x6, y6, var_x="Gamma(k=2)", var_y="Gamma(k=3)", verbose=True ) # Example 7: Comparison with t-test logger.info("\n=== Example 7: Mann-Whitney U vs t-test ===") from ..parametric._test_ttest import test_ttest_ind # Normal data - both tests should agree x_norm = np.random.normal(5, 1, 50) y_norm = np.random.normal(5.5, 1, 50) logger.info("Mann-Whitney U:") test_mannwhitneyu(x_norm, y_norm, verbose=True) logger.info("\nt-test:") test_ttest_ind(x_norm, y_norm, verbose=True) # Non-normal data - MWU more appropriate x_exp = np.random.exponential(2, 50) y_exp = np.random.exponential(2.5, 50) logger.info("\nFor exponential data:") logger.info("Mann-Whitney U:") test_mannwhitneyu(x_exp, y_exp, verbose=True) logger.info("\nt-test:") test_ttest_ind(x_exp, y_exp, verbose=True) logger.info("Mann-Whitney U is more reliable for non-normal data") # Example 8: Comparison with Brunner-Munzel logger.info("\n=== Example 8: Mann-Whitney U vs Brunner-Munzel ===") from ._test_brunner_munzel import test_brunner_munzel # Same shape distributions x8 = np.random.normal(5, 1, 50) y8 = np.random.normal(6, 1, 50) mwu = test_mannwhitneyu(x8, y8) bm = test_brunner_munzel(x8, y8) logger.info("Same distribution shape:") logger.info( f" Mann-Whitney U: p = {mwu['pvalue']:.4f}, r = {mwu['effect_size']:.3f}" ) logger.info( f" Brunner-Munzel: p = {bm['pvalue']:.4f}, P(X>Y) = {bm['effect_size']:.3f}" ) # Different shapes x9 = np.random.normal(5, 1, 50) y9 = np.random.normal(6, 3, 50) # Different variance mwu2 = test_mannwhitneyu(x9, y9) bm2 = test_brunner_munzel(x9, y9) logger.info("\nDifferent distribution shapes:") logger.info(f" Mann-Whitney U: p = {mwu2['pvalue']:.4f}") logger.info(f" Brunner-Munzel: p = {bm2['pvalue']:.4f}") logger.info(" Note: Brunner-Munzel is more appropriate for different shapes") # Example 9: Export results logger.info("\n=== Example 9: Export results ===") from scitex_stats._utils._normalizers import force_dataframe test_results = [result1, result2, result3, result4, result6] df = force_dataframe(test_results) logger.info(f"\nDataFrame shape: {df.shape}") df.to_excel("./mannwhitneyu_tests.xlsx", index=False) logger.info("Results exported to Excel") df.to_csv("./mannwhitneyu_tests.csv", index=False) logger.info("Results exported to CSV") return 0 def parse_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser(description="Demonstrate Mann-Whitney U test") parser.add_argument("--verbose", action="store_true", help="Enable verbose output") return parser.parse_args() def run_main(): """Run main without the scitex umbrella session helpers.""" import matplotlib matplotlib.use("Agg") args = parse_args() return main(args) if __name__ == "__main__": run_main() # EOF