Source code for scitex_stats.auto._formatting

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-12-10 (ywatanabe)"
# File: scitex_stats/auto/_formatting.py

"""
Statistical Formatting - Publication-ready output generation.

This module provides functions for:
- Computing summary statistics per group
- Formatting complete test result lines
- Converting results for Inspector panel display
- Handling multiple comparison corrections

All formatting respects journal style presets (APA, Nature, Cell, Elsevier).
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Dict, List, Optional, TypedDict, Union

import numpy as np

from ._styles import StatStyle, get_stat_style
from ._summary import (
    SummaryStatsDict,
    compute_summary_from_groups,
    compute_summary_stats,
)
from ._symbols import get_stat_symbol

# =============================================================================
# Type Definitions
# =============================================================================


class SummaryStatsDict(TypedDict, total=False):
    """
    Summary statistics for a single group.

    Attributes
    ----------
    group : str
        Group name/label.
    n : int
        Sample size.
    mean : float or None
        Mean value.
    sd : float or None
        Standard deviation.
    sem : float or None
        Standard error of mean.
    median : float or None
        Median value.
    iqr : float or None
        Interquartile range.
    q1 : float or None
        First quartile (25th percentile).
    q3 : float or None
        Third quartile (75th percentile).
    minimum : float or None
        Minimum value.
    maximum : float or None
        Maximum value.
    """

    group: str
    n: int
    mean: Optional[float]
    sd: Optional[float]
    sem: Optional[float]
    median: Optional[float]
    iqr: Optional[float]
    q1: Optional[float]
    q3: Optional[float]
    minimum: Optional[float]
    maximum: Optional[float]


class TestResultDict(TypedDict, total=False):
    """
    Result structure for a single statistical test.

    Attributes
    ----------
    test_name : str
        Internal test name ("ttest_ind", "brunner_munzel", etc.).
    p_raw : float or None
        Raw p-value.
    p_adj : float or None
        Adjusted p-value after multiple correction.
    stat : float or None
        Test statistic value.
    df : float or None
        Degrees of freedom.
    method : str or None
        Human-readable method label.
    correction_method : str or None
        Multiple correction method used ("bonferroni", "fdr_bh", etc.).
    details : dict
        Additional test-specific information.
    """

    test_name: str
    p_raw: Optional[float]
    p_adj: Optional[float]
    stat: Optional[float]
    df: Optional[float]
    method: Optional[str]
    correction_method: Optional[str]
    details: Dict[str, Any]


class EffectResultDict(TypedDict, total=False):
    """
    Result structure for a single effect size measure.

    Attributes
    ----------
    name : str
        Internal name ("cohens_d_ind", "eta_squared", etc.).
    label : str
        Human-readable label.
    value : float
        Effect size value.
    ci_lower : float or None
        Lower bound of confidence interval.
    ci_upper : float or None
        Upper bound of confidence interval.
    note : str or None
        Interpretation note (e.g., "small", "medium", "large").
    """

    name: str
    label: str
    value: float
    ci_lower: Optional[float]
    ci_upper: Optional[float]
    note: Optional[str]


# =============================================================================
# =============================================================================
# Test Line Formatting
# =============================================================================


def format_test_line(
    test: TestResultDict,
    effects: Optional[List[EffectResultDict]] = None,
    summary: Optional[List[SummaryStatsDict]] = None,
    style: Optional[Union[str, StatStyle]] = None,
    include_n: bool = True,
    max_effects: int = 2,
) -> str:
    """
    Format a complete statistical result line.

    Produces publication-ready formatted text with proper italics,
    symbols, and formatting according to the specified journal style.

    Parameters
    ----------
    test : TestResultDict
        Test result dictionary.
    effects : list of EffectResultDict, optional
        Effect size results to include.
    summary : list of SummaryStatsDict, optional
        Summary statistics for sample size display.
    style : str or StatStyle, optional
        Style to use. Can be style ID or StatStyle instance.
        Defaults to APA LaTeX.
    include_n : bool
        Whether to include sample sizes in output.
    max_effects : int
        Maximum number of effect sizes to include.

    Returns
    -------
    str
        Formatted result line.

    Examples
    --------
    >>> test = {"test_name": "ttest_ind", "stat": 2.31, "df": 28.0, "p_raw": 0.028}
    >>> effects = [{"name": "cohens_d_ind", "value": 0.72, "label": "Cohen's d"}]
    >>> summary = [{"group": "A", "n": 15}, {"group": "B", "n": 15}]
    >>> line = format_test_line(test, effects, summary, style="apa_latex")
    >>> "\\mathit{t}" in line
    True
    """
    # Get style
    if style is None:
        style = get_stat_style("apa_latex")
    elif isinstance(style, str):
        style = get_stat_style(style)

    parts: List[str] = []

    # Format test statistic
    test_name = test.get("test_name", "")
    stat = test.get("stat")
    df = test.get("df")

    if stat is not None:
        symbol = get_stat_symbol(test_name)
        stat_part = style.format_stat(symbol, stat, df)
        parts.append(stat_part)

    # Format p-value
    p = test.get("p_adj") or test.get("p_raw")
    if p is not None:
        p_part = style.format_p(p)
        parts.append(p_part)

    # Format effect sizes
    if effects:
        for eff in effects[:max_effects]:
            eff_name = eff.get("name", "")
            eff_value = eff.get("value")
            if eff_value is not None:
                eff_part = style.format_effect(eff_name, eff_value)
                parts.append(eff_part)

    # Format sample sizes
    if include_n and summary:
        for s in summary:
            group_name = str(s.get("group", ""))
            n_value = int(s.get("n", 0))
            n_part = style.format_n(group_name, n_value)
            parts.append(n_part)

    return ", ".join(parts)


def format_test_line_compact(
    test: TestResultDict,
    style: Optional[Union[str, StatStyle]] = None,
) -> str:
    """
    Format a compact test result (statistic + p-value only).

    Parameters
    ----------
    test : TestResultDict
        Test result dictionary.
    style : str or StatStyle, optional
        Style to use.

    Returns
    -------
    str
        Compact formatted result.
    """
    return format_test_line(
        test,
        effects=None,
        summary=None,
        style=style,
        include_n=False,
    )


# =============================================================================
# Inspector Panel Formatting
# =============================================================================


def format_for_inspector(
    test_results: List[TestResultDict],
    effect_results: Optional[List[EffectResultDict]] = None,
) -> Dict[str, List[Dict]]:
    """
    Format results for Inspector panel display.

    Produces a structure suitable for displaying in a UI panel
    with tables for tests and effect sizes.

    Parameters
    ----------
    test_results : list of TestResultDict
        Test results to display.
    effect_results : list of EffectResultDict, optional
        Effect size results to display.

    Returns
    -------
    dict
        Dictionary with 'tests' and 'effects' lists.

    Examples
    --------
    >>> tests = [{"test_name": "ttest_ind", "p_raw": 0.03, "stat": 2.2}]
    >>> effects = [{"name": "cohens_d_ind", "value": 0.8, "label": "Cohen's d"}]
    >>> result = format_for_inspector(tests, effects)
    >>> len(result["tests"])
    1
    """
    from ._selector import _pretty_label

    return {
        "tests": [
            {
                "name": r.get("test_name"),
                "label": _pretty_label(r.get("test_name", "")),
                "p_raw": r.get("p_raw"),
                "p_adj": r.get("p_adj"),
                "stat": r.get("stat"),
                "df": r.get("df"),
                "method": r.get("method"),
                "correction": r.get("correction_method"),
                "details": r.get("details", {}),
            }
            for r in test_results
        ],
        "effects": [
            {
                "name": e.get("name"),
                "label": e.get("label", e.get("name", "")),
                "value": e.get("value"),
                "ci_lower": e.get("ci_lower"),
                "ci_upper": e.get("ci_upper"),
                "note": e.get("note"),
            }
            for e in (effect_results or [])
        ],
    }


# =============================================================================
# P-value to Stars
# =============================================================================



[docs]
def p_to_stars(
    p_value: Optional[float],
    style: Optional[Union[str, StatStyle]] = None,
) -> str:
    """
    Convert p-value to significance stars.

    Uses the alpha thresholds from the specified style.

    Parameters
    ----------
    p_value : float or None
        P-value to convert.
    style : str or StatStyle, optional
        Style to use for thresholds.

    Returns
    -------
    str
        Stars string ("***", "**", "*", or "ns").

    Examples
    --------
    >>> p_to_stars(0.001)
    '***'
    >>> p_to_stars(0.03)
    '*'
    >>> p_to_stars(0.10)
    'ns'
    """
    if style is None:
        style = get_stat_style("apa_latex")
    elif isinstance(style, str):
        style = get_stat_style(style)

    return style.p_to_stars(p_value)



# =============================================================================
# Multiple Comparison Correction
# =============================================================================


CorrectionMethod = Union[str, None]


def apply_multiple_correction(
    results: List[TestResultDict],
    method: CorrectionMethod = "fdr_bh",
) -> List[TestResultDict]:
    """
    Apply multiple-comparison correction to test results.

    Modifies results in-place by setting p_adj and correction_method.

    Parameters
    ----------
    results : list of TestResultDict
        Test results with p_raw values.
    method : str or None
        Correction method:
        - "none": No correction (p_adj = p_raw)
        - "bonferroni": Bonferroni correction
        - "holm": Holm-Bonferroni step-down
        - "fdr_bh": Benjamini-Hochberg FDR

    Returns
    -------
    list of TestResultDict
        Results with p_adj filled in.

    Examples
    --------
    >>> results = [
    ...     {"test_name": "t1", "p_raw": 0.01},
    ...     {"test_name": "t2", "p_raw": 0.03},
    ...     {"test_name": "t3", "p_raw": 0.04},
    ... ]
    >>> corrected = apply_multiple_correction(results, "bonferroni")
    >>> corrected[0]["p_adj"]
    0.03
    """
    if method is None or method == "none":
        for r in results:
            r["p_adj"] = r.get("p_raw")
            r["correction_method"] = "none"
        return results

    # Get valid p-values
    valid_indices = [i for i, r in enumerate(results) if r.get("p_raw") is not None]

    if not valid_indices:
        return results

    p_values = [results[i]["p_raw"] for i in valid_indices]
    m = len(p_values)

    adjusted: List[float] = []

    if method == "bonferroni":
        adjusted = [min(p * m, 1.0) for p in p_values]

    elif method == "holm":
        # Holm-Bonferroni step-down
        sorted_idx = sorted(range(m), key=lambda i: p_values[i])
        adj = [0.0] * m
        cummax = 0.0
        for rank, idx in enumerate(sorted_idx, start=1):
            adj_val = min((m - rank + 1) * p_values[idx], 1.0)
            adj_val = max(adj_val, cummax)  # Enforce monotonicity
            adj[idx] = adj_val
            cummax = adj_val
        adjusted = adj

    elif method == "fdr_bh":
        # Benjamini-Hochberg
        sorted_idx = sorted(range(m), key=lambda i: p_values[i])
        adj = [0.0] * m
        prev = 1.0
        for rank in range(m, 0, -1):
            idx = sorted_idx[rank - 1]
            p = p_values[idx]
            bh = p * m / rank
            val = min(bh, prev, 1.0)
            adj[idx] = val
            prev = val
        adjusted = adj

    else:
        # Unknown method - no correction
        adjusted = p_values

    # Write back
    for local_i, global_i in enumerate(valid_indices):
        results[global_i]["p_adj"] = float(adjusted[local_i])
        results[global_i]["correction_method"] = method

    return results


# =============================================================================
# Public API
# =============================================================================

__all__ = [
    # Type definitions
    "SummaryStatsDict",
    "TestResultDict",
    "EffectResultDict",
    "CorrectionMethod",
    # Summary statistics
    "compute_summary_stats",
    "compute_summary_from_groups",
    # Symbol mapping
    "get_stat_symbol",
    # Formatting
    "format_test_line",
    "format_test_line_compact",
    "format_for_inspector",
    "p_to_stars",
    # Correction
    "apply_multiple_correction",
]

# EOF