Source code for mioXpektron.normalization.main

"""High-level orchestration helpers for normalising ToF-SIMS spectra."""

from __future__ import annotations

import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

import numpy as np

logger = logging.getLogger(__name__)

try:
    import polars as pl
    _POLARS_AVAILABLE = True
except ImportError:
    pl = None  # type: ignore[assignment]
    _POLARS_AVAILABLE = False

from .normalization import normalize, normalization_method_names, tic_normalization
from .normalization_eval import NormalizationEvaluator
from ..plotting import PlotPeak

OUTPUT_DIR = Path("output_files")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


[docs] class NormalizationMethods: """Evaluate and apply normalization strategies for ToF-SIMS data. Parameters ---------- mz_values : array-like The m/z axis shared by all spectra. raw_intensities : array-like Raw intensity values aligned with ``mz_values``. """ def __init__(self, mz_values, raw_intensities): self.mz = np.asarray(mz_values, dtype=float) self.intensity = np.asarray(raw_intensities, dtype=float) # -- single spectrum helpers -------------------------------------------
[docs] def apply(self, method: str = "tic", **kwargs) -> np.ndarray: """Apply a named normalization to the stored spectrum. Parameters ---------- method : str Normalization method name (see :func:`normalization_method_names`). **kwargs Method-specific keyword arguments. Returns ------- np.ndarray Normalized intensity array. """ return normalize(self.intensity, method=method, **kwargs)
[docs] def compare_visual( self, methods: Optional[List[str]] = None, method_kwargs_map: Optional[Dict[str, Dict[str, Any]]] = None, mz_min: float = 0, mz_max: float = 500, sample_name: str = "test", group: Optional[str] = None, figsize: tuple = (12, 8), save_plot: bool = True, ): """Plot the raw spectrum alongside several normalized versions. Parameters ---------- methods : list of str, optional Normalization methods to overlay. Defaults to a curated set. method_kwargs_map : dict, optional ``{method: {kwarg: value}}`` for method-specific parameters. mz_min, mz_max : float m/z bounds for the preview window. sample_name : str Label used for file naming. group : str or None Group identifier. figsize : tuple Figure size. save_plot : bool Persist the rendered figure. Returns ------- matplotlib.axes.Axes """ import matplotlib.pyplot as plt if methods is None: methods = ["tic", "median", "rms", "poisson", "sqrt", "vsn"] method_kwargs_map = method_kwargs_map or {} mask = (self.mz >= mz_min) & (self.mz <= mz_max) mz_win = self.mz[mask] n = len(methods) + 1 fig, axes = plt.subplots(n, 1, figsize=(figsize[0], figsize[1] / 3 * n), sharex=True) axes[0].plot(mz_win, self.intensity[mask], lw=0.5, color="grey") axes[0].set_title("Raw") axes[0].set_ylabel("Intensity") for i, m in enumerate(methods): kwargs = method_kwargs_map.get(m, {}) try: normed = normalize(self.intensity, method=m, **kwargs) axes[i + 1].plot(mz_win, normed[mask], lw=0.5) axes[i + 1].set_title(m) axes[i + 1].set_ylabel("Norm. Int.") except Exception as e: axes[i + 1].set_title(f"{m} (failed: {e})") axes[-1].set_xlabel("m/z") fig.suptitle(f"Normalization comparison — {sample_name}", y=1.01) fig.tight_layout() if save_plot: for ext in (".png", ".pdf"): fig.savefig( OUTPUT_DIR / f"norm_compare_{sample_name}{ext}", bbox_inches="tight", dpi=300, ) return axes
[docs] def normalize_and_check( self, method: str = "tic", method_kwargs: Optional[Dict[str, Any]] = None, *, sample_name: str = "test", group: Optional[str] = None, mz_min: float = 0, mz_max: float = 500, show_peaks: bool = False, peak_height: float = 1000, peak_prominence: float = 50, min_peak_width: int = 1, max_peak_width: Optional[int] = None, figsize: tuple = (10, 6), save_plot: bool = True, ): """Apply one normalization and visualise the result with peak overlay. Parameters ---------- method : str Normalization method. method_kwargs : dict, optional Extra kwargs forwarded to :func:`normalize`. sample_name, group : str Plot labels. mz_min, mz_max : float m/z window for the plot. show_peaks : bool Annotate detected peaks. peak_height, peak_prominence, min_peak_width, max_peak_width Peak detection tuning passed to :class:`PlotPeak`. figsize : tuple save_plot : bool Returns ------- matplotlib.axes.Axes """ method_kwargs = method_kwargs or {} normalized = normalize(self.intensity, method=method, **method_kwargs) plotter = PlotPeak( mz_values=self.mz, raw_intensities=self.intensity, sample_name=sample_name, group=group, corrected_intensities=normalized, ) return plotter.plot( mz_min=mz_min, mz_max=mz_max, show_peaks=show_peaks, peak_height=peak_height, peak_prominence=peak_prominence, min_peak_width=min_peak_width, max_peak_width=max_peak_width, figsize=figsize, save_plot=save_plot, )
# -- batch evaluation --------------------------------------------------
[docs] @staticmethod def evaluate( files: List[Union[str, Path]], methods: Optional[List[str]] = None, method_kwargs_map: Optional[Dict[str, Dict[str, Any]]] = None, mz_min: Optional[float] = None, mz_max: Optional[float] = None, n_jobs: int = -1, compute_supervised: bool = True, save_results: bool = True, ): """Evaluate normalization methods across multiple spectra files. Thin wrapper around :class:`NormalizationEvaluator` that runs evaluation, prints a summary, and optionally saves results. Parameters ---------- files : list of str or Path Spectrum file paths or glob patterns. methods : list of str, optional Method names to evaluate. method_kwargs_map : dict, optional Per-method keyword arguments. mz_min, mz_max : float, optional m/z range for data import. n_jobs : int Parallel workers (``-1`` = all CPUs). compute_supervised : bool Run supervised classification (requires scikit-learn). save_results : bool Save CSV + JSON + plots to ``OUTPUT_DIR``. Returns ------- NormalizationEvaluator The evaluator instance (call ``.plot()`` for figures). """ evaluator = NormalizationEvaluator( files=files, methods=methods, method_kwargs_map=method_kwargs_map, mz_min=mz_min, mz_max=mz_max, n_jobs=n_jobs, compute_supervised=compute_supervised, ) results = evaluator.evaluate() evaluator.print_summary() if save_results: out_path = OUTPUT_DIR / "normalization_eval.xlsx" results.to_excel(out_path, index=False) logger.info("Results saved to: %s", out_path) return evaluator
[docs] @staticmethod def available_methods() -> List[str]: """Return sorted list of available normalization method names.""" return normalization_method_names()