"""High-level orchestration helpers for normalising ToF-SIMS spectra."""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
import numpy as np
logger = logging.getLogger(__name__)
try:
import polars as pl
_POLARS_AVAILABLE = True
except ImportError:
pl = None # type: ignore[assignment]
_POLARS_AVAILABLE = False
from .normalization import normalize, normalization_method_names, tic_normalization
from .normalization_eval import NormalizationEvaluator
from ..plotting import PlotPeak
OUTPUT_DIR = Path("output_files")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
[docs]
class NormalizationMethods:
"""Evaluate and apply normalization strategies for ToF-SIMS data.
Parameters
----------
mz_values : array-like
The m/z axis shared by all spectra.
raw_intensities : array-like
Raw intensity values aligned with ``mz_values``.
"""
def __init__(self, mz_values, raw_intensities):
self.mz = np.asarray(mz_values, dtype=float)
self.intensity = np.asarray(raw_intensities, dtype=float)
# -- single spectrum helpers -------------------------------------------
[docs]
def apply(self, method: str = "tic", **kwargs) -> np.ndarray:
"""Apply a named normalization to the stored spectrum.
Parameters
----------
method : str
Normalization method name (see :func:`normalization_method_names`).
**kwargs
Method-specific keyword arguments.
Returns
-------
np.ndarray
Normalized intensity array.
"""
return normalize(self.intensity, method=method, **kwargs)
[docs]
def compare_visual(
self,
methods: Optional[List[str]] = None,
method_kwargs_map: Optional[Dict[str, Dict[str, Any]]] = None,
mz_min: float = 0,
mz_max: float = 500,
sample_name: str = "test",
group: Optional[str] = None,
figsize: tuple = (12, 8),
save_plot: bool = True,
):
"""Plot the raw spectrum alongside several normalized versions.
Parameters
----------
methods : list of str, optional
Normalization methods to overlay. Defaults to a curated set.
method_kwargs_map : dict, optional
``{method: {kwarg: value}}`` for method-specific parameters.
mz_min, mz_max : float
m/z bounds for the preview window.
sample_name : str
Label used for file naming.
group : str or None
Group identifier.
figsize : tuple
Figure size.
save_plot : bool
Persist the rendered figure.
Returns
-------
matplotlib.axes.Axes
"""
import matplotlib.pyplot as plt
if methods is None:
methods = ["tic", "median", "rms", "poisson", "sqrt", "vsn"]
method_kwargs_map = method_kwargs_map or {}
mask = (self.mz >= mz_min) & (self.mz <= mz_max)
mz_win = self.mz[mask]
n = len(methods) + 1
fig, axes = plt.subplots(n, 1, figsize=(figsize[0], figsize[1] / 3 * n),
sharex=True)
axes[0].plot(mz_win, self.intensity[mask], lw=0.5, color="grey")
axes[0].set_title("Raw")
axes[0].set_ylabel("Intensity")
for i, m in enumerate(methods):
kwargs = method_kwargs_map.get(m, {})
try:
normed = normalize(self.intensity, method=m, **kwargs)
axes[i + 1].plot(mz_win, normed[mask], lw=0.5)
axes[i + 1].set_title(m)
axes[i + 1].set_ylabel("Norm. Int.")
except Exception as e:
axes[i + 1].set_title(f"{m} (failed: {e})")
axes[-1].set_xlabel("m/z")
fig.suptitle(f"Normalization comparison — {sample_name}", y=1.01)
fig.tight_layout()
if save_plot:
for ext in (".png", ".pdf"):
fig.savefig(
OUTPUT_DIR / f"norm_compare_{sample_name}{ext}",
bbox_inches="tight", dpi=300,
)
return axes
[docs]
def normalize_and_check(
self,
method: str = "tic",
method_kwargs: Optional[Dict[str, Any]] = None,
*,
sample_name: str = "test",
group: Optional[str] = None,
mz_min: float = 0,
mz_max: float = 500,
show_peaks: bool = False,
peak_height: float = 1000,
peak_prominence: float = 50,
min_peak_width: int = 1,
max_peak_width: Optional[int] = None,
figsize: tuple = (10, 6),
save_plot: bool = True,
):
"""Apply one normalization and visualise the result with peak overlay.
Parameters
----------
method : str
Normalization method.
method_kwargs : dict, optional
Extra kwargs forwarded to :func:`normalize`.
sample_name, group : str
Plot labels.
mz_min, mz_max : float
m/z window for the plot.
show_peaks : bool
Annotate detected peaks.
peak_height, peak_prominence, min_peak_width, max_peak_width
Peak detection tuning passed to :class:`PlotPeak`.
figsize : tuple
save_plot : bool
Returns
-------
matplotlib.axes.Axes
"""
method_kwargs = method_kwargs or {}
normalized = normalize(self.intensity, method=method, **method_kwargs)
plotter = PlotPeak(
mz_values=self.mz,
raw_intensities=self.intensity,
sample_name=sample_name,
group=group,
corrected_intensities=normalized,
)
return plotter.plot(
mz_min=mz_min,
mz_max=mz_max,
show_peaks=show_peaks,
peak_height=peak_height,
peak_prominence=peak_prominence,
min_peak_width=min_peak_width,
max_peak_width=max_peak_width,
figsize=figsize,
save_plot=save_plot,
)
# -- batch evaluation --------------------------------------------------
[docs]
@staticmethod
def evaluate(
files: List[Union[str, Path]],
methods: Optional[List[str]] = None,
method_kwargs_map: Optional[Dict[str, Dict[str, Any]]] = None,
mz_min: Optional[float] = None,
mz_max: Optional[float] = None,
n_jobs: int = -1,
compute_supervised: bool = True,
save_results: bool = True,
):
"""Evaluate normalization methods across multiple spectra files.
Thin wrapper around :class:`NormalizationEvaluator` that runs
evaluation, prints a summary, and optionally saves results.
Parameters
----------
files : list of str or Path
Spectrum file paths or glob patterns.
methods : list of str, optional
Method names to evaluate.
method_kwargs_map : dict, optional
Per-method keyword arguments.
mz_min, mz_max : float, optional
m/z range for data import.
n_jobs : int
Parallel workers (``-1`` = all CPUs).
compute_supervised : bool
Run supervised classification (requires scikit-learn).
save_results : bool
Save CSV + JSON + plots to ``OUTPUT_DIR``.
Returns
-------
NormalizationEvaluator
The evaluator instance (call ``.plot()`` for figures).
"""
evaluator = NormalizationEvaluator(
files=files,
methods=methods,
method_kwargs_map=method_kwargs_map,
mz_min=mz_min,
mz_max=mz_max,
n_jobs=n_jobs,
compute_supervised=compute_supervised,
)
results = evaluator.evaluate()
evaluator.print_summary()
if save_results:
out_path = OUTPUT_DIR / "normalization_eval.xlsx"
results.to_excel(out_path, index=False)
logger.info("Results saved to: %s", out_path)
return evaluator
[docs]
@staticmethod
def available_methods() -> List[str]:
"""Return sorted list of available normalization method names."""
return normalization_method_names()