Source code for mioXpektron.recalibrate.calibrate_spectrum

from pathlib import Path
from typing import Optional, Sequence, Tuple, Literal

import numpy as np
import pandas as pd
from scipy.signal import find_peaks
from scipy.optimize import least_squares


[docs] def local_centroid(mz: np.ndarray, intensity: np.ndarray, target: float, window_ppm: float = 50.0) -> Optional[float]: """ Returns the centroid (intensity-weighted mean) of the peak nearest *target* within ±window_ppm. Returns None if no peak is found. """ delta = target * window_ppm * 1e-6 mask = (mz >= target - delta) & (mz <= target + delta) if not mask.any(): return None idx = mask.nonzero()[0] weights = intensity[idx] return np.average(mz[idx], weights=weights)
def _tof_model(params: np.ndarray, m_true: np.ndarray) -> np.ndarray: """ Empirical TOF correction model: t = k*sqrt(m) + c*m + t0 We invert it analytically (solve for m) when calibrating the spectrum. """ k, c, t0 = params t = k * np.sqrt(m_true) + c * m_true + t0 return t def _tof_residuals(params: np.ndarray, m_meas: np.ndarray, m_true: np.ndarray, weights: np.ndarray) -> np.ndarray: """ Residuals for nonlinear least squares in time-of-flight space. """ k, c, t0 = params # Convert measured m/z to times assuming t ∝ √m (raw axis ≈ m/z already) t_meas = np.sqrt(m_meas) t_calc = _tof_model(params, m_true) return weights * (t_meas - t_calc) def _poly_residuals(coeffs: np.ndarray, m_meas: np.ndarray, m_true: np.ndarray, weights: np.ndarray) -> np.ndarray: """ Residuals for direct polynomial fit: m_true = p0 + p1*m_meas + p2*m_meas² """ m_calc = np.polyval(coeffs, m_meas) return weights * (m_calc - m_true)
[docs] def calibrate_spectrum(mz: np.ndarray, intensity: np.ndarray, standards: Sequence[float], mode: Literal["tof", "poly"] = "tof", window_ppm: float = 50.0): """ Calibrate a ToF-SIMS spectrum. Parameters ---------- mz, intensity : 1-D numpy arrays of equal length standards : iterable of reference masses (monoisotopic, in u) mode : "tof" (default) – empirical TOF correction model "poly" – 2nd-order polynomial window_ppm : search window for peak matching (±ppm) Returns ------- mz_cal : calibrated mass axis fit : scipy OptimizeResult report : pandas DataFrame with residual statistics """ # 1. Match experimental peaks to standards exp = [] refs = [] wts = [] for mass in standards: centroid = local_centroid(mz, intensity, mass, window_ppm) if centroid is not None: exp.append(centroid) refs.append(mass) # weight by peak height to favour well-defined peaks wts.append(float(intensity[np.argmin(np.abs(mz - centroid))])) if len(exp) < 3: raise RuntimeError("Fewer than three standards were found – " "cannot perform reliable calibration.") exp = np.array(exp) refs = np.array(refs) wts = np.sqrt(np.array(wts)) # use sqrt for dynamic-range mitigation # 2. Fit chosen calibration model if mode == "tof": # Initial guess: k ≈ 1, c ≈ 0, t0 ≈ 0 x0 = np.array([1.0, 0.0, 0.0]) fit = least_squares(_tof_residuals, x0, args=(exp, refs, wts), method="trf") k, c, t0 = fit.x # Build calibrated axis by solving t = k√m + cm + t0 for m t_meas = np.sqrt(mz) # Quadratic: c m + k √m + (t0 - t) = 0 # Solve for √m using quadratic formula in √m (see derivation) a = c b = k d = t0 - t_meas discriminant = b**2 - 4 * a * d if np.isclose(a, 0.0): # Linear case: k * sqrt(m) = t - t0 sqrt_m = (t_meas - t0) / k sqrt_m = np.where(sqrt_m < 0, np.nan, sqrt_m) else: # Guard against negative discriminant discriminant = np.maximum(discriminant, 0.0) sqrt_m = (-b + np.sqrt(discriminant)) / (2 * a) sqrt_m = np.where(sqrt_m < 0, np.nan, sqrt_m) mz_cal = sqrt_m**2 elif mode == "poly": # 2nd-order polynomial fit (np.polyfit returns highest -> lowest order) coeffs = np.polyfit(exp, refs, deg=2, w=wts) fit = least_squares(_poly_residuals, coeffs, args=(exp, refs, wts), method="trf") coeffs_fit = fit.x mz_cal = np.polyval(coeffs_fit, mz) else: raise ValueError("mode must be 'tof' or 'poly'") # 3. Quality metrics residuals = mz_cal[np.searchsorted(mz, exp)] - refs ppm_err = residuals / refs * 1e6 report = pd.DataFrame({ "Reference (u)": refs, "Measured (u)": exp, "Calibrated (u)": mz_cal[np.searchsorted(mz, exp)], "Error (ppm)": ppm_err, }) return mz_cal, fit, report