Source code for mioXpektron.detection.peak_analysis

# Requirements:
#   pip install polars scipy numpy
# File:
#   'blood serum_CT-1a_1.txt' (tab-delimited), with header: Channel  m/z  Intensity
#   comment lines start with '#'

import logging

import polars as pl
import numpy as np
from scipy.signal import find_peaks

logger = logging.getLogger(__name__)

def _interp(x1, y1, x2, y2, y_target):
    """Linear interpolation to get x at y_target between (x1,y1) and (x2,y2)."""
    if y2 == y1:
        return x1  # degenerate, return left point
    return x1 + (y_target - y1) * (x2 - x1) / (y2 - y1)

def _fwhm_from_peak(mz: np.ndarray, y: np.ndarray, p: int) -> float | None:
    """
    Compute FWHM for peak at index p using linear interpolation of half-maximum crossings.
    Returns None if the half-maximum is not bracketed on either side.
    """
    peak_y = y[p]
    if peak_y <= 0:
        return None
    half = peak_y / 2.0

    # Left crossing
    i = p
    while i > 0 and y[i] >= half:
        i -= 1
    if i == p:  # immediate drop (flat/edge)
        return None
    if i == 0 and y[i] > half:
        return None
    xL = _interp(mz[i], y[i], mz[i+1], y[i+1], half)

    # Right crossing
    j = p
    n = len(y)
    while j < n - 1 and y[j] >= half:
        j += 1
    if j == p:
        return None
    if j == n - 1 and y[j] > half:
        return None
    xR = _interp(mz[j-1], y[j-1], mz[j], y[j], half)

    width = xR - xL
    return width if width > 0 else None

[docs] def load_and_measure_peaks_polars( path: str, mz_min: float, mz_max: float, min_height: float | None = None, prominence: float | None = None, distance_pts: int | None = None, save_csv: str | None = None, ) -> pl.DataFrame: """ Read ToF-SIMS TXT with columns [Channel, m/z, Intensity], filter m/z range, detect peaks, and compute [m/z_center, FWHM, height]. Parameters ---------- path : str Path to TXT (tab-delimited). Lines starting with '#' are ignored. mz_min, mz_max : float m/z selection window (inclusive). min_height : float or None Minimum peak height for detection (scipy find_peaks 'height'). prominence : float or None Peak prominence threshold (recommended for robustness). distance_pts : int or None Minimum horizontal distance (in data points) between neighboring peaks. save_csv : str or None If provided, save the results to this CSV path. Returns ------- Polars DataFrame with columns: 'm/z', 'FWHM', 'height' """ # Read with Polars (handle comments and tab separator) df = pl.read_csv( path, separator="\t", has_header=True, comment_char="#", infer_schema_length=10000, ) # normalize column names df = df.rename({c: c.strip().lower() for c in df.columns}) # sanity check expected columns expected = {"channel", "m/z", "intensity"} missing = expected.difference(set(df.columns)) if missing: raise ValueError(f"Missing expected columns: {missing}. Found: {df.columns}") # filter range and sort by m/z (important if input is not strictly sorted) sub = ( df.filter((pl.col("m/z") >= mz_min) & (pl.col("m/z") <= mz_max)) .sort("m/z") ) if sub.height == 0: return pl.DataFrame({"m/z": [], "FWHM": [], "height": []}) mz = sub["m/z"].to_numpy() y = sub["intensity"].to_numpy() # Peak detection (SciPy) kwargs = {} if min_height is not None: kwargs["height"] = min_height if prominence is not None: kwargs["prominence"] = prominence if distance_pts is not None: kwargs["distance"] = distance_pts peak_idx, props = find_peaks(y, **kwargs) centers = [] heights = [] widths = [] for p in peak_idx: centers.append(float(mz[p])) # prefer 'peak height' from y-array; if 'peak_heights' supplied, it equals y[p] heights.append(float(y[p])) w = _fwhm_from_peak(mz, y, p) widths.append(float(w) if w is not None else np.nan) out = pl.DataFrame({"m/z": centers, "FWHM": widths, "height": heights}) if save_csv: out.write_csv(save_csv) return out
# ------------------ Example usage ------------------ if __name__ == "__main__": results = load_and_measure_peaks_polars( path="blood serum_CT-1a_1.txt", mz_min=0.0275, # example window mz_max=0.0286, min_height=1, # set based on your noise level prominence=0.5, # often more robust than height alone distance_pts=5, # avoid double-counting very close peaks save_csv=None ) logger.info(results)