Source code for sphero_vem.measure.pipeline

"""Pipeline orchestration, I/O, and configuration for label morphology analysis."""

from pathlib import Path
from dataclasses import dataclass, field
from tqdm import tqdm
import numpy as np
import pandas as pd
import zarr
from sphero_vem.utils import (
    check_isotropic,
    slice_from_bbox,
    flatten_for_save,
    reconstruct_tuples,
)
from sphero_vem.utils.config import BaseConfig
from sphero_vem.measure.voxel import props_voxel, assign_cell
from sphero_vem.measure.sdf import props_sdf
from sphero_vem.measure.mesh import props_mesh
from sphero_vem.measure.fractal import props_fractal


[docs] @dataclass class LabelAnalysisConfig(BaseConfig): """ Configuration for 3D label morphology analysis pipeline. Defines paths, SDF parameters, and mesh extraction settings for computing shape descriptors from segmented volumetric data stored in zarr format. Parameters ---------- root_path : Path Root path to the zarr store containing segmentation data. seg_target : str Name of the segmentation target (e.g., 'cells', 'nuclei'). Used to construct paths: `labels/{seg_target}/tables/`. scale_dir : str Scale directory name within the masks folder (e.g., '50-50-50', 's1'). bbox_margin : int, optional Margin in voxels to expand bounding boxes for label cropping. Default is 15. sigma : float, optional Gaussian smoothing sigma in voxels for SDF computation. Controls surface smoothness for curvature estimation. Default is 3. eps_voxels : float, optional Epsilon in voxels for Heaviside volume/area integration. Default is 1.5. mesh_downsample_factor : int, optional Factor by which to downsample SDF before marching cubes. Reduces vertex count and computation time. Default is 2. h : float, optional Step size in voxels for finite difference curvature estimation. Default is 1.5. voxel_only : bool, optional If True, compute only voxel-based properties (skip SDF and mesh). Default is False. sigma_frac : float, optional Gaussian smoothing sigma in voxels for SDF computation used during fractal dimension calculation. This should be in the 0.5-1.0 range. Default is 0.7. n_steps_frac : int, optional Number of epsilon values sampled in log-space during fractal dimension calculation. Default is 30. sep : str Separator used for unpacking tuple columns when saving the region properties dataframe to parquet using `save_regionprops`. This should be used by `read_regionprops` to reconstruct the tuple columns, e.g. bbox, centroid... Default is `"__"` """ root_path: Path seg_target: str scale_dir: str bbox_margin: int = 15 sigma: float = 3.0 eps_voxels: float = 1.5 mesh_downsample_factor: int = 2 h: float = 1.5 voxel_only: bool = False sigma_frac: float = 0.7 n_steps_frac: int = 30 sep: str = "__" array_path: Path = field(init=False) save_root: Path = field(init=False) spacing: tuple[float, float, float] = field(init=False) cell_array_path: Path = field(init=False) def __post_init__(self): self.array_path = ( self.root_path / f"labels/{self.seg_target}/masks/{self.scale_dir}" ) self.save_root = self.root_path / f"labels/{self.seg_target}/tables" self.cell_array_path = self.root_path / f"labels/cells/masks/{self.scale_dir}" # Spacing is assumed to be in nm and converted to µm. src_zarr = zarr.open_array(self.array_path) self.spacing = tuple(i / 1000 for i in src_zarr.attrs.get("spacing"))
[docs] def label_properties( labels: np.ndarray, spacing: tuple[float], bbox_margin: int = 15, sigma: float = 3, eps_voxels: int = 1.5, mesh_downsample_factor: int = 2, h: float = 1.5, mesh_save_root: Path | None = None, voxel_only: bool = False, sigma_frac: float = 0.7, n_steps_frac: int = 30, ) -> pd.DataFrame: """ Compute morphological properties for all labels in a 3D volume. Extracts voxel-based, SDF-based, and mesh-based shape descriptors for each labeled region. Requires isotropic voxel spacing. Parameters ---------- labels : np.ndarray 3D integer array of labeled regions. Background should be 0. spacing : tuple[float] Isotropic voxel spacing in physical units (z, y, x). bbox_margin : int, optional Margin in voxels to expand bounding boxes when cropping labels. Default is 15. sigma : float, optional Gaussian smoothing sigma in voxels for SDF computation. Default is 3. eps_voxels : float, optional Epsilon in voxels for Heaviside volume/area integration. Default is 1.5. mesh_downsample_factor : int, optional Downsampling factor for SDF before mesh extraction. Default is 2. h : float, optional Step size in voxels for finite difference curvature estimation. Default is 1.5. mesh_save_root : Path | None, optional If provided, per-label meshes and curvature data are saved as .npz files under `{mesh_save_root}/meshes/`. Default is None. voxel_only : bool, optional If True, skip SDF and mesh computations; return only voxel-based properties. Default is False. Returns ------- pd.DataFrame DataFrame with one row per label. Columns include voxel-based properties (label, bbox, centroid, inertia eigenvalues) and, when ``voxel_only`` is False, SDF-based properties (volume, surface area, sphericity) and mesh-based curvature statistics. Raises ------ ValueError If spacing is not isotropic. See Also -------- ~sphero_vem.measure.voxel.props_voxel Voxel-based property extraction. ~sphero_vem.measure.sdf.props_sdf SDF-based volume and surface area computation. ~sphero_vem.measure.mesh.props_mesh Mesh-based curvature computation. """ check_isotropic(spacing, raise_error=True) results = props_voxel( labels, spacing=spacing, bbox_margin=bbox_margin, calc_volume=voxel_only ) if not voxel_only: for entry in tqdm(results, "Analyzing labels"): sel_slice = slice_from_bbox(entry["bbox_exp"]) labels_crop = labels[sel_slice] props, sdf = props_sdf( label_idx=entry["label"], labels=labels_crop, spacing=spacing, sigma=sigma, eps_voxels=eps_voxels, ) entry |= props mesh_save_path = ( mesh_save_root / f"meshes/mesh-{entry['label']}.npz" if mesh_save_root is not None else None ) if mesh_save_path: mesh_save_path.parent.mkdir(exist_ok=True, parents=True) props = props_mesh( sdf=sdf, spacing=spacing, mesh_downsample_factor=mesh_downsample_factor, h=h, mesh_save_path=mesh_save_path, ) entry |= props props = props_fractal( label_idx=entry["label"], labels=labels_crop, spacing=spacing, sigma_frac=sigma_frac, n_steps=n_steps_frac, ) entry |= props return pd.DataFrame(results)
[docs] def analyze_labels(config: LabelAnalysisConfig) -> None: """ Run label morphology analysis pipeline from configuration. Loads labels from zarr, computes shape descriptors via `label_properties`, and saves results to parquet along with the configuration. Parameters ---------- config : LabelAnalysisConfig Configuration object specifying paths and analysis parameters. Raises ------ ValueError If spacing is not isotropic. Notes ----- Outputs are saved to `{config.save_root}/`: - `regionprops.parquet`: DataFrame with all computed properties - `analysis-config.json`: Serialized configuration for reproducibility - `meshes/mesh-{label}.npz`: Per-label mesh data (if not voxel_only) """ label_array = zarr.open_array(config.array_path) props = label_properties( labels=label_array[:], spacing=config.spacing, bbox_margin=config.bbox_margin, sigma=config.sigma, eps_voxels=config.eps_voxels, mesh_downsample_factor=config.mesh_downsample_factor, h=config.h, mesh_save_root=config.save_root, voxel_only=config.voxel_only, sigma_frac=config.sigma_frac, n_steps_frac=config.n_steps_frac, ) if config.seg_target != "cells": cell_array = zarr.open_array(config.cell_array_path) props = assign_cell(props=props, cells=cell_array[:]) save_regionprops( props, dst_path=config.save_root / "regionprops.parquet", sep=config.sep ) config.to_json(config.save_root / "analysis-config.json")
[docs] def save_regionprops( props: pd.DataFrame, dst_path: Path, sep: str = "__", ) -> None: """ Save region properties to parquet with tuple columns flattened. Tuple and list columns are unpacked into indexed scalar columns (e.g., ``centroid`` → ``centroid__0``, ``centroid__1``, ...) for parquet compatibility. The index is not saved; all information should be encoded in the columns. Parameters ---------- props : pd.DataFrame DataFrame of region properties, potentially containing tuple or list valued columns. dst_path : Path Destination path for the parquet file. sep : str, optional Separator for flattened column names. Must match the `sep` passed to `read_regionprops` for round-tripping. Default is ``"__"``. See Also -------- read_regionprops : Inverse operation. flatten_for_save : Underlying flattening logic. """ props = flatten_for_save(props, sep=sep) props.to_parquet(dst_path, index=False)
[docs] def read_regionprops( src_path: Path, sep: str = "__", ) -> pd.DataFrame: """ Read region properties from parquet and reconstruct tuple columns. Indexed scalar columns (e.g., ``centroid__0``, ``centroid__1``, ...) are packed back into tuple columns (``centroid``). Parameters ---------- src_path : Path Path to the parquet file saved by `save_regionprops`. sep : str, optional Separator used when the file was saved. Default is ``"__"``. Returns ------- pd.DataFrame DataFrame with tuple columns reconstructed. See Also -------- save_regionprops : Inverse operation. reconstruct_tuples : Underlying reconstruction logic. """ props = pd.read_parquet(src_path) props = reconstruct_tuples(props, sep=sep) return props