"""Pipeline orchestration, I/O, and configuration for label morphology analysis."""
from pathlib import Path
from dataclasses import dataclass, field
from tqdm import tqdm
import numpy as np
import pandas as pd
import zarr
from sphero_vem.utils import (
check_isotropic,
slice_from_bbox,
flatten_for_save,
reconstruct_tuples,
)
from sphero_vem.utils.config import BaseConfig
from sphero_vem.measure.voxel import props_voxel, assign_cell
from sphero_vem.measure.sdf import props_sdf
from sphero_vem.measure.mesh import props_mesh
from sphero_vem.measure.fractal import props_fractal
[docs]
@dataclass
class LabelAnalysisConfig(BaseConfig):
"""
Configuration for 3D label morphology analysis pipeline.
Defines paths, SDF parameters, and mesh extraction settings for computing
shape descriptors from segmented volumetric data stored in zarr format.
Parameters
----------
root_path : Path
Root path to the zarr store containing segmentation data.
seg_target : str
Name of the segmentation target (e.g., 'cells', 'nuclei').
Used to construct paths: `labels/{seg_target}/tables/`.
scale_dir : str
Scale directory name within the masks folder (e.g., '50-50-50', 's1').
bbox_margin : int, optional
Margin in voxels to expand bounding boxes for label cropping.
Default is 15.
sigma : float, optional
Gaussian smoothing sigma in voxels for SDF computation.
Controls surface smoothness for curvature estimation. Default is 3.
eps_voxels : float, optional
Epsilon in voxels for Heaviside volume/area integration.
Default is 1.5.
mesh_downsample_factor : int, optional
Factor by which to downsample SDF before marching cubes.
Reduces vertex count and computation time. Default is 2.
h : float, optional
Step size in voxels for finite difference curvature estimation.
Default is 1.5.
voxel_only : bool, optional
If True, compute only voxel-based properties (skip SDF and mesh).
Default is False.
sigma_frac : float, optional
Gaussian smoothing sigma in voxels for SDF computation used during fractal
dimension calculation. This should be in the 0.5-1.0 range.
Default is 0.7.
n_steps_frac : int, optional
Number of epsilon values sampled in log-space during fractal dimension
calculation.
Default is 30.
sep : str
Separator used for unpacking tuple columns when saving the region properties
dataframe to parquet using `save_regionprops`. This should be used by
`read_regionprops` to reconstruct the tuple columns, e.g. bbox, centroid...
Default is `"__"`
"""
root_path: Path
seg_target: str
scale_dir: str
bbox_margin: int = 15
sigma: float = 3.0
eps_voxels: float = 1.5
mesh_downsample_factor: int = 2
h: float = 1.5
voxel_only: bool = False
sigma_frac: float = 0.7
n_steps_frac: int = 30
sep: str = "__"
array_path: Path = field(init=False)
save_root: Path = field(init=False)
spacing: tuple[float, float, float] = field(init=False)
cell_array_path: Path = field(init=False)
def __post_init__(self):
self.array_path = (
self.root_path / f"labels/{self.seg_target}/masks/{self.scale_dir}"
)
self.save_root = self.root_path / f"labels/{self.seg_target}/tables"
self.cell_array_path = self.root_path / f"labels/cells/masks/{self.scale_dir}"
# Spacing is assumed to be in nm and converted to µm.
src_zarr = zarr.open_array(self.array_path)
self.spacing = tuple(i / 1000 for i in src_zarr.attrs.get("spacing"))
[docs]
def label_properties(
labels: np.ndarray,
spacing: tuple[float],
bbox_margin: int = 15,
sigma: float = 3,
eps_voxels: int = 1.5,
mesh_downsample_factor: int = 2,
h: float = 1.5,
mesh_save_root: Path | None = None,
voxel_only: bool = False,
sigma_frac: float = 0.7,
n_steps_frac: int = 30,
) -> pd.DataFrame:
"""
Compute morphological properties for all labels in a 3D volume.
Extracts voxel-based, SDF-based, and mesh-based shape descriptors for
each labeled region. Requires isotropic voxel spacing.
Parameters
----------
labels : np.ndarray
3D integer array of labeled regions. Background should be 0.
spacing : tuple[float]
Isotropic voxel spacing in physical units (z, y, x).
bbox_margin : int, optional
Margin in voxels to expand bounding boxes when cropping labels.
Default is 15.
sigma : float, optional
Gaussian smoothing sigma in voxels for SDF computation.
Default is 3.
eps_voxels : float, optional
Epsilon in voxels for Heaviside volume/area integration.
Default is 1.5.
mesh_downsample_factor : int, optional
Downsampling factor for SDF before mesh extraction. Default is 2.
h : float, optional
Step size in voxels for finite difference curvature estimation.
Default is 1.5.
mesh_save_root : Path | None, optional
If provided, per-label meshes and curvature data are saved as .npz
files under `{mesh_save_root}/meshes/`. Default is None.
voxel_only : bool, optional
If True, skip SDF and mesh computations; return only voxel-based
properties. Default is False.
Returns
-------
pd.DataFrame
DataFrame with one row per label. Columns include voxel-based
properties (label, bbox, centroid, inertia eigenvalues) and, when
``voxel_only`` is False, SDF-based properties (volume, surface area,
sphericity) and mesh-based curvature statistics.
Raises
------
ValueError
If spacing is not isotropic.
See Also
--------
~sphero_vem.measure.voxel.props_voxel
Voxel-based property extraction.
~sphero_vem.measure.sdf.props_sdf
SDF-based volume and surface area computation.
~sphero_vem.measure.mesh.props_mesh
Mesh-based curvature computation.
"""
check_isotropic(spacing, raise_error=True)
results = props_voxel(
labels, spacing=spacing, bbox_margin=bbox_margin, calc_volume=voxel_only
)
if not voxel_only:
for entry in tqdm(results, "Analyzing labels"):
sel_slice = slice_from_bbox(entry["bbox_exp"])
labels_crop = labels[sel_slice]
props, sdf = props_sdf(
label_idx=entry["label"],
labels=labels_crop,
spacing=spacing,
sigma=sigma,
eps_voxels=eps_voxels,
)
entry |= props
mesh_save_path = (
mesh_save_root / f"meshes/mesh-{entry['label']}.npz"
if mesh_save_root is not None
else None
)
if mesh_save_path:
mesh_save_path.parent.mkdir(exist_ok=True, parents=True)
props = props_mesh(
sdf=sdf,
spacing=spacing,
mesh_downsample_factor=mesh_downsample_factor,
h=h,
mesh_save_path=mesh_save_path,
)
entry |= props
props = props_fractal(
label_idx=entry["label"],
labels=labels_crop,
spacing=spacing,
sigma_frac=sigma_frac,
n_steps=n_steps_frac,
)
entry |= props
return pd.DataFrame(results)
[docs]
def analyze_labels(config: LabelAnalysisConfig) -> None:
"""
Run label morphology analysis pipeline from configuration.
Loads labels from zarr, computes shape descriptors via `label_properties`,
and saves results to parquet along with the configuration.
Parameters
----------
config : LabelAnalysisConfig
Configuration object specifying paths and analysis parameters.
Raises
------
ValueError
If spacing is not isotropic.
Notes
-----
Outputs are saved to `{config.save_root}/`:
- `regionprops.parquet`: DataFrame with all computed properties
- `analysis-config.json`: Serialized configuration for reproducibility
- `meshes/mesh-{label}.npz`: Per-label mesh data (if not voxel_only)
"""
label_array = zarr.open_array(config.array_path)
props = label_properties(
labels=label_array[:],
spacing=config.spacing,
bbox_margin=config.bbox_margin,
sigma=config.sigma,
eps_voxels=config.eps_voxels,
mesh_downsample_factor=config.mesh_downsample_factor,
h=config.h,
mesh_save_root=config.save_root,
voxel_only=config.voxel_only,
sigma_frac=config.sigma_frac,
n_steps_frac=config.n_steps_frac,
)
if config.seg_target != "cells":
cell_array = zarr.open_array(config.cell_array_path)
props = assign_cell(props=props, cells=cell_array[:])
save_regionprops(
props, dst_path=config.save_root / "regionprops.parquet", sep=config.sep
)
config.to_json(config.save_root / "analysis-config.json")
[docs]
def save_regionprops(
props: pd.DataFrame,
dst_path: Path,
sep: str = "__",
) -> None:
"""
Save region properties to parquet with tuple columns flattened.
Tuple and list columns are unpacked into indexed scalar columns
(e.g., ``centroid`` → ``centroid__0``, ``centroid__1``, ...) for
parquet compatibility. The index is not saved; all information
should be encoded in the columns.
Parameters
----------
props : pd.DataFrame
DataFrame of region properties, potentially containing tuple
or list valued columns.
dst_path : Path
Destination path for the parquet file.
sep : str, optional
Separator for flattened column names. Must match the `sep`
passed to `read_regionprops` for round-tripping. Default is
``"__"``.
See Also
--------
read_regionprops : Inverse operation.
flatten_for_save : Underlying flattening logic.
"""
props = flatten_for_save(props, sep=sep)
props.to_parquet(dst_path, index=False)
[docs]
def read_regionprops(
src_path: Path,
sep: str = "__",
) -> pd.DataFrame:
"""
Read region properties from parquet and reconstruct tuple columns.
Indexed scalar columns (e.g., ``centroid__0``, ``centroid__1``, ...)
are packed back into tuple columns (``centroid``).
Parameters
----------
src_path : Path
Path to the parquet file saved by `save_regionprops`.
sep : str, optional
Separator used when the file was saved. Default is ``"__"``.
Returns
-------
pd.DataFrame
DataFrame with tuple columns reconstructed.
See Also
--------
save_regionprops : Inverse operation.
reconstruct_tuples : Underlying reconstruction logic.
"""
props = pd.read_parquet(src_path)
props = reconstruct_tuples(props, sep=sep)
return props