Source code for sphero_vem.measure.distance

"""Nucleus distance map computation."""

from pathlib import Path
from tqdm import tqdm
import numpy as np
import pandas as pd
import zarr
from sphero_vem.io import write_zarr
from sphero_vem.utils import slice_from_bbox
from sphero_vem.utils.accelerator import ndi, gpu_dispatch, xp, ArrayLike
from sphero_vem.measure.pipeline import LabelAnalysisConfig, read_regionprops


@gpu_dispatch(return_to_host=True)
def _distance_map_cell(
    nuclei_region: ArrayLike,
    cells_region: ArrayLike,
    cell_label: int,
    nuclei_in_cell: ArrayLike,
    spacing: tuple[float, float, float],
) -> tuple[ArrayLike, ArrayLike]:
    """
    Compute Euclidean distance from nuclei within a single cell's bounding box.

    Computes the EDT from all nuclei belonging to the given cell. Returns the distance
    map and cell mask for the bounding box region, which are used by the caller to
    assign values into the full-volume output.

    Parameters
    ----------
    nuclei_region : ArrayLike
        Cropped nuclei label array (bounding box of the cell).
    cells_region : ArrayLike
        Cropped cell label array (same bounding box).
    cell_label : int
        Label index of the cell being processed.
    nuclei_in_cell : ArrayLike
        Array of nuclei label indices that belong to this cell.
    spacing : tuple[float, float, float]
        Voxel spacing in physical units (z, y, x).

    Returns
    -------
    nuclei_edt_region : ArrayLike
        Euclidean distance transform from the nuclei surfaces within the bounding box,
        in the same units as `spacing`.
    cells_mask_region : ArrayLike
        Boolean mask of voxels belonging to `cell_label` within the bounding box.
    """

    cells_mask_region = cells_region == cell_label
    nuclei_region_inv = ~xp.isin(nuclei_region, nuclei_in_cell)

    nuclei_edt_region = ndi.distance_transform_edt(nuclei_region_inv, sampling=spacing)
    return nuclei_edt_region, cells_mask_region


def _calc_nuclei_distance(
    nuclei: np.ndarray,
    cells: np.ndarray,
    props_nuclei: pd.DataFrame,
    props_cells: pd.DataFrame,
    spacing: tuple[float, float, float],
    verbose: bool = True,
) -> np.ndarray:
    """
    Compute per-voxel Euclidean distance to the nearest nucleus for each cell.

    Iterates over cells that contain at least one nucleus, computing the distance
    transform from the nuclear surfaces within each cell's bounding box. Voxels outside
    any cell or in cells without a detected nucleus are set to NaN.

    Parameters
    ----------
    nuclei : np.ndarray
        Full-volume 3D nuclei label array.
    cells : np.ndarray
        Full-volume 3D cell label array (same shape as `nuclei`).
    props_nuclei : pd.DataFrame
        Nuclei properties table, indexed by nucleus label. Must contain a
        ``parent_cell`` column mapping each nucleus to its enclosing cell label.
    props_cells : pd.DataFrame
        Cell properties table, indexed by cell label. Must contain a ``bbox`` column
        with bounding box coordinates.
    spacing : tuple[float, float, float]
        Voxel spacing in physical units (z, y, x).
    verbose : bool, optional
        If True, show a progress bar. Default is True.

    Returns
    -------
    np.ndarray
        Float32 array (same shape as input) with Euclidean distance to the nearest
        nucleus in each cell. NaN for background voxels and cells without a detected
        nucleus.
    """

    nuclei_edt = np.full(nuclei.shape, np.nan, dtype=np.float32)
    cells_with_nuclei = props_nuclei["parent_cell"].unique()

    for label in tqdm(cells_with_nuclei, "Analyzing cells", disable=not verbose):
        bbox_slice = slice_from_bbox(props_cells.at[label, "bbox"])
        nuclei_in_cell = props_nuclei.loc[
            props_nuclei["parent_cell"] == label
        ].index.to_numpy()

        nuclei_edt_region, cells_mask_region = _distance_map_cell(
            nuclei_region=nuclei[bbox_slice],
            cells_region=cells[bbox_slice],
            cell_label=label,
            nuclei_in_cell=nuclei_in_cell,
            spacing=spacing,
        )
        nuclei_edt[cells == label] = nuclei_edt_region[cells_mask_region]

    return nuclei_edt


[docs] def nuclei_distance(root_path: Path, verbose: bool = True) -> None: """ Compute and save the nucleus distance map for all cells in a dataset. For each cell containing at least one nucleus, computes the Euclidean distance from every voxel to the nearest nuclear surface. The result is saved as a zarr array under ``labels/nuclei/distance/``. Requires that label analysis (via `analyze_labels`) has been run for both ``cells`` and ``nuclei`` segmentation targets at the same spacing, and that nuclei properties contain a ``parent_cell`` column. Parameters ---------- root_path : Path Root path to the zarr store. verbose : bool, optional If True, show a progress bar. Default is True. Raises ------ ValueError If cells and nuclei were analyzed at different spacings. Notes ----- Output is written to ``labels/nuclei/distance/{scale_dir}`` in the zarr store, with units in micrometers. Voxels outside cells or in cells without a detected nucleus are set to NaN. """ root = zarr.open(root_path, mode="a") data = { "cells": {}, "nuclei": {}, } for seg_target in data.keys(): tables_path = root_path / f"labels/{seg_target}/tables/" config = LabelAnalysisConfig.from_json(tables_path / "analysis-config.json") data[seg_target]["spacing"] = config.spacing data[seg_target]["array"] = zarr.open_array(config.array_path) props = read_regionprops(tables_path / "regionprops.parquet") data[seg_target]["props"] = props.set_index("label") if data["cells"]["spacing"] != data["nuclei"]["spacing"]: raise ValueError( "Cells and nuclei properties were not calculated at the same spacing" ) nuclei_edt = _calc_nuclei_distance( nuclei=data["nuclei"]["array"][:], cells=data["cells"]["array"][:], props_nuclei=data["nuclei"]["props"], props_cells=data["cells"]["props"], spacing=data["nuclei"]["spacing"], verbose=verbose, ) write_zarr( root, nuclei_edt, dst_path=f"labels/nuclei/distance/{config.scale_dir}", src_zarr=data["nuclei"]["array"], processing={ "step": "euclidean distance transform", "units": "micrometers", }, inputs=[data["nuclei"]["array"].path, data["cells"]["array"].path], )