Source code for ai_cdss.processing.clinical

import importlib.resources
from pathlib import Path
from typing import Dict, List, Optional

import numpy as np
import pandas as pd
from ai_cdss import config
from ai_cdss.constants import *
from ai_cdss.utils import MultiKeyDict

# ------------------------------
# Clinical Scores


[docs] class ClinicalSubscales: def __init__(self, scale_yaml_path: Optional[str] = None): """Initialize with an optional path to scale.yaml, defaulting to internal package resource.""" # Retrieves max values for clinical subscales from config/scales.yaml if scale_yaml_path: self.scales_path = Path(scale_yaml_path) else: self.scales_path = importlib.resources.files(config) / Path(SCALES_YAML) if not self.scales_path.exists(): raise FileNotFoundError(f"Scale YAML file not found at {self.scales_path}") # Load scales maximum values self.scales_dict = MultiKeyDict.from_yaml(self.scales_path)
[docs] def compute_deficit_matrix(self, patient_df: pd.DataFrame) -> pd.DataFrame: """Compute deficit matrix given patient clinical scores.""" # Retrieve max values using MultiKeyDict max_subscales = [ self.scales_dict.get(scale, None) for scale in patient_df.columns ] # Check for missing subscale values if None in max_subscales: missing_subscales = [ scale for scale, max_val in zip(patient_df.columns, max_subscales) if max_val is None ] raise ValueError(f"Missing max values for subscales: {missing_subscales}") # Compute deficit matrix deficit_matrix = 1 - ( patient_df / pd.Series(max_subscales, index=patient_df.columns) ) deficit_matrix.rename(self.scales_dict._keys, axis=1, inplace=True) return deficit_matrix
# ------------------------------ # Protocol Attributes
[docs] class ProtocolToClinicalMapper: def __init__(self, mapping_yaml_path: Optional[str] = None): """Initialize with an optional path to scale.yaml, defaulting to internal package resource.""" if mapping_yaml_path: self.mapping_path = Path(mapping_yaml_path) else: self.mapping_path = importlib.resources.files(config) / Path(MAPPING_YAML) if not self.mapping_path.exists(): raise FileNotFoundError(f"Scale YAML file not found at {self.mapping_path}") # logger.info(f"Loading subscale max values from: {self.scales_path}") self.mapping = MultiKeyDict.from_yaml(self.mapping_path)
[docs] def map_protocol_features( self, protocol_df: pd.DataFrame, agg_func=np.mean ) -> pd.DataFrame: """Map protocol-level features into clinical scales using a predefined mapping.""" # Retrieve max values using MultiKeyDict df_clinical = pd.DataFrame(index=protocol_df.index) # Collapse using agg_func the protocol latent attributes for clinical_scale, features in self.mapping.items(): df_clinical[clinical_scale] = protocol_df[features].apply(agg_func, axis=1) df_clinical.index = protocol_df[PROTOCOL_ID] return df_clinical