Monitor¶

monitor ¶

Classes¶

SHAPMonitor ¶

SHAPMonitor(
    explainer: ExplainerLike | None = None,
    data_dir: PathLike | None = None,
    sample_rate: float = 0.1,
    model_version: str = "unknown",
    feature_names: list[str] | None = None,
    backend: Backend | None = None,
    random_seed: int | None = 42,
)

Monitor SHAP explanations over time.

Parameters:

Name	Type	Description	Default
`explainer`	`ExplainerLike`	A SHAP explainer object that implements the shap_values method.	`None`
`data_dir`	`PathLike`	Directory to store explanation logs.	`None`
`sample_rate`	`float`	Fraction of predictions to log explanations for (default is 0.1).	`0.1`
`model_version`	`str`	Version identifier for the model (default is "unknown").	`'unknown'`
`feature_names`	`list[str]`	Names of the features in the input data.	`None`
`backend`	`Backend`	Backend for storing explanations (default is None).	`None`

Raises:

Type	Description
`ValueError`	If neither data_dir nor backend is provided or if both are provided.

Source code in shapmonitor/monitor.py

def __init__(
    self,
    explainer: ExplainerLike | None = None,
    data_dir: PathLike | None = None,
    sample_rate: float = 0.1,
    model_version: str = "unknown",
    feature_names: list[str] | None = None,
    backend: Backend | None = None,
    random_seed: int | None = 42,
) -> None:
    self._explainer = explainer

    if data_dir is None and backend is None:
        raise ValueError("Either data_dir or backend must be provided.")

    if data_dir and backend:
        raise ValueError("Provide only one of data_dir or backend, not both.")

    if data_dir:
        self._backend = BackendFactory.get_backend("parquet", file_dir=data_dir)
    else:
        self._backend = backend

    self._sample_rate = sample_rate
    self._model_version = model_version
    self._feature_names = feature_names
    self._rng = np.random.default_rng(random_seed)

Attributes¶

explainer `property` ¶

explainer: ExplainerLike

Get the SHAP explainer object.

backend `property` ¶

backend: Backend

Get the backend for storing explanations.

sample_rate `property` ¶

sample_rate: float

Get the sample rate for logging explanations.

model_version `property` ¶

model_version: str

Get the model version identifier.

feature_names `property` ¶

feature_names: list[str] | None

Get the feature names.

Functions¶

log_shap ¶

log_shap(
    shap_values: ExplanationLike | ArrayLike,
    base_values: ArrayLike | float | None = None,
    batch_id: str | None = None,
) -> None

Log SHAP explanations for a single batch.

Parameters:

Name	Type	Description	Default
`shap_values`	`ExplanationLike \| ArrayLike`	A SHAP Explanation object, or a 2-D numpy array of SHAP values (n_samples x n_features).	required
`base_values`	`ArrayLike \| float \| None`	Base (expected) values. Ignored when "shap_values" is an Explanation object. When "shap_values" is a raw array and this is omitted, the base_value column will be filled with NaN.	`None`
`batch_id`	`str`	Unique identifier for the batch. If not provided, a new UUID will be generated.	`None`

Source code in shapmonitor/monitor.py

def log_shap(
    self,
    shap_values: ExplanationLike | ArrayLike,
    base_values: ArrayLike | float | None = None,
    batch_id: str | None = None,
) -> None:
    """Log SHAP explanations for a single batch.

    Parameters
    ----------
    shap_values : ExplanationLike | ArrayLike
        A SHAP Explanation object, or a 2-D numpy array of SHAP values
        (n_samples x n_features).
    base_values : ArrayLike | float | None, optional
        Base (expected) values. Ignored when "shap_values" is an
        Explanation object. When "shap_values" is a raw array and
        this is omitted, the base_value column will be filled with NaN.
    batch_id : str, optional
        Unique identifier for the batch. If not provided, a new UUID
        will be generated.
    """
    import shap

    if isinstance(shap_values, shap.Explanation):
        values_arr = np.asarray(shap_values.values, dtype=np.float32)
        base_arr = np.asarray(shap_values.base_values, dtype=np.float32)
    else:
        values_arr = np.asarray(shap_values, dtype=np.float32)
        if base_values is None:
            base_arr = np.full(values_arr.shape[0], np.nan, dtype=np.float32)
        else:
            base_arr = np.atleast_1d(np.asarray(base_values, dtype=np.float32))
            if base_arr.ndim == 0 or (base_arr.ndim == 1 and base_arr.shape[0] == 1):
                base_arr = np.full(values_arr.shape[0], base_arr.item(), dtype=np.float32)

    if not self._feature_names:
        self._feature_names = [f"feat_{i}" for i in range(values_arr.shape[1])]

    if not batch_id:
        batch_id = self._generate_batch_id()

    explanation_batch = ExplanationBatch(
        timestamp=datetime.now(),
        batch_id=batch_id,
        model_version=self._model_version,
        n_samples=values_arr.shape[0],
        base_values=base_arr,
        shap_values={feat: values_arr[:, idx] for idx, feat in enumerate(self._feature_names)},
        feature_values=None,
        predictions=None,
    )

    path = self._backend.write(explanation_batch)
    _logger.info("Logged SHAP explanations for batch_id: %s in path: %s", batch_id, path)

log_batch ¶

log_batch(
    X: ArrayLike,
    y: ArrayLike | None = None,
    batch_id: str | None = None,
) -> None

Log SHAP explanations for a batch of predictions.

Parameters:

Name	Type	Description	Default
`X`	`ArrayLike`	Input features (2D array: n_samples x n_features).	required
`y`	`ArrayLike`	Model predictions for the batch. If not provided, predictions will not be stored in the explanation record.	`None`
`batch_id`	`str`	Unique identifier for the batch. If not provided, a new UUID will be generated.	`None`

Source code in shapmonitor/monitor.py

def log_batch(
    self, X: ArrayLike, y: ArrayLike | None = None, batch_id: str | None = None
) -> None:
    """Log SHAP explanations for a batch of predictions.

    Parameters
    ----------
    X : ArrayLike
        Input features (2D array: n_samples x n_features).
    y : ArrayLike, optional
        Model predictions for the batch. If not provided, predictions
        will not be stored in the explanation record.
    batch_id : str, optional
        Unique identifier for the batch. If not provided, a new UUID
        will be generated.
    """
    if self._explainer is None:
        raise ValueError(
            "Explainer is not set. Please set the explainer when initializing the monitor."
        )

    if not self._feature_names:
        if isinstance(X, pd.DataFrame):
            self._feature_names = X.columns.tolist()
        else:
            self._feature_names = [f"feat_{i}" for i in range(X.shape[1])]

    # Sample the data
    n_samples = max(1, int(len(X) * self._sample_rate))
    sample_indices = self._rng.choice(len(X), size=n_samples, replace=False)

    if isinstance(X, pd.DataFrame):
        X = X.iloc[sample_indices].reset_index(drop=True)
    else:
        X = np.asarray(X)
        X = X[sample_indices]

    # Sample y to match X if provided
    if y is not None:
        y = np.asarray(y)
        y = y[sample_indices]

    if not batch_id:
        batch_id = self._generate_batch_id()

    # Compute SHAP values for the batch
    explanations = self.compute(X)

    shap_values_dict = {
        feat: explanations.values[:, idx] for idx, feat in enumerate(self._feature_names)
    }
    if isinstance(X, pd.DataFrame):
        feat_values_dict = {
            feat: X.iloc[:, idx].to_numpy() for idx, feat in enumerate(self._feature_names)
        }
    else:
        feat_values_dict = {feat: X[:, idx] for idx, feat in enumerate(self._feature_names)}

    explanation_batch = ExplanationBatch(
        timestamp=datetime.now(),
        batch_id=batch_id,
        model_version=self._model_version,
        n_samples=len(X),
        base_values=explanations.base_values,
        shap_values=shap_values_dict,
        feature_values=feat_values_dict,
        predictions=y,
    )

    path = self._backend.write(explanation_batch)
    _logger.info("Logged SHAP explanations for batch_id: %s in path: %s", batch_id, path)

compute ¶

compute(X: ArrayLike) -> ExplanationLike

Compute SHAP values for the given input features.

Parameters:

Name	Type	Description	Default
`X`	`ArrayLike`	Input features for which to compute SHAP values.	required

Returns:

Type	Description
`Shap explanation object`	The SHAP explanation object containing SHAP values.

Source code in shapmonitor/monitor.py

def compute(self, X: ArrayLike) -> ExplanationLike:
    """
    Compute SHAP values for the given input features.


    Parameters
    ----------
    X : ArrayLike
        Input features for which to compute SHAP values.

    Returns
    -------
    Shap explanation object
        The SHAP explanation object containing SHAP values.
    """
    if self._explainer is None:
        raise ValueError(
            "Explainer is not set. Please set the explainer when initializing the monitor."
        )

    return self._explainer(X)

Monitor¶

monitor ¶

Classes¶

SHAPMonitor ¶

Attributes¶

explainer property ¶

backend property ¶

sample_rate property ¶

model_version property ¶

feature_names property ¶

Functions¶

log_shap ¶

log_batch ¶

compute ¶

explainer `property` ¶

backend `property` ¶

sample_rate `property` ¶

model_version `property` ¶

feature_names `property` ¶