Skip to content

Monitor

monitor

Classes

SHAPMonitor

SHAPMonitor(
    explainer: ExplainerLike | None = None,
    data_dir: PathLike | None = None,
    sample_rate: float = 0.1,
    model_version: str = "unknown",
    feature_names: list[str] | None = None,
    backend: Backend | None = None,
    random_seed: int | None = 42,
)

Monitor SHAP explanations over time.

Parameters:

Name Type Description Default
explainer ExplainerLike

A SHAP explainer object that implements the shap_values method.

None
data_dir PathLike

Directory to store explanation logs.

None
sample_rate float

Fraction of predictions to log explanations for (default is 0.1).

0.1
model_version str

Version identifier for the model (default is "unknown").

'unknown'
feature_names list[str]

Names of the features in the input data.

None
backend Backend

Backend for storing explanations (default is None).

None

Raises:

Type Description
ValueError

If neither data_dir nor backend is provided or if both are provided.

Source code in shapmonitor/monitor.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def __init__(
    self,
    explainer: ExplainerLike | None = None,
    data_dir: PathLike | None = None,
    sample_rate: float = 0.1,
    model_version: str = "unknown",
    feature_names: list[str] | None = None,
    backend: Backend | None = None,
    random_seed: int | None = 42,
) -> None:
    self._explainer = explainer

    if data_dir is None and backend is None:
        raise ValueError("Either data_dir or backend must be provided.")

    if data_dir and backend:
        raise ValueError("Provide only one of data_dir or backend, not both.")

    if data_dir:
        self._backend = BackendFactory.get_backend("parquet", file_dir=data_dir)
    else:
        self._backend = backend

    self._sample_rate = sample_rate
    self._model_version = model_version
    self._feature_names = feature_names
    self._rng = np.random.default_rng(random_seed)
Attributes
explainer property
explainer: ExplainerLike

Get the SHAP explainer object.

backend property
backend: Backend

Get the backend for storing explanations.

sample_rate property
sample_rate: float

Get the sample rate for logging explanations.

model_version property
model_version: str

Get the model version identifier.

feature_names property
feature_names: list[str] | None

Get the feature names.

Functions
log_shap
log_shap(
    shap_values: ExplanationLike | ArrayLike,
    base_values: ArrayLike | float | None = None,
    batch_id: str | None = None,
) -> None

Log SHAP explanations for a single batch.

Parameters:

Name Type Description Default
shap_values ExplanationLike | ArrayLike

A SHAP Explanation object, or a 2-D numpy array of SHAP values (n_samples x n_features).

required
base_values ArrayLike | float | None

Base (expected) values. Ignored when "shap_values" is an Explanation object. When "shap_values" is a raw array and this is omitted, the base_value column will be filled with NaN.

None
batch_id str

Unique identifier for the batch. If not provided, a new UUID will be generated.

None
Source code in shapmonitor/monitor.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
def log_shap(
    self,
    shap_values: ExplanationLike | ArrayLike,
    base_values: ArrayLike | float | None = None,
    batch_id: str | None = None,
) -> None:
    """Log SHAP explanations for a single batch.

    Parameters
    ----------
    shap_values : ExplanationLike | ArrayLike
        A SHAP Explanation object, or a 2-D numpy array of SHAP values
        (n_samples x n_features).
    base_values : ArrayLike | float | None, optional
        Base (expected) values. Ignored when "shap_values" is an
        Explanation object. When "shap_values" is a raw array and
        this is omitted, the base_value column will be filled with NaN.
    batch_id : str, optional
        Unique identifier for the batch. If not provided, a new UUID
        will be generated.
    """
    import shap

    if isinstance(shap_values, shap.Explanation):
        values_arr = np.asarray(shap_values.values, dtype=np.float32)
        base_arr = np.asarray(shap_values.base_values, dtype=np.float32)
    else:
        values_arr = np.asarray(shap_values, dtype=np.float32)
        if base_values is None:
            base_arr = np.full(values_arr.shape[0], np.nan, dtype=np.float32)
        else:
            base_arr = np.atleast_1d(np.asarray(base_values, dtype=np.float32))
            if base_arr.ndim == 0 or (base_arr.ndim == 1 and base_arr.shape[0] == 1):
                base_arr = np.full(values_arr.shape[0], base_arr.item(), dtype=np.float32)

    if not self._feature_names:
        self._feature_names = [f"feat_{i}" for i in range(values_arr.shape[1])]

    if not batch_id:
        batch_id = self._generate_batch_id()

    explanation_batch = ExplanationBatch(
        timestamp=datetime.now(),
        batch_id=batch_id,
        model_version=self._model_version,
        n_samples=values_arr.shape[0],
        base_values=base_arr,
        shap_values={feat: values_arr[:, idx] for idx, feat in enumerate(self._feature_names)},
        feature_values=None,
        predictions=None,
    )

    path = self._backend.write(explanation_batch)
    _logger.info("Logged SHAP explanations for batch_id: %s in path: %s", batch_id, path)
log_batch
log_batch(
    X: ArrayLike,
    y: ArrayLike | None = None,
    batch_id: str | None = None,
) -> None

Log SHAP explanations for a batch of predictions.

Parameters:

Name Type Description Default
X ArrayLike

Input features (2D array: n_samples x n_features).

required
y ArrayLike

Model predictions for the batch. If not provided, predictions will not be stored in the explanation record.

None
batch_id str

Unique identifier for the batch. If not provided, a new UUID will be generated.

None
Source code in shapmonitor/monitor.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def log_batch(
    self, X: ArrayLike, y: ArrayLike | None = None, batch_id: str | None = None
) -> None:
    """Log SHAP explanations for a batch of predictions.

    Parameters
    ----------
    X : ArrayLike
        Input features (2D array: n_samples x n_features).
    y : ArrayLike, optional
        Model predictions for the batch. If not provided, predictions
        will not be stored in the explanation record.
    batch_id : str, optional
        Unique identifier for the batch. If not provided, a new UUID
        will be generated.
    """
    if self._explainer is None:
        raise ValueError(
            "Explainer is not set. Please set the explainer when initializing the monitor."
        )

    if not self._feature_names:
        if isinstance(X, pd.DataFrame):
            self._feature_names = X.columns.tolist()
        else:
            self._feature_names = [f"feat_{i}" for i in range(X.shape[1])]

    # Sample the data
    n_samples = max(1, int(len(X) * self._sample_rate))
    sample_indices = self._rng.choice(len(X), size=n_samples, replace=False)

    if isinstance(X, pd.DataFrame):
        X = X.iloc[sample_indices].reset_index(drop=True)
    else:
        X = np.asarray(X)
        X = X[sample_indices]

    # Sample y to match X if provided
    if y is not None:
        y = np.asarray(y)
        y = y[sample_indices]

    if not batch_id:
        batch_id = self._generate_batch_id()

    # Compute SHAP values for the batch
    explanations = self.compute(X)

    shap_values_dict = {
        feat: explanations.values[:, idx] for idx, feat in enumerate(self._feature_names)
    }
    if isinstance(X, pd.DataFrame):
        feat_values_dict = {
            feat: X.iloc[:, idx].to_numpy() for idx, feat in enumerate(self._feature_names)
        }
    else:
        feat_values_dict = {feat: X[:, idx] for idx, feat in enumerate(self._feature_names)}

    explanation_batch = ExplanationBatch(
        timestamp=datetime.now(),
        batch_id=batch_id,
        model_version=self._model_version,
        n_samples=len(X),
        base_values=explanations.base_values,
        shap_values=shap_values_dict,
        feature_values=feat_values_dict,
        predictions=y,
    )

    path = self._backend.write(explanation_batch)
    _logger.info("Logged SHAP explanations for batch_id: %s in path: %s", batch_id, path)
compute
compute(X: ArrayLike) -> ExplanationLike

Compute SHAP values for the given input features.

Parameters:

Name Type Description Default
X ArrayLike

Input features for which to compute SHAP values.

required

Returns:

Type Description
Shap explanation object

The SHAP explanation object containing SHAP values.

Source code in shapmonitor/monitor.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def compute(self, X: ArrayLike) -> ExplanationLike:
    """
    Compute SHAP values for the given input features.


    Parameters
    ----------
    X : ArrayLike
        Input features for which to compute SHAP values.

    Returns
    -------
    Shap explanation object
        The SHAP explanation object containing SHAP values.
    """
    if self._explainer is None:
        raise ValueError(
            "Explainer is not set. Please set the explainer when initializing the monitor."
        )

    return self._explainer(X)