In [ ]:
# ML prediction pipeline
Model development using different ML methods¶
Model training and validation functions¶
In [23]:
# --- imports ---
import os
import time
# Prevent numba from loading LLVM OpenMP
os.environ["SHAP_DISABLE_JIT"] = "1" # shap will use pure-python fallbacks
os.environ["JOBLIB_PROGRESSBAR"] = "0" # turn off joblib's tqdm bars
# Standard library
import statistics
# Core scientific stack
import numpy as np
import pandas as pd
# Utilities
from tqdm import tqdm # To display progress bars during loops and long computations.
# # Scikit-learn with Intel acceleration
from sklearnex import patch_sklearn
patch_sklearn()
# Scikit-learn modules
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold, GridSearchCV, cross_val_predict
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
# Classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
# Imbalanced-learn
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import BorderlineSMOTE, SMOTE
# Model interpretation
import shap
# Gradient boosting
from xgboost import XGBClassifier
def _gpu_available():
"""Return True if a CUDA-capable GPU appears available via numba.
Notes
-----
- This is a lightweight check. XGBoost may still error if CUDA runtime is
unavailable or mismatched even when this returns True.
"""
try:
import numba.cuda as _cuda
return _cuda.is_available()
except Exception:
return False
def arraylist2arrayColumn(arrays):
"""Stack a list of 1‑D arrays (possibly different lengths) as columns.
Shorter arrays are **left‑padded with zeros** so that all columns align.
Parameters
----------
arrays : list[np.ndarray]
List of 1‑D arrays with potentially different lengths.
Returns
-------
np.ndarray
2‑D array of shape (max_length, n_arrays) where each input array forms
a column and is left‑padded with zeros to `max_length`.
"""
max_length = max(len(arr) for arr in arrays)
padded_arrays = [np.pad(arr, (max_length - len(arr), 0), constant_values=0) for arr in arrays]
return np.column_stack(padded_arrays)
def allocate_feature_list(data, feature_list):
"""Count how often features were selected across folds/iterations.
Parameters
----------
data : pd.DataFrame
Original feature dataframe; used to preserve full feature list/order.
feature_list : list[Iterable[str]]
List where each item is an iterable of selected feature names from one
fold/iteration.
Returns
-------
pd.DataFrame
DataFrame with columns `Feature` and `Count` (selection frequency).
"""
all_features = list(data.columns)
flattened = [f for sub in feature_list for f in sub]
counts = pd.Series(flattened).value_counts()
out = pd.DataFrame({'Feature': all_features})
out['Count'] = out['Feature'].map(counts).fillna(0).astype(int)
return out
def should_apply_pca(x, threshold=1.5):
"""Heuristic to decide if PCA can be applied based on sample/feature ratio. if false, pca is not allowed.
Parameters
----------
x : pd.DataFrame | np.ndarray
Feature matrix.
threshold : float, default 1.5
Apply PCA when n_samples / n_features < `threshold`.
Returns
-------
bool
True if PCA is recommended under the heuristic.
"""
n_samples, n_features = x.shape
return n_samples / n_features < threshold
def _to_2d_array(sv):
"""Normalize SHAP output to shape (n_samples, n_features).
Handles lists (per‑class), 1D vectors, and 3D arrays.
Parameters
----------
sv : array-like | list
Raw SHAP values as returned by various explainers/models.
Returns
-------
np.ndarray
SHAP values as a 2‑D array.
"""
# If SHAP returns a list per class -> take positive class if available
if isinstance(sv, (list, tuple)):
sv = sv[1] if len(sv) > 1 else sv[0]
sv = np.asarray(sv)
# If 3D: (n_samples, n_features, n_outputs). Pick class 1 if available; else 0.
if sv.ndim == 3:
sv = sv[..., 1] if sv.shape[2] > 1 else sv[..., 0]
# If 1D: (n_features,) -> (1, n_features)
if sv.ndim == 1:
sv = sv.reshape(1, -1)
# Now it should be (n_samples, n_features)
return sv
# ---------- model specs ----------
def _get_model_specs(use_gpu=False, n_jobs=5, ratio=1.0):
"""Define available model, grid, selector, and type for the pipeline search.
Parameters
----------
use_gpu : bool, default False
If True, enable GPU‑accelerated XGBoost when available.
n_jobs : int, default 5
Parallelism for estimators that support it.
ratio : float, default 1.0
Passed to XGBoost as `scale_pos_weight` to compensate class imbalance
(usually `neg/pos`).
Returns
-------
dict[str, dict]
Mapping from model name to its specification with keys:
- "clf": estimator instance
- "grid": parameter grid for GridSearchCV
- "selector": feature selector (SelectFromModel)
- "type": one of {"linear", "kernel", "tree"}
"""
rf_selector = SelectFromModel(RandomForestClassifier(n_estimators=200, random_state=42), threshold='median')
specs = {
# lasso and ridge are just penalties for logistic, so I comment them here.
# "lasso": {
# "clf": LogisticRegression(penalty='l1', solver='saga', max_iter=5000),
# "grid": {
# "feature_selection__threshold": ['mean', 'median'],
# "classification__C": [0.01, 0.1, 1.0, 10.0],
# },
# "selector": SelectFromModel(LogisticRegression(penalty='l1', solver='saga', max_iter=5000), threshold='median'),
# "type": "linear"
# },
# "ridge": {
# "clf": LogisticRegression(penalty='l2', solver='lbfgs', max_iter=5000),
# "grid": {
# "feature_selection__threshold": ['mean', 'median'],
# "classification__C": [0.01, 0.1, 1.0, 10.0],
# },
# "selector": SelectFromModel(LogisticRegression(penalty='l2', solver='lbfgs', max_iter=5000), threshold='median'),
# "type": "linear"
# },
"logistic": {
"clf": LogisticRegression(class_weight='balanced', penalty='l2', solver='lbfgs', max_iter=5000),
"grid": {
"feature_selection__threshold": ['median'],
"classification__C": [0.01, 0.1, 1.0, 10.0],
},
"selector": SelectFromModel(LogisticRegression(penalty='l2', solver='lbfgs', max_iter=5000), threshold='median'),
"type": "linear"
},
"svm": {
"clf": SVC(kernel='rbf', class_weight= 'balanced' , probability=True),
"grid": {
"feature_selection__threshold": ['median'],
"classification__C": [0.1, 1],
"classification__gamma": ['auto']
},
"selector": rf_selector,
"type": "kernel"
},
"random_forest": {
"clf": RandomForestClassifier(class_weight='balanced', n_jobs=n_jobs, random_state=42),
"grid": {
"feature_selection__threshold": ['median'],
"classification__n_estimators": [100, 200, 400],
"classification__min_samples_leaf": [1, 2],
"classification__max_features": ['sqrt', 0.5]
},
"selector": SelectFromModel(RandomForestClassifier(n_estimators=200, random_state=42), threshold='median'),
"type": "tree"
},
"decision_tree": {
"clf": DecisionTreeClassifier(class_weight='balanced', random_state=42),
"grid": {
"feature_selection__threshold": ['median'],
"classification__max_depth": [5, 10],
"classification__min_samples_split": [2, 5]
},
"selector": rf_selector,
"type": "tree"
},
"knn": {
"clf": KNeighborsClassifier(),
"grid": {
"feature_selection__threshold": ['median'],
"classification__n_neighbors": [3, 5, 7, 11],
"classification__weights": ['uniform', 'distance'],
"classification__p": [1, 2],
},
"selector": rf_selector,
"type": "kernel"
},
}
xgb_kwargs = dict(
n_estimators=300,
learning_rate=0.05,
subsample=0.9,
colsample_bytree=0.9,
max_depth=4,
reg_lambda=1.0,
reg_alpha=0.0,
eval_metric='logloss',
scale_pos_weight=ratio,
n_jobs=n_jobs,
random_state=42,
# use_label_encoder=False
)
if use_gpu and _gpu_available():
xgb_kwargs.update({
"tree_method": "gpu_hist",
"predictor": "gpu_predictor"
})
# xgb_kwargs.update({
# "tree_method": "hist",
# "predictor": "auto",
# "device": "cuda"
# })
else:
# fast CPU histogram as fallback
xgb_kwargs.update({
"tree_method": "hist",
# "predictor": "auto"
})
specs["xgboost"] = {
"clf": XGBClassifier(**xgb_kwargs),
"grid": {
"feature_selection__threshold": ['median'],
"classification__n_estimators": [200, 400],
"classification__max_depth": [4, 6],
"classification__learning_rate": [0.03, 0.1],
"classification__subsample": [0.8],
"classification__colsample_bytree": [0.8],
},
"selector": SelectFromModel(RandomForestClassifier(n_estimators=200, random_state=42), threshold='median'),
"type": "tree"
}
return specs
# ---------- SHAP utils ----------
def _compute_shap_summary(best_pipeline, x_train, x_test, feature_names, model_type,
bg_max=100, test_max=300):
"""
Compute mean|SHAP| per (selected) feature for the final estimator in best_pipeline.
Returns a DataFrame with columns: Feature, mean_abs_shap.
For SVM/KNN (kernel), uses KernelExplainer with small background/test samples.
"""
"""Compute mean |SHAP| per (selected) feature for the pipeline's classifier.
Parameters
----------
best_pipeline : imblearn.Pipeline
Fitted pipeline with steps [scaler, smote, feature_selection, classification].
x_train, x_test : pd.DataFrame | np.ndarray
Train/test data from the outer fold (before scaling/selection).
feature_names : list[str]
Original feature names for alignment.
model_type : {"tree", "linear", "kernel"}
Determines which SHAP explainer to use.
bg_max : int, default 100
Background sample size for KernelExplainer (used for kernel models).
test_max : int, default 300
Test subsample size for KernelExplainer (used for kernel models).
Returns
-------
pd.DataFrame
Columns: `Feature`, `mean_abs_shap` for **all** original features
(unselected features are filled with 0.0). If SHAP fails, values are NaN.
Notes
-----
- For kernel models (SVM/KNN), full SHAP on all samples is expensive. We
approximate by computing SHAP on a small subset and taking the mean |SHAP|.
- For tree/linear models, TreeExplainer/LinearExplainer is used directly.
"""
# Extract steps
scaler = best_pipeline.named_steps.get('scaler', None)
selector = best_pipeline.named_steps['feature_selection']
est = best_pipeline.named_steps['classification']
# Transform to the estimator's input space (selected features after scaling)
# Xtr = x_train.values if isinstance(x_train, pd.DataFrame) else x_train
# Xte = x_test.values if isinstance(x_test, pd.DataFrame) else x_test
Xtr = x_train if isinstance(x_train, pd.DataFrame) else pd.DataFrame(x_train, columns=feature_names)
Xte = x_test if isinstance(x_test, pd.DataFrame) else pd.DataFrame(x_test, columns=feature_names)
if scaler is not None:
Xtr = scaler.transform(Xtr)
Xte = scaler.transform(Xte)
Xtr_sel = selector.transform(Xtr)
Xte_sel = selector.transform(Xte)
# Map selected column names
support = selector.get_support()
# tqdm.write(f"[{model_type}] selected features: {np.sum(support)} / {len(support)}")
if support is None or np.sum(support) == 0:
sel_names = np.array(feature_names)
else:
sel_names = np.array(feature_names)[support]
# If no features selected (edge case), return NaNs
if Xtr_sel.shape[1] == 0:
return pd.DataFrame({"Feature": feature_names, "mean_abs_shap": np.nan})
# Choose explainer
try:
if model_type == "tree":
explainer = shap.TreeExplainer(est)
shap_vals = explainer.shap_values(Xte_sel)
# binary: list-of-arrays per class; take class 1 if available
if isinstance(shap_vals, list):
# prefer positive class if present
sv = shap_vals[1] if len(shap_vals) > 1 else shap_vals[0]
else:
sv = shap_vals
elif model_type == "linear":
explainer = shap.LinearExplainer(est, Xtr_sel)
sv = explainer.shap_values(Xte_sel)
else: # "kernel" (e.g., SVM, KNN)
# sample small background and test to keep it fast
rng = np.random.default_rng(0)
bg_idx = rng.choice(Xtr_sel.shape[0], size=min(bg_max, Xtr_sel.shape[0]), replace=False)
te_idx = rng.choice(Xte_sel.shape[0], size=min(test_max, Xte_sel.shape[0]), replace=False)
Xbg = Xtr_sel[bg_idx]
Xte_small = Xte_sel[te_idx]
# explain probability of class 1
f = (lambda A: est.predict_proba(A)[:, 1])
explainer = shap.KernelExplainer(f, Xbg) # Kernel SHAP approximates this by replacing missing features with their distribution from the background dataset.
sv_small = explainer.shap_values(Xte_small, nsamples="auto", silent=True)
# expand to full test by mapping means (approx)
sv = np.zeros((Xte_sel.shape[0], Xte_sel.shape[1]))
sv[:] = np.mean(np.abs(sv_small), axis=0) # use mean abs across the small set as proxy
# NOTE: Kernel SHAP exact per-sample is expensive; this proxy summarizes feature impact.
sv = _to_2d_array(sv)
mean_abs = np.mean(np.abs(sv), axis=0) if sv.ndim == 2 else np.mean(np.abs(sv), axis=0)
df = pd.DataFrame({"Feature": sel_names, "mean_abs_shap": mean_abs})
# ensure all features appear; fill 0 for unselected
out = pd.DataFrame({"Feature": feature_names}).merge(df, on="Feature", how="left").fillna(0.0)
return out
except Exception:
# fallback if SHAP fails
return pd.DataFrame({"Feature": feature_names, "mean_abs_shap": np.nan})
def find_threshold(y_true, p_val, mode="fbeta", beta=2.0, target_sens=None, costs=None):
"""
Pick an operating threshold from validation probs.
modes:
- "fbeta": maximize F_beta (beta>1 favors sensitivity)
- "youden": maximize Youden's J = TPR - FPR
- "target_sens": smallest threshold with sensitivity >= target_sens
- "cost": minimize expected cost given costs={"fp":c_fp,"fn":c_fn}
beta : float, default 2.0
F-beta parameter (used only when mode="fbeta").
target_sens : float | None
Target sensitivity (recall) in [0,1] when mode="target_sens".
costs : dict | None
Required when mode="cost"; keys {"fp", "fn"} specify relative costs for
false positives/negatives.
Returns
-------
float
Chosen threshold in [0,1].
"""
# use all unique prob cutpoints (plus 0 and 1 for safety)
thr_grid = np.unique(np.r_[0.0, p_val, 1.0])
best_thr, best_val = 0.5, -np.inf
if mode == "cost":
# for costs we minimize, so track min instead
best_val, best_thr = np.inf, 0.5
for t in thr_grid:
y_hat = (p_val >= t).astype(int)
tp = np.sum((y_true==1) & (y_hat==1))
fn = np.sum((y_true==1) & (y_hat==0))
tn = np.sum((y_true==0) & (y_hat==0))
fp = np.sum((y_true==0) & (y_hat==1))
sens = tp / (tp + fn) if (tp+fn)>0 else 0.0
spec = tn / (tn + fp) if (tn+fp)>0 else 0.0
if mode == "fbeta":
prec = tp / (tp + fp) if (tp+fp)>0 else 0.0
rec = sens
if prec+rec==0:
val = 0.0
else:
beta2 = beta*beta
val = (1+beta2) * (prec*rec) / (beta2*prec + rec) if (beta2*prec + rec)>0 else 0.0
if val > best_val:
best_val, best_thr = val, t
elif mode == "youden":
val = sens - (1 - spec) # TPR - FPR
if val > best_val:
best_val, best_thr = val, t
elif mode == "target_sens":
# choose smallest t achieving desired sensitivity
if target_sens is None:
raise ValueError("Provide target_sens for mode='target_sens'")
if sens >= target_sens:
# prefer higher specificity among those meeting the target; break ties by higher threshold
val = spec
if val > best_val or (np.isclose(val, best_val) and t > best_thr):
best_val, best_thr = val, t
elif mode == "cost":
if costs is None or not {"fp","fn"} <= set(costs):
raise ValueError("Provide costs={'fp':...,'fn':...} for mode='cost'")
exp_cost = costs["fp"]*fp + costs["fn"]*fn
if exp_cost < best_val:
best_val, best_thr = exp_cost, t
else:
raise ValueError("Unknown mode")
return float(best_thr)
def tune_threshold_on_inner(x_train, y_train, best_pipeline, inner_cv,
mode="fbeta", beta=2.0, target_sens=None, costs=None, n_jobs=1):
"""
Using the best pipeline (hyperparams fixed), get out-of-fold validation probabilities
on x_train via the inner CV, then pick threshold by the chosen rule.
Parameters
----------
x_train : pd.DataFrame | np.ndarray
Features for the outer fold's training split.
y_train : pd.Series | np.ndarray
Binary labels for the outer fold's training split.
best_pipeline : imblearn.Pipeline
Pipeline with hyperparameters fixed (e.g., the GridSearchCV best estimator).
inner_cv : sklearn.model_selection.StratifiedKFold
The same inner CV scheme used during hyperparameter selection.
mode, beta, target_sens, costs : see `find_threshold`.
n_jobs : int, default 1
Parallelism for `cross_val_predict`.
Returns
-------
float
Selected decision threshold.
"""
# get OOF probabilities for the positive class
p_val = cross_val_predict(best_pipeline, x_train, y_train,
cv=inner_cv, method="predict_proba", n_jobs=n_jobs)[:, 1]
thr = find_threshold(y_train.values, p_val,
mode=mode, beta=beta, target_sens=target_sens, costs=costs)
return thr
# ---------- main multi-model nested CV with tqdm, scaler, SHAP ----------
def my_nestedcv_multi(
x, y, n_iter=1, n_jobs=5, permutation=False, threshold_mode="youden", pca_predictors=False,
model_names=("svm", "logistic", "random_forest", "decision_tree", "xgboost", "knn"),
shap_kernel_bg_max=30, shap_kernel_test_max=40, use_gpu=True, ratio=1.0
):
"""Run nested CV for multiple models with SMOTE, feature selection, and SHAP.
Workflow (per model)
--------------------
For each outer fold and `n_iter` repetitions:
1) Inner 5× Stratified CV grid search (scoring='recall') over pipeline params
2) Threshold tuning on inner CV OOF probabilities (e.g., Youden)
3) Refit best pipeline on full outer-train with tuned threshold
4) Evaluate on outer-test (ACC, SEN, SPEC, PPV, NPV, AUC + ROC curve)
5) Record selected features and SHAP mean |SHAP| per feature
Parameters
----------
x : pd.DataFrame
Feature matrix. If `pca_predictors=True` and the heuristic triggers,
PCA is applied once to reduce dimensionality.
y : array-like
Binary labels (0/1).
n_iter : int, default 1
How many times to repeat the outer CV (with different shuffles).
n_jobs : int, default 5
Parallelism for GridSearchCV and models that support it.
permutation : bool, default False
If True, permutes labels for a quick sanity check (should degrade performance).
threshold_mode : str, default "youden"
Passed to `tune_threshold_on_inner` ("youden", "fbeta", "target_sens", "cost").
pca_predictors : bool, default False
Whether to optionally apply PCA before CV when samples/features are low.
model_names : tuple[str], default ("svm", "logistic", "random_forest", "decision_tree", "xgboost", "knn")
Subset of models defined in `_get_model_specs` to run.
shap_kernel_bg_max, shap_kernel_test_max : int
Subsample sizes for Kernel SHAP when using kernel models (SVM/KNN).
use_gpu : bool, default True
Try GPU for XGBoost if available.
ratio : float, default 1.0
Class imbalance ratio for XGBoost's `scale_pos_weight`.
Returns
-------
dict
Mapping: model_name -> {
"performance": {
"score": {"accuracy": list, "sensitivity": list, "specificity": list,
"AUC": list, "PPV": list, "NPV": list},
"fpr": list[np.ndarray],
"tpr": list[np.ndarray],
},
"selected_features": pd.DataFrame, # selection counts across folds/iters
"shap_summary": pd.DataFrame # mean of mean|SHAP| across folds/iters
}
Notes
-----
- The pipeline order is critical: **StandardScaler -> SMOTE -> FeatureSelection -> Classifier**.
This ensures SMOTE operates in a standardized space and selection is applied to
the resampled data.
- GridSearchCV uses `scoring='recall'` to prioritize sensitivity on imbalanced data.
"""
# PCA (optional; applied once pre-CV ) would be better to put this into the "ImbPipeline" to avoid data leakage
if pca_predictors:
if should_apply_pca(x):
n_components = min(10, x.shape[1])
pca = PCA(n_components=n_components)
x = pd.DataFrame(pca.fit_transform(x), index=x.index, columns=[f"PC{i+1}" for i in range(n_components)])
else:
x = x.copy()
else:
x = x.copy()
# y to Series 0/1
y_series = pd.Series(y).astype(int).reset_index(drop=True)
if permutation:
y_series = y_series.sample(frac=1.0, random_state=None).reset_index(drop=True)
model_specs = _get_model_specs(use_gpu=use_gpu, n_jobs=n_jobs, ratio=ratio)
results_by_model = {}
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)
inner_cv = StratifiedKFold(n_splits=5, shuffle=True)
feature_names = list(x.columns)
# Only run models that exist in the spec mapping
pbar_models = [m for m in model_names if m in model_specs]
for name in pbar_models:
spec = model_specs[name]
classifier = spec["clf"]
param_grid = spec["grid"]
feature_selector = spec["selector"]
model_type = spec["type"]
# Build pipeline
# NOTE: StandardScaler is placed **before** SMOTE so that k-NN based resamplers
# and distance metrics are computed in a standardized space.
pipeline = ImbPipeline([
('scaler', StandardScaler(with_mean=True, with_std=True)),
('smote', BorderlineSMOTE(kind='borderline-1', random_state=42)),
# ('smote', SMOTE(random_state=42)),
('feature_selection', feature_selector),
('classification', classifier),
])
overall_outercv_performance = {
"score": {"accuracy": [], "sensitivity": [], "specificity": [], "AUC": [], "PPV": [], "NPV": []},
"fpr": [],
"tpr": [],
}
selected_features_list = []
shap_summaries = [] # collect per-fold mean|SHAP|; aggregate later
pbar_iters = tqdm(range(n_iter), desc=f"{name} • iterations", leave=True)
for _ in pbar_iters:
iteration_outercv = {
"score": {"accuracy": [], "sensitivity": [], "specificity": [], "AUC": [], "PPV": [], "NPV": []},
"fpr": [],
"tpr": [],
}
# pbar_folds = tqdm(list(outer_cv.split(x, y_series)), desc=f"{name} • outer folds", leave=False)
for train_idx, test_idx in outer_cv.split(x, y_series):
x_train, x_test = x.iloc[train_idx], x.iloc[test_idx]
y_train, y_test = y_series.iloc[train_idx], y_series.iloc[test_idx]
# Inner CV grid search (optimize recall) you can choose others like roc_auc if needed
grid_search = GridSearchCV(
estimator=pipeline,
param_grid=param_grid,
cv=inner_cv,
scoring='recall',
n_jobs=n_jobs,
verbose=0 # Silence joblib progress display
)
grid_search.fit(x_train, y_train)
best_pipeline = grid_search.best_estimator_
## choose the best threshold
best_thr = tune_threshold_on_inner(
x_train, y_train, best_pipeline, inner_cv,
mode=threshold_mode, beta=2.0, # e.g., favor sensitivity
# OR: mode="youden"
# OR: mode="target_sens", target_sens=0.80
# OR: mode="cost", costs={"fp":1, "fn":5} # cost of FN is higher
n_jobs=n_jobs)
# 2) refit on full outer-train (already fit by gridsearch on inner splits,
# but we want a fresh fit on all x_train with the chosen params)
best_pipeline.fit(x_train, y_train)
# Selected features (names)
selector = best_pipeline.named_steps['feature_selection']
support = selector.get_support()
if support is None or np.sum(support) == 0:
selected_cols = x.columns
else:
selected_cols = x.columns[support]
selected_features_list.append(selected_cols)
# Evaluate on outer test with tuned threshold
probs = best_pipeline.predict_proba(x_test)[:, 1]
yhat = (probs >= best_thr).astype(int)
acc = accuracy_score(y_test, yhat)
tn, fp, fn, tp = confusion_matrix(y_test, yhat).ravel()
sens = tp / (tp + fn) if (tp + fn) > 0 else 0.0
spec = tn / (tn + fp) if (tn + fp) > 0 else 0.0
cur_fpr, cur_tpr, _ = roc_curve(y_test, probs)
ppv = tp / (tp + fp) if (tp + fp) > 0 else 0.0
npv = tn / (tn + fn) if (tn + fn) > 0 else 0.0
auc_val = auc(cur_fpr, cur_tpr)
iteration_outercv["score"]["accuracy"].append(acc)
iteration_outercv["score"]["sensitivity"].append(sens)
iteration_outercv["score"]["specificity"].append(spec)
iteration_outercv["score"]["AUC"].append(auc_val)
iteration_outercv["score"]["PPV"].append(ppv)
iteration_outercv["score"]["NPV"].append(npv)
iteration_outercv["fpr"].append(cur_fpr)
iteration_outercv["tpr"].append(cur_tpr)
# SHAP summary for this fold (mean|SHAP| per feature)
shap_df = _compute_shap_summary(
best_pipeline, x_train, x_test, feature_names, model_type,
bg_max=shap_kernel_bg_max, test_max=shap_kernel_test_max
)
shap_summaries.append(shap_df)
# Aggregate metrics across outer folds for this iteration
overall_outercv_performance["score"]["accuracy"].append(statistics.mean(iteration_outercv["score"]["accuracy"]))
overall_outercv_performance["score"]["sensitivity"].append(statistics.mean(iteration_outercv["score"]["sensitivity"]))
overall_outercv_performance["score"]["specificity"].append(statistics.mean(iteration_outercv["score"]["specificity"]))
overall_outercv_performance["score"]["AUC"].append(statistics.mean(iteration_outercv["score"]["AUC"]))
overall_outercv_performance["score"]["PPV"].append(statistics.mean(iteration_outercv["score"]["PPV"]))
overall_outercv_performance["score"]["NPV"].append(statistics.mean(iteration_outercv["score"]["NPV"]))
overall_outercv_performance["fpr"].append(np.mean(arraylist2arrayColumn(iteration_outercv["fpr"]), axis=1))
overall_outercv_performance["tpr"].append(np.mean(arraylist2arrayColumn(iteration_outercv["tpr"]), axis=1))
# Feature selection frequencies across all folds/iterations
selected_features_df = allocate_feature_list(x, selected_features_list)
# aggregate SHAP summaries across all folds/iterations (mean of mean|SHAP|)
if len(shap_summaries) > 0:
shap_concat = pd.concat(shap_summaries, axis=0)
shap_summary = (shap_concat.groupby("Feature", as_index=False)["mean_abs_shap"]
.mean()
.sort_values("mean_abs_shap", ascending=False)
.reset_index(drop=True))
else:
shap_summary = pd.DataFrame({"Feature": feature_names, "mean_abs_shap": np.nan})
results_by_model[name] = {
"performance": overall_outercv_performance,
"selected_features": selected_features_df,
"shap_summary": shap_summary
}
return results_by_model
Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
Load data¶
In [24]:
# create predictors and outcome
# Load sheets
data = pd.read_excel(r"MLpipeline data.xlsx")
predictors = data.drop(columns=["outcome"])
# Create the outcome variable
y = (data["outcome"]).astype(int)
y_ = np.asarray(y)
n_pos = np.sum(y_ == 1)
n_neg = np.sum(y_ == 0)
ratio = float(n_neg) / float(max(n_pos, 1))
n_iter = 5 # number of iteration
n_jobs = 6 # number of cores in parallel computation
Run the ML pipeline¶
In [25]:
start_time = time.time() # record running time
results = my_nestedcv_multi(
predictors, y,
n_iter=n_iter, # e.g., repeat the outer loop 3 times
n_jobs=n_jobs,
permutation=False,
threshold_mode="youden",
pca_predictors=False,
# model_names=("lasso","ridge","svm","logistic","random_forest","decision_tree","xgboost","knn"),
model_names=("svm","logistic","random_forest","decision_tree","xgboost","knn"),
use_gpu=False,
ratio=1.0
)
results_permu = my_nestedcv_multi(
predictors, y,
n_iter=n_iter, # e.g., repeat the outer loop 3 times
n_jobs=n_jobs,
permutation=True,
threshold_mode="youden",
pca_predictors=False,
# model_names=("lasso","ridge","svm","logistic","random_forest","decision_tree","xgboost","knn"),
model_names=("svm","logistic","random_forest","decision_tree","xgboost","knn"),
use_gpu=False,
ratio=1.0
) # permutation
end_time = time.time()
runtime = end_time - start_time
print(f"runtime: {runtime}")
svm • iterations: 100%|██████████████████████████████████████████████████████████████████| 5/5 [03:00<00:00, 36.01s/it] logistic • iterations: 100%|█████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.87s/it] random_forest • iterations: 100%|████████████████████████████████████████████████████████| 5/5 [01:16<00:00, 15.32s/it] decision_tree • iterations: 100%|████████████████████████████████████████████████████████| 5/5 [00:39<00:00, 7.97s/it] xgboost • iterations: 100%|██████████████████████████████████████████████████████████████| 5/5 [01:43<00:00, 20.69s/it] knn • iterations: 100%|██████████████████████████████████████████████████████████████████| 5/5 [04:02<00:00, 48.59s/it] svm • iterations: 100%|██████████████████████████████████████████████████████████████████| 5/5 [03:00<00:00, 36.02s/it] logistic • iterations: 100%|█████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.85s/it] random_forest • iterations: 100%|████████████████████████████████████████████████████████| 5/5 [01:16<00:00, 15.34s/it] decision_tree • iterations: 100%|████████████████████████████████████████████████████████| 5/5 [00:39<00:00, 7.96s/it] xgboost • iterations: 100%|██████████████████████████████████████████████████████████████| 5/5 [01:44<00:00, 20.85s/it] knn • iterations: 100%|██████████████████████████████████████████████████████████████████| 5/5 [04:04<00:00, 48.93s/it]
runtime: 1307.1047189235687
In [12]:
# save results
import pickle
# save
with open("ML_results.pkl", "wb") as f:
pickle.dump(results, f)
with open("ML_results_permu.pkl", "wb") as f:
pickle.dump(results_permu, f)
# # load back
# with open("results.pkl", "rb") as f:
# loaded = pickle.load(f)
# print(loaded)
Display the model performance¶
In [26]:
import numpy as np
import pandas as pd
import scipy.stats as st
# ---- your original test (unchanged) ----
def mypermutation_ttest(x, y):
"""Compute a simple permutation-style p-value vs. chance for metric arrays.
Parameters
----------
x : array-like
Permutation distribution for a metric (e.g., scores from label shuffles).
Interpreted as the **null** distribution.
y : array-like
Observed metric values under the real labels.
Returns
-------
float
Proportion of permutation samples greater than the **median** of `y`.
(Matches the original behavior precisely.)
- This is not a full t-test; consider a standard permutation test comparing means or AUC with a two-sided tail if needed
"""
permutation_large_sum = sum(x > np.median(y))
permutation_size = len(x)
p_value = permutation_large_sum / permutation_size
return p_value
# ---- helpers for stars / summary formatting ----
def _stars_from_p(p):
"""Map a p-value to significance stars.
"""
if p is None or np.isnan(p):
return ""
if p > 0.05:
return "ns"
elif p <= 0.0001:
return "***"
elif p <= 0.01:
return "**"
else:
return "*"
def _mean_ci_95(arr):
"""Return (mean, [low, high]) using a normal-approximation 95% CI on the mean.
Parameters
----------
arr : array-like
Input values (NaNs ignored).
Returns
-------
tuple
`(mean, [ci_low, ci_high])` where the CI is computed as a **normal**
95% CI on the mean using the standard error of the mean (SEM). Values
are rounded to 3 decimals.
"""
arr = np.asarray(arr, dtype=float)
arr = arr[~np.isnan(arr)]
if arr.size == 0:
return (np.nan, [np.nan, np.nan])
if arr.size == 1:
m = float(arr[0])
return (m, [m, m])
m = float(np.mean(arr))
# 95% CI for mean (your previous approach): normal approx
ci_lo, ci_hi = st.norm.interval(confidence=0.95, loc=np.mean(arr), scale=st.sem(arr))
return (m, [float(np.round(ci_lo, 3)), float(np.round(ci_hi, 3))])
# Map internal metric keys to display names/order
_METRIC_ORDER = [
("accuracy", "ACC"),
("sensitivity", "SENS"),
("specificity", "SPEC"),
("AUC", "AUC"),
("PPV", "PPV"),
("NPV", "NPV"),
]
def build_model_performance_table(
results_by_model,
permutation_results_by_model=None,
models=None,
fmt="string" # "string" -> "mean★, 95%CI([lo, hi])"; or "numbers" -> separate numeric tables
):
"""
Build a single table with rows=metrics and columns=models.
- results_by_model: dict like results['lasso'] = {"performance": {"score": {...}}}
- permutation_results_by_model: same structure; if given, stars are computed vs permutation per metric/model
- models: optional list to control column order; defaults to keys in results_by_model
- fmt: "string" for compact strings in cells, or "numbers" to also return numeric matrices
Returns:
table_df (if fmt="string")
OR
{"display": DataFrame of strings,
"mean": DataFrame of means,
"cilow": DataFrame of CI lows,
"cihigh": DataFrame of CI highs,
"p": DataFrame of p-values,
"stars": DataFrame of stars}
"""
if models is None:
models = [m for m in results_by_model.keys()]
# Prepare containers
display = pd.DataFrame(index=[lab for _, lab in _METRIC_ORDER], columns=models, dtype=object)
mean_df = pd.DataFrame(index=[lab for _, lab in _METRIC_ORDER], columns=models, dtype=float)
lo_df = pd.DataFrame(index=[lab for _, lab in _METRIC_ORDER], columns=models, dtype=float)
hi_df = pd.DataFrame(index=[lab for _, lab in _METRIC_ORDER], columns=models, dtype=float)
p_df = pd.DataFrame(index=[lab for _, lab in _METRIC_ORDER], columns=models, dtype=float)
star_df = pd.DataFrame(index=[lab for _, lab in _METRIC_ORDER], columns=models, dtype=object)
for model in models:
if model not in results_by_model:
continue
scores = results_by_model[model]["performance"]["score"]
perm_scores = None
if permutation_results_by_model is not None and model in permutation_results_by_model:
perm_scores = permutation_results_by_model[model]["performance"]["score"]
for key, label in _METRIC_ORDER:
data = scores.get(key, [])
mean, ci = _mean_ci_95(data)
# p-value / stars vs permutation (if provided)
pval, stars = (np.nan, "")
if perm_scores is not None and key in perm_scores:
try:
pval = mypermutation_ttest(np.asarray(perm_scores[key]), np.asarray(data))
except Exception:
pval = np.nan
stars = _stars_from_p(pval)
# fill numeric frames
mean_df.loc[label, model] = round(mean, 3) if pd.notna(mean) else np.nan
lo_df.loc[label, model] = ci[0]
hi_df.loc[label, model] = ci[1]
p_df.loc[label, model] = pval
star_df.loc[label, model] = stars
# build display string
if pd.isna(mean):
disp = "NA"
else:
disp = f"{round(mean,3)}{stars}, 95%CI([{ci[0]}, {ci[1]}])"
display.loc[label, model] = disp
if fmt == "numbers":
return {
"display": display,
"mean": mean_df,
"cilow": lo_df,
"cihigh": hi_df,
"p": p_df,
"stars": star_df,
}
else:
return display
In [27]:
models_in_order = ("svm","logistic","random_forest","decision_tree","xgboost","knn")
# Do the same for EEG-only and Pedi-only
table_predictors = build_model_performance_table(results, results_permu , models_in_order, fmt="string") # perm_eeg
# table_pedi = build_model_performance_table(results_pedi, perm_pedi, models_in_order, fmt="string")
print("\npredictors → outcomes")
table_predictors
predictors → outcomes
Out[27]:
| svm | logistic | random_forest | decision_tree | xgboost | knn | |
|---|---|---|---|---|---|---|
| ACC | 0.66ns, 95%CI([0.637, 0.684]) | 0.553ns, 95%CI([0.517, 0.589]) | 0.768***, 95%CI([0.726, 0.81]) | 0.754***, 95%CI([0.73, 0.779]) | 0.729***, 95%CI([0.716, 0.742]) | 0.654***, 95%CI([0.614, 0.695]) |
| SENS | 0.536ns, 95%CI([0.418, 0.654]) | 0.592ns, 95%CI([0.472, 0.712]) | 0.744***, 95%CI([0.656, 0.832]) | 0.544ns, 95%CI([0.513, 0.575]) | 0.712***, 95%CI([0.615, 0.809]) | 0.656ns, 95%CI([0.568, 0.744]) |
| SPEC | 0.709ns, 95%CI([0.655, 0.764]) | 0.538ns, 95%CI([0.467, 0.609]) | 0.777***, 95%CI([0.731, 0.823]) | 0.838***, 95%CI([0.797, 0.878]) | 0.737***, 95%CI([0.694, 0.78]) | 0.654***, 95%CI([0.593, 0.716]) |
| AUC | 0.685ns, 95%CI([0.652, 0.719]) | 0.628ns, 95%CI([0.602, 0.655]) | 0.856***, 95%CI([0.82, 0.892]) | 0.697***, 95%CI([0.678, 0.716]) | 0.811***, 95%CI([0.784, 0.837]) | 0.73***, 95%CI([0.683, 0.778]) |
| PPV | 0.488***, 95%CI([0.431, 0.544]) | 0.349ns, 95%CI([0.313, 0.385]) | 0.6***, 95%CI([0.539, 0.662]) | 0.568***, 95%CI([0.472, 0.664]) | 0.557ns, 95%CI([0.516, 0.598]) | 0.515***, 95%CI([0.439, 0.59]) |
| NPV | 0.819ns, 95%CI([0.784, 0.855]) | 0.783ns, 95%CI([0.738, 0.828]) | 0.895***, 95%CI([0.855, 0.935]) | 0.834***, 95%CI([0.819, 0.849]) | 0.885***, 95%CI([0.855, 0.914]) | 0.85***, 95%CI([0.814, 0.886]) |
Plot the SHAP¶
In [48]:
results["xgboost"]["shap_summary"]
Out[48]:
| Feature | mean_abs_shap | |
|---|---|---|
| 0 | Feature_49 | 0.664357 |
| 1 | Feature_59 | 0.657823 |
| 2 | Feature_47 | 0.524258 |
| 3 | Feature_26 | 0.404132 |
| 4 | Feature_46 | 0.300160 |
| 5 | Feature_41 | 0.288114 |
| 6 | Feature_33 | 0.228888 |
| 7 | Feature_57 | 0.200986 |
| 8 | Feature_35 | 0.194673 |
| 9 | Feature_3 | 0.182435 |
| 10 | Feature_31 | 0.157442 |
| 11 | Feature_27 | 0.140048 |
| 12 | Feature_50 | 0.132195 |
| 13 | Feature_20 | 0.099229 |
| 14 | Feature_16 | 0.097026 |
| 15 | Feature_37 | 0.082143 |
| 16 | Feature_51 | 0.081249 |
| 17 | Feature_21 | 0.077274 |
| 18 | Feature_29 | 0.075925 |
| 19 | Feature_18 | 0.075559 |
| 20 | Feature_39 | 0.071748 |
| 21 | Feature_44 | 0.069582 |
| 22 | Feature_42 | 0.068766 |
| 23 | Feature_17 | 0.061193 |
| 24 | Feature_22 | 0.059763 |
| 25 | Feature_36 | 0.059216 |
| 26 | Feature_2 | 0.055967 |
| 27 | Feature_54 | 0.054189 |
| 28 | Feature_48 | 0.052283 |
| 29 | Feature_45 | 0.049294 |
| 30 | Feature_32 | 0.047828 |
| 31 | Feature_13 | 0.043113 |
| 32 | Feature_4 | 0.037824 |
| 33 | Feature_55 | 0.036640 |
| 34 | Feature_25 | 0.033612 |
| 35 | Feature_1 | 0.031376 |
| 36 | Feature_24 | 0.026758 |
| 37 | Feature_15 | 0.026149 |
| 38 | Feature_40 | 0.021900 |
| 39 | Feature_8 | 0.020529 |
| 40 | Feature_30 | 0.019574 |
| 41 | Feature_6 | 0.016959 |
| 42 | Feature_43 | 0.014200 |
| 43 | Feature_11 | 0.013500 |
| 44 | Feature_5 | 0.013499 |
| 45 | Feature_58 | 0.010371 |
| 46 | Feature_10 | 0.010156 |
| 47 | Feature_34 | 0.009229 |
| 48 | Feature_23 | 0.007678 |
| 49 | Feature_14 | 0.007328 |
| 50 | Feature_28 | 0.006232 |
| 51 | Feature_53 | 0.005224 |
| 52 | Feature_38 | 0.004656 |
| 53 | Feature_19 | 0.003142 |
| 54 | Feature_52 | 0.002915 |
| 55 | Feature_56 | 0.001687 |
| 56 | Feature_9 | 0.000141 |
| 57 | Feature_12 | 0.000000 |
| 58 | Feature_7 | 0.000000 |
In [33]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
def plot_shap_summary_bars(results_by_model, models=None, topk=10, normalize='sum', save=True, save_filename=None):
"""
Plot top-k features per model with a consistent 0–1 scale.
normalize:
- 'sum' -> value = mean_abs_shap / sum(mean_abs_shap) (shares add to 1)
- 'max' -> value = mean_abs_shap / max(mean_abs_shap) (largest bar = 1)
- None -> raw mean_abs_shap (original behavior)
"""
if models is None:
models = list(results_by_model.keys())
n = len(models)
fig, axes = plt.subplots(nrows=n, ncols=1, figsize=(8, 3*n), constrained_layout=True)
if n == 1:
axes = [axes]
for ax, name in zip(axes, models):
if name not in results_by_model:
ax.axis('off')
continue
df_all = results_by_model[name]["shap_summary"].copy()
# compute normalization using ALL features, not just top-k
if normalize == 'sum':
denom = df_all["mean_abs_shap"].sum()
# avoid division by zero
df_all["norm_value"] = df_all["mean_abs_shap"] / denom if denom > 0 else 0.0
xlabel = "share of total |SHAP|"
xlim = (0, 1)
formatter = mtick.PercentFormatter(1.0) # show as %
elif normalize == 'max':
denom = df_all["mean_abs_shap"].max()
df_all["norm_value"] = df_all["mean_abs_shap"] / denom if denom > 0 else 0.0
xlabel = "normalized |SHAP| (max=1)"
xlim = (0, 1)
formatter = None
else:
df_all["norm_value"] = df_all["mean_abs_shap"]
xlabel = "mean |SHAP|"
xlim = None
formatter = None
# pick top-k for plotting
ss = df_all.sort_values("norm_value", ascending=False).head(topk)
ax.barh(ss["Feature"][::-1], ss["norm_value"][::-1])
ax.set_title(f"{name} — top {topk} features")
ax.set_xlabel(xlabel)
ax.set_ylabel("feature")
if xlim is not None:
ax.set_xlim(*xlim)
if formatter is not None:
ax.xaxis.set_major_formatter(formatter)
if save and save_filename is None:
save_filename = f"SHAP Plot.png"
plt.savefig(save_filename, dpi=300, bbox_inches='tight')
print(f"Figure saved to {save_filename}")
plt.show()
# Example:
plot_shap_summary_bars(
results,
models=("svm","logistic","random_forest","decision_tree","xgboost","knn"),
topk=20,
normalize=None # or 'max' or None
)
Figure saved to SHAP Plot.png
Plot the ROC curve¶
In [53]:
import numpy as np
import matplotlib.pyplot as plt
def _interp_on_grid(fpr, tpr, grid):
"""Interpolate a single ROC curve (FPR, TPR) onto a shared FPR grid.
Parameters
----------
fpr : array-like
False Positive Rate values from an ROC curve, assumed sorted ascending.
tpr : array-like
True Positive Rate values corresponding to each FPR.
grid : array-like
Common grid of FPR points (e.g., from 0 to 1) where TPR should be interpolated.
Returns
-------
np.ndarray
Interpolated TPR values evaluated at the given grid.
Notes
-----
Ensures the curve starts at (0,0) and ends at (1,1) before performing
piecewise-linear interpolation using ``np.interp``.
"""
fpr = np.asarray(fpr, float)
tpr = np.asarray(tpr, float)
if fpr[0] > 0.0:
fpr = np.r_[0.0, fpr]; tpr = np.r_[0.0, tpr]
if fpr[-1] < 1.0:
fpr = np.r_[fpr, 1.0]; tpr = np.r_[tpr, 1.0]
return np.interp(grid, fpr, tpr)
def _adaptive_fpr_grid(fpr_list):
"""Generate an adaptive FPR grid based on all ROC curves.
Parameters
----------
fpr_list : list of array-like
List of FPR arrays (one per ROC curve / iteration).
Returns
-------
np.ndarray
Sorted, unique array containing the union of all FPR points across iterations,
including 0.0 and 1.0.
Notes
-----
This avoids choosing an arbitrary ``grid_size`` and ensures the mean/SEM
calculations preserve all original ROC anchor points.
"""
all_fpr = np.concatenate([
np.asarray(fpr, float).ravel() for fpr in fpr_list if len(fpr) > 0
])
all_fpr = np.r_[all_fpr, 0.0, 1.0]
grid = np.unique(np.clip(all_fpr, 0.0, 1.0))
return grid
def plot_mean_roc_with_sem_adaptive(results_by_model, models=None, ax=None,
label_fmt="{model} (AUC≈{auc:.3f})",
save=True, save_filename="ROC_mean_sem.png"):
"""Plot mean ROC curves with SEM bands using an adaptive FPR grid.
Parameters
----------
results_by_model : dict
Output dictionary from ``my_nestedcv_multi`` containing ROC data for each model.
models : list[str] or None, optional
Which model keys to plot. Defaults to all keys in ``results_by_model``.
ax : matplotlib.axes.Axes or None, optional
Axis object to draw on. Creates a new figure if None.
label_fmt : str, optional
Format string for legend labels (receives model and AUC).
save : bool, optional
If True, saves the plot as a PNG file.
save_filename : str, optional
Filename for saving the figure if ``save=True``.
Returns
-------
matplotlib.axes.Axes
The axis containing the plotted ROC curves.
Notes
-----
- Computes the mean and SEM (standard error of the mean) across iterations.
- Uses a data-driven adaptive grid formed from the union of all FPRs.
- Avoids the need for a fixed grid size (e.g., 201 points).
- Draws a shaded SEM band around the mean ROC curve for uncertainty visualization.
"""
if models is None:
models = list(results_by_model.keys())
if ax is None:
fig, ax = plt.subplots()
for model in models:
perf = results_by_model[model]["performance"]
fpr_list = perf["fpr"] # list of arrays (one per iteration)
tpr_list = perf["tpr"]
# Build adaptive grid from all iterations’ FPRs
fpr_grid = _adaptive_fpr_grid(fpr_list)
# Interpolate each iteration onto the adaptive grid
tpr_interp = [
_interp_on_grid(fpr_i, tpr_i, fpr_grid)
for fpr_i, tpr_i in zip(fpr_list, tpr_list)
]
tpr_interp = np.column_stack(tpr_interp) # (len(fpr_grid), n_iter)
# Mean and SEM across iterations
mean_tpr = np.mean(tpr_interp, axis=1)
if tpr_interp.shape[1] > 1:
std_tpr = np.std(tpr_interp, axis=1, ddof=1)
sem_tpr = std_tpr / np.sqrt(tpr_interp.shape[1])
else:
sem_tpr = np.zeros_like(mean_tpr)
# AUC of the mean curve
# auc_mean = np.trapz(mean_tpr, fpr_grid) # calculate the auc from the interpolate values
auc_values = results_by_model[model]["performance"]["score"]["AUC"] # get the auc from the model validation resutls
auc_mean = np.mean(auc_values)
# Plot mean + SEM band
ax.plot(fpr_grid, mean_tpr, linewidth=2, label=label_fmt.format(model=model, auc=auc_mean))
ax.fill_between(fpr_grid,
np.clip(mean_tpr - sem_tpr, 0, 1),
np.clip(mean_tpr + sem_tpr, 0, 1),
alpha=0.2, linewidth=0)
ax.plot([0,1],[0,1],'--',linewidth=1,label='Chance')
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC (mean across iterations) with SEM bands — adaptive grid")
ax.legend(loc="lower right")
ax.set_xlim(0, 1); ax.set_ylim(0, 1)
if save:
plt.savefig(save_filename, dpi=300, bbox_inches='tight')
print(f"Figure saved to {save_filename}")
plt.show()
return ax
In [55]:
# Plot all models Or select a subset / customize
plot_mean_roc_with_sem_adaptive(results, models=["svm","random_forest","decision_tree","xgboost"])
Figure saved to ROC_mean_sem.png
Out[55]:
<Axes: title={'center': 'ROC (mean across iterations) with SEM bands — adaptive grid'}, xlabel='False Positive Rate', ylabel='True Positive Rate'>