import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, f1_score
from xgboost import XGBClassifier, XGBRegressor
from bioneuralnet.utils import get_logger
logger = get_logger(__name__)
[docs]
def evaluate_model(X: np.ndarray,y: np.ndarray,model_type: str = "rf_classif",n_estimators: int = 150,runs: int = 100,seed: int = 119,):
"""
Evaluate a single model (RF or XGB, classif or reg) over multiple runs, returning three tuples.
For classification:
- (accuracy_mean, accuracy_std)
- (f1_weighted_mean, f1_weighted_std)
- (f1_macro_mean, f1_macro_std)
For regression:
- (r2_mean, r2_std)
- (None, None)
- (None, None)
"""
accs, f1ws, f1ms, rsqs = [], [], [], []
is_classif = "classif" in model_type
for run in range(runs):
stratify = y if is_classif else None
X_tr, X_te, y_tr, y_te = train_test_split(
X, y, test_size=0.3, random_state=seed + run, stratify=stratify
)
if model_type == "rf_classif":
mdl = RandomForestClassifier(n_estimators=n_estimators, random_state=seed + run)
elif model_type == "xgb_classif":
mdl = XGBClassifier( n_estimators=n_estimators, eval_metric="logloss", random_state=seed + run)
elif model_type == "rf_reg":
mdl = RandomForestRegressor(n_estimators=n_estimators, random_state=seed + run)
elif model_type == "xgb_reg":
mdl = XGBRegressor(n_estimators=n_estimators, random_state=seed + run)
else:
raise ValueError("model_type must be one of: rf_classif, xgb_classif, rf_reg, xgb_reg")
mdl.fit(X_tr, y_tr)
y_pred = mdl.predict(X_te)
if is_classif:
accs.append(accuracy_score(y_te, y_pred))
f1ws.append(f1_score(y_te, y_pred, average="weighted"))
f1ms.append(f1_score(y_te, y_pred, average="macro"))
else:
rsqs.append(r2_score(y_te, y_pred))
if is_classif:
return (
(np.mean(accs), np.std(accs)),
(np.mean(f1ws), np.std(f1ws)),
(np.mean(f1ms), np.std(f1ms)))
else:
return (
(np.mean(rsqs), np.std(rsqs)),
(None, None),
(None, None))
[docs]
def evaluate_single_run(X: np.ndarray,y: np.ndarray,model_type: str = "rf_classif",n_estimators: int = 100,test_size: float = 0.3,seed: int = 119):
"""
Do one train/test split, train the specified model.
Return: (accuracy, f1_weighted, f1_macro)
"""
stratify = y if "classif" in model_type else None
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=test_size, random_state=seed, stratify=stratify)
if model_type == "rf_classif":
mdl = RandomForestClassifier(n_estimators=n_estimators, random_state=seed)
elif model_type == "xgb_classif":
mdl = XGBClassifier(n_estimators=n_estimators,eval_metric="logloss",random_state=seed)
else:
raise ValueError("model_type must be 'rf_classif' or 'xgb_classif'")
mdl.fit(X_tr, y_tr)
y_pred = mdl.predict(X_te)
acc = accuracy_score(y_te, y_pred)
f1w = f1_score(y_te, y_pred, average="weighted")
f1m = f1_score(y_te, y_pred, average="macro")
return acc, f1w, f1m
[docs]
def evaluate_rf(X: np.ndarray,y: np.ndarray,mode: str = "classification",n_estimators: int = 150,runs: int = 100,seed: int = 119,return_all: bool = False):
"""
Shortcut function: evaluate a RandomForest (classification or regression).
"""
mt = "rf_classif" if mode == "classification" else "rf_reg"
return evaluate_model(X, y, model_type=mt, n_estimators=n_estimators,runs=runs, seed=seed, return_all=return_all)
[docs]
def evaluate_xgb(X: np.ndarray,y: np.ndarray,mode: str = "classification",n_estimators: int = 150,runs: int = 100,seed: int = 119,return_all: bool = False):
"""
Shortcut function: evaluate an XGBoost (classification or regression).
"""
mt = "xgb_classif" if mode == "classification" else "xgb_reg"
return evaluate_model(X, y, model_type=mt, n_estimators=n_estimators, runs=runs, seed=seed, return_all=return_all)
[docs]
def evaluate_f1w(X: np.ndarray,y: np.ndarray,model_type: str = "rf_classif",n_estimators: int = 100,runs: int = 5,seed: int = 119):
"""
Evaluate weighted F1-score over multiple runs.
"""
scores = []
for run in range(runs):
stratify = y if "classif" in model_type else None
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed+run, stratify=stratify)
if model_type == "rf_classif":
mdl = RandomForestClassifier(n_estimators=n_estimators, random_state=seed+run)
elif model_type == "xgb_classif":
mdl = XGBClassifier(n_estimators=n_estimators,eval_metric="logloss",random_state=seed+run)
else:
raise ValueError("Unsupported model_type for F1 scoring")
mdl.fit(X_train, y_train)
y_pred = mdl.predict(X_test)
scores.append(f1_score(y_test, y_pred, average="weighted"))
return np.mean(scores), np.std(scores)
[docs]
def evaluate_f1m(X: np.ndarray,y: np.ndarray,model_type: str = "rf_classif",n_estimators: int = 100,runs: int = 5,seed: int = 119):
"""Evaluate macro F1-score over multiple runs."""
scores = []
for run in range(runs):
stratify = y if "classif" in model_type else None
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed+run, stratify=stratify)
if model_type == "rf_classif":
mdl = RandomForestClassifier(n_estimators=n_estimators, random_state=seed+run)
elif model_type == "xgb_classif":
mdl = XGBClassifier(n_estimators=n_estimators,eval_metric="logloss",random_state=seed+run)
else:
raise ValueError("Unsupported model_type for F1 scoring")
mdl.fit(X_train, y_train)
y_pred = mdl.predict(X_test)
scores.append(f1_score(y_test, y_pred, average="macro"))
return np.mean(scores), np.std(scores)
[docs]
def plot_multiple_metrics(metrics: dict[str, dict[str, dict[str, tuple[float, float]]]],title_map: dict[str, str] = None,ylabel_map: dict[str, str] = None,filename: Path = None):
"""
Consolidate multiple metric grouped performances into one figure.
Adds numeric labels on top of each bar.
"""
logger.info(f"Plotting multiple metrics: {list(metrics.keys())}")
n = len(metrics)
fig, axes = plt.subplots(1, n, figsize=(5*n, 5), sharey=True)
if n == 1:
axes = [axes]
for ax, (metric, sc) in zip(axes, metrics.items()):
groups = list(sc.keys())
sublabels = list(next(iter(sc.values())).keys())
means = []
errs = []
for g in groups:
rm = []
re = []
for s in sublabels:
m, e = sc[g][s]
rm.append(m)
re.append(e)
means.append(rm)
errs.append(re)
means = np.array(means)
errs = np.array(errs)
ind = np.arange(len(groups))
total = 0.7
width = total / len(sublabels)
# plot bars and annotate
for i in range(len(sublabels)):
x = ind + i * width
y = means[:, i]
yerr = errs[:, i]
bars = ax.bar(x, y, width, yerr=yerr, capsize=3, label=sublabels[i])
# add value labels
for bar in bars:
height = bar.get_height()
x = bar.get_x() + bar.get_width() / 2
y = height + 0.01
label = f"{height:.2f}"
ax.text(x,y,label,ha='center', va='bottom', fontsize=8)
ax.set_xticks(ind + total/2 - width/2)
ax.set_xticklabels(groups, fontsize=11)
if title_map:
title = title_map.get(metric, metric)
else:
title = metric
ax.set_title(title, fontsize=14, pad=12)
if ylabel_map:
ylabel = ylabel_map.get(metric, metric)
else:
ylabel = metric
ax.set_ylabel(ylabel, fontsize=12)
if "Accuracy" in ylabel or "F1" in ylabel:
ax.set_ylim(0, 1)
ax.legend(title="Method", fontsize=9)
ax.grid(True, axis="y", linestyle="--", alpha=0.3)
plt.tight_layout(pad=2.0)
if filename:
fig.savefig(str(filename), dpi=300, bbox_inches="tight")
logger.info(f"Saved combined figure to {filename}")
plt.close(fig)