Disease Prediction

Disease Prediction using Multi-Omics Networks (DPMON):

Example of Disease Prediction Workflow
import numpy as np
import pandas as pd
from bioneuralnet.datasets import DatasetLoader
from bioneuralnet.external_tools import SmCCNet
from bioneuralnet.network_embedding import GNNEmbedding
from bioneuralnet.downstream_task import SubjectRepresentation
from bioneuralnet.downstream_task import DPMON
from bioneuralnet.metrics import evaluate_rf, plot_performance

# Load example synthetic dataset
loader = DatasetLoader("example1")
omics1, omics2, phenotype, clinical = loader.load_data()

# Display dataset dimensions
print("Dataset Shapes:")
print(f"Omics1: {omics1.shape}")  # Expected: (358, 500)
print(f"Omics2: {omics2.shape}")  # Expected: (358, 100)
print(f"Phenotype: {phenotype.shape}")  # Expected: (358, 1)
print(f"Clinical: {clinical.shape}")  # Expected: (358, 6)")

# Preprocess phenotype data: Convert continuous values into discrete bins
min_val = phenotype["phenotype"].min()
max_val = phenotype["phenotype"].max()
bins = np.linspace(min_val, max_val, 5)  # Creates 4 categories
phenotype["phenotype"] = pd.cut(phenotype["phenotype"], bins=bins, labels=[0, 1, 2, 3], include_lowest=True)

print("Binned Phenotype Data:")
print(phenotype.head())
print(phenotype["phenotype"].value_counts(sort=False))

# Merge omics data
merged_omics = pd.concat([omics1, omics2], axis=1)

# Generate global network using SmCCNet
smccnet = SmCCNet(
    phenotype_df=phenotype,
    omics_dfs=[omics1, omics2],
    data_types=["genes", "proteins"],
    kfold=3,
    subSampNum=500,
)
global_network, smccnet_clusters = smccnet.run()

# Generate node embeddings using GNNEmbedding
embeddings = GNNEmbedding(
    adjacency_matrix=global_network,
    omics_data=merged_omics,
    phenotype_data=phenotype,
    clinical_data=clinical,
    tune=True,
)
embeddings.fit()
embeddings_output = embeddings.embed(as_df=True)

# Perform Subject Representation using SubjectRepresentation
enhanced_omics = SubjectRepresentation(
    omics_data=merged_omics,
    embeddings=embeddings_output,
    phenotype_data=phenotype,
    tune=True,
)
enhanced_omics_df = enhanced_omics.run()

print(f"Enhanced Omics Shape: {enhanced_omics_df.shape}")

# Run Disease Prediction using DPMON
dpmon = DPMON(
    adjacency_matrix=global_network,
    omics_list=[omics1, omics2],
    phenotype_data=phenotype,
    clinical_data=clinical,
    tune=True,
)

dpmon_predictions = dpmon.run()
print(f"DPMON Predictions:\n{dpmon_predictions[0]}")

# Evaluate Classifier Performance
X_raw = merged_omics.values
y_global = phenotype.values
raw_rf_acc = evaluate_rf(X_raw, y_global, mode='classification')

print("Global Results:")
plot_performance(dpmon_predictions[1], raw_rf_acc, "Raw Omics vs. DPMON Omics")