Graph Clustering

Correlated clustering on complex networks:

Example correlated Louvain clustering workflow
import networkx as nx
import pandas as pd
from bioneuralnet.datasets import DatasetLoader
from bioneuralnet.external_tools import SmCCNet
from bioneuralnet.clustering import CorrelatedLouvain
from bioneuralnet.utils import compare_clusters

# Load example synthetic dataset
loader = DatasetLoader("example1")
omics1, omics2, phenotype, clinical = loader.load_data()

# Display dataset dimensions
print("Dataset Shapes:")
print(f"Omics1: {omics1.shape}")  # Expected: (358, 500)
print(f"Omics2: {omics2.shape}")  # Expected: (358, 100)
print(f"Phenotype: {phenotype.shape}")  # Expected: (358, 1)
print(f"Clinical: {clinical.shape}")  # Expected: (358, 6)")

# Generate global network using SmCCNet
smccnet = SmCCNet(
    phenotype_df=phenotype,
    omics_dfs=[omics1, omics2],
    data_types=["genes", "proteins"],
    kfold=3,
    subSampNum=500,
)
global_network, smccnet_clusters = smccnet.run()

# Convert adjacency matrix to NetworkX graph
merged_omics = pd.concat([omics1, omics2], axis=1)
G_network = nx.from_pandas_adjacency(global_network)

# Perform Correlated Louvain Clustering
louvain_instance = CorrelatedLouvain(
    G=G_network,
    B=merged_omics,
    Y=phenotype,
    k3=0.2,
    k4=0.8,
    weight="weight",
    tune=True
)
louvain_clusters = louvain_instance.run(as_dfs=True)

# Compare clusters against SmCCNet clusters
print(f"Number of Louvain Clusters: {len(louvain_clusters)}")
compare_clusters(louvain_clusters, smccnet_clusters, phenotype)
Example correlated hybrid Louvain clustering workflow
import networkx as nx
import pandas as pd
from bioneuralnet.datasets import DatasetLoader
from bioneuralnet.external_tools import SmCCNet
from bioneuralnet.clustering import HybridLouvain
from bioneuralnet.utils import compare_clusters

# Load example synthetic dataset
loader = DatasetLoader("example1")
omics1, omics2, phenotype, clinical = loader.load_data()

# Display dataset dimensions
print("Dataset Shapes:")
print(f"Omics1: {omics1.shape}")  # Expected: (358, 500)
print(f"Omics2: {omics2.shape}")  # Expected: (358, 100)
print(f"Phenotype: {phenotype.shape}")  # Expected: (358, 1)
print(f"Clinical: {clinical.shape}")  # Expected: (358, 6)")

# Generate global network using SmCCNet
smccnet = SmCCNet(
    phenotype_df=phenotype,
    omics_dfs=[omics1, omics2],
    data_types=["genes", "proteins"],
    kfold=3,
    subSampNum=500,
)
global_network, smccnet_clusters = smccnet.run()

# Convert adjacency matrix to NetworkX graph
merged_omics = pd.concat([omics1, omics2], axis=1)
G_network = nx.from_pandas_adjacency(global_network)

# Perform Hybrid Louvain Clustering
hybrid_louvain_instance = HybridLouvain(
    G=G_network,
    B=merged_omics,
    Y=phenotype,
    k3=0.2,
    k4=0.8,
    max_iter=10,  # Number of refinement iterations
    weight="weight",
    tune=True
)
hybrid_louvain_results = hybrid_louvain_instance.run()

# Extract final partitions
final_clusters = hybrid_louvain_results["curr"]
iterative_clusters = hybrid_louvain_results["clus"]

print(f"Final Hybrid Louvain Clusters: {len(set(final_clusters.values()))}")
print(f"Number of Iterative Refinements: {len(iterative_clusters)}")

# Compare clusters against SmCCNet clusters
compare_clusters(final_clusters, smccnet_clusters, phenotype)