
# !pip install numpy pandas matplotlib seaborn scikit-learn scipy
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# ---------------------------------------------
# Dataset
# ---------------------------------------------
data = {
    'Age': [25, 34, 22, 28, 40, 35, 30, 26, 45, 23],
    'Income(₹k)': [50, 70, 45, 55, 85, 75, 60, 48, 95, 42],
    'SpendingScore': [60, 50, 80, 65, 30, 35, 60, 75, 20, 82],
    'Membership': [3, 2, 4, 3, 1, 2, 3, 4, 1, 4]
}

df = pd.DataFrame(data)

# ---------------------------------------------
# K-Means Clustering (3 Clusters)
# ---------------------------------------------
kmeans = KMeans(n_clusters=3, random_state=0)
df['Cluster'] = kmeans.fit_predict(df)

# ---------------------------------------------
# Display Cluster Results and Centroids
# ---------------------------------------------
print("===== K-Means Clustering Results =====")
print(df)
print("\nCentroids:\n", kmeans.cluster_centers_)
print("======================================\n")

# ---------------------------------------------
# Visualization (Income vs Spending Score)
# ---------------------------------------------
plt.scatter(df['Income(₹k)'], df['SpendingScore'],
            c=df['Cluster'], cmap='viridis', s=100)

plt.xlabel('Income (₹k)')
plt.ylabel('Spending Score')
plt.title('K-Means Clustering of Customers')
plt.show()
