# !pip install numpy pandas matplotlib seaborn scikit-learn scipy
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np

# Dataset from the image
data = {
    'ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'SepalLength': [5.1, 4.9, 4.7, 5.0, 7.0, 6.4, 6.9, 6.3, 5.8, 7.1],
    'SepalWidth': [3.5, 3.0, 3.2, 3.6, 3.2, 3.2, 3.1, 3.3, 2.7, 3.0],
    'PetalLength': [1.4, 1.4, 1.3, 1.4, 4.7, 4.5, 4.9, 6.0, 5.1, 5.9],
    'PetalWidth': [0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 2.5, 1.9, 2.1]
}

df = pd.DataFrame(data)


X = df[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']]


kmeans = KMeans(n_clusters=3, random_state=42, n_init=10) # Added random_state and n_init for reproducibility

df['Cluster'] = kmeans.fit_predict(X)

print("Flower ID and Assigned Cluster:")
print(df[['ID', 'Cluster']])
print("-" * 30)

print("\nCluster Centroids (Mean of Features for Each Cluster):\n", kmeans.cluster_centers_)
print("-" * 30)

plt.figure(figsize=(8, 6))
scatter = plt.scatter(df['PetalLength'], df['PetalWidth'], c=df['Cluster'], cmap='viridis', s=100)
plt.xlabel('PetalLength')
plt.ylabel('PetalWidth')
plt.title('K-Means Clustering of Flowers (Petal Features)')
#plt.colorbar(scatter, label='Cluster Label')
plt.grid(True)
plt.show()
