###K -MEANS


import random

def euclidean(p1, p2):
    # Calculate Euclidean distance between two points (lists)
    return sum((x - y) * 2 for x, y in zip(p1, p2)) * 0.5

def kmeans(data, k, max_iterations=100):
    # Randomly choose k data points as initial centroids
    centroids = random.sample(data, k)
    clusters = []
    for _ in range(max_iterations):
        # Assign points to nearest centroid
        clusters = [[] for _ in range(k)]
        for point in data:
            distances = [euclidean(point, centroid) for centroid in centroids]
            nearest = distances.index(min(distances))
            clusters[nearest].append(point)
        
        # Recompute centroids
        new_centroids = []
        for cluster in clusters:
            if cluster:  # Avoid division by zero
                # Compute mean of points in cluster
                centroid = [sum(dim) / len(cluster) for dim in zip(*cluster)]
                new_centroids.append(centroid)
            else:  # If a cluster is empty, pick a random point as centroid
                new_centroids.append(random.choice(data))
        
        # If centroids didn't change, break early
        if new_centroids == centroids:
            break
        centroids = new_centroids
    
    return clusters, centroids

# Example usage:
data = [
    [2,10],
    [2,5],
    [8,4],
    [5,8],
    [7,5],
    [6,4],
    [1,2],
    [4,9],
]
clusters, centroids = kmeans(data, k=3)
print("Clusters:", clusters)
print("Centroids:", centroids)



### NAIVE BIAS

# Data
data = [
    ['Sunny', 'Hot', 'High', 'Weak', 'No'],
    ['Sunny', 'Hot', 'High', 'Strong', 'No'],
    ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
    ['Sunny', 'Mild', 'High', 'Weak', 'No'],
    ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
    ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Strong', 'No']
]

# What we want to predict
test = ['Sunny', 'Cool', 'High', 'Strong']

# Step 1: Count total Yes and No
total = len(data)
yes_count = 0
no_count = 0

for row in data:
    if row[-1] == 'Yes':
        yes_count += 1
    else:
        no_count += 1

# Step 2: Calculate probability for Yes
p_yes = yes_count / total  # Start with base probability

for i in range(4):  # Check each feature
    match = 0
    for row in data:
        if row[-1] == 'Yes' and row[i] == test[i]:
            match += 1
    p_yes = p_yes * (match / yes_count)

# Step 3: Calculate probability for No
p_no = no_count / total  # Start with base probability

for i in range(4):  # Check each feature
    match = 0
    for row in data:
        if row[-1] == 'No' and row[i] == test[i]:
            match += 1
    p_no = p_no * (match / no_count)

# Step 4: Compare and predict
if p_yes > p_no:
    print("Prediction: Yes")
else:
    print("Prediction: No")

print("P(Yes) =", p_yes)
print("P(No) =", p_no)


### PAGE RANK

def page_rank(graph, damping=0.85, max_iterations=100, tol=1.0e-6):
    nodes = list(graph.keys())
    n = len(nodes)
    rank = {node: 1 / n for node in nodes}
    out_degree = {node: len(neighbors) for node, neighbors in graph.items()}
    
    for _ in range(max_iterations):
        new_rank = {}
        for node in nodes:
            rank_sum = 0
            for other_node in nodes:
                if node in graph[other_node]:
                    rank_sum += rank[other_node] / out_degree[other_node]
            new_rank[node] = (1 - damping) / n + damping * rank_sum
        
        # Check convergence
        diff = sum(abs(new_rank[node] - rank[node]) for node in nodes)
        if diff < tol:
            break
        rank = new_rank
    
    return rank

# Example usage:
graph = {
    'A': ['B', 'C'],
    'B': ['A', 'D'],
    'C': ['D'],
    'D': ['C']
}
print(page_rank(graph))