
# !pip install numpy pandas matplotlib seaborn scikit-learn scipy
# question 12 logistic
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

# Dataset
data = {
    'CustomerID': [301, 302, 303, 304, 305, 306, 307, 308, 309, 310],
    'Age': [25, 40, 30, 50, 35, 28, 45, 32, 38, 27],
    'Balance(₹K)': [50, 200, 150, 300, 120, 80, 250, 100, 180, 60],
    'Loan': ['No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No'],
    'CreditCard': ['Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Encode categorical data
label_enc = LabelEncoder()
df['Loan'] = label_enc.fit_transform(df['Loan'])           # Yes=1, No=0
df['CreditCard'] = label_enc.fit_transform(df['CreditCard'])  # Yes=1, No=0

# Define features and target
X = df[['Age', 'Balance(₹K)', 'Loan']]
y = df['CreditCard']

# Logistic Regression with 5-fold CV
log_reg = LogisticRegression()
log_reg_scores = cross_val_score(log_reg, X, y, cv=5, scoring='accuracy')

print("===== Logistic Regression (5-Fold Cross Validation) =====")
print("Fold Accuracies:", log_reg_scores)
print("Average Accuracy: {:.2f}".format(np.mean(log_reg_scores)))



#question 12 decision tree
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
import numpy as np
from matplotlib import pyplot as plt
from sklearn import tree

data = load_iris()
X, y = data.data, data.target

max_depth = 4
dtree = DecisionTreeClassifier(max_depth=max_depth, random_state=42)

kf = KFold(n_splits=5, shuffle=False)
kf_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    dtree.fit(X_train, y_train)

    accuracy = dtree.score(X_test, y_test)
    kf_scores.append(accuracy)


    fig = plt.figure()
    tree.plot_tree(dtree)
    plt.show()


print("K-Fold Accuracy Scores:", kf_scores)
print("Mean K-Fold Accuracy:", np.mean(kf_scores))



