import os
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

js_path = "javascript"
py_path = "python"

corpus, labels = [], []

for path, label in [(js_path,1),(py_path,0)]:
    for file in os.listdir(path):
        with open(os.path.join(path,file), "r", errors="ignore") as f:
            corpus.append(f.read())
            labels.append(label)

X_train, X_test, y_train, y_test = train_test_split(corpus, labels, test_size=0.33)

vec = HashingVectorizer()
X_train = vec.fit_transform(X_train)
X_test = vec.transform(X_test)

model = RandomForestClassifier()
model.fit(X_train, y_train)

pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred))