#EX-8-TF-IDF and BoW-Distribution

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

data = {
    'text': [
        "I love programming",
        "Python is great",
        "I enjoy machine learning",
        "TensorFlow is a powerful tool",
        "I hate bugs",
        "Debugging is tedious"
    ],
    'label': [
        'positive',
        'positive',
        'positive',
        'positive',
        'negative',
        'negative'
    ]
}
df_to_save = pd.DataFrame(data)
csv_filename = 'texts.csv'
df_to_save.to_csv(csv_filename, index=False, encoding='utf-8')
print(f"\n'{csv_filename}' created successfully in Colab environment.")
print("--------------------------------------------------\n")
df = pd.read_csv('texts.csv')
df.head()

label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

df.head()

X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42
)
X_train, X_test

# Vectorize text data using Bag of Words
vectorizer_bow = CountVectorizer()
X_train_bow = vectorizer_bow.fit_transform(X_train).toarray()
X_test_bow = vectorizer_bow.transform(X_test).toarray()

# Vectorize text data using TF-IDF
vectorizer_tfidf = TfidfVectorizer()
X_train_tfidf = vectorizer_tfidf.fit_transform(X_train).toarray()
X_test_tfidf = vectorizer_tfidf.transform(X_test).toarray()

def build_model(input_dim):
    model = Sequential([
        Dense(16, activation='relu', input_dim=input_dim),
        Dense(8, activation='relu'),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

X_train_bow, X_train_bow.shape

# Training and evaluating with Bag of Words
model_bow = build_model(X_train_bow.shape[1])
model_bow.fit(X_train_bow, y_train, epochs=10, batch_size=2, verbose=1)

loss, accuracy = model_bow.evaluate(X_test_bow, y_test)
print(f'BoW Model Accuracy: {accuracy:.2f}')

# Training and evaluating with TF-IDF
model_tfidf = build_model(X_train_tfidf.shape[1])
model_tfidf.fit(X_train_tfidf, y_train, epochs=10, batch_size=2, verbose=1)

loss, accuracy = model_tfidf.evaluate(X_test_tfidf, y_test)
print(f'TF-IDF Model Accuracy: {accuracy:.2f}')

text = "Python is programming"
text = vectorizer_tfidf.transform([text]).toarray()
results = model_tfidf.predict(text)

a = np.argmax(results)
"Positive" if a == 1 else "Neutral"

text = "Python is programming"
text = vectorizer_bow.transform([text]).toarray()
results = model_bow.predict(text)

a = np.argmax(results)
"Positive" if a == 1 else "Neutral"  


##Feed foward neutral network 
----------------------------
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Load dataset
df = pd.read_csv('Q1ecommerceDataset.csv')  # Replace with actual filename if different
df.dropna(inplace=True)

# Assume 'description' column has text, and 'category' is the target
X_text = df['description']
y = df['category']

# TF-IDF representation
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(X_text).toarray()

# Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_cat = to_categorical(y_encoded)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=42)

# Model
model = Sequential()
model.add(Dense(128, input_dim=X.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(y_cat.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")
