#Practical 1:Web Scraping in Social media
import requests
from bs4 import BeautifulSoup

url = "https://example.com"

try:
  response = requests.get(url)
  response.raise_for_status()
  soup = BeautifulSoup(response.text, "html.parser")
  paragraphs = soup.find_all('p')
  print(f"Found {len(paragraphs)} paragraphs:")
  for para in paragraphs:
    print(f"- {para.get_text().strip()}")

  title = soup.find('title')
  if title:
    print(f"\nPage Title:{title.get_text().strip()}")

except requests.exceptions.RequestException as e:
  print(f"An error occurred during the request: {e}")

except Exception as e:
  print(f"An unexpected error occurred: {e}")



#2 Twitter

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tweeterpy import TweeterPy
from tweeterpy.util import User, Tweet
from pandas import json_normalize
import json

twitter = TweeterPy()
data = twitter.search("IPL")

combined_data = []

for tweet_data in data['data'][1:]:  # Start from index 1 to skip header if present
    try:
        tweet = Tweet(tweet_data)
        user_data = twitter.get_user_data(tweet.dict()['screen_name'])

        flattened_tweet_data = json_normalize(tweet.dict())
        flattened_user_data = json_normalize(user_data)

        combined_data_row = {
            **flattened_tweet_data.to_dict(orient='records')[0],
            **flattened_user_data.to_dict(orient='records')[0]
        }

        combined_data.append(combined_data_row)

    except Exception as e:
        print(f"Error processing tweet {tweet_data.get('id')}: {e}")

df = pd.DataFrame(combined_data)
df.info()

source_counts = df['source'].value_counts()

plt.figure(figsize=(10, 6))
source_counts.plot(kind='pie', autopct='%1.1f%%', startangle=140)
plt.title('Distribution of Tweet Sources')
plt.axis('equal')
plt.tight_layout()
plt.show()

tweets_per_user = df['screen_name'].value_counts().head(10)

plt.figure(figsize=(10, 6))
tweets_per_user.plot(kind='bar', color='skyblue')
plt.title('Top 10 Users by Number of Tweets')
plt.xlabel('User Name')
plt.ylabel('Number of Tweets')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

df['legacy.created_at'] = pd.to_datetime(df['legacy.created_at'], errors='coerce')
df['user_creation_year'] = df['legacy.created_at'].dt.year

df.dropna(subset=['user_creation_year'], inplace=True)

user_creation_counts = df['user_creation_year'].value_counts().sort_index()

plt.figure(figsize=(10, 6))
user_creation_counts.plot(kind='bar', color='skyblue')
plt.title('User Creation Year-wise')
plt.xlabel('Year')
plt.ylabel('Number of Users Created')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()