
# !pip install numpy pandas matplotlib seaborn scikit-learn scipy
import pandas as pd
import numpy as np

# Creating the dataset
data = {
    'PatientID': [101, 102, 103, 104, 105],
    'Age': [45, 50, np.nan, 60, 55],
    'BloodPressure': [120, np.nan, 130, 125, np.nan],
    'Cholesterol': [200, 220, 210, np.nan, 230],
    'HeartRate': [80, 85, np.nan, 90, 88]
}

# Load the dataset into a DataFrame
df = pd.DataFrame(data)

# Display the data
print("Original Data:")
print(df)

print("\nMissing values per column:")
print(df.isna().sum())

print("\nNon-missing values per column:")
print(df.notna().sum())

# ✅ Updated imputation methods (future safe)
df['Age'] = df['Age'].fillna(df['Age'].mean())  # Fill missing Age with mean
df['BloodPressure'] = df['BloodPressure'].fillna(df['BloodPressure'].median())  # Fill BP with median
df['Cholesterol'] = df['Cholesterol'].bfill()  # Backward fill
df['HeartRate'] = df['HeartRate'].ffill()      # Forward fill

print("\nData after imputation:")
print(df)





