7 ML
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#task 1:
df = pd.read_csv("telecom_customer_churn.csv")
#task 2:
print(df.head())
print(df.info())
print(df.describe())
#task 3:
df.fillna(method='ffill', inplace=True)
#task 4:
df.drop_duplicates(inplace=True)
#task 5:
df['Gender'] = df['Gender'].str.lower()
#task 6:
df['TotalCharges'] = pd.to_numric(df['Total Charges'], errors='coerce')
#task 7:
z_scores = (df['TotalCharges'] - df['Total Charges'].mean())/df['Total Charges'].std()
df = df[(z_scores.abs() < 3)]
#task 8:
df['TenureinMonths'] =df['Tenure in Months']*30
#task 9:
scaler = StandardScaler()
df[['MonthlyCharge', 'TotalCharges', 'TenurenMonths']] = Scaler.fit_transform[['Monthly Charge', 'Total Charges', 'Tenure in Months']])
#task 10:
X = df.drop('Churn Category', axis=1)
y = df['Churn Category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#Task 11:
df.to_csv("Cleaned_telecom_customer_churn.", index=False)
Comments
Post a Comment