Posts

Showing posts from 2023

8 ML

 import pandas as pd #task 1: data = pd.read_csv("realestate.csv") data.columns = data.columns.str.strip() #task 2: data.dropna(inplace=True) #task 4: filtered_data= data[(data['transactiondate'] >= 2013) & (data['distance'] <= 500)] #task 5: filtered_data = pd.get_dummies(filtered_data, columns=['stores']) #task 6: average_price_by_age = filtered_data.groupby('houseage')['unit_area'].mean() #task 7: lower_bound = filtered_data['unit_area'].quantile(0.05) upper_bound = filtered_data['unit_area'].quantile(0.95) filtered_data = filtered_data[(filtered_data['unit_area']>=lower_bound)&(filtered_data['unit_area']<=upper_bound)] print(filtered_data.head())

7 ML

 import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler #task 1: df = pd.read_csv("telecom_customer_churn.csv") #task 2: print(df.head()) print(df.info()) print(df.describe()) #task 3: df.fillna(method='ffill', inplace=True) #task 4: df.drop_duplicates(inplace=True) #task 5: df['Gender'] = df['Gender'].str.lower() #task 6: df['TotalCharges'] = pd.to_numric(df['Total Charges'], errors='coerce') #task 7: z_scores = (df['TotalCharges'] - df['Total Charges'].mean())/df['Total Charges'].std() df = df[(z_scores.abs() < 3)] #task 8: df['TenureinMonths'] =df['Tenure in Months']*30 #task 9: scaler = StandardScaler() df[['MonthlyCharge', 'TotalCharges', 'TenurenMonths']] = Scaler.fit_transform[['Monthly Charge', 'Total Charges', 'Tenure in Months']]) #task 10: X = df.drop('Churn ...

6. ML

 import pandas as pd csv_data = pd.read_csv('customers.csv') excel_data = pd.read_excel('customers.xlsx' , engine = 'openpyxl') import json with open('customers.json', 'r') as json_file:     json_data = json.load(json_file) json_data = pd.DataFrame(json_data) print("CSV Data:") print(csv_data.head()) print("\nExcell Data:") print(excel_data.head()) print("n\Json Data:") print(json_data.head()) print(csv_data.info()) print(csv_data.isnull().sum()) print(excel_data.info()) print(excel_data.isnull().sum()) print(json_data.info()) print(json_data.isnull().sum()) #replacing missing values in csv data csv_data.fillna(0, inplace=True) csv_data.drop_duplicates(inplace=True) excel_data.fillna(0, inplace=True) excel_data.drop_duplicates(subset=['first_name', 'last_name'],inplace=True) json_data.fillna(0, inplace=True) json_data.drop_duplicates(subset=['first_name', 'last_name'],inplace=True) pri...

5. ML

 import pandas as pd from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report data = pd.read_csv("car_evaluation.csv", header=None) columns = ["buying","maint","doors","persons","lug_boot","safety","class"] data.columns = columns data_encoded = pd.get_dummies(data, columns=columns[:-1], drop_first=True) X=data_encoded.drop('class' , axis = 1) y=data_encoded['class'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) rf_classifier = RandomForestClassifier(random_state=42) rf_classifier.fit(X_train, y_train) y_pred = rf_classifier.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:" , accuracy) class_report = classification_report(y_test, y_pred, target_names=data['class'].unique()) print("Classifi...

4. ML

 import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA data= pd.read_csv("Iris.csv") data.head() X = data.drop('Species' , axis = 1) scaler = StandardScaler() X_scaled = scaler.fit_transform(X) sse= [] for k in range(1, 11):     kmeans = KMeans(n_clusters=k, random_state=42)     kmeans.fit(X_scaled)     sse.append(kmeans.inertia_)      plt.figure(figsize=(8, 6)) plt.plot(range(1,11), sse , marker='o') plt.plot('Elbow Method') plt.xlabel('Number of Clusters') plt.xlabel('SSE (Sum of Squared Distance)') plt.show()

3. ML

 from sklearn import datasets from sklearn.model_selection import train_test_split digits = datasets.load_digits() X = digits.data Y = digits.target X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=42) from sklearn.svm import SVC clf = SVC(kernel = 'linear', C=1) clf.fit(X_train, Y_train) from sklearn.metrics import accuracy_score  y_pred = clf.predict(X_test) accuracy = accuracy_score(Y_test, y_pred) print(f"Accuracy: {accuracy * 100:.2f}%")

2. ML

 import pandas as pd import matplotlib.pyplot as plt data = pd.read_csv('diabetes.csv') import numpy as np x=data.iloc[:,:-1].values y=data.iloc[:,-1].values from sklearn.model_selection import train_test_split xtrain, xtest , ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0) from sklearn.preprocessing import StandardScaler sc_x = StandardScaler() xtrain = sc_x.fit_transform(xtrain) xtest= sc_x.fit_transform(xtest) from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) classifier.fit(xtrain, ytrain) y_pred = classifier.predict(xtest) from sklearn.metrics import accuracy_score print ("Accuracy : ", accuracy_score(ytest, y_pred))

1. ML

 import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA import matplotlib.pyplot as plt import seaborn as sns data = pd.read_csv('wine.csv') X = data.drop('Alcohol' , axis=1) y = data['Alcohol'] scaler= StandardScaler() X_scaled = scaler.fit_transform(X) pca = PCA(n_components=2) X_pca = pca.fit_transform(X_scaled) plt.figure(figsize=(10,6)) plt.scatter(X_pca[:,0], X_pca[:,1], c=y, cmap='viridis') plt.xlabel('principle component 1') plt.ylabel('principle component 2') plt.title('PCA: Wine Dataset') plt.colorbar(label='Wine Class') plt.show()