import pandas as pd
df= bd.read_csv('the_file_in_.csv')
df.head() #you can specify how many columns you want to see
df.shape() # show the shape of the dataset
# Checking the null values in the rows and columns
df.isnull().sum()
df.isnull().any()
# Count the number of values in all columns that are not null
df.isnull().count()
# Check the distrebutation of the values in a dataset
df['some_column'].value_counts()
# How we devide the dataset into dependent and independent features
x=df.drop("Class",axis=1)# in case it is called Class
y=df.Class
# if the data is imbalance we can solve it in these ways
### Under Sampeling
This code imports the LogisticRegression
module from the sklearn
library, splits the data into training and testing sets, fits the model on the training data, predicts the test set results, and calculates the accuracy score.
You can use the accuracy_score
function from the sklearn.metrics
module to calculate the accuracy score. Here's the code with the accuracy score added:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification _report
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)