# Import the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
# Read the data
df = pd.read_csv("file.csv",index_col=0)
# Data shape
df.shape
# To access a column in a dataset
df['column1']
# or
df.column1
# Data info
df.info()
# Data describtions
df.describe()
# Check that unique value of a column
df['column1'].unique()
# check if the dataset has in duplicated raws or data.
df.duplicated().sum()
# print a 10 samples of the dataset
df.head(10)
# Check if a column has null values
df['column1'].isnull().sum()
# Check if the whole dataset has null values
df.isnull().values.any()
# List of all columns with number of null values
df.isnull().sum()
# summing all the null values in the whole dataset
df.isnull().sum().sum()
# Print all columns
for i in df.columns:
print (i)
For DEA (Data Explanatory Analysis)
- Download and install these libraries through pip3 [dataprep,sweetviz]
pip3 install dataprep
# or from the jupyter itself
!pip3 install dataprep
# ========== Using dataprep ==========
from dataprep.eda import plot
# using dataprep's plot method to get insights on each variable
report=plot(df)
report.show_browser()
#--------------------------------------------------
!pip3 install sweetvi
report = sv.analyze(df)
report.show_html('report.html')