...
Code Block |
---|
import pandas as pd import numpy as np # Create a DataFrame d = { 'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine', 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'], 'Age':[26,24,23,22,23,24,26,24,22,23,24,24], 'Score':[85,63,55,74,31,77,85,63,42,62,89,77] } df = pd.DataFrame(d,columns=['Name','Age','Score']) # get the maximum values of all the column in dataframe - it will be raghu, 26, 89, object df.max() # get the maximum value of the column 'Age' - it will be 26 df['Age'].max() # get the maximum value of the column 'Name' - it will be raghu df['Name'].max() |
...
Code Block |
---|
import pandas as pd import numpy as np # Create a DataFrame d = { 'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine', 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'], 'Age':[26,24,23,22,23,24,26,24,22,23,24,24], 'Score':[85,63,55,74,31,77,85,63,42,62,89,77] } df = pd.DataFrame(d,columns=['Name','Age','Score']) # get the minimum values of all the column in dataframe - it will display Alex, 22, 31, object df.min() # get the minimum value of the column 'Age' - it will be 22 df['Age'].min() # get the minimum value of the column 'Name' - it will be Alex df['Name'].min() |
...
Code Block |
---|
import pandas as pd import numpy as np # #CreateCreate a DataFrame d = { 'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine', 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'], 'Age':[26,24,23,22,23,24,26,24,22,23,24,24], 'Score':[85,63,55,74,31,77,85,63,42,62,89,77]} df = pd.DataFrame(d,columns=['Name','Age','Score']) # get the row of max value df.loc[df['Score'].idxmax()] # get the row of minimum value df.loc[df['Score'].idxmin()] |
...
Get the unique values (rows) of a Pandas Dataframe
Code Block |
---|
Create Dataframe: import pandas as pd import numpy as np # #CreateCreate a DataFrame d = { 'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine', 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'], 'Age':[26,24,23,22,23,24,26,24,22,23,24,24] } df = pd.DataFrame(d,columns=['Name','Age']) # get the unique values (rows) print df.drop_duplicates() # get the unique values (rows) by retaining last row print df.drop_duplicates(keep='last') |
...
Code Block |
---|
import pandas as pd import numpy as np # #CreateCreate a DataFrame d = { 'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine', 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'], 'Age':[26,24,23,22,23,24,26,24,22,23,24,24], 'Score':[85,63,55,74,31,77,85,63,42,62,89,77]} df = pd.DataFrame(d,columns=['Name','Age','Score']) # method 1: get list of column name list(df.columns.values) # method 2: get list of column name list(df) |
...
Code Block |
---|
import pandas as pd import numpy as np # #CreateCreate a DataFrame d = { 'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine', 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'], 'Age':[26,24,23,22,23,24,26,24,22,23,24,24], 'Score':[85,63,55,74,31,77,85,63,42,62,89,77]} df = pd.DataFrame(d,columns=['Name','Age','Score']) # drop duplicate rows df.drop_duplicates() # drop duplicate rows by retaining last occurrence df.drop_duplicates(keep='last') # drop duplicate by a column name df.drop_duplicates(['Name'], keep='last') |
...
Drop or delete the row in Pandas DataFrame with conditions
Code Block |
---|
import numpypandas as nppd #Create# Create a DataFrame d = { 'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine', 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'], 'Age':[26,24,23,22,23,24,26,24,22,23,24,24], 'Score':[85,63,55,74,31,77,85,63,42,62,89,77]} df = pd.DataFrame(d,columns=['Name','Age','Score']) # Drop an observation or row df.drop([1,2]) # Drop a row by condition df[df.Name != 'Alisa'] # Drop a row by index df.drop(df.index[2]) # Drop bottom 3 rows df[:-3] |
...
Code Block |
---|
import pandas as pd import numpy as np # #CreateCreate a DataFrame d = { 'countries':['A','B','C'], 'population_in_million':[100,200,120], 'gdp_percapita':[2000,7000,15000] } df = pd.DataFrame(d,columns=['countries','population_in_million','gdp_percapita']) # shape from wide to long with melt function in pandas df2=pd.melt(df,id_vars=['countries'],var_name='metrics', value_name='values') |
...
Code Block |
---|
import pandas as pd import numpy as np # #CreateCreate a DataFrame d = { 'countries':['A','B','C','A','B','C'], 'metrics':['population_in_million','population_in_million','population_in_million', 'gdp_percapita','gdp_percapita','gdp_percapita'], 'values':[100,200,120,2000,7000,15000] } df = pd.DataFrame(d,columns=['countries','metrics','values']) # reshape from long to wide in pandas python df2=df.pivot(index='countries', columns='metrics', values='values') |
...
Code Block |
---|
import pandas as pd
import numpy as np
header = pd.MultiIndex.from_product([['Semester1','Semester2'],['Maths','Science']])
d=([[12,45,67,56],[78,89,45,67],[45,67,89,90],[67,44,56,55]])
df = pd.DataFrame(d,
index=['Alisa','Bobby','Cathrine','Jack'],
columns=header)
# stack the dataframe
stacked_df=df.stack()
# unstack the dataframe
unstacked_df = stacked_df.unstack()
# stack the dataframe of column at level 0
stacked_df_lvl=df.stack(level=0)
# unstack the dataframe
unstacked_df1 = stacked_df_lvl.unstack() |
...