Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
import pandas as pd
import numpy as np
 
# Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
    'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
 
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the maximum values of all the column in dataframe - it will be raghu, 26, 89, object
df.max()

# get the maximum value of the column 'Age' - it will be 26
df['Age'].max()

# get the maximum value of the column 'Name' - it will be raghu
df['Name'].max()


Get the minimum value of column in Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
# Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
    'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
 
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the minimum values of all the column in dataframe - it will display Alex, 22, 31, object
df.min()

# get the minimum value of the column 'Age' - it will be 22
df['Age'].min()

# get the minimum value of the column 'Name' - it will be Alex
df['Name'].min()


Select row with maximum and minimum value in Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the row of max value
df.loc[df['Score'].idxmax()]

# get the row of minimum value
df.loc[df['Score'].idxmin()]


Get the unique values (rows) of a Pandas Dataframe

Code Block
Create Dataframe:
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24]
}
 
df = pd.DataFrame(d,columns=['Name','Age'])

# get the unique values (rows)
print df.drop_duplicates()

# get the unique values (rows) by retaining last row
print df.drop_duplicates(keep='last')


Get the list of column headers or column name in a Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
       'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# method 1: get list of column name
list(df.columns.values)

# method 2: get list of column name
list(df)


Delete or Drop the duplicate row of a Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# drop duplicate rows
df.drop_duplicates()

# drop duplicate rows by retaining last occurrence
df.drop_duplicates(keep='last')

# drop duplicate by a column name
df.drop_duplicates(['Name'], keep='last')


Drop or delete the row in Pandas DataFrame with conditions

Code Block
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
       'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# Drop an observation or row
df.drop([1,2])


# Drop a row by condition
df[df.Name != 'Alisa']

# Drop a row by index
df.drop(df.index[2])

# Drop bottom 3 rows
df[:-3]


Generator

Random number generation

...