Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
import pandas as pd
from IPython.display import display
from IPython.display import Image

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
        'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df_n = pd.DataFrame(raw_data, columns = ['subject_id','test_id'])

# Join the two dataframes along rows
df_new = pd.concat([df_a, df_b])

# Join the two dataframes along columns
pd.concat([df_a, df_b], axis=1)

# Merge two dataframes along the subject_id value
pd.merge(df_new, df_n, on='subject_id')

# Merge two dataframes with both the left and right dataframes using the subject_id key
pd.merge(df_new, df_n, left_on='subject_id', right_on='subject_id')

# Merge with outer join
pd.merge(df_a, df_b, on='subject_id', how='outer')

# Merge with inner join
pd.merge(df_a, df_b, on='subject_id', how='inner')

# Merge with right join
pd.merge(df_a, df_b, on='subject_id', how='right')

# Merge with left join
pd.merge(df_a, df_b, on='subject_id', how='left')

# Merge while adding a suffix to duplicate column names
pd.merge(df_a, df_b, on='subject_id', how='left', suffixes=('_left', '_right'))

# Merge based on indexes
pd.merge(df_a, df_b, right_index=True, left_index=True)


Get the maximum value of column in Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
# Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
    'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
 
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the maximum values of all the column in dataframe - it will be raghu, 26, 89, object
df.max()

# get the maximum value of the column 'Age' - it will be 26
df['Age'].max()

# get the maximum value of the column 'Name' - it will be raghu
df['Name'].max()


Get the minimum value of column in Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
# Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
    'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
 
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the minimum values of all the column in dataframe - it will display Alex, 22, 31, object
df.min()

# get the minimum value of the column 'Age' - it will be 22
df['Age'].min()

# get the minimum value of the column 'Name' - it will be Alex
df['Name'].min()


Select row with maximum and minimum value in Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the row of max value
df.loc[df['Score'].idxmax()]

# get the row of minimum value
df.loc[df['Score'].idxmin()]


Get the unique values (rows) of a Pandas Dataframe

Code Block
Create Dataframe:
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24]
}
 
df = pd.DataFrame(d,columns=['Name','Age'])

# get the unique values (rows)
print df.drop_duplicates()

# get the unique values (rows) by retaining last row
print df.drop_duplicates(keep='last')


Get the list of column headers or column name in a Pandas DataFrame

Code Block
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
       'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# method 1: get list of column name
list(df.columns.values)

# method 2: get list of column name
list(df)


Generator

Random number generation

...