Code Block
a = ["Jake", "John", "Eric"] b = ["John", "Jill"] print(set(a).difference(set(b)))

Pandas DataFrame / CSV / Join / Merge

...

Generator

Random number generation

Code Block

dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],import random

def lottery():
    # returns 6 numbers between 1 and 40
    for i in range(6):
       "area": [8.516, 17.10, 3.286, 9.597, 1.221], yield random.randint(1, 40)

    # returns a "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)
print(brics)

Adding index to a Pandas DataFrame

Code Block
# Set the index for brics brics.index = ["BR", "RU", "IN", "CH", "SA"] # Print out brics with new index values print(brics)

Reading CSV by Pandas DataFrame

Code Block
# Import pandas as pd import pandas as pd # Import the cars.csv data: cars cars = pd.read_csv('cars.csv') # Print out cars print(cars)

Reading a CSV file by Pandas DataFrame with 1st column as index

Code Block

# Import pandas and cars.csv
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out country column as Pandas Series
print(cars['cars_per_cap'])

# Print out country column as Pandas DataFrame
print(cars[['cars_per_cap']])

# Print out DataFrame with country and drives_right columns
print(cars[['cars_per_cap', 'country']])

Save a Pandas DaraFrame by CSV format

Code Block

dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)

brics.to_csv('example.csv')

Save a Pandas DaraFrame by CSV format with header and no index

Code Block

from pandas import DataFrame

Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000]
        }

df = DataFrame(Cars, columns= ['Brand', 'Price'])

export_csv = df.to_csv (r'C:\Users\Ron\Desktop\export_dataframe.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path

print (df)

Print partial rows (observations) from a Pandas DataFrame

Code Block

# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out first 4 observations
print(cars[0:4])

# Print out fifth, sixth, and seventh observation
print(cars[4:6])

Data access by loc and iloc in Pandas DaraFrame - Select colums by index or name

loc is label-based, and iloc is integer index based

Code Block

# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out observation for Japan
print(cars.iloc[2])

# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])

Sort a Pandas DataFrame in an ascending order

Info
df.sort_values(by=['Brand'], inplace=True)

7th number between 1 and 15
    yield random.randint(1,15)

for random_number in lottery():
       print("And the next number is... %d!" %(random_number))

Swap variables' value

Code Block
a = 1 b = 2 a, b = b, a print(a,b)

Fibonacci series generator

The first two numbers of the series is always equal to 1, and each consecutive number returned is the sum of the last two numbers - the below code uses only two variables to get the result.

Code Block

def fib():
    a, b = 1, 1
    while 1:
        yield a
        a, b = b, a + b

# testing code
import types
if type(fib()) == types.GeneratorType:
    print("Good, The fib function is a generator.")

    counter = 0
    for n in fib():
        print(n)
        counter += 1
        if counter == 10:
            break

Function Arguments(Parameters)

Multiple Function Argument recognition - the list of "therest" parameters

Code Block
def foo(first, second, third, *therest): print("First: %s" %(first)) print("Second: %s" %(second)) print("Third: %s" %(third)) print("And all the rest... %s" %(list(therest))) foo(1,2,3,4,5)

Multiple Function Argument by keyword

Code Block

def bar(first, second, third, **options):
    if options.get("action") == "sum":
        print("The sum is: %d" %(first + second + third))

    if options.get("number") == "first":
        return first

result = bar(1, 2, 3, action = "sum", number = "first")
print("Result: %d" %(result))

Regular Expression

RegEx(Regular Expressions) to search "[on]" or "[off]" on the string

Code Block
import re pattern = re.compile(r"\[(on\|off)\]") # Slight optimization print(re.search(pattern, "Mono: Playback 65 [75%] [-16.50dB] [on]"))

RegEx(Regular Expression) to check email address

Code Block

import re

def test_email(your_pattern):
    pattern = re.compile(your_pattern)
    emails = ["john@example.com", "python-list@python.org", "wha.t.`1an?ug{}ly@email.com"]
    for email in emails:
        if not re.match(pattern, email):

Code Block

# sort - ascending order
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
        }
  
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - ascending order
df.sort_values(by=['Brand'], inplace=True)

print (df)

Sort a Pandas DataFrame in a descending order

Info
df.sort_values(by=['Brand'], inplace=True, ascending=False)

Code Block

# sort - descending order
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],print("You failed to match %s" % (email))
        elif not your_pattern:
            print("Forgot to enter a pattern!")
        'Price': [22000,25000,27000,35000],else:
        'Year': [2015,2013,2018,2018]
        }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - descending order
df.sort_values(by=['Brand'], inplace=True, ascending=False)

print (df)

Sort a Pandas DataFrame by multiple columns

Info
df.sort_values(by=['First Column','Second Column',...], inplace=True)

print("Pass")

pattern = r"[a-z0-9]+@[a-z0-9]+\.[a-z0-9]+"
test_email(pattern)

Exception Handling

try/except block

Code Block

def do_stuff_with_number(n):
    print(n)

def catch_this():
    the_list = (1, 2, 3, 4, 5)

    for i in range(20):

Code Block

# sort by multiple columns
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year'try: [2015,2013,2018,2018]

         }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort by multiple columns: Year and Price
df.sort_values(by=['Year','Price'], inplace=True)

print (df)

Join and merge Pandas DataFrames

 do_stuff_with_number(the_list[i])
        except IndexError: # Raised when accessing a non-existing index of a list
            do_stuff_with_number('out of bound - %d' % i)

catch_this()

Pandas DataFrame / CSV / Join / Merge

Create a Pandas DataFrame based on array

Code Block

dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"

Code Block

import pandas as pd
from IPython.display import display
from IPython.display import Image

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name'"capital": ['Alex'"Brasilia", 'Amy', 'Allen', 'Alice', 'Ayoung'], "Moscow", "New Dehli", "Beijing", "Pretoria"],
        'last_name'"area": ['Anderson'8.516, 'Ackerman'17.10, 'Ali'3.286, 'Aoni'9.597, 'Atiches']}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
        'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df_n = pd.DataFrame(raw_data, columns = ['subject_id','test_id'])

# Join the two dataframes along rows
df_new = pd.concat([df_a, df_b])

# Join the two dataframes along columns
pd.concat([df_a, df_b], axis=1)

# Merge two dataframes along the subject_id value
pd.merge(df_new, df_n, on='subject_id')

# Merge two dataframes with both the left and right dataframes using the subject_id key
pd.merge(df_new, df_n, left_on='subject_id', right_on='subject_id')

# Merge with outer join
pd.merge(df_a, df_b, on='subject_id', how='outer')

# Merge with inner join
pd.merge(df_a, df_b, on='subject_id', how='inner')

# Merge with right join
pd.merge(df_a, df_b, on='subject_id', how='right')

# Merge with left join
pd.merge(df_a, df_b, on='subject_id', how='left')

# Merge while adding a suffix to duplicate column names
pd.merge(df_a, df_b, on='subject_id', how='left', suffixes=('_left', '_right'))

# Merge based on indexes
pd.merge(df_a, df_b, right_index=True, left_index=True)

Get the maximum value of column in Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
# Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
    'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
 
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the maximum values of all the column in dataframe - it will be raghu, 26, 89, object
df.max()

# get the maximum value of the column 'Age' - it will be 26
df['Age'].max()

# get the maximum value of the column 'Name' - it will be raghu
df['Name'].max()

Get the minimum value of column in Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
# Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
    'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
 
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the minimum values of all the column in dataframe - it will display Alex, 22, 31, object
df.min()

# get the minimum value of the column 'Age' - it will be 22
df['Age'].min()

# get the minimum value of the column 'Name' - it will be Alex
df['Name'].min()

Select row with maximum and minimum value in Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the row of max value
df.loc[df['Score'].idxmax()]

# get the row of minimum value
df.loc[df['Score'].idxmin()]

Get the unique values (rows) of a Pandas Dataframe

Code Block

Create Dataframe:
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24]
}
 
df = pd.DataFrame(d,columns=['Name','Age'])

# get the unique values (rows)
print df.drop_duplicates()

# get the unique values (rows) by retaining last row
print df.drop_duplicates(keep='last')

Get the list of column headers or column name in a Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
       'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# method 1: get list of column name
list(df.columns.values)

# method 2: get list of column name
list(df)

Delete or Drop the duplicate row of a Pandas DataFrame

1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)
print(brics)

Adding index to a Pandas DataFrame

Code Block
# Set the index for brics brics.index = ["BR", "RU", "IN", "CH", "SA"] # Print out brics with new index values print(brics)

Reading CSV by Pandas DataFrame

Code Block
# Import pandas as pd import pandas as pd # Import the cars.csv data: cars cars = pd.read_csv('cars.csv') # Print out cars print(cars)

Reading a CSV file by Pandas DataFrame with 1st column as index

Code Block

# Import pandas and cars.csv
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out country column as Pandas Series
print(cars['cars_per_cap'])

# Print out country column as Pandas DataFrame
print(cars[['cars_per_cap']])

# Print out DataFrame with country and drives_right columns
print(cars[['cars_per_cap', 'country']])

Save a Pandas DaraFrame by CSV format

Code Block

dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)

brics.to_csv('example.csv')

Save a Pandas DaraFrame by CSV format with header and no index

Code Block

from pandas import DataFrame

Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000]
        }

df = DataFrame(Cars, columns= ['Brand', 'Price'])

export_csv = df.to_csv (r'C:\Users\Ron\Desktop\export_dataframe.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path

print (df)

Print partial rows (observations) from a Pandas DataFrame

Code Block

# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out first 4 observations
print(cars[0:4])

# Print out fifth, sixth, and seventh observation
print(cars[4:6])

Data access by loc and iloc in Pandas DaraFrame - Select colums by index or name

loc is label-based, and iloc is integer index based

Code Block

# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out observation for Japan
print(cars.iloc[2])

# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])

Sort a Pandas DataFrame in an ascending order

Info
df.sort_values(by=['Brand'], inplace=True)

Code Block

# sort - ascending order
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
        }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - ascending order
df.sort_values(by=['Brand'], inplace=True)

print (df)

Sort a Pandas DataFrame in a descending order

Info
df.sort_values(by=['Brand'], inplace=True, ascending=False)

Code Block

# sort - descending order
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
        }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - descending order
df.sort_values(by=['Brand'], inplace=True, ascending=False)

print (df)

Sort a Pandas DataFrame by multiple columns

Info
df.sort_values(by=['First Column','Second Column',...], inplace=True)

Code Block

# sort by multiple columns
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
        }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort by multiple columns: Year and Price
df.sort_values(by=['Year','Price'], inplace=True)

print (df)

Join and merge Pandas DataFrames

Code Block

import pandas as pd
from IPython.display import display
from IPython.display import Image

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
        'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df_n = pd.DataFrame(raw_data, columns = ['subject_id','test_id'])

# Join the two dataframes along rows
df_new = pd.concat([df_a, df_b])

# Join the two dataframes along columns
pd.concat([df_a, df_b], axis=1)

# Merge two dataframes along the subject_id value
pd.merge(df_new, df_n, on='subject_id')

# Merge two dataframes with both the left and right dataframes using the subject_id key
pd.merge(df_new, df_n, left_on='subject_id', right_on='subject_id')

# Merge with outer join
pd.merge(df_a, df_b, on='subject_id', how='outer')

# Merge with inner join
pd.merge(df_a, df_b, on='subject_id', how='inner')

# Merge with right join
pd.merge(df_a, df_b, on='subject_id', how='right')

# Merge with left join
pd.merge(df_a, df_b, on='subject_id', how='left')

# Merge while adding a suffix to duplicate column names
pd.merge(df_a, df_b, on='subject_id', how='left', suffixes=('_left', '_right'))

# Merge based on indexes
pd.merge(df_a, df_b, right_index=True, left_index=True)

Get the maximum value of column in Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
# Create

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get dropthe duplicate rows
df.drop_duplicates()

# drop duplicate rows by retaining last occurrence
df.drop_duplicates(keep='last'maximum values of all the column in dataframe - it will be raghu, 26, 89, object
df.max()

# drop duplicate by a column name
df.drop_duplicates(['Name'], keep='last')

Drop or delete the row in Pandas DataFrame with conditions

get the maximum value of the column 'Age' - it will be 26
df['Age'].max()

# get the maximum value of the column 'Name' - it will be raghu
df['Name'].max()

Get the minimum value of column in Pandas DataFrame

Code Block

import pandas as pd

Code Block

import numpy as np
 
#Create# Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
       'Score':[85,63,55,74,31,77,85,63,42,62,89,77]
}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# Dropget anthe observationminimum or rowvalues of all the column in dataframe - it will display Alex, 22, 31, object
df.dropmin([1,2])


# Dropget the aminimum rowvalue by condition
df[df.Name != 'Alisa']

# Drop a row by index
df.drop(df.index[2])

# Drop bottom 3 rows
df[:-3]

...

of the column 'Age' - it will be 22
df['Age'].min()

# get the minimum value of the column 'Name' - it will be Alex
df['Name'].min()

Select row with maximum and minimum value in Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'countriesName':['AAlisa','BBobby','Cjodha'],
    'population_in_million':[100,200,120]'jack','raghu','Cathrine',
    'gdp_percapita':[2000,7000,15000]
      }
 
df = pd.DataFrame(d,columns=['countries','population_in_million','gdp_percapita'])

# shape from wide to long with melt function in pandas
df2=pd.melt(df,id_vars=['countries'],var_name='metrics', value_name='values')

Reshape long to wide in Pandas DataFrame with pivot function

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = { 'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
    'countriesScore':['A','B','C','A','B','C'],
    'metrics':['population_in_million'85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# get the row of max value
df.loc[df['Score'].idxmax()]

# get the row of minimum value
df.loc[df['Score'].idxmin()]

Get the unique values (rows) of a Pandas Dataframe

Code Block

Create Dataframe:
import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24]
,'population_in_million','population_in_million',
                             'gdp_percapita','gdp_percapita','gdp_percapita'],
    'values':[100,200,120,2000,7000,15000]
    }
 
df = pd.DataFrame(d,columns=['countriesName','metricsAge','values'])

# reshapeget fromthe longunique to wide in pandas python
df2=df.pivot(index='countries', columns='metrics', values='values')

Reshape using Stack() and unstack() function in Pandas DataFrame

values (rows)
print df.drop_duplicates()

# get the unique values (rows) by retaining last row
print df.drop_duplicates(keep='last')

Get the list of column headers or column name in a Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',

Code Block

import pandas as pd
import numpy as np
 
 
header = pd.MultiIndex.from_product([['Semester1','Semester2'],['Maths','Science']])
d=([[12,45,67,56],[78,89,45,67],[45,67,89,90],[67,44,56,55]])
 
 
df = pd.DataFrame(d,
                  index=['Alisa','Bobby','kumar','Alisa','CathrineAlex','JackCathrine'],
      'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
      columns=header)

# stack the dataframe
stacked_df=df.stack()

# unstack the dataframe
unstacked_df = stacked_df.unstack( 'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# stack the dataframemethod 1: get list of column at level 0
stacked_df_lvl=df.stack(level=0name
list(df.columns.values)

# method 2: unstackget the dataframe
unstacked_df1 = stacked_df_lvl.unstack()

Generator

Random number generation

list of column name
list(df)

Delete or Drop the duplicate row of a Pandas DataFrame

Code Block

import random

def lottery():
    # returns 6 numbers between 1 and 40
    for i in range(6):
        yield random.randint(1, 40)

    # returns a 7th number between 1 and 15
    yield random.randint(1,15)

for random_number in lottery():
       print("And the next number is... %d!" %(random_number))

Swap variables' value

Code Block
a = 1 b = 2 a, b = b, a print(a,b)

Fibonacci series generator

The first two numbers of the series is always equal to 1, and each consecutive number returned is the sum of the last two numbers - the below code uses only two variables to get the result.

Code Block

def fib():
    a, b = 1, 1
    while 1:
        yield a
        a, b = b, a + b

# testing code
import types
if type(fib()) == types.GeneratorType:
    print("Good, The fib function is a generator.")

    counter = 0
    for n in fib():
        print(n)
        counter += 1
        if counter == 10:
            break

Function Arguments(Parameters)

Multiple Function Argument recognition - the list of "therest" parameters

Code Block
def foo(first, second, third, *therest): print("First: %s" %(first)) print("Second: %s" %(second)) print("Third: %s" %(third)) print("And all the rest... %s" %(list(therest))) foo(1,2,3,4,5)

Multiple Function Argument by keyword

Code Block

def bar(first, second, third, **options):
    if options.get("action") == "sum":
        print("The sum is: %d" %(first + second + third))

    if options.get("number") == "first":
        return first

result = bar(1, 2, 3, action = "sum", number = "first")
print("Result: %d" %(result))

Regular Expression

RegEx(Regular Expressions) to search "[on]" or "[off]" on the string

Code Block
import re pattern = re.compile(r"\[(on\|off)\]") # Slight optimization print(re.search(pattern, "Mono: Playback 65 [75%] [-16.50dB] [on]"))

RegEx(Regular Expression) to check email address

Code Block

import re

def test_email(your_pattern):
    pattern = re.compile(your_pattern)
    emails = ["john@example.com", "python-list@python.org", "wha.t.`1an?ug{}ly@email.com"]
    for email in emails:
        if not re.match(pattern, email):
            print("You failed to match %s" % (email))
        elif not your_pattern:
            print("Forgot to enter a pattern!")
        else:
            print("Pass")

pattern = r"[a-z0-9]+@[a-z0-9]+\.[a-z0-9]+"
test_email(pattern)

Exception Handling

try/except block

pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
    'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# drop duplicate rows
df.drop_duplicates()

# drop duplicate rows by retaining last occurrence
df.drop_duplicates(keep='last')

# drop duplicate by a column name
df.drop_duplicates(['Name'], keep='last')

Drop or delete the row in Pandas DataFrame with conditions

Code Block

import numpy as np
 
#Create a DataFrame
d = {
    'Name':['Alisa','Bobby','jodha','jack','raghu','Cathrine',
            'Alisa','Bobby','kumar','Alisa','Alex','Cathrine'],
    'Age':[26,24,23,22,23,24,26,24,22,23,24,24],
      
       'Score':[85,63,55,74,31,77,85,63,42,62,89,77]}
 
df = pd.DataFrame(d,columns=['Name','Age','Score'])

# Drop an observation or row
df.drop([1,2])


# Drop a row by condition
df[df.Name != 'Alisa']

# Drop a row by index
df.drop(df.index[2])

# Drop bottom 3 rows
df[:-3]

Reshape wide to long in Pandas DataFrame with melt() function

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'countries':['A','B','C'],
    'population_in_million':[100,200,120],
    'gdp_percapita':[2000,7000,15000]
    }
 
df = pd.DataFrame(d,columns=['countries','population_in_million','gdp_percapita'])

# shape from wide to long with melt function in pandas
df2=pd.melt(df,id_vars=['countries'],var_name='metrics', value_name='values')

Reshape long to wide in Pandas DataFrame with pivot function

Code Block

import pandas as pd
import numpy as np
 
#Create a DataFrame
d = {
    'countries':['A','B','C','A','B','C'],
    'metrics':['population_in_million','population_in_million','population_in_million',
                             'gdp_percapita','gdp_percapita','gdp_percapita'],
    'values':[100,200,120,2000,7000,15000]
    }
 
df = pd.DataFrame(d,columns=['countries','metrics','values'])

# reshape from long to wide in pandas python
df2=df.pivot(index='countries', columns='metrics', values='values')

Reshape using Stack() and unstack() function in Pandas DataFrame

Code Block

import pandas as pd
import numpy as np
 
 
header = pd.MultiIndex.from_product([['Semester1','Semester2'],['Maths','Science']])
d=([[12,45,67,56],[78,89,45,67],[45,67,89,90],[67,44,56,55]])
 
 
df = pd.DataFrame(d,
                  index=['Alisa','Bobby','Cathrine','Jack'],
                  columns=header)

# stack the dataframe
stacked_df=df.stack()

# unstack the dataframe
unstacked_df = stacked_df.unstack()

# stack the dataframe of column at level 0
stacked_df_lvl=df.stack(level=0)

# unstack the dataframe
unstacked_df1 = stacked_df_lvl.unstack()

Code Block

def do_stuff_with_number(n):
    print(n)

def catch_this():
    the_list = (1, 2, 3, 4, 5)

    for i in range(20):
        try:
            do_stuff_with_number(the_list[i])
        except IndexError: # Raised when accessing a non-existing index of a list
            do_stuff_with_number('out of bound - %d' % i)

catch_this()

Space shortcuts

Page tree

Page History

Versions Compared

Old Version 27

New Version 28

Key

Pandas DataFrame / CSV / Join / Merge

Generator

Random number generation

Adding index to a Pandas DataFrame

Reading CSV by Pandas DataFrame

Reading a CSV file by Pandas DataFrame with 1st column as index

Save a Pandas DaraFrame by CSV format

Save a Pandas DaraFrame by CSV format with header and no index

Print partial rows (observations) from a Pandas DataFrame

Data access by loc and iloc in Pandas DaraFrame - Select colums by index or name

Sort a Pandas DataFrame in an ascending order

Swap variables' value

Fibonacci series generator

Function Arguments(Parameters)

Multiple Function Argument recognition - the list of "therest" parameters

Multiple Function Argument by keyword

Regular Expression

RegEx(Regular Expressions) to search "[on]" or "[off]" on the string

RegEx(Regular Expression) to check email address

Sort a Pandas DataFrame in a descending order

Sort a Pandas DataFrame by multiple columns

Exception Handling

try/except block

Join and merge Pandas DataFrames

Pandas DataFrame / CSV / Join / Merge

Create a Pandas DataFrame based on array

Get the maximum value of column in Pandas DataFrame

Get the minimum value of column in Pandas DataFrame

Select row with maximum and minimum value in Pandas DataFrame

Get the unique values (rows) of a Pandas Dataframe

Get the list of column headers or column name in a Pandas DataFrame

Delete or Drop the duplicate row of a Pandas DataFrame

Adding index to a Pandas DataFrame

Reading CSV by Pandas DataFrame

Reading a CSV file by Pandas DataFrame with 1st column as index

Save a Pandas DaraFrame by CSV format

Save a Pandas DaraFrame by CSV format with header and no index

Print partial rows (observations) from a Pandas DataFrame

Data access by loc and iloc in Pandas DaraFrame - Select colums by index or name

Sort a Pandas DataFrame in an ascending order

Sort a Pandas DataFrame in a descending order

Sort a Pandas DataFrame by multiple columns

Join and merge Pandas DataFrames

Get the maximum value of column in Pandas DataFrame

Drop or delete the row in Pandas DataFrame with conditions

Get the minimum value of column in Pandas DataFrame

Select row with maximum and minimum value in Pandas DataFrame

Reshape long to wide in Pandas DataFrame with pivot function

Get the unique values (rows) of a Pandas Dataframe

Reshape using Stack() and unstack() function in Pandas DataFrame

Get the list of column headers or column name in a Pandas DataFrame

Generator

Random number generation

Delete or Drop the duplicate row of a Pandas DataFrame

Swap variables' value

Fibonacci series generator

Function Arguments(Parameters)

Multiple Function Argument recognition - the list of "therest" parameters

Multiple Function Argument by keyword

Regular Expression

RegEx(Regular Expressions) to search "[on]" or "[off]" on the string

RegEx(Regular Expression) to check email address

Exception Handling

try/except block

Drop or delete the row in Pandas DataFrame with conditions

Reshape wide to long in Pandas DataFrame with melt() function

Reshape long to wide in Pandas DataFrame with pivot function

Reshape using Stack() and unstack() function in Pandas DataFrame