Versions Compared


  • This line was added.
  • This line was removed.
  • Formatting was changed.

Table of Contents


Get last name from full name by split()


Code Block
actor = {"name": "John Cleese", "rank": "awesome"}

def get_last_name():
    return actor["name"].split()[1]

print("All exceptions caught! Good job!")
print("The actor's last name is %s" % get_last_name())


Split string as list

Code Block
dictsentence = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)

Adding index to a Pandas DataFrame

Code Block
# Set the index for brics
brics.index = ["BR", "RU", "IN", "CH", "SA"]

# Print out brics with new index values

Reading CSV by Pandas DataFrame

Code Block
# Import pandas as pd
import pandas as pd

# Import the cars.csv data: cars
cars = pd.read_csv('cars.csv')

# Print out cars

Reading a CSV file by Pandas DataFrame with 1st column as index

Code Block
# Import pandas and cars.csv
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out country column as Pandas Series

# Print out country column as Pandas DataFrame

# Print out DataFrame with country and drives_right columns
print(cars[['cars_per_cap', 'country']])

Save a Pandas DaraFrame by CSV format

Code Block
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)


Save a Pandas DaraFrame by CSV format with header and no index

Code Block
from pandas import DataFrame

Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000]

df = DataFrame(Cars, columns= ['Brand', 'Price'])

export_csv = df.to_csv (r'C:\Users\Ron\Desktop\export_dataframe.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path

print (df)

Print partial rows (observations) from a Pandas DataFrame

Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out first 4 observations

# Print out fifth, sixth, and seventh observation

Data access by loc and iloc in Pandas DaraFrame

loc is label-based, and iloc is integer index based

Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out observation for Japan

# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])

Sort a Pandas DataFrame in an ascending order

df.sort_values(by=['Brand'], inplace=True)
the quick brown fox jumps over the lazy dog"
words = sentence.split()

Filter positive numbers only - 1

Code Block
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7]
newlist = []
for number in numbers:
    if number>0:

Filter positive numbers only - 2

Code Block
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7]
newlist = [int(x) for x in numbers if x > 0]

Create word list from a sentence with no duplicate entries

set() removes all the duplicate entries in the array

Code Block
strings = "my name is Chun Kang and Chun is my name"
r = set(strings.split())

Find overlapped entries from two arrays

Code Block
a = set([ "Seoul", "Pusan", "Incheon", "Mokpo" ])
b = set([ "Seoul", "Incheon", "Suwon", "Daejeon", "Gwangjoo", "Taeku"])


The result will be like below


{'Seoul', 'Incheon'}

{'Seoul', 'Incheon'}

Find different elements from two arrays based on "symmetric_difference" method

Code Block
a = set(["Jake", "John", "Eric"])
b = set(["John", "Jill"])


The result will be like below


{'Jake', 'Eric', 'Jill'}

{'Eric', 'Jake', 'Jill'}

Find different elements from two arrays based on "difference" method

Code Block
a = set(["Jake", "John", "Eric"])
b = set(["John", "Jill"])


The result will be like below


{'Jake', 'Eric'}


Find different elements from two arrays based on "union" method

Code Block
a = set(["Jake", "John", "Eric"])
b = set(["John", "Jill"])


The result will be like below


{'John', 'Eric', 'Jake', 'Jill'}

Print out a set containing all the participants from event A which did not attend event B

Code Block
a = ["Jake", "John", "Eric"]
b = ["John", "Jill"]


Pandas DataFrame / CSV

Create a Pandas DataFrame based on array

Code Block
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221
Code Block
# sort - ascending order
from pandas import DataFrame
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price'"population": [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - ascending order
df.sort_values(by=['Brand'], inplace=True)

print (df)

Sort a Pandas DataFrame in a descending order

df.sort_values(by=['Brand'], inplace=True, ascending=False)
Code Block
# sort - descending order
from pandas import DataFrame
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - descending order
df.sort_values(by=['Brand'], inplace=True, ascending=False)

print (df)

Sort a Pandas DataFrame by multiple columns

df.sort_values(by=['First Column','Second Column',...], inplace=True)
200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)

Adding index to a Pandas DataFrame

Code Block
# Set the index for brics
brics.index = ["BR", "RU", "IN", "CH", "SA"]

# Print out brics with new index values

Reading CSV by Pandas DataFrame

Code Block
# Import pandas as pd
import pandas as pd

# Import the cars.csv data: cars
cars = pd.read_csv('cars.csv')

# Print out cars

Reading a CSV file by Pandas DataFrame with 1st column as index

Code Block
# Import pandas and cars.csv
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out country column as Pandas Series

# Print out country column as Pandas DataFrame

# Print out DataFrame with country and drives_right columns
print(cars[['cars_per_cap', 'country']])

Save a Pandas DaraFrame by CSV format

Code Block
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"
Code Block
# sort by multiple columns
from pandas import DataFrame
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price'"area": [22000,25000,27000,350008.516, 17.10, 3.286, 9.597, 1.221],
        'Year'"population": [2015,2013,2018,2018]
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort by multiple columns: Year and Price
df.sort_values(by=['Year','Price'], inplace=True)

print (df)

Join and merge Pandas DataFrames

200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)


Save a Pandas DaraFrame by CSV format with header and no index

Code Block
from pandas import DataFrame

Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000]

df = DataFrame(Cars, columns= ['Brand', 'Price'])

export_csv = df.to_csv (r'C:\Users\Ron\Desktop\export_dataframe.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path

print (df)

Print partial rows (observations) from a Pandas DataFrame

Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out first 4 observations

# Print out fifth, sixth, and seventh observation

Data access by loc and iloc in Pandas DaraFrame

loc is label-based, and iloc is integer index based

Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out observation for Japan

# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])

Sort a Pandas DataFrame in an ascending order

df.sort_values(by=['Brand'], inplace=True)

Code Block
# sort - ascending order
from pandas import DataFrame
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
Code Block
import pandas as pd
from IPython.display import display
from IPython.display import Image

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_namePrice': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - ascending order
df.sort_values(by=['Brand'], inplace=True)

print (df)

Sort a Pandas DataFrame in a descending order

df.sort_values(by=['Brand'], inplace=True, ascending=False)

Code Block
# sort - descending order
from pandas import DataFrame
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'test_idPrice': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df_n = pd.DataFrame(raw_data, columns = ['subject_id','test_id22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# Joinsort theBrand two- dataframesdescending along rowsorder
df_new = pd.concat([df_a, df_b])

# Join the two dataframes along columns
pd.concat([df_a, df_b], axis=1)

# Merge two dataframes along the subject_id value
pd.merge(df_new, df_n, on='subject_id')

# Merge two dataframes with both the left and right dataframes using the subject_id key
pd.merge(df_new, df_n, left_on='subject_id', right_on='subject_id')

# Merge with outer join
pd.merge(df_a, df_b, on='subject_id', how='outer')

# Merge with inner join
pd.merge(df_a, df_b, on='subject_id', how='inner')

# Merge with right join
pd.merge(df_a, df_b, on='subject_id', how='right')

# Merge with left join
pd.merge(df_a, df_b, on='subject_id', how='left')

# Merge while adding a suffix to duplicate column names
pd.merge(df_a, df_b, on='subject_id', how='left', suffixes=('_left', '_right'))

# Merge based on indexes
pd.merge(df_a, df_b, right_index=True, left_index=True)

Random number generation

Code Block
import random

def lottery():
    # returns 6 numbers between 1 and 40
    for i in range(6):
        yield random.randint(1, 40)

    # returns a 7th number between 1 and 15
    yield random.randint(1,15)

for random_number in lottery():
       print("And the next number is... %d!" %(random_number))

Swap variables' value

Code Block
a = 1
b = 2
a, b = b, a

Fibonacci series generator

The first two numbers of the series is always equal to 1, and each consecutive number returned is the sum of the last two numbers - the below code uses only two variables to get the result.

Code Block
def fib():
    a, b = 1, 1
    while 1:
        yield a
        a, b = b, a + b

# testing code
import types
if type(fib()) == types.GeneratorType:
    print("Good, The fib function is a generator.")

    counter = 0
    for n in fib():
        counter += 1
        if counter == 10:

Split string as list

Code Block
sentence = "the quick brown fox jumps over the lazy dog"
words = sentence.split()

Filter positive numbers only - 1

Code Block
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7]
newlist = []
for number in numbers:
    if number>0:

Filter positive numbers only - 2

Code Block
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7]
newlist = [int(x) for x in numbers if x > 0]

Multiple Function Argument recognition - the list of "therest" parameters

Code Block
def foo(first, second, third, *therest):
    print("First: %s" %(first))
    print("Second: %s" %(second))
    print("Third: %s" %(third))
    print("And all the rest... %s" %(list(therest)))


Multiple Function Argument by keyword

Code Block
def bar(first, second, third, **options):
    if options.get("action") == "sum":
        print("The sum is: %d" %(first + second + third))

    if options.get("number") == "first":
        return first

result = bar(1, 2, 3, action = "sum", number = "first")
print("Result: %d" %(result))

RegEx(Regular Expressions) to search "[on]" or "[off]" on the string

Code Block
import re

pattern = re.compile(r"\[(on|off)\]") # Slight optimization
print(, "Mono: Playback 65 [75%] [-16.50dB] [on]"))

RegEx(Regular Expression) to check email address

.sort_values(by=['Brand'], inplace=True, ascending=False)

print (df)

Sort a Pandas DataFrame by multiple columns

df.sort_values(by=['First Column','Second Column',...], inplace=True)

Code Block
# sort by multiple columns
from pandas import DataFrame
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort by multiple columns: Year and Price
df.sort_values(by=['Year','Price'], inplace=True)

print (df)

Join and merge Pandas DataFrames

Code Block
import pandas as pd
from IPython.display import display
from IPython.display import Image

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])

raw_data = {
        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
        'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df_n = pd.DataFrame(raw_data, columns = ['subject_id','test_id'])

# Join the two dataframes along rows
df_new = pd.concat([df_a, df_b])

# Join the two dataframes along columns
pd.concat([df_a, df_b], axis=1)

# Merge two dataframes along the subject_id value
pd.merge(df_new, df_n, on='subject_id')

# Merge two dataframes with both the left and right dataframes using the subject_id key
pd.merge(df_new, df_n, left_on='subject_id', right_on='subject_id')

# Merge with outer join
pd.merge(df_a, df_b, on='subject_id', how='outer')

# Merge with inner join
pd.merge(df_a, df_b, on='subject_id', how='inner')

# Merge with right join
pd.merge(df_a, df_b, on='subject_id', how='right')

# Merge with left join
pd.merge(df_a, df_b, on='subject_id', how='left')

# Merge while adding a suffix to duplicate column names
pd.merge(df_a, df_b, on='subject_id', how='left', suffixes=('_left', '_right'))

# Merge based on indexes
pd.merge(df_a, df_b, right_index=True, left_index=True)


Random number generation

Code Block
import random

def lottery():
    # returns 6 numbers between 1 and 40
    for i in range(6):
        yield random.randint(1, 40)

    # returns a 7th number between 1 and 15
    yield random.randint(1,15)

for random_number in lottery():
       print("And the next number is... %d!" %(random_number))

Swap variables' value

Code Block
a = 1
b = 2
a, b = b, a

Fibonacci series generator

The first two numbers of the series is always equal to 1, and each consecutive number returned is the sum of the last two numbers - the below code uses only two variables to get the result.

Code Block
def fib():
    a, b = 1, 1
    while 1:
        yield a
        a, b = b, a + b

# testing code
import types
if type(fib()) == types.GeneratorType:
    print("Good, The fib function is a generator.")

    counter = 0
    for n in fib():
        counter += 1
Code Block
import re

def test_email(your_pattern):
    pattern = re.compile(your_pattern)
    emails = ["", "", "wha.t.`1an?ug{}"]
    for email in emails:
        if not re.match(pattern, email)counter == 10:
            print("You failed to matchbreak

Function Arguments(Parameters)

Multiple Function Argument recognition - the list of "therest" parameters

Code Block
def foo(first, second, third, *therest):
    print("First: %s" % (emailfirst))
    print("Second: %s" %(second))
  elif not your_pattern:
       print("Third: %s" %(third))
      print("ForgotAnd toall enterthe rest... %s" %(list(therest)))


Multiple Function Argument by keyword

Code Block
def bar(first, second, third, **options):
    if options.get("action") == "sum":
   a pattern!")
      print("The sum is: %d" %(first + second  print+ third))

    if options.get("Passnumber")

pattern == r"[a-z0-9]+@[a-z0-9]+\.[a-z0-9]+"

Exception Handling - try/except block

Code Block
def do_stuff_with_number(n):

def catch_this():
    the_list = (1"first":
        return first

result = bar(1, 2, 3, 4, 5)

    for i in range(20):
        except IndexError: # Raised when accessing a non-existing index of a list
            do_stuff_with_number('out of bound - %d' % i)


Create word list from a sentence with no duplicate entries

set() removes all the duplicate entries in the array

Code Block
strings = "my name is Chun Kang and Chun is my name"
r = set(strings.split())

Find overlapped entries from two arrays

Code Block
a = set([ "Seoul", "Pusan", "Incheon", "Mokpo" ])
b = set([ "Seoul", "Incheon", "Suwon", "Daejeon", "Gwangjoo", "Taeku"])


The result will be like below


{'Seoul', 'Incheon'}

{'Seoul', 'Incheon'}

Find different elements from two arrays based on "symmetric_difference" method

Code Block
a = set(["Jake", "John", "Eric"])
b = set(["John", "Jill"])


The result will be like below


{'Jake', 'Eric', 'Jill'}

{'Eric', 'Jake', 'Jill'}

action = "sum", number = "first")
print("Result: %d" %(result))

Regular Expression

RegEx(Regular Expressions) to search "[on]" or "[off]" on the string

Code Block
import re

pattern = re.compile(r"\[(on|off)\]") # Slight optimization
print(, "Mono: Playback 65 [75%] [-16.50dB] [on]"))

RegEx(Regular Expression) to check email address

Code Block
import re

def test_email(your_pattern):
    pattern = re.compile(your_pattern)
    emails = ["", "", "wha.t.`1an?ug{}"]
    for email in emails:
        if not re.match(pattern, email):
            print("You failed to match %s" % (email))
        elif not your_pattern:
            print("Forgot to enter a pattern!")

pattern = r"[a-z0-9]+@[a-z0-9]+\.[a-z0-9]+"

Exception Handling

try/except block

Code Block
def do_stuff_with_number(n):

def catch_this():
    the_list = (1, 2, 3, 4, 5)

    for i in range(20):
        except IndexError: # Raised when accessing a non-existing index of a list
            do_stuff_with_number('out of bound - %d' % i)


Find different elements from two arrays based on "difference" method

Code Block
a = set(["Jake", "John", "Eric"])
b = set(["John", "Jill"])


The result will be like below


{'Jake', 'Eric'}


Find different elements from two arrays based on "union" method

Code Block
a = set(["Jake", "John", "Eric"])
b = set(["John", "Jill"])


The result will be like below


{'John', 'Eric', 'Jake', 'Jill'}

Print out a set containing all the participants from event A which did not attend event B

Code Block
a = ["Jake", "John", "Eric"]
b = ["John", "Jill"]
