Get last name from full name by split()
The function can be easily implemented by string method
actor = {"name": "John Cleese", "rank": "awesome"} def get_last_name(): return actor["name"].split()[1] get_last_name() print("All exceptions caught! Good job!") print("The actor's last name is %s" % get_last_name())
Create a Pandas DataFrame based on array
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"], "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"], "area": [8.516, 17.10, 3.286, 9.597, 1.221], "population": [200.4, 143.5, 1252, 1357, 52.98] } import pandas as pd brics = pd.DataFrame(dict) print(brics)
Adding index to DataFrame
# Set the index for brics brics.index = ["BR", "RU", "IN", "CH", "SA"] # Print out brics with new index values print(brics)
Reading CSV by Pandas DataFrame
# Import pandas as pd import pandas as pd # Import the cars.csv data: cars cars = pd.read_csv('cars.csv') # Print out cars print(cars)
Reading a CSV file by Pandas DataFrame with 1st column as index
# Import pandas and cars.csv import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0) # Print out country column as Pandas Series print(cars['cars_per_cap']) # Print out country column as Pandas DataFrame print(cars[['cars_per_cap']]) # Print out DataFrame with country and drives_right columns print(cars[['cars_per_cap', 'country']])
Save a Pandas DaraFrame by CSV format
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"], "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"], "area": [8.516, 17.10, 3.286, 9.597, 1.221], "population": [200.4, 143.5, 1252, 1357, 52.98] } import pandas as pd brics = pd.DataFrame(dict) brics.to_csv('example.csv')
Save a Pandas DaraFrame by CSV format with header and no index
from pandas import DataFrame Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'], 'Price': [22000,25000,27000,35000] } df = DataFrame(Cars, columns= ['Brand', 'Price']) export_csv = df.to_csv (r'C:\Users\Ron\Desktop\export_dataframe.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path print (df)
Print partial rows (observations) from a Pandas DataFrame
# Import cars data import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0) # Print out first 4 observations print(cars[0:4]) # Print out fifth, sixth, and seventh observation print(cars[4:6])
Data access by loc and iloc in Pandas DaraFrame
loc is label-based, and iloc is integer index based
# Import cars data import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0) # Print out observation for Japan print(cars.iloc[2]) # Print out observations for Australia and Egypt print(cars.loc[['AUS', 'EG']])
Sort a Pandas DataFrame in an ascending order
df.sort_values(by=['Brand'], inplace=True)
# sort - ascending order from pandas import DataFrame Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'], 'Price': [22000,25000,27000,35000], 'Year': [2015,2013,2018,2018] } df = DataFrame(Cars, columns= ['Brand', 'Price','Year']) # sort Brand - ascending order df.sort_values(by=['Brand'], inplace=True) print (df)
Sort a Pandas DataFrame in a descending order
df.sort_values(by=['Brand'], inplace=True, ascending=False)
# sort - descending order from pandas import DataFrame Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'], 'Price': [22000,25000,27000,35000], 'Year': [2015,2013,2018,2018] } df = DataFrame(Cars, columns= ['Brand', 'Price','Year']) # sort Brand - descending order df.sort_values(by=['Brand'], inplace=True, ascending=False) print (df)
Sort a Pandas DataFrame by multiple columns
df.sort_values(by=['First Column','Second Column',...], inplace=True)
# sort by multiple columns from pandas import DataFrame Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'], 'Price': [22000,25000,27000,35000], 'Year': [2015,2013,2018,2018] } df = DataFrame(Cars, columns= ['Brand', 'Price','Year']) # sort by multiple columns: Year and Price df.sort_values(by=['Year','Price'], inplace=True) print (df)
Random number generation
import random def lottery(): # returns 6 numbers between 1 and 40 for i in range(6): yield random.randint(1, 40) # returns a 7th number between 1 and 15 yield random.randint(1,15) for random_number in lottery(): print("And the next number is... %d!" %(random_number))
Swap variables' value
a = 1 b = 2 a, b = b, a print(a,b)
Fibonacci series generator
The first two numbers of the series is always equal to 1, and each consecutive number returned is the sum of the last two numbers - the below code uses only two variables to get the result.
def fib(): a, b = 1, 1 while 1: yield a a, b = b, a + b # testing code import types if type(fib()) == types.GeneratorType: print("Good, The fib function is a generator.") counter = 0 for n in fib(): print(n) counter += 1 if counter == 10: break
Split string as list
sentence = "the quick brown fox jumps over the lazy dog" words = sentence.split() print(words)
Filter positive numbers only - 1
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7] newlist = [] for number in numbers: if number>0: newlist.append(number) print(newlist)
Filter positive numbers only - 2
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7] newlist = [int(x) for x in numbers if x > 0] print(newlist)
Multiple Function Argument recognition - the list of "therest" parameters
def foo(first, second, third, *therest): print("First: %s" %(first)) print("Second: %s" %(second)) print("Third: %s" %(third)) print("And all the rest... %s" %(list(therest))) foo(1,2,3,4,5)
Multiple Function Argument by keyword
def bar(first, second, third, **options): if options.get("action") == "sum": print("The sum is: %d" %(first + second + third)) if options.get("number") == "first": return first result = bar(1, 2, 3, action = "sum", number = "first") print("Result: %d" %(result))
RegEx(Regular Expressions) to search "[on]" or "[off]" on the string
import re pattern = re.compile(r"\[(on|off)\]") # Slight optimization print(re.search(pattern, "Mono: Playback 65 [75%] [-16.50dB] [on]"))
RegEx(Regular Expression) to check email address
import re def test_email(your_pattern): pattern = re.compile(your_pattern) emails = ["john@example.com", "python-list@python.org", "wha.t.`1an?ug{}ly@email.com"] for email in emails: if not re.match(pattern, email): print("You failed to match %s" % (email)) elif not your_pattern: print("Forgot to enter a pattern!") else: print("Pass") pattern = r"[a-z0-9]+@[a-z0-9]+\.[a-z0-9]+" test_email(pattern)
Exception Handling - try/except block
def do_stuff_with_number(n): print(n) def catch_this(): the_list = (1, 2, 3, 4, 5) for i in range(20): try: do_stuff_with_number(the_list[i]) except IndexError: # Raised when accessing a non-existing index of a list do_stuff_with_number('out of bound - %d' % i) catch_this()
Create word list from a sentence with no duplicate entries
set() removes all the duplicate entries in the array
strings = "my name is Chun Kang and Chun is my name" r = set(strings.split()) print(r)
Find overlapped entries from two arrays
a = set([ "Seoul", "Pusan", "Incheon", "Mokpo" ]) b = set([ "Seoul", "Incheon", "Suwon", "Daejeon", "Gwangjoo", "Taeku"]) print(a.intersection(b)) print(b.intersection(a))
The result will be like below
Result |
---|
{'Seoul', 'Incheon'} {'Seoul', 'Incheon'} |
Find different elements from two arrays based on "symmetric_difference" method
a = set(["Jake", "John", "Eric"]) b = set(["John", "Jill"]) print(a.symmetric_difference(b)) print(b.symmetric_difference(a))
The result will be like below
Result |
---|
{'Jake', 'Eric', 'Jill'} {'Eric', 'Jake', 'Jill'} |
Find different elements from two arrays based on "difference" method
a = set(["Jake", "John", "Eric"]) b = set(["John", "Jill"]) print(a.difference(b)) print(b.difference(a))
The result will be like below
Result |
---|
{'Jake', 'Eric'} {'Jill'} |
Find different elements from two arrays based on "union" method
a = set(["Jake", "John", "Eric"]) b = set(["John", "Jill"]) print(a.union(b))
The result will be like below
Result |
---|
{'John', 'Eric', 'Jake', 'Jill'} |
Print out a set containing all the participants from event A which did not attend event B
a = ["Jake", "John", "Eric"] b = ["John", "Jill"] print(set(a).difference(set(b)))