Pandas DataFrames
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"], "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"], "area": [8.516, 17.10, 3.286, 9.597, 1.221], "population": [200.4, 143.5, 1252, 1357, 52.98] } import pandas as pd brics = pd.DataFrame(dict) print(brics)
Adding index to DataFrame
# Set the index for brics brics.index = ["BR", "RU", "IN", "CH", "SA"] # Print out brics with new index values print(brics)
Reading CSV by Pandas DataFrame
# Import pandas as pd import pandas as pd # Import the cars.csv data: cars cars = pd.read_csv('cars.csv') # Print out cars print(cars)
Reading CSV file by Pandas DataFrame with 1st column as index
# Import pandas and cars.csv import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0) # Print out country column as Pandas Series print(cars['cars_per_cap']) # Print out country column as Pandas DataFrame print(cars[['cars_per_cap']]) # Print out DataFrame with country and drives_right columns print(cars[['cars_per_cap', 'country']])
Print partial rows (observations) from a DataFrame
# Import cars data import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0) # Print out first 4 observations print(cars[0:4]) # Print out fifth, sixth, and seventh observation print(cars[4:6])
Data access by loc and iloc in Pandas DaraFrame
loc is label-based, and iloc is integer index based
# Import cars data import pandas as pd cars = pd.read_csv('cars.csv', index_col = 0) # Print out observation for Japan print(cars.iloc[2]) # Print out observations for Australia and Egypt print(cars.loc[['AUS', 'EG']])
Random number generation
import random def lottery(): # returns 6 numbers between 1 and 40 for i in range(6): yield random.randint(1, 40) # returns a 7th number between 1 and 15 yield random.randint(1,15) for random_number in lottery(): print("And the next number is... %d!" %(random_number))
Swap variables' value
a = 1 b = 2 a, b = b, a print(a,b)
Fibonacci series generator
The first two numbers of the series is always equal to 1, and each consecutive number returned is the sum of the last two numbers - the below code uses only two variables to get the result.
def fib(): a, b = 1, 1 while 1: yield a a, b = b, a + b # testing code import types if type(fib()) == types.GeneratorType: print("Good, The fib function is a generator.") counter = 0 for n in fib(): print(n) counter += 1 if counter == 10: break
Split string as list
sentence = "the quick brown fox jumps over the lazy dog" words = sentence.split() print(words)
Filter positive numbers only - 1
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7] newlist = [] for number in numbers: if number>0: newlist.append(number) print(newlist)
Filter positive numbers only - 2
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7] newlist = [int(x) for x in numbers if x > 0] print(newlist)
Multiple Function Argument recognition - the list of "therest" parameters
def foo(first, second, third, *therest): print("First: %s" %(first)) print("Second: %s" %(second)) print("Third: %s" %(third)) print("And all the rest... %s" %(list(therest))) foo(1,2,3,4,5)
Multiple Function Argument by keyword
def bar(first, second, third, **options): if options.get("action") == "sum": print("The sum is: %d" %(first + second + third)) if options.get("number") == "first": return first result = bar(1, 2, 3, action = "sum", number = "first") print("Result: %d" %(result))