Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)
print(brics)

...


Adding index to DataFrame

Code Block
dict = {"country":# Set the index for brics
brics.index = ["BrazilBR", "RussiaRU", "IndiaIN", "ChinaCH", "South AfricaSA"],

# Print out brics with new  "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }
index values
print(brics)


Reading CSV by Pandas DataFrame

Code Block
# Import pandas as pd
import pandas as pd

# Import the cars.csv data: cars
cars = pd.read_csv('cars.csv')

# Print out cars
print(cars)


Reading a CSV file by Pandas DataFrame with 1st column as index

Code Block
# Import pandas and cars.csv
import pandas as pd
bricscars = pd.DataFrame(dict)

brics.toread_csv('examplecars.csv')

Adding index to DataFrame

Code Block
# Set the index for brics
brics.index = ["BR", "RU", "IN", "CH", "SA"], index_col = 0)

# Print out country column as Pandas Series
print(cars['cars_per_cap'])

# Print out brics with new index values
print(brics)

Reading CSV by Pandas DataFrame

Code Block
# Import pandas as pd
import pandas as pd

# Import the cars.csv data: cars
cars = pd.read_csv('cars.csv')

# Print out cars
print(cars)

Reading CSV file by Pandas DataFrame with 1st column as index

Code Block
# Import pandas and cars.csv
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out country column as Pandas Series
print(cars['cars_per_cap'])

# Print out country column as Pandas DataFrame
print(cars[['cars_per_cap']])

# Print out DataFrame with country and drives_right columns
print(cars[['cars_per_cap', 'country']])

Print partial rows (observations) from a DataFrame

Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out first 4 observations
print(cars[0:4])

# Print out fifth, sixth, and seventh observation
print(cars[4:6])

Data access by loc and iloc in Pandas DaraFrame

loc is label-based, and iloc is integer index based

country column as Pandas DataFrame
print(cars[['cars_per_cap']])

# Print out DataFrame with country and drives_right columns
print(cars[['cars_per_cap', 'country']])


Save a Pandas DaraFrame by CSV format

Code Block
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)

brics.to_csv('example.csv')


Save a Pandas DaraFrame by CSV format with header and no index

Code Block
from pandas import DataFrame

Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000]
        }

df = DataFrame(Cars, columns= ['Brand', 'Price'])

export_csv = df.to_csv (r'C:\Users\Ron\Desktop\export_dataframe.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path

print (df)


Print partial rows (observations) from a Pandas DataFrame

Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out first 4 observations
print(cars[0:4])

# Print out fifth, sixth, and seventh observation
print(cars[4:6])


Data access by loc and iloc in Pandas DaraFrame

loc is label-based, and iloc is integer index based

Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out observation for Japan
print(cars.iloc[2])

# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])


Sort a Pandas DataFrame in an ascending order

Info
df.sort_values(by=['Brand'], inplace=True)


Code Block
# sort - ascending order
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
        }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - ascending order
df.sort_values(by=['Brand'], inplace=True)

print (df)


Sort a Pandas DataFrame in a descending order


Info
df.sort_values(by=['Brand'], inplace=True, ascending=False)


Code Block
# sort - descending order
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
        }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort Brand - descending order
df.sort_values(by=['Brand'], inplace=True, ascending=False)

print (df)

Sort a Pandas DataFrame by multiple columns


Info
df.sort_values(by=['First Column','Second Column',...], inplace=True)


Code Block
# sort by multiple columns
from pandas import DataFrame
 
Cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000],
        'Year': [2015,2013,2018,2018]
        }
 
df = DataFrame(Cars, columns= ['Brand', 'Price','Year'])

# sort by multiple columns: Year and Price
df.sort_values(by=['Year','Price'], inplace=True)

print (df
Code Block
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out observation for Japan
print(cars.iloc[2])

# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])


Random number generation

...