Some useful functions to effectively work with pandas
import pandas as pd
import numpy as np
from nbdev.showdoc import *
Create a DataFrame from any CSV for demo.
df = pd.read_csv('/home/condor/datasets/mlb-games/dataquest-mlb-game-logs/game_logs.csv',low_memory=False)
df.info(memory_usage='deep')
df_meminfo(df)
print(mem_usage(df['day_of_week']))
df_int = df.select_dtypes(include=['int'])
converted_int = df_int.apply(pd.to_numeric,downcast='unsigned')
print(mem_usage(df_int))
print(mem_usage(converted_int))
compare_ints = pd.concat([df_int.dtypes,converted_int.dtypes],axis=1)
compare_ints.columns = ['before','after']
compare_ints.apply(pd.Series.value_counts)
df_fl = df.select_dtypes(include=['float'])
converted_fl = df_fl.apply(pd.to_numeric,downcast='float')
print(mem_usage(df_fl))
print(mem_usage(converted_fl))
compare_fls = pd.concat([df_fl.dtypes,converted_fl.dtypes],axis=1)
compare_fls.columns = ['before','after']
compare_fls.apply(pd.Series.value_counts)