Efficient Data Type Minification in Python using Numpy and Pandas

Python

A function that takes a dataframe and converts the data type of each numerical column to the optimum type based on the size of the values. This reduces the size in memory of the dataframe.

 1|  import numpy as np
 2|  import pandas as pd
 3|  from pandas.api.types import is_string_dtype, is_integer_dtype, is_numeric_dtype
 4|  
 5|  def minification(df):
 6|      for col in df.columns:
 7|          if is_numeric_dtype(df[col]):
 8|              if is_integer_dtype(df[col]):
 9|                      max_value = df[col].max()
10|                      min_value = df[col].min()
11|                      if min_value >= 0:
12|                          if max_value < 255:
13|                              df[col] = df[col].astype(np.uint8)
14|                          elif max_value < 65535:
15|                              df[col] = df[col].astype(np.uint16)
16|                          elif max_value < 4294967295:
17|                              df[col] = df[col].astype(np.uint32)
18|                          else:
19|                              df[col] = df[col].astype(np.uint64)
20|                      else:
21|                          if min_value > np.iinfo(np.int8).min and max_value < np.iinfo(np.int8).max:
22|                              df[col] = df[col].astype(np.int8)
23|                          elif min_value > np.iinfo(np.int16).min and max_value < np.iinfo(np.int16).max:
24|                              df[col] = df[col].astype(np.int16)
25|                          elif min_value > np.iinfo(np.int32).min and max_value < np.iinfo(np.int32).max:
26|                              df[col] = df[col].astype(np.int32)
27|                          elif min_value > np.iinfo(np.int64).min and max_value < np.iinfo(np.int64).max:
28|                              df[col] = df[col].astype(np.int64) 
29|              else:
30|                  df[col] = df[col].astype(np.float32)
31|      return df
32|  
33|  minification(df)
Did you find this snippet useful?

Sign up for free to to add this to your code library