Monday, 19 October 2020

How to store python data using hickle? hickle = pickle + hdf5?

import os
import hickle as hkl
import numpy as np

# create a numpy array of data
array_obj = np.ones(32768, dtype='float32')

# dump to file
hkl.dump(array_obj, 'test.hkl', mode='w')

# dump data, with compression
hkl.dump(array_obj, 'test_gzip.hkl', mode='w', compression='gzip')

# compare filesizes
print('uncompressed: {} bytes'.format(os.path.getsize('test.hkl')))
print('compressed: {} bytes'.format(os.path.getsize('test_gzip.hkl')))

# load data
array_hkl = hkl.load('test_gzip.hkl')

# check the two are the same file
assert array_hkl.dtype == array_obj.dtype
assert np.all((array_hkl, array_obj))


# ===
# hdf5 compression options
# datasets are stored as B-trees, data are split into chunks
# fletcher32=True computes a checksum
# scaleoffset=0, move the data around to improve compression ratios
# hkl.dump(array_obj, 'test_lzf.hkl', mode='w', compression='lzf', scaleoffset=0, \
#     chunks=True, shuffle=True, fletcher32=True)

No comments:

Post a Comment