Source code for cr.nimble._src.compression.binary_arrs

# Copyright 2022 CR-Suite Development Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Run Length Encoding of Binary Maps


References:

https://stackoverflow.com/questions/1066758/find-length-of-sequences-of-identical-values-in-a-numpy-array-run-length-encodi
"""

from bitarray import bitarray
from bitarray.util import int2ba, ba2int
import numpy as np

[docs]def count_binary_runs(input_arr): """Returns the runs of 0s and 1s in a binary map """ # make sure that it is a numpy array input_arr = np.asarray(input_arr) # the first bit b = input_arr[0] # the last bit e = input_arr[-1] # extend the array extended = np.hstack(([1-b], input_arr, [1 - e])) # locate the changes diffs = np.diff(extended) markers, = np.where(diffs) runs = np.diff(markers) return runs
B00 = bitarray('00') B01 = bitarray('01') B10 = bitarray('10') B11 = bitarray('11') NUM_BITS_RUN_LEN = 4
[docs]def encode_binary_arr(input_arr): """Encodes a binary array into a bit array via run length encoding """ # the first bit b = input_arr[0] # the runs runs = count_binary_runs(input_arr) # build the bit array a = bitarray() a.append(b) for run in runs: run = int(run) if run == 1: a.extend(B00) continue if run == 2: a.extend(B01) continue if run == 3: a.extend(B10) continue # run is 4 or more a.extend(B11) # now record number of bits for the run bl = run.bit_length() a.extend(int2ba(bl, NUM_BITS_RUN_LEN)) # now record the run a.extend(int2ba(run)) return a
[docs]def decode_binary_arr(input_bit_arr : bitarray): """Decodes a binary array from a bit array via run length decoding """ a = input_bit_arr result = [] # The first bit b = a[0] idx = 1 # number of bits in the encoded bit array n = len(a) while idx < n: # read the next 2 bits code = a[idx:idx+2] idx += 2 code = ba2int(code) run = code + 1 if code == 3: # we need to decode run from the stream bl = ba2int(a[idx:idx+NUM_BITS_RUN_LEN]) idx += NUM_BITS_RUN_LEN run = ba2int(a[idx:idx+bl]) idx += bl for i in range(run): result.append(b) b = 1 - b return np.array(result)
[docs]def binary_compression_ratio(input_arr, output_arr, bits_per_sample=1): """Returns the compression ratio of binary array compression algorithm """ out_len = output_arr.nbytes * 8 ratio = len(input_arr) * bits_per_sample / out_len return ratio
[docs]def binary_space_saving_ratio(input_arr, output_arr, bits_per_sample=1): """Returns the space saving ratio of binary array compression algorithm """ out_len = output_arr.nbytes * 8 return 1 - out_len / (len(input_arr) * bits_per_sample)