I'm trying to create a shannon_entropy function that works on both python 2 and 3. The following code works in python 3, however, the statement to evaluate norm_counts returns a narray equaling 0 on python 2 and returns correctly in python 3.
I've broken down and simplified the code below:
import unittest
import numpy as np
def shannon_ent(labels, base=256):
value, counts = np.unique(labels, return_counts=True)
sum_counts = counts.sum()
norm_counts = counts / sum_counts
print(norm_counts)
base = e if base is None else base
logged_counts = np.log(norm_counts)
logged_base = np.log(base)
logged = logged_counts/logged_base
final = -(norm_counts * logged)
return final.sum()
class function_tests(unittest.TestCase):
def test_shannon_ent(self):
chunk = [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126]
ent = shannon_ent(chunk)
print('*** is: {}'.format(ent))
self.assertEqual(ent, 0.8212319510413685)
if __name__ == '__main__':
unittest.main()
The following output is given:
Python 2
# python unittest_binGraph.py
(array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1]), 95)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
E
======================================================================
ERROR: test_shannon_ent (__main__.function_tests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "unittest_binGraph.py", line 39, in test_shannon_ent
ent = shannon_ent(chunk)
File "unittest_binGraph.py", line 22, in shannon_ent
logged_counts = np.log(norm_counts)
FloatingPointError: divide by zero encountered in log
----------------------------------------------------------------------
Ran 1 test in 0.007s
FAILED (errors=1)
Python 3
# python unittest_binGraph.py
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 95
[0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
0.01052632 0.01052632 0.01052632 0.01052632 0.01052632]
*** is: 0.8212319510413685
.
----------------------------------------------------------------------
Ran 1 test in 0.007s
OK
Unless someone has a better way to calculate entropy?! I am currently using scripy and statistics modules in the code as well.