class Solution:
def calculateMaxInfoGain(self, petal_length: List[float], species: List[str]) -> float:
# we use only species
if len(species) == 0: return 0.0
max_info = float('-inf')
data = list(zip(petal_length, species))
data.sort(key = lambda x:x[0])
H = self.calculateEntropy(species)
n = len(data)
for i in range(1,n):
set1, set2 = data[:i], data[i:]
set1_l = [i for j,i in set1]
set2_l = [i for j,i in set2]
H1 = self.calculateEntropy(set1_l)
r1 = len(set1_l)*1.0/n
H2 = self.calculateEntropy(set2_l)
r2 = len(set2_l)*1.0/n
gain = H - H1*r1 - H2*r2
max_info = max(max_info,gain)
return max_info
def calculateEntropy(self, input:List[str]) -> float:
count = collections.Counter(input)
total = sum(count.values())
entropy = 0
for key in count:
p = count[key]*1.0/total
entropy += -p*math.log2(p)
return entropy