class Solution:
def calculateMaxInfoGain(self, petal_length: List[float], species: List[str]) -> float:
if not petal_length or not species:
return 0
n = len(petal_length)
Hs = math.inf
mp = sorted(zip(petal_length, species))
petal_length, species = zip(*mp)
for i in range(1, n):
H1 = self.calculateEntropy(species[:i])
H2 = self.calculateEntropy(species[i:])
Hs = min(Hs, H1*i/n + H2*(n-i)/n)
Hl = self.calculateEntropy(species)
return Hl-Hs
def calculateEntropy(self, species: List[str]) -> float:
size = len(species)
probs = collections.defaultdict(int)
for sp in species:
probs[sp] += 1/size
if len(probs.values())<=1:
return 0
return -sum([p*log2(p) for p in probs.values()])