Max information gain
class Solution:
    
    def calculateMaxInfoGain(self, petal_length: List[float], species: List[str]) -> float:
        # we use only species
        
        if len(species) == 0: return 0.0
        
        max_info = float('-inf')
        data = list(zip(petal_length, species))
        data.sort(key = lambda x:x[0])
        
        H = self.calculateEntropy(species)
        
        n = len(data)
        for i in range(1,n):
            set1, set2 = data[:i], data[i:]
            set1_l = [i for j,i in set1]
            set2_l = [i for j,i in set2]
            H1 = self.calculateEntropy(set1_l)
            r1 = len(set1_l)*1.0/n
            H2 = self.calculateEntropy(set2_l)
            r2 = len(set2_l)*1.0/n
            gain = H - H1*r1 - H2*r2
            max_info = max(max_info,gain)
            
        return max_info
            
    
    def calculateEntropy(self, input:List[str]) -> float:
        count = collections.Counter(input)
        total = sum(count.values())
        entropy = 0
        for key in count:
            p = count[key]*1.0/total
            entropy += -p*math.log2(p)
        return entropy
Comments (0)