Longest Substring or Subsequence with Frequency Requirement Type Problem

Let us look at Longest Substring or Subsequence problem with frequency requirement

  1. Find longest SUBSTRING such that each character in the substring appears at least K times, in the given string s.
  2. Find longest SUBSEQUENCE such that the subsequence appears at least K times in the sense that
    SUBSEQUENCE * k is subsequence of the given string s.

Substring VS subsequence: the difference lies in that SUBSTRING consists of a contiguous piece of s, while the SUBSEQUENCE is made up of characters in s that preserves there original order relation in s.

Let us look at and analyze the two problems below:

Question 1. Leetcode 395 --- Longest Substring with At Least K Repeating Characters
https://leetcode.com/problems/longest-substring-with-at-least-k-repeating-characters/

class Solution:
    def longestSubstring(self, s, k):

        # For counting unique chars in the candidate substring,
        # just need to count those with frequency at least k
        aux = collections.Counter(s)
        unique_count = sum([1 for _, v in aux.items() if v >= k])
        
        
        maxLen = 0
        # loop depending on how many unique chars in the substring
        # idea: use sliding window method to find the longest substring with cnt number of unique chars 
        #       that satisfy the condition that each letter's freq is at least k.
        for cnt in range(1, unique_count + 1):
            # count_map does two things:
            # 1. number of keys is the number of unique chars in the window
            # 2. count the freq of each key: once achieved k, num_formed += 1
            count_map = dict()
            l, r, num_formed = 0, 0, 0

            while r < len(s):
                count_map[s[r]] = 1 + count_map.get(s[r], 0)
                if count_map[s[r]] == k:
                    num_formed += 1
                if num_formed == cnt == len(count_map):
                    maxLen = max(maxLen, r - l + 1)

                # minimize window for unique chars limit
                while l < r and len(count_map) > cnt:
                    if count_map[s[l]] == k:
                        num_formed -= 1

                    if count_map[s[l]] == 1:
                        del count_map[s[l]]
                    else:
                        count_map[s[l]] -= 1
                    l += 1
                r += 1
        return maxLen

Question 2 Leetcode 2014 Longest Subsequence Repeated k Times

# Since each char has to appear at least k times,
# we only need to keep track of these chars (found in one pass).

# Let's keep a list of valid candidates old_cand that appear at least k times
# in string s. Initially, chunk length m=1, it's the same as chars.
# To get the next level (chunks of length m+1), try to append each character
# in the chars we found earlier, and if it is valid (meaning k times of it is a subsequence),
# add to new_cand. Do this until new_cand is empty, or maximum length n//k is reached.

# 1. In order to validate a subsequence t (repeated k times) in s,
# we just need O(n) string match. See find() below.
# 2. Finally, return the lexicographically largest candidate.
# Always try appending the largest valid char first,
# so we won't need to sort at the end.

from collections import Counter, defaultdict


class Solution1:
    def longestSubsequenceRepeatedK(self, s, k):
        n = len(s)
        max_chunk_sz = n // k
        d = Counter(s)
        chars = sorted([c for c in d if d[c] >= k], reverse=True)
        if not chars:
            return ""
        old_cand = chars
        for m in range(2, max_chunk_sz + 1):
            new_cand = []
            for t in self.get_next_level(old_cand, chars):
                if self.find(s, t * k):
                    new_cand.append(t)
            if len(new_cand) == 0:
                break
            old_cand = new_cand
        return old_cand[0]

    def get_next_level(self, cand, chars):
        for s in cand:
            for ch in chars:
                yield s + ch

    def find(self, s, t):
        # find subsequence t in s
        j = 0
        for i in range(len(s)):
            if s[i] == t[j]:
                j += 1
            if j == len(t):
                return True
        return False


print("Solution 1 results:")
obj = Solution1().longestSubsequenceRepeatedK
s = "bb"
k = 2
print(obj(s, k))

s = "ab"
k = 2
print(obj(s, k))

s = "bbabbabbbbabaababab"
k = 3
print(obj(s, k))
print()


class Solution2:
    def longestSubsequenceRepeatedK(self, s, k):
        n = len(s)
        count = Counter(s)
        allowedCount = defaultdict(int)
        for c, cnt in count.items():
            if cnt >= k:
                allowedCount[c] = cnt // k

        sortedLetters = sorted(allowedCount.keys(), reverse=True)

        cand = []
        for c in sortedLetters:
            cnt = defaultdict(int)
            cnt[c] += 1
            cand.append((c, cnt))

        while True:
            newCand = []
            for cur, cnt in cand:
                for c in sortedLetters:
                    allowedCnt = allowedCount[c]
                    if cnt[c] < allowedCnt:
                        if self.isSubSeq(s, (cur + c) * k):
                            newCnt = cnt.copy()
                            newCnt[c] += 1
                            newCand.append((cur + c, newCnt))
            if not newCand:
                break
            cand = newCand
        return cand[0][0] if cand else ""

    def isSubSeq(self, s, t):
        m = len(s)
        n = len(t)
        i = j = 0
        while i < m and j < n:
            if s[i] == t[j]:
                j += 1
            i += 1
        return j == n


print("Solution 2 results:")

obj = Solution2().longestSubsequenceRepeatedK
s = "bb"
k = 2
print(obj(s, k))

s = "ab"
k = 2
print(obj(s, k))

s = "bbabbabbbbabaababab"
k = 3
print(obj(s, k))
Comments (0)