Let us look at Longest Substring or Subsequence problem with frequency requirement
Substring VS subsequence: the difference lies in that SUBSTRING consists of a contiguous piece of s, while the SUBSEQUENCE is made up of characters in s that preserves there original order relation in s.
Let us look at and analyze the two problems below:
Question 1. Leetcode 395 --- Longest Substring with At Least K Repeating Characters
https://leetcode.com/problems/longest-substring-with-at-least-k-repeating-characters/
class Solution:
def longestSubstring(self, s, k):
# For counting unique chars in the candidate substring,
# just need to count those with frequency at least k
aux = collections.Counter(s)
unique_count = sum([1 for _, v in aux.items() if v >= k])
maxLen = 0
# loop depending on how many unique chars in the substring
# idea: use sliding window method to find the longest substring with cnt number of unique chars
# that satisfy the condition that each letter's freq is at least k.
for cnt in range(1, unique_count + 1):
# count_map does two things:
# 1. number of keys is the number of unique chars in the window
# 2. count the freq of each key: once achieved k, num_formed += 1
count_map = dict()
l, r, num_formed = 0, 0, 0
while r < len(s):
count_map[s[r]] = 1 + count_map.get(s[r], 0)
if count_map[s[r]] == k:
num_formed += 1
if num_formed == cnt == len(count_map):
maxLen = max(maxLen, r - l + 1)
# minimize window for unique chars limit
while l < r and len(count_map) > cnt:
if count_map[s[l]] == k:
num_formed -= 1
if count_map[s[l]] == 1:
del count_map[s[l]]
else:
count_map[s[l]] -= 1
l += 1
r += 1
return maxLen
Question 2 Leetcode 2014 Longest Subsequence Repeated k Times
# Since each char has to appear at least k times,
# we only need to keep track of these chars (found in one pass).
# Let's keep a list of valid candidates old_cand that appear at least k times
# in string s. Initially, chunk length m=1, it's the same as chars.
# To get the next level (chunks of length m+1), try to append each character
# in the chars we found earlier, and if it is valid (meaning k times of it is a subsequence),
# add to new_cand. Do this until new_cand is empty, or maximum length n//k is reached.
# 1. In order to validate a subsequence t (repeated k times) in s,
# we just need O(n) string match. See find() below.
# 2. Finally, return the lexicographically largest candidate.
# Always try appending the largest valid char first,
# so we won't need to sort at the end.
from collections import Counter, defaultdict
class Solution1:
def longestSubsequenceRepeatedK(self, s, k):
n = len(s)
max_chunk_sz = n // k
d = Counter(s)
chars = sorted([c for c in d if d[c] >= k], reverse=True)
if not chars:
return ""
old_cand = chars
for m in range(2, max_chunk_sz + 1):
new_cand = []
for t in self.get_next_level(old_cand, chars):
if self.find(s, t * k):
new_cand.append(t)
if len(new_cand) == 0:
break
old_cand = new_cand
return old_cand[0]
def get_next_level(self, cand, chars):
for s in cand:
for ch in chars:
yield s + ch
def find(self, s, t):
# find subsequence t in s
j = 0
for i in range(len(s)):
if s[i] == t[j]:
j += 1
if j == len(t):
return True
return False
print("Solution 1 results:")
obj = Solution1().longestSubsequenceRepeatedK
s = "bb"
k = 2
print(obj(s, k))
s = "ab"
k = 2
print(obj(s, k))
s = "bbabbabbbbabaababab"
k = 3
print(obj(s, k))
print()
class Solution2:
def longestSubsequenceRepeatedK(self, s, k):
n = len(s)
count = Counter(s)
allowedCount = defaultdict(int)
for c, cnt in count.items():
if cnt >= k:
allowedCount[c] = cnt // k
sortedLetters = sorted(allowedCount.keys(), reverse=True)
cand = []
for c in sortedLetters:
cnt = defaultdict(int)
cnt[c] += 1
cand.append((c, cnt))
while True:
newCand = []
for cur, cnt in cand:
for c in sortedLetters:
allowedCnt = allowedCount[c]
if cnt[c] < allowedCnt:
if self.isSubSeq(s, (cur + c) * k):
newCnt = cnt.copy()
newCnt[c] += 1
newCand.append((cur + c, newCnt))
if not newCand:
break
cand = newCand
return cand[0][0] if cand else ""
def isSubSeq(self, s, t):
m = len(s)
n = len(t)
i = j = 0
while i < m and j < n:
if s[i] == t[j]:
j += 1
i += 1
return j == n
print("Solution 2 results:")
obj = Solution2().longestSubsequenceRepeatedK
s = "bb"
k = 2
print(obj(s, k))
s = "ab"
k = 2
print(obj(s, k))
s = "bbabbabbbbabaababab"
k = 3
print(obj(s, k))