718\. Maximum Length of Repeated Subarray

# 718\. Maximum Length of Repeated Subarray similar: https://hackmd.io/@y56/BJXStesvY?type=view ## DP M * N If matched here, here = previous + 1 ## binary search + rolling bash ### binary search https://hackmd.io/@y56/SJ3z5fBq5 "has common sublist of length L" ===> "has common sublist of length x" for all x <= L, Let True/T be has_commom_with(x) At least one True. Set "has common sublist of length 0" always to be True. This means can't find any common Do binary search to find right most True TF....FF TTF...FF TTT...TT ### Rolling hashing list[i : j] into sum of a_n * base^n for n in the length list[i + 1 : j + 1], remove the highest order digit, multiply by base, and then add the new comer Try to reused the overlapping part ## sliding window Time: O((L1+L2) * L1) try all different alignment, totally (L1 + L2) possibilities; In each alignment, find longest matched sublist ``` 22222222222222222222 1111111111 1111111111 1111111111 ... ``` ``` 1111111111 22222222222222222222 22222222222222222222 22222222222222222222 ..... ``` ```python= """ DP """ class Solution: def findLength(self, nums1: List[int], nums2: List[int]) -> int: if not nums1 or not nums1: return 0 # DP: build a grid, when letter matched, mark as T # Try to find len of the longest consecutive diagonal T's """ 123456789 0 1T 2 T 0 1T 0 3 T 4 T 5 T """ # DP ver 2: init all as 0, if matched here, here = upper_left_val + 1 # if upper-left is outside, treat that as a 0 # try to find max num in grid len1 = len(nums1) len2 = len(nums2) dp = [[0] * len2 for _ in range(len1)] # dp[i][j] is for nums1[i] vs nums2[j] seen_max = 0 for i in range(len1): for j in range(len2): upperleft = 0 if 0 <= i - 1 < len1 and 0 <= j - 1 < len2: upperleft = dp[i - 1][j - 1] if nums1[i] == nums2[j]: dp[i][j] = upperleft + 1 seen_max = max(seen_max, dp[i][j]) return seen_max ################################################################################### """ binary search + rolling hash """ # L1 = len(nums1) # L2 = len(nums2) # WOLG, L1 <= L2 # given length L # do binary search to find largest L # so, time: O(log L1) # for each search # (L1 - L + 1) lists in nums1; call (L1 - L + 1) as m1 # (L2 - L + 1) lists in nums2; call (L2 - L + 1) as m2 # m1 <= m2 # Note: hashing a list needs time linear to len of that list # m1 times putting into set_1; casting list to tuple: O(L); # assuming put to a set in more expensive than query # m2 times to query if in set_1; casting list to tuple: O(L); # time: O(m1*L + m2*L) ie O((m1+m2)*L) ie O((L1+L2)*L1) # total time: O((L1+L2) * L1 * log L1) # can do rolling hash => O((L1+L2) * log L1) class RollingHashset: def __init__(self, nums1, length): self.base = 113 self.prime = 10 ** 9 + 7 self.nums1 = nums1 self.length = length self.dict = defaultdict(list) for hash_value, start_index in self.__rolling_hashed(nums1): # Use open Hashing (Separate chaining). self.dict[hash_value].append(start_index) def __rolling_hashed(self, nums): length = self.length result = [] hash_value = 0 highest_power = self.base ** (length - 1) % self.prime for i, num in enumerate(nums): # the previous is already full-length, starting to remove oldest if i >= length: hash_value -= (nums[i - length] * highest_power) % self.prime # raise power, and add new number hash_value = (hash_value * self.base + num) % self.prime # collect it if reaching enough length if i >= self.length - 1: # also record starting index of sublist result.append((hash_value, i - length + 1)) return result def has_any_hit_by(self, nums2): nums1 = self.nums1 for hash_value, start_index_2 in self.__rolling_hashed(nums2): if hash_value in self.dict: # check if truly matched, or just collision for start_index_1 in self.dict[hash_value]: # can use pointers here to avoid list-slicing initiate new lists sub_nums1 = nums1[start_index_1 : start_index_1 + self.length] sub_nums2 = nums2[start_index_2 : start_index_2 + self.length] if sub_nums1 == sub_nums2: return True return False class Solution: def findLength(self, nums1: List[int], nums2: List[int]) -> int: if len(nums1) > len(nums2): nums1, nums2 = nums2, nums1 L1 = len(nums1) L2 = len(nums2) def has_common(length): if length == 0: return True # my handmade hashset to receive query if X from nums2 is in nums1 rolling_hashset = RollingHashset(nums1, length) return rolling_hashset.has_any_hit_by(nums2) # find rightmost True, guaranteed at least one True at leftmost left, right = 0, L1 while left < right: mid = (left + right) // 2 + 1 if has_common(mid): left = mid else: right = mid - 1 return left ''' Sliding window ''' class Solution: def findLength(self, nums1: List[int], nums2: List[int]) -> int: def helper(nums_i, nums_j): seen_max = 0 for i in range(len(nums_i)): # Align nums_i[i] and nums_j[0] j = 0 count = 0 while i < len(nums_i) and j < len(nums_j): if nums_i[i] == nums_j[j]: count += 1 else: count = 0 seen_max = max(seen_max, count) i += 1 j += 1 return seen_max return max(helper(nums1, nums2), helper(nums2, nums1)) ```