8/20/23

jsgoller1 · jsgoller1 · commit f68c64db1883 · 2023-08-21T12:30:37.000-04:00
- Anki bump
- replace_and_remove.py
- spreadsheet_encoding.py
- substring_match.py
diff --git a/anki/Competitive Programming.apkg b/anki/Competitive Programming.apkg
diff --git a/elements-of-programming-interviews/problem_mapping.js b/elements-of-programming-interviews/problem_mapping.js
@@ -518,7 +518,7 @@ problem_mapping = {
                 "total": 10000
             },
             "Python: spreadsheet_encoding.py": {
-                "passed": 0,
+                "passed": 10000,
                 "total": 10000
             }
         },
@@ -532,7 +532,7 @@ problem_mapping = {
                 "total": 501
             },
             "Python: replace_and_remove.py": {
-                "passed": 0,
+                "passed": 501,
                 "total": 501
             }
         },
@@ -644,7 +644,7 @@ problem_mapping = {
                 "total": 835
             },
             "Python: substring_match.py": {
-                "passed": 0,
+                "passed": 835,
                 "total": 835
             }
         }
diff --git a/elements-of-programming-interviews/python/replace_and_remove.py b/elements-of-programming-interviews/python/replace_and_remove.py
@@ -5,9 +5,79 @@
 from test_framework.test_utils import enable_executor_hook
 
 
+"""
+input: size (int), list of strings
+output: int, size of new list? (assume so for now)
+Can assume sufficient size (lists aren't immutable anyway)
+
+Edge cases:
+- empty string
+- no b's or a's
+- all b's, all a's
+
+Remove every b, replace every a with 2 ds
+- Trivial: O(n) storage, write to new string
+- Better if we can use O(c) storage
+- can't do forward pass, deletes characters we've not yet examined 
+- can safely assume enough space for final result
+
+- if we first delete every b, we have just a string with chars and ds, 
+  should give pretty wide margin (may not, though)
+  - worst edge case: array is 12 a's with 24 spaces; but I don't think
+    it could catch up unless there wasn't enough room
+
+plan (3 O(n) passes):
+- delete b's and downshift; 
+- definitely will have space starting at right end; start from last character for a replacement
+- then do a pass to downshift 
+"""
+
+import string
+
+
+def delete_bs(size: int, s: List[str]) -> int:
+    j = 0
+    i = 0
+    while i < size:
+        if s[i] != 'b':
+            s[j] = s[i]
+            j += 1
+        i += 1
+    return j
+
+
+def reversed_replace(size: int, s: List[str]) -> int:
+    i, j = size-1, len(s)-1
+    while i >= 0:
+        if s[i] != 'a' and s[i] not in string.whitespace:
+            s[j] = s[i]
+            j -= 1
+        else:
+            s[j] = 'd'
+            j -= 1
+            s[j] = 'd'
+            j -= 1
+        i -= 1
+        # print(f"{i}: {s}")
+    return j+1
+
+
+def leftshift(start: int, s: List[str]):
+    i = 0
+    while start < len(s):
+        s[i] = s[start]
+        i += 1
+        start += 1
+    return i
+
+
 def replace_and_remove(size: int, s: List[str]) -> int:
-    # TODO - you fill in here.
-    return 0
+    # print(f"size: {size}")
+    new_size = delete_bs(size, s)
+    new_start = reversed_replace(new_size, s)
+    # print(f"new start: {new_start}")
+    new_size = leftshift(new_start, s)
+    return new_size
 
 
 @enable_executor_hook
@@ -17,6 +87,11 @@ def replace_and_remove_wrapper(executor, size, s):
 
 
 if __name__ == '__main__':
+    """
+    # s = ["a", "b", "a", "b", "a", "b", "a", "b", "", "", "", "", "", "", ""]
+    # print(replace_and_remove(8, s))
+    # print(s)
+    """
     exit(
         generic_test.generic_test_main('replace_and_remove.py',
                                        'replace_and_remove.tsv',
diff --git a/elements-of-programming-interviews/python/spreadsheet_encoding.py b/elements-of-programming-interviews/python/spreadsheet_encoding.py
@@ -1,9 +1,26 @@
 from test_framework import generic_test
 
+"""
+Input: string (could be large, but fits in mem)
+Output: int (same constraints; represents string)
+
+- "" invalid
+- A = 1, Z = 26
+- AA = 1*(26^0) + 1*(26*1)
+- No 0, no negatives
+"""
+import string
+
+DIGITS = {c: i+1 for i, c in enumerate(string.ascii_uppercase)}
+
 
 def ss_decode_col_id(col: str) -> int:
-    # TODO - you fill in here.
-    return 0
+    total = 0
+    place = 1
+    for c in col[::-1]:
+        total += (DIGITS[c] * place)
+        place *= 26
+    return total
 
 
 if __name__ == '__main__':
diff --git a/elements-of-programming-interviews/python/substring_match.py b/elements-of-programming-interviews/python/substring_match.py
@@ -1,9 +1,86 @@
 from test_framework import generic_test
 
 
+"""
+Brute force: check every substring in t of len(s). O(n^2) time.
+
+Naive approach; for each character in t, check if it's the first character in s. If so,
+see if the next in t is a match on the next in s. Continue this until a match or failure.
+If a failure occurs, don't go back to the start of t (we already checked all characters between
+it and the failure); continue from failure. O(n) time, O(c) space. Does this work? No. 
+
+This approach fails if starting the pattern match at s[i] ends at s[i+n] in failure
+so we skip over all chars between s[i] and s[i+n], but would've found a match if we started
+at s[i+m] where m < n. When might this occur? If all preceding n-1 characters matched,
+but the nth did not, and we would somehow have to be able to "slide P forward in T" and
+obtain a match. Example: P = aaacaaacd, T = aaacaaacaaacd.
+
+Knuth-Morris-Pratt: Suppose we followed the naive approach, but instead of resetting p_idx
+each time a failure occurs, we "backtrack" to the last location in T where 
+
+"""
+
+
+def compute_lps(pattern):
+    lps = [0] * len(pattern)
+    # i is the current char of the prefix,
+    # j is the current char of the suffix
+    i, j = 0, 1
+    while j < len(pattern):
+        if pattern[j] == pattern[i]:
+            # a single char of the prefix and suffix (at the end of each)
+            # matches. We can record it as our best for this position and
+            # advance each index.
+            i += 1
+            lps[j] = i
+            j += 1
+        else:
+            if i == 0:
+                # If we mismatch and we haven't
+                # matched a prefix and suffix yet, we can't move
+                # i forward because we still have to match it to something.
+                # j moves forward to try the next character.
+                lps[j] = 0
+                j += 1
+            else:
+                # if we mismatch and we have matched a prefix with the suffix,
+                # we can't record the best suffix / prefix yet, because of the mismatch.
+                # instead, we have to send the prefix back to where it was when we got the
+                # best prefix/suffix match for the previous character. This may send us back to 0.
+                i = lps[i - 1]
+
+    return lps
+
+
+def kmp(t: str, s: str) -> int:
+    if s == "":
+        return 0
+    lps = compute_lps(s)
+    t_idx = s_idx = 0
+    while t_idx < len(t):
+        if t[t_idx] == s[s_idx]:
+            # Chars match; one step closer to full match
+            t_idx += 1
+            s_idx += 1
+        else:
+            if s_idx == 0:
+                # The chars didn't match, but we haven't matched any
+                # in the pattern yet anyways, so just go to the next
+                # char of t and try again
+                t_idx += 1
+            else:
+                # This is the crux of the algorithm. Only go back in
+                # s to the point where we know we will still match,
+                # because we precomputed matching prefixes and suffixes.
+                s_idx = lps[s_idx - 1]
+        if s_idx == len(s):
+            # Full match; return the index it starts at.
+            return t_idx - s_idx
+    return -1
+
+
 def rabin_karp(t: str, s: str) -> int:
-    # TODO - you fill in here.
-    return 0
+    return kmp(t, s)
 
 
 if __name__ == '__main__':