8/22/23

jsgoller1 · jsgoller1 · commit a35f0ba4b385 · 2023-08-23T15:12:29.000-04:00
- nearest_repeated_entries.py
- smallest_subarray_covering_set.py
diff --git a/elements-of-programming-interviews/problem_mapping.js b/elements-of-programming-interviews/problem_mapping.js
@@ -1520,7 +1520,7 @@ problem_mapping = {
                 "total": 505
             },
             "Python: nearest_repeated_entries.py": {
-                "passed": 0,
+                "passed": 505,
                 "total": 505
             }
         },
@@ -1534,7 +1534,7 @@ problem_mapping = {
                 "total": 904
             },
             "Python: smallest_subarray_covering_set.py": {
-                "passed": 0,
+                "passed": 904,
                 "total": 904
             }
         },
diff --git a/elements-of-programming-interviews/python/nearest_repeated_entries.py b/elements-of-programming-interviews/python/nearest_repeated_entries.py
@@ -2,13 +2,50 @@
 
 from test_framework import generic_test
 
+"""
+in: list of strs
+out: int, distance between closest two repetitions 
+
+edges:
+    - empty list
+    - no repetitions (what do we return? -1?)
+    - all repetitions
+    - all equal distance 
+
+O(n) space, time:
+    - hash every word to index each time it appears
+    - hash every word to distance since last occured (inf if first occurence)
+        - don't need this, just best
+    - return min distance 
+
+Can't do better than O(n) time, need to examine each word
+Do better than O(n) space? Need to keep track of each word.
+"""
+import math
+
 
 def find_nearest_repetition(paragraph: List[str]) -> int:
-    # TODO - you fill in here.
-    return 0
+    best = float('inf')
+    last_idx = {}
+    for i, word in enumerate(paragraph):
+        if word in last_idx:
+            best = min(i-last_idx[word], best)
+        last_idx[word] = i
+    return best if not math.isclose(best, float('inf')) else -1
 
 
 if __name__ == '__main__':
+    """
+    cases = [
+        ([], -1),
+        (["no", "a", "b"], -1),
+        (["no", "a", "b", "no"], 2),
+        (["no", "no", "b", "no"], 0)
+    ]
+    for arg, expected in cases:
+        actual = find_nearest_repetition(arg)
+    assert actual == expected, f"{actual} != {expected}"
+    """
     exit(
         generic_test.generic_test_main('nearest_repeated_entries.py',
                                        'nearest_repeated_entries.tsv',
diff --git a/elements-of-programming-interviews/python/smallest_subarray_covering_set.py b/elements-of-programming-interviews/python/smallest_subarray_covering_set.py
@@ -8,11 +8,57 @@
 
 Subarray = collections.namedtuple('Subarray', ('start', 'end'))
 
+"""
+["a", "b", "c", "b", "a", "d", "c", "a", "e", "a", "a", "b", "e"]
+       |    |    |              |         |              |     |
+       1    2    1              2         3              1     3 
+
+   1 ... 2 ...3 ... 1,2,3 ... 3 ... 2 ... 1
+
+"""
+
+# b...... b c e ......... b
+# first remove l/r characters that aren't in keywords
+
+# No keywords, no paragraph, no keywords in paragraph?
+# assume for now none of these can happen
+
+
+def find_smallest_subarray_covering_set_attempt_one(paragraph: List[str],
+                                                    keywords: Set[str]) -> Subarray:
+    subarr = collections.deque([])
+    current_kwords = collections.Counter()
+    left, right = 0, len(paragraph)-1
+    for i, word in enumerate(paragraph):
+        if word in keywords:
+            current_kwords[word] += 1
+            subarr.append(i)
+        while subarr and current_kwords[paragraph[subarr[0]]] > 1:
+            current_kwords[paragraph[subarr[0]]] -= 1
+            subarr.popleft()
+        if len(current_kwords) == len(keywords) and ((subarr[-1] - subarr[0]) <= (right-left)):
+            left, right = subarr[0], subarr[-1]
+    return Subarray(left, right)
+
 
 def find_smallest_subarray_covering_set(paragraph: List[str],
                                         keywords: Set[str]) -> Subarray:
-    # TODO - you fill in here.
-    return Subarray(0, 0)
+    counts = collections.Counter()
+    best = Subarray(0, len(paragraph)-1)
+    left = 0
+    for right, word in enumerate(paragraph):
+        if word not in keywords:
+            continue
+        counts[word] += 1
+
+        while left < len(paragraph) and not (counts[paragraph[left]] == 1):
+            if paragraph[left] in counts:
+                counts[paragraph[left]] -= 1
+            left += 1
+
+        if len(counts) == len(keywords) and ((right - left) <= (best.end - best.start)):
+            best = Subarray(left, right)
+    return best
 
 
 @enable_executor_hook
@@ -37,6 +83,19 @@ def find_smallest_subarray_covering_set_wrapper(executor, paragraph, keywords):
 
 
 if __name__ == '__main__':
+    """
+    cases = [
+        (["a", "b", "c", "b", "a", "d", "c", "a", "e", "a", "a", "b", "e"], set(["b", "c", "e"]), Subarray(6, 11)),
+        (["a", "b", "c", "e", "a"], set(["b", "c", "e"]), Subarray(1, 3)),
+        (["a", "b", "c", "e", "a", "e"], set(["b", "c", "e"]), Subarray(1, 3)),
+        (["b", "a", "b", "c", "e", "a", "e"], set(["b", "c", "e"]), Subarray(2, 4)),
+        (["b", "a", "c", "c", "c", "b", "c", "e", "a", "e"], set(["b", "c", "e"]), Subarray(5, 7))
+
+    ]
+    for paragraph, keywords, expected in cases:
+        actual = find_smallest_subarray_covering_set(paragraph, keywords)
+        assert expected == actual, f"\nparagraph: {paragraph}\nkeywords: {keywords}\n{expected} != {actual}"
+    """
     exit(
         generic_test.generic_test_main(
             'smallest_subarray_covering_set.py',