spylang · antocuni · Oct 7, 2025 · Jun 4, 2025 · Jun 10, 2025 · Jun 15, 2025
diff --git a/stdlib/dict.spy b/stdlib/dict.spy
@@ -0,0 +1,196 @@
+from hash import hash
+from unsafe import gc_alloc, ptr
+
+
+DKIX_EMPTY = 0
+# DKIX_DUMMY = 2 ** 32 - 1
+DKIX_DUMMY = 1 << 32 - 1
+# DKIX_ERROR = 2 ** 32 - 2
+DKIX_ERROR = 1 << 32 - 2
+# DKIX_KEY_CHANGED = 2 ** 32 - 3
+DKIX_KEY_CHANGED = 1 << 32 - 3
+
+MIN_LOG_SIZE = 6
+MAX_LOG_SIZE = 31
+MAX_FILL_RATIO = 2 / 3
+
+
+@blue.generic
+def dict(Key, Value):
+    hash_key = hash[Key]
+
+    @struct
+    class Entry:
+        # empty: bool
+        empty: i32
+        key: Key
+        value: Value
+
+
+    @struct
+    class DictData:
+        index: ptr[i32]
+        log_size: i32  # capacity
+        length: i32  # number of items stored
+        entries: ptr[Entry]
+
+
+    def new_data(log_size: i32) -> ptr[DictData]:
+        # assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE
+        data = gc_alloc(DictData)(1)
+        index = gc_alloc(i32)(1 << log_size)
+        data.index = index
+        data.log_size = 1 << log_size
+        data.length = 0
+        i = 0
+        while i < 1 << log_size:
+            data.index[i] = DKIX_EMPTY
+            # data.entries[i].empty = True
+            i += 1
+        return data
+
+    def new_index(log_size: i32) -> ptr[i32]:
+        # assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE
+        index = gc_alloc(i32)(1 << log_size)
+        i = 0
+        while i < 1 << log_size:
+            index[i] = DKIX_EMPTY
+            i += 1
+        return index
+
+    # @blue
+    def new_entries(log_size: i32) -> ptr[Entry]:
+        entries = gc_alloc(Entry)(1 << log_size)
+        i = 0
+        while i < 1 << log_size:
+            entries[i].empty = True
+            # ^ WIP: Cannot read struct by value: `dict::dict[i32, i32]::Entry`
+            # [Antonio]: not sure what to do here, I'm leaning towards removing
+            # the Entry struct in favor of having three different arrays.
+            # It degrades cache friendliness, but that's fine.
+            # Do you think there's a way around this WIP error now?
+            # I also tried blueing the function, but it didn't help.
+            entries[i].key = None
+            entries[i].value = None
+            i += 1
+        return entries
+
+    def capacity(self: _dict) -> i32:
+        data: ptr[DictData] = self.__ll__
+        return 1 << data.log_size
+
+    def mask(self: _dict) -> i32:
+        data: ptr[DictData] = self.__ll__
+        return 1 << data.log_size - 1
+
+    def distance_0(self: _dict, key: Key) -> i32:
+        # the first slot in the index to probe when looking up key
+        return hash_key(key) & mask(self)
+
+    def get_entry_at(self: _dict, position: i32) -> Entry:
+        data: DictData = self.__ll__
+        return data.entries[position]
+
+    def lookup(self: _dict, key: Key) -> i32:
+        data: ptr[DictData] = self.__ll__
+        d0 = distance_0(self, key)
+        distance = -1
+        while distance < capacity(self):
+            distance += 1
+            ix = data.index[(d0 + distance) & capacity(self)]
+            if ix == DKIX_EMPTY:
+                return DKIX_EMPTY
+            if ix == DKIX_DUMMY:
+                # continue
+                pass
+            else:
+                entry = get_entry_at(self, ix)
+                if not entry.empty:
+                    # continue (I'd rather remove the `not` and use `continue` here)
+                    hash_entry = hash_key(entry.key)  # might raise exception
+                    hash_lookup = hash_key(key)  # might raise exception
+                    if not (hash_entry != hash_lookup):
+                        # continue (ibid.)
+                        if entry.key is key:
+                            return ix
+                        cmp = entry.key == key  # might raise exception
+                        if cmp:
+                            return ix
+        return DKIX_EMPTY
+
+    def insert(self: _dict, key: Key, value: Value) -> void:
+        data: ptr[DictData] = self.__ll__
+        entry = data.entries[data.length]
+        # data.length += 1
+        data.length = data.length + 1
+        entry.key = key
+        entry.value = value
+        entry.empty = False
+        d0 = distance_0(self, key)
+        distance = -1
+        while distance < capacity(self):
+            distance += 1
+            ix = data.index[(d0 + distance) & capacity(self)]
+            if ix == DKIX_EMPTY:
+                data.index[d0 + distance] = data.length - 1
+                return
+        raise Exception("aargh!")
+        # assert False  # must not loop through the entire index
+        # without finding a free slot
+
+    def resize(self: _dict) -> i32:
+        # The resize operation never decreases the memory used by dict;
+        # i.e., we never resize to decrease the size of dict.
+        # This follows the CPython implementation.
+        old_data: ptr[DictData] = self.__ll__
+        # assert old_data.log_size < MAX_LOG_SIZE
+        new = new_data(old_data.log_size + 1)
+        new.length = old_data.length
+        i = 0
+        while i < old_data.length:
+            entry = old_data.entries[i]
+            new.entries[i] = entry
+            i += 1
+        self.__ll__.data = new
+
+
+    @typelift
+    class _dict:
+        __ll__: ptr[DictData]
+
+        def __new__() -> _dict:
+            data = gc_alloc(DictData)(1)
+            data.log_size = MIN_LOG_SIZE
+            data.length = 0
+            data.index = new_index(data.log_size)
+            data.entries = new_entries(data.log_size)
+            return _dict.__lift__(data)
+
+        def __getitem__(self: _dict, key: Key) -> Value:
+            data: ptr[DictData] = self.__ll__
+            ix = lookup(self, key)
+            if ix == DKIX_EMPTY:
+                raise KeyError(key)
+            return data.entries[ix].value
+
+        def __setitem__(self: _dict, key: Key, value: Value) -> void:
+            data: ptr[DictData] = self.__ll__
+            ix = lookup(self, key)
+            if ix == DKIX_EMPTY:
+                insert(self, key, value)
+                if data.length >= capacity(self) * MAX_FILL_RATIO:
+                    resize(self)
+            else:
+                entry = get_entry_at(self, ix)
+                # assert not entry.empty
+                entry.value = value
+
+        def __delitem__(self: _dict, key: Key) -> void:
+            pass
+
+        def __len__(self: _dict) -> i32:
+            data: ptr[DictData] = self.__ll__
+            return data.length
+
+
+    return _dict
diff --git a/stdlib/hash.spy b/stdlib/hash.spy
@@ -0,0 +1,9 @@
+@blue.generic
+def hash(T):
 @BUILTINS.builtin_func(color='blue', kind='metafunc') 
 def w_len(vm: 'SPyVM', wam_obj: W_MetaArg) -> W_OpSpec: 
     w_T = wam_obj.w_static_T 
     if w_fn := w_T.lookup_func('__len__'): 
         w_opspec = vm.fast_metacall(w_fn, [wam_obj]) 
         return w_opspec 
     t = w_T.fqn.human_name 
     raise SPyError.simple( 
         'W_TypeError', 
         f'cannot call len(`{t}`)', 
         f'this is `{t}`', 
         wam_obj.loc 
     ) 
 @BUILTINS.builtin_func(color='blue', kind='metafunc') 
 def w_len(vm: 'SPyVM', wam_obj: W_MetaArg) -> W_OpSpec: 
     w_T = wam_obj.w_static_T 
     if w_fn := w_T.lookup_func('__len__'): 
         w_opspec = vm.fast_metacall(w_fn, [wam_obj]) 
         return w_opspec 
  
     t = w_T.fqn.human_name 
     raise SPyError.simple( 
         'W_TypeError', 
         f'cannot call len(`{t}`)', 
         f'this is `{t}`', 
         wam_obj.loc 
     ) 
+    def hash_i32(v: i32) -> i32:
+        # this mimics the implementation of CPython's hash(int)
+        return v
+    if T == i32:
+        return hash_i32
+
+    raise TypeError("unsupported type for hash()")
diff --git a/stdlib/test_dict.spy b/stdlib/test_dict.spy
@@ -0,0 +1,15 @@
+from dict import dict
+
+
+def test() -> void:
+    # d = dict[int, int]()
+    d = dict[i32, i32]()
+    d[1] = 1
+    if d[1] == 1:
+        print("✅")
+    else:
+        print("❌")
+
+
+def main() -> void:
+    test()