-
Notifications
You must be signed in to change notification settings - Fork 31
Add dict to stdlib #172
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add dict to stdlib #172
Changes from 2 commits
fef1760
b58db2e
a2183b3
73fe55b
53e67ec
b9f2a86
d2ec0e7
1c8a373
70adc99
721996b
17a6a47
ef5bfc4
460b083
9ac3f9f
ee1c276
af75bac
7839061
e6dab95
690eb1c
79eb71a
0ec3e69
8ed4030
246151f
c0599f6
807d5c3
b6a13ea
c4ebbd9
f689bde
d2d8884
724a338
5df6c2d
b88bfc6
db39ed9
2a3c805
b74e7b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,187 @@ | ||
| from hash import hash | ||
| from unsafe import gc_alloc, ptr | ||
|
|
||
|
|
||
| DKIX_EMPTY = 0 | ||
| # DKIX_DUMMY = 2 ** 32 - 1 | ||
| DKIX_DUMMY = 1 << 32 - 1 | ||
| # DKIX_ERROR = 2 ** 32 - 2 | ||
| DKIX_ERROR = 1 << 32 - 2 | ||
| # DKIX_KEY_CHANGED = 2 ** 32 - 3 | ||
| DKIX_KEY_CHANGED = 1 << 32 - 3 | ||
|
|
||
| MIN_LOG_SIZE = 6 | ||
| MAX_LOG_SIZE = 31 | ||
| MAX_FILL_RATIO = 2 / 3 | ||
|
|
||
|
|
||
| @blue.generic | ||
| def dict(Key, Value): | ||
| hash_key = hash[Key] | ||
|
|
||
| @struct | ||
| class Entry: | ||
| # empty: bool | ||
| empty: i32 | ||
| key: Key | ||
| value: Value | ||
|
|
||
|
|
||
| @struct | ||
| class DictData: | ||
| index: ptr[i32] | ||
| log_size: i32 # capacity | ||
| length: i32 # number of items stored | ||
| entries: ptr[Entry] | ||
|
|
||
|
|
||
| def new_data(log_size: i32) -> ptr[DictData]: | ||
| # assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE | ||
| data = gc_alloc(DictData)(1) | ||
| index = gc_alloc(i32)(1 << log_size) | ||
| data.index = index | ||
dpdani marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| data.log_size = 1 << log_size | ||
| data.length = 0 | ||
| i = 0 | ||
| while i < 1 << log_size: | ||
| data.index[i] = DKIX_EMPTY | ||
| # data.entries[i].empty = True | ||
| i += 1 | ||
| return data | ||
|
|
||
| def capacity(self: _dict) -> i32: | ||
| data: ptr[DictData] = self.__ll__.data | ||
| return 1 << data.log_size | ||
|
|
||
| def mask(self: _dict) -> i32: | ||
| data: ptr[DictData] = self.__ll__.data | ||
| return 1 << data.log_size - 1 | ||
|
|
||
| def distance_0(self: _dict, key: Key) -> i32: | ||
| # the first slot in the index to probe when looking up key | ||
| return hash_key(key) & mask(self) | ||
|
|
||
| def get_entry_at(self: _dict, position: i32) -> Entry: | ||
| data: DictData = self.__ll__.data | ||
| return data.entries[position] | ||
|
|
||
| def lookup(self: _dict, key: Key) -> i32: | ||
| data: ptr[DictData] = self.__ll__.data | ||
| d0 = distance_0(self, key) | ||
| distance = -1 | ||
| while distance < capacity(self): | ||
| distance += 1 | ||
| ix = data.index[(d0 + distance) & capacity(self)] | ||
| # [Antonio]: another confusing error: | ||
| # File "/Users/dp/.pyenv/versions/3.13.2/envs/spy/lib/python3.13/site-packages/fixedint/base.py", line 391, in _f | ||
| # return nt(intfunc(int(self), int(other))) | ||
| # ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^ | ||
| # ZeroDivisionError: integer modulo by zero | ||
| # I guess this is saying that capacity() might return 0? | ||
|
||
| if ix == DKIX_EMPTY: | ||
| return DKIX_EMPTY | ||
| if ix == DKIX_DUMMY: | ||
| # continue | ||
| pass | ||
| else: | ||
| entry = get_entry_at(self, ix) | ||
| if not entry.empty: | ||
| # continue (I'd rather remove the `not` and use `continue` here) | ||
| hash_entry = hash_key(entry.key) # might raise exception | ||
| hash_lookup = hash_key(key) # might raise exception | ||
| if not (hash_entry != hash_lookup): | ||
| # continue (ibid.) | ||
| if entry.key is key: | ||
| return ix | ||
| cmp = entry.key == key # might raise exception | ||
| if cmp: | ||
| return ix | ||
| return DKIX_EMPTY | ||
|
|
||
| def insert(self: _dict, key: Key, value: Value) -> void: | ||
| data: DictData = self.__ll__.data[0] | ||
| # [Antonio]: another confusing error | ||
| # File "/Users/dp/repos/spy/spy/vm/function.py", line 265, in raw_call | ||
| # w_res = self._pyfunc(vm, *args_w) | ||
| # File "/Users/dp/repos/spy/spy/vm/modules/unsafe/mem.py", line 47, in w_mem_read_T | ||
| # assert False | ||
| # ^^^^^ | ||
| # AssertionError | ||
| # does this mean that structs cannot be dereferenced? | ||
dpdani marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| entry = data.entries[data.length] | ||
| # data.length += 1 | ||
| data.length = data.length + 1 | ||
| entry.key = key | ||
| entry.value = value | ||
| entry.empty = False | ||
| d0 = distance_0(self, key) | ||
| distance = -1 | ||
| while distance < capacity(self): | ||
| distance += 1 | ||
| ix = data.index[(d0 + distance) & capacity(self)] | ||
| if ix == DKIX_EMPTY: | ||
| data.index[d0 + distance] = data.length - 1 | ||
| return | ||
| raise Exception("aargh!") | ||
| # assert False # must not loop through the entire index | ||
| # without finding a free slot | ||
|
|
||
| def resize(self: _dict) -> i32: | ||
| # The resize operation never decreases the memory used by dict; | ||
| # i.e., we never resize to decrease the size of dict. | ||
| # This follows the CPython implementation. | ||
| old_data: DictData = self.__ll__.data | ||
| # assert old_data.log_size < MAX_LOG_SIZE | ||
| new = new_data(old_data.log_size + 1) | ||
| new.length = old_data.length | ||
| i = 0 | ||
| while i < old_data.length: | ||
| entry = old_data.entries[i] | ||
| new.entries[i] = entry | ||
| i += 1 | ||
| self.__ll__.data = new | ||
|
|
||
|
|
||
| @struct | ||
| class __dict: | ||
| data: ptr[DictData] | ||
|
|
||
|
|
||
| @typelift | ||
| class _dict: | ||
| __ll__: ptr[__dict] | ||
|
|
||
| def __new__() -> _dict: | ||
| data = new_data(MIN_LOG_SIZE) | ||
| d = gc_alloc(__dict)(1) | ||
| d.data = data | ||
| return _dict.__lift__(d) | ||
|
|
||
| def __getitem__(self: _dict, key: Key) -> Value: | ||
| data: DictData = self.__ll__.data | ||
| ix = lookup(self, key) | ||
| if ix == DKIX_EMPTY: | ||
| raise KeyError(key) | ||
| return data.entries[ix].value | ||
|
|
||
| def __setitem__(self: _dict, key: Key, value: Value) -> void: | ||
| data: ptr[DictData] = self.__ll__.data | ||
| ix = lookup(self, key) | ||
| if ix == DKIX_EMPTY: | ||
| insert(self, key, value) | ||
| if data.length >= capacity(self) * MAX_FILL_RATIO: | ||
| resize(self) | ||
| else: | ||
| entry = get_entry_at(self, ix) | ||
| # assert not entry.empty | ||
| entry.value = value | ||
|
|
||
| def __delitem__(self: _dict, key: Key) -> void: | ||
| pass | ||
|
|
||
| def __len__(self: _dict) -> i32: | ||
| data: DictData = self.__ll__.data | ||
| return data.length | ||
|
|
||
|
|
||
| return _dict | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,9 @@ | ||||||||||||||||||||||||||||||
| @blue.generic | ||||||||||||||||||||||||||||||
| def hash(T): | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| @BUILTINS.builtin_func(color='blue', kind='metafunc') | |
| def w_len(vm: 'SPyVM', wam_obj: W_MetaArg) -> W_OpSpec: | |
| w_T = wam_obj.w_static_T | |
| if w_fn := w_T.lookup_func('__len__'): | |
| w_opspec = vm.fast_metacall(w_fn, [wam_obj]) | |
| return w_opspec | |
| t = w_T.fqn.human_name | |
| raise SPyError.simple( | |
| 'W_TypeError', | |
| f'cannot call len(`{t}`)', | |
| f'this is `{t}`', | |
| wam_obj.loc | |
| ) |
We should do the same for hash.
We can do it in this PR, but I'm also happy to do it in a follow-up PR if you prefer.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| from dict import dict | ||
|
|
||
|
|
||
| def test() -> void: | ||
| # d = dict[int, int]() | ||
| d = dict[i32, i32]() | ||
| d[1] = 1 | ||
| if d[1] == 1: | ||
| print("✅") | ||
| else: | ||
| print("❌") | ||
|
|
||
|
|
||
| def main() -> void: | ||
| test() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks to #240 we now have
assertThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but the chained comparison doesn't work, and using
anddoesn't work either:I'll split it into two asserts
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh right, I always forget about missing features.
I opened #248 and #249 so we don't forget.
Feel free to open issues whenever you find missing features.