-
Notifications
You must be signed in to change notification settings - Fork 31
Add dict to stdlib #172
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add dict to stdlib #172
Changes from 5 commits
fef1760
b58db2e
a2183b3
73fe55b
53e67ec
b9f2a86
d2ec0e7
1c8a373
70adc99
721996b
17a6a47
ef5bfc4
460b083
9ac3f9f
ee1c276
af75bac
7839061
e6dab95
690eb1c
79eb71a
0ec3e69
8ed4030
246151f
c0599f6
807d5c3
b6a13ea
c4ebbd9
f689bde
d2d8884
724a338
5df6c2d
b88bfc6
db39ed9
2a3c805
b74e7b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,196 @@ | ||
| from hash import hash | ||
| from unsafe import gc_alloc, ptr | ||
|
|
||
|
|
||
| DKIX_EMPTY = 0 | ||
| # DKIX_DUMMY = 2 ** 32 - 1 | ||
| DKIX_DUMMY = 1 << 32 - 1 | ||
| # DKIX_ERROR = 2 ** 32 - 2 | ||
| DKIX_ERROR = 1 << 32 - 2 | ||
| # DKIX_KEY_CHANGED = 2 ** 32 - 3 | ||
| DKIX_KEY_CHANGED = 1 << 32 - 3 | ||
|
|
||
| MIN_LOG_SIZE = 6 | ||
| MAX_LOG_SIZE = 31 | ||
| MAX_FILL_RATIO = 2 / 3 | ||
|
|
||
|
|
||
| @blue.generic | ||
| def dict(Key, Value): | ||
| hash_key = hash[Key] | ||
|
|
||
| @struct | ||
| class Entry: | ||
| # empty: bool | ||
| empty: i32 | ||
| key: Key | ||
| value: Value | ||
|
|
||
|
|
||
| @struct | ||
| class DictData: | ||
| index: ptr[i32] | ||
| log_size: i32 # capacity | ||
| length: i32 # number of items stored | ||
| entries: ptr[Entry] | ||
|
|
||
|
|
||
| def new_data(log_size: i32) -> ptr[DictData]: | ||
| # assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE | ||
| data = gc_alloc(DictData)(1) | ||
| index = gc_alloc(i32)(1 << log_size) | ||
| data.index = index | ||
| data.log_size = 1 << log_size | ||
| data.length = 0 | ||
| i = 0 | ||
| while i < 1 << log_size: | ||
| data.index[i] = DKIX_EMPTY | ||
| # data.entries[i].empty = True | ||
| i += 1 | ||
| return data | ||
|
|
||
| def new_index(log_size: i32) -> ptr[i32]: | ||
| # assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE | ||
|
||
| index = gc_alloc(i32)(1 << log_size) | ||
| i = 0 | ||
| while i < 1 << log_size: | ||
| index[i] = DKIX_EMPTY | ||
| i += 1 | ||
| return index | ||
|
|
||
| # @blue | ||
| def new_entries(log_size: i32) -> ptr[Entry]: | ||
| entries = gc_alloc(Entry)(1 << log_size) | ||
| i = 0 | ||
| while i < 1 << log_size: | ||
| entries[i].empty = True | ||
| # ^ WIP: Cannot read struct by value: `dict::dict[i32, i32]::Entry` | ||
| # [Antonio]: not sure what to do here, I'm leaning towards removing | ||
| # the Entry struct in favor of having three different arrays. | ||
| # It degrades cache friendliness, but that's fine. | ||
| # Do you think there's a way around this WIP error now? | ||
| # I also tried blueing the function, but it didn't help. | ||
dpdani marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| entries[i].key = None | ||
| entries[i].value = None | ||
| i += 1 | ||
| return entries | ||
|
|
||
| def capacity(self: _dict) -> i32: | ||
| data: ptr[DictData] = self.__ll__ | ||
| return 1 << data.log_size | ||
|
|
||
| def mask(self: _dict) -> i32: | ||
| data: ptr[DictData] = self.__ll__ | ||
| return 1 << data.log_size - 1 | ||
|
|
||
| def distance_0(self: _dict, key: Key) -> i32: | ||
| # the first slot in the index to probe when looking up key | ||
| return hash_key(key) & mask(self) | ||
|
|
||
| def get_entry_at(self: _dict, position: i32) -> Entry: | ||
| data: DictData = self.__ll__ | ||
| return data.entries[position] | ||
|
|
||
| def lookup(self: _dict, key: Key) -> i32: | ||
| data: ptr[DictData] = self.__ll__ | ||
| d0 = distance_0(self, key) | ||
| distance = -1 | ||
| while distance < capacity(self): | ||
| distance += 1 | ||
| ix = data.index[(d0 + distance) & capacity(self)] | ||
| if ix == DKIX_EMPTY: | ||
| return DKIX_EMPTY | ||
| if ix == DKIX_DUMMY: | ||
| # continue | ||
| pass | ||
| else: | ||
| entry = get_entry_at(self, ix) | ||
| if not entry.empty: | ||
| # continue (I'd rather remove the `not` and use `continue` here) | ||
| hash_entry = hash_key(entry.key) # might raise exception | ||
| hash_lookup = hash_key(key) # might raise exception | ||
| if not (hash_entry != hash_lookup): | ||
| # continue (ibid.) | ||
| if entry.key is key: | ||
| return ix | ||
| cmp = entry.key == key # might raise exception | ||
| if cmp: | ||
| return ix | ||
| return DKIX_EMPTY | ||
|
|
||
| def insert(self: _dict, key: Key, value: Value) -> void: | ||
| data: ptr[DictData] = self.__ll__ | ||
| entry = data.entries[data.length] | ||
| # data.length += 1 | ||
| data.length = data.length + 1 | ||
| entry.key = key | ||
| entry.value = value | ||
| entry.empty = False | ||
| d0 = distance_0(self, key) | ||
| distance = -1 | ||
| while distance < capacity(self): | ||
| distance += 1 | ||
| ix = data.index[(d0 + distance) & capacity(self)] | ||
| if ix == DKIX_EMPTY: | ||
| data.index[d0 + distance] = data.length - 1 | ||
| return | ||
| raise Exception("aargh!") | ||
| # assert False # must not loop through the entire index | ||
| # without finding a free slot | ||
|
|
||
| def resize(self: _dict) -> i32: | ||
| # The resize operation never decreases the memory used by dict; | ||
| # i.e., we never resize to decrease the size of dict. | ||
| # This follows the CPython implementation. | ||
| old_data: ptr[DictData] = self.__ll__ | ||
| # assert old_data.log_size < MAX_LOG_SIZE | ||
| new = new_data(old_data.log_size + 1) | ||
| new.length = old_data.length | ||
| i = 0 | ||
| while i < old_data.length: | ||
| entry = old_data.entries[i] | ||
| new.entries[i] = entry | ||
| i += 1 | ||
| self.__ll__.data = new | ||
|
|
||
|
|
||
| @typelift | ||
| class _dict: | ||
| __ll__: ptr[DictData] | ||
|
|
||
| def __new__() -> _dict: | ||
| data = gc_alloc(DictData)(1) | ||
| data.log_size = MIN_LOG_SIZE | ||
| data.length = 0 | ||
| data.index = new_index(data.log_size) | ||
| data.entries = new_entries(data.log_size) | ||
| return _dict.__lift__(data) | ||
|
|
||
| def __getitem__(self: _dict, key: Key) -> Value: | ||
| data: ptr[DictData] = self.__ll__ | ||
| ix = lookup(self, key) | ||
| if ix == DKIX_EMPTY: | ||
| raise KeyError(key) | ||
| return data.entries[ix].value | ||
|
|
||
| def __setitem__(self: _dict, key: Key, value: Value) -> void: | ||
| data: ptr[DictData] = self.__ll__ | ||
| ix = lookup(self, key) | ||
| if ix == DKIX_EMPTY: | ||
| insert(self, key, value) | ||
| if data.length >= capacity(self) * MAX_FILL_RATIO: | ||
| resize(self) | ||
| else: | ||
| entry = get_entry_at(self, ix) | ||
| # assert not entry.empty | ||
| entry.value = value | ||
|
|
||
| def __delitem__(self: _dict, key: Key) -> void: | ||
| pass | ||
|
|
||
| def __len__(self: _dict) -> i32: | ||
| data: ptr[DictData] = self.__ll__ | ||
| return data.length | ||
|
|
||
|
|
||
| return _dict | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,9 @@ | ||||||||||||||||||||||||||||||
| @blue.generic | ||||||||||||||||||||||||||||||
| def hash(T): | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| @BUILTINS.builtin_func(color='blue', kind='metafunc') | |
| def w_len(vm: 'SPyVM', wam_obj: W_MetaArg) -> W_OpSpec: | |
| w_T = wam_obj.w_static_T | |
| if w_fn := w_T.lookup_func('__len__'): | |
| w_opspec = vm.fast_metacall(w_fn, [wam_obj]) | |
| return w_opspec | |
| t = w_T.fqn.human_name | |
| raise SPyError.simple( | |
| 'W_TypeError', | |
| f'cannot call len(`{t}`)', | |
| f'this is `{t}`', | |
| wam_obj.loc | |
| ) |
We should do the same for hash.
We can do it in this PR, but I'm also happy to do it in a follow-up PR if you prefer.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| from dict import dict | ||
|
|
||
|
|
||
| def test() -> void: | ||
| # d = dict[int, int]() | ||
| d = dict[i32, i32]() | ||
| d[1] = 1 | ||
| if d[1] == 1: | ||
| print("✅") | ||
| else: | ||
| print("❌") | ||
|
|
||
|
|
||
| def main() -> void: | ||
| test() |
Uh oh!
There was an error while loading. Please reload this page.