Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
fef1760
Add dict to stdlib (WIP)
dpdani Jun 4, 2025
b58db2e
changes
dpdani Jun 10, 2025
a2183b3
changes
dpdani Jun 15, 2025
73fe55b
Merge branch 'main' into feature/dict
dpdani Jun 15, 2025
53e67ec
changes
dpdani Jun 15, 2025
b9f2a86
it passes! 🎉
dpdani Jun 17, 2025
d2ec0e7
Merge branch 'main' into feature/dict
dpdani Oct 1, 2025
1c8a373
void -> None
dpdani Oct 1, 2025
70adc99
unhashable type prevents compilation 👌
dpdani Oct 1, 2025
721996b
use array of structs
dpdani Oct 1, 2025
17a6a47
tests!
dpdani Oct 1, 2025
ef5bfc4
tests pass!
dpdani Oct 2, 2025
460b083
Merge branch 'main' into feature/dict
dpdani Oct 2, 2025
9ac3f9f
actually implement resizing
dpdani Oct 3, 2025
ee1c276
.get(), but using `default` as a parameter is a big ouch
dpdani Oct 3, 2025
af75bac
cleanup
dpdani Oct 3, 2025
7839061
default -> default_
dpdani Oct 4, 2025
e6dab95
refactor index lookups to avoid duplicated code in inserts
dpdani Oct 4, 2025
690eb1c
cleanups
dpdani Oct 4, 2025
79eb71a
delete
dpdani Oct 4, 2025
0ec3e69
implement `__contains__`, `__eq__`, and `__fastiter__`
dpdani Oct 4, 2025
8ed4030
cleanup
dpdani Oct 4, 2025
246151f
module dict.spy -> _dict.spy
dpdani Oct 4, 2025
c0599f6
asserts
dpdani Oct 4, 2025
807d5c3
review
dpdani Oct 4, 2025
b6a13ea
removed comments
dpdani Oct 4, 2025
c4ebbd9
Merge branch 'refs/heads/main' into feature/dict
dpdani Oct 6, 2025
f689bde
reworked hash() builtin
dpdani Oct 6, 2025
d2d8884
fix
dpdani Oct 6, 2025
724a338
design comment
dpdani Oct 6, 2025
5df6c2d
fix
dpdani Oct 6, 2025
b88bfc6
move to static inline in the header
dpdani Oct 7, 2025
db39ed9
improve comment
dpdani Oct 7, 2025
2a3c805
Merge branch 'main' into feature/dict
dpdani Oct 7, 2025
b74e7b8
fix
dpdani Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions stdlib/dict.spy
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from hash import hash
from unsafe import gc_alloc, ptr


DKIX_EMPTY = 0
# DKIX_DUMMY = 2 ** 32 - 1
DKIX_DUMMY = 1 << 32 - 1
# DKIX_ERROR = 2 ** 32 - 2
DKIX_ERROR = 1 << 32 - 2
# DKIX_KEY_CHANGED = 2 ** 32 - 3
DKIX_KEY_CHANGED = 1 << 32 - 3

MIN_LOG_SIZE = 6
MAX_LOG_SIZE = 31
MAX_FILL_RATIO = 2 / 3


@blue.generic
def dict(Key, Value):
hash_key = hash[Key]

@struct
class Entry:
# empty: bool
empty: i32
key: Key
value: Value


@struct
class DictData:
index: ptr[i32]
log_size: i32 # capacity
length: i32 # number of items stored
entries: ptr[Entry]


def new_data(log_size: i32) -> ptr[DictData]:
# assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks to #240 we now have assert

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but the chained comparison doesn't work, and using and doesn't work either:

self = <spy.parser.Parser object at 0x106fbcf50>
primary = 'not implemented yet: BoolOp', secondary = 'this is not supported'
loc = <Loc: '/Users/dp/repos/spy/stdlib/_dict.spy 37:15 37:68'>

    def error(self, primary: str, secondary: str, loc: Loc) -> NoReturn:
>       raise SPyError.simple("W_ParseError", primary, secondary, loc)
E       spy.errors.SPyError: ParseError: not implemented yet: BoolOp
E          --> /Users/dp/repos/spy/stdlib/_dict.spy:37:16
E        37 |         assert MIN_LOG_SIZE <= log_size and log_size <= MAX_LOG_SIZE
E           |                |___________________________________________________| this is not supported

I'll split it into two asserts

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh right, I always forget about missing features.
I opened #248 and #249 so we don't forget.
Feel free to open issues whenever you find missing features.

data = gc_alloc(DictData)(1)
index = gc_alloc(i32)(1 << log_size)
data.index = index
data.log_size = 1 << log_size
data.length = 0
i = 0
while i < 1 << log_size:
data.index[i] = DKIX_EMPTY
# data.entries[i].empty = True
i += 1
return data

def capacity(self: _dict) -> i32:
data: ptr[DictData] = self.__ll__.data
return 1 << data.log_size

def mask(self: _dict) -> i32:
data: ptr[DictData] = self.__ll__.data
return 1 << data.log_size - 1

def distance_0(self: _dict, key: Key) -> i32:
# the first slot in the index to probe when looking up key
return hash_key(key) & mask(self)

def get_entry_at(self: _dict, position: i32) -> Entry:
data: DictData = self.__ll__.data
return data.entries[position]

def lookup(self: _dict, key: Key) -> i32:
data: ptr[DictData] = self.__ll__.data
d0 = distance_0(self, key)
distance = -1
while distance < capacity(self):
distance += 1
ix = data.index[(d0 + distance) & capacity(self)]
# [Antonio]: another confusing error:
# File "/Users/dp/.pyenv/versions/3.13.2/envs/spy/lib/python3.13/site-packages/fixedint/base.py", line 391, in _f
# return nt(intfunc(int(self), int(other)))
# ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
# ZeroDivisionError: integer modulo by zero
# I guess this is saying that capacity() might return 0?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it says that someone tried to do x % 0 (not that it might do modulo 0... someone actually DID that).
You get the same tb with this toy program:

def main() -> void:
    print(2 % 0)

We should detect the case and raise a proper exception/panic, see #183.

That said, I cannot really understand where it comes from, because I don't see any modulo operator in your source code, and I cannot reproduce it.
Can you got this error with a different/previous version of the code?
Or, can you show me how to reproduce?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! If you substitute &% on line 74 you should see that error

if ix == DKIX_EMPTY:
return DKIX_EMPTY
if ix == DKIX_DUMMY:
# continue
pass
else:
entry = get_entry_at(self, ix)
if not entry.empty:
# continue (I'd rather remove the `not` and use `continue` here)
hash_entry = hash_key(entry.key) # might raise exception
hash_lookup = hash_key(key) # might raise exception
if not (hash_entry != hash_lookup):
# continue (ibid.)
if entry.key is key:
return ix
cmp = entry.key == key # might raise exception
if cmp:
return ix
return DKIX_EMPTY

def insert(self: _dict, key: Key, value: Value) -> void:
data: DictData = self.__ll__.data[0]
# [Antonio]: another confusing error
# File "/Users/dp/repos/spy/spy/vm/function.py", line 265, in raw_call
# w_res = self._pyfunc(vm, *args_w)
# File "/Users/dp/repos/spy/spy/vm/modules/unsafe/mem.py", line 47, in w_mem_read_T
# assert False
# ^^^^^
# AssertionError
# does this mean that structs cannot be dereferenced?
entry = data.entries[data.length]
# data.length += 1
data.length = data.length + 1
entry.key = key
entry.value = value
entry.empty = False
d0 = distance_0(self, key)
distance = -1
while distance < capacity(self):
distance += 1
ix = data.index[(d0 + distance) & capacity(self)]
if ix == DKIX_EMPTY:
data.index[d0 + distance] = data.length - 1
return
raise Exception("aargh!")
# assert False # must not loop through the entire index
# without finding a free slot

def resize(self: _dict) -> i32:
# The resize operation never decreases the memory used by dict;
# i.e., we never resize to decrease the size of dict.
# This follows the CPython implementation.
old_data: DictData = self.__ll__.data
# assert old_data.log_size < MAX_LOG_SIZE
new = new_data(old_data.log_size + 1)
new.length = old_data.length
i = 0
while i < old_data.length:
entry = old_data.entries[i]
new.entries[i] = entry
i += 1
self.__ll__.data = new


@struct
class __dict:
data: ptr[DictData]


@typelift
class _dict:
__ll__: ptr[__dict]

def __new__() -> _dict:
data = new_data(MIN_LOG_SIZE)
d = gc_alloc(__dict)(1)
d.data = data
return _dict.__lift__(d)

def __getitem__(self: _dict, key: Key) -> Value:
data: DictData = self.__ll__.data
ix = lookup(self, key)
if ix == DKIX_EMPTY:
raise KeyError(key)
return data.entries[ix].value

def __setitem__(self: _dict, key: Key, value: Value) -> void:
data: ptr[DictData] = self.__ll__.data
ix = lookup(self, key)
if ix == DKIX_EMPTY:
insert(self, key, value)
if data.length >= capacity(self) * MAX_FILL_RATIO:
resize(self)
else:
entry = get_entry_at(self, ix)
# assert not entry.empty
entry.value = value

def __delitem__(self: _dict, key: Key) -> void:
pass

def __len__(self: _dict) -> i32:
data: DictData = self.__ll__.data
return data.length


return _dict
9 changes: 9 additions & 0 deletions stdlib/hash.spy
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@blue.generic
def hash(T):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have a better way to deal with hash nowadays 🎉.
Look how we implement len:

@BUILTINS.builtin_func(color='blue', kind='metafunc')
def w_len(vm: 'SPyVM', wam_obj: W_MetaArg) -> W_OpSpec:
w_T = wam_obj.w_static_T
if w_fn := w_T.lookup_func('__len__'):
w_opspec = vm.fast_metacall(w_fn, [wam_obj])
return w_opspec
t = w_T.fqn.human_name
raise SPyError.simple(
'W_TypeError',
f'cannot call len(`{t}`)',
f'this is `{t}`',
wam_obj.loc
)

We should do the same for hash.
We can do it in this PR, but I'm also happy to do it in a follow-up PR if you prefer.

def hash_i32(v: i32) -> i32:
# this mimics the implementation of CPython's hash(int)
return v
if T == i32:
return hash_i32

raise TypeError("unsupported type for hash()")
15 changes: 15 additions & 0 deletions stdlib/test_dict.spy
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dict import dict


def test() -> void:
# d = dict[int, int]()
d = dict[i32, i32]()
d[1] = 1
if d[1] == 1:
print("✅")
else:
print("❌")


def main() -> void:
test()