Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
fef1760
Add dict to stdlib (WIP)
dpdani Jun 4, 2025
b58db2e
changes
dpdani Jun 10, 2025
a2183b3
changes
dpdani Jun 15, 2025
73fe55b
Merge branch 'main' into feature/dict
dpdani Jun 15, 2025
53e67ec
changes
dpdani Jun 15, 2025
b9f2a86
it passes! 🎉
dpdani Jun 17, 2025
d2ec0e7
Merge branch 'main' into feature/dict
dpdani Oct 1, 2025
1c8a373
void -> None
dpdani Oct 1, 2025
70adc99
unhashable type prevents compilation 👌
dpdani Oct 1, 2025
721996b
use array of structs
dpdani Oct 1, 2025
17a6a47
tests!
dpdani Oct 1, 2025
ef5bfc4
tests pass!
dpdani Oct 2, 2025
460b083
Merge branch 'main' into feature/dict
dpdani Oct 2, 2025
9ac3f9f
actually implement resizing
dpdani Oct 3, 2025
ee1c276
.get(), but using `default` as a parameter is a big ouch
dpdani Oct 3, 2025
af75bac
cleanup
dpdani Oct 3, 2025
7839061
default -> default_
dpdani Oct 4, 2025
e6dab95
refactor index lookups to avoid duplicated code in inserts
dpdani Oct 4, 2025
690eb1c
cleanups
dpdani Oct 4, 2025
79eb71a
delete
dpdani Oct 4, 2025
0ec3e69
implement `__contains__`, `__eq__`, and `__fastiter__`
dpdani Oct 4, 2025
8ed4030
cleanup
dpdani Oct 4, 2025
246151f
module dict.spy -> _dict.spy
dpdani Oct 4, 2025
c0599f6
asserts
dpdani Oct 4, 2025
807d5c3
review
dpdani Oct 4, 2025
b6a13ea
removed comments
dpdani Oct 4, 2025
c4ebbd9
Merge branch 'refs/heads/main' into feature/dict
dpdani Oct 6, 2025
f689bde
reworked hash() builtin
dpdani Oct 6, 2025
d2d8884
fix
dpdani Oct 6, 2025
724a338
design comment
dpdani Oct 6, 2025
5df6c2d
fix
dpdani Oct 6, 2025
b88bfc6
move to static inline in the header
dpdani Oct 7, 2025
db39ed9
improve comment
dpdani Oct 7, 2025
2a3c805
Merge branch 'main' into feature/dict
dpdani Oct 7, 2025
b74e7b8
fix
dpdani Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 196 additions & 0 deletions stdlib/dict.spy
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
from hash import hash
from unsafe import gc_alloc, ptr


DKIX_EMPTY = 0
# DKIX_DUMMY = 2 ** 32 - 1
DKIX_DUMMY = 1 << 32 - 1
# DKIX_ERROR = 2 ** 32 - 2
DKIX_ERROR = 1 << 32 - 2
# DKIX_KEY_CHANGED = 2 ** 32 - 3
DKIX_KEY_CHANGED = 1 << 32 - 3

MIN_LOG_SIZE = 6
MAX_LOG_SIZE = 31
MAX_FILL_RATIO = 2 / 3


@blue.generic
def dict(Key, Value):
hash_key = hash[Key]

@struct
class Entry:
# empty: bool
empty: i32
key: Key
value: Value


@struct
class DictData:
index: ptr[i32]
log_size: i32 # capacity
length: i32 # number of items stored
entries: ptr[Entry]


def new_data(log_size: i32) -> ptr[DictData]:
# assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE
data = gc_alloc(DictData)(1)
index = gc_alloc(i32)(1 << log_size)
data.index = index
data.log_size = 1 << log_size
data.length = 0
i = 0
while i < 1 << log_size:
data.index[i] = DKIX_EMPTY
# data.entries[i].empty = True
i += 1
return data

def new_index(log_size: i32) -> ptr[i32]:
# assert MIN_LOG_SIZE <= log_size <= MAX_LOG_SIZE
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks to #240 we now have assert

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but the chained comparison doesn't work, and using and doesn't work either:

self = <spy.parser.Parser object at 0x106fbcf50>
primary = 'not implemented yet: BoolOp', secondary = 'this is not supported'
loc = <Loc: '/Users/dp/repos/spy/stdlib/_dict.spy 37:15 37:68'>

    def error(self, primary: str, secondary: str, loc: Loc) -> NoReturn:
>       raise SPyError.simple("W_ParseError", primary, secondary, loc)
E       spy.errors.SPyError: ParseError: not implemented yet: BoolOp
E          --> /Users/dp/repos/spy/stdlib/_dict.spy:37:16
E        37 |         assert MIN_LOG_SIZE <= log_size and log_size <= MAX_LOG_SIZE
E           |                |___________________________________________________| this is not supported

I'll split it into two asserts

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh right, I always forget about missing features.
I opened #248 and #249 so we don't forget.
Feel free to open issues whenever you find missing features.

index = gc_alloc(i32)(1 << log_size)
i = 0
while i < 1 << log_size:
index[i] = DKIX_EMPTY
i += 1
return index

# @blue
def new_entries(log_size: i32) -> ptr[Entry]:
entries = gc_alloc(Entry)(1 << log_size)
i = 0
while i < 1 << log_size:
entries[i].empty = True
# ^ WIP: Cannot read struct by value: `dict::dict[i32, i32]::Entry`
# [Antonio]: not sure what to do here, I'm leaning towards removing
# the Entry struct in favor of having three different arrays.
# It degrades cache friendliness, but that's fine.
# Do you think there's a way around this WIP error now?
# I also tried blueing the function, but it didn't help.
entries[i].key = None
entries[i].value = None
i += 1
return entries

def capacity(self: _dict) -> i32:
data: ptr[DictData] = self.__ll__
return 1 << data.log_size

def mask(self: _dict) -> i32:
data: ptr[DictData] = self.__ll__
return 1 << data.log_size - 1

def distance_0(self: _dict, key: Key) -> i32:
# the first slot in the index to probe when looking up key
return hash_key(key) & mask(self)

def get_entry_at(self: _dict, position: i32) -> Entry:
data: DictData = self.__ll__
return data.entries[position]

def lookup(self: _dict, key: Key) -> i32:
data: ptr[DictData] = self.__ll__
d0 = distance_0(self, key)
distance = -1
while distance < capacity(self):
distance += 1
ix = data.index[(d0 + distance) & capacity(self)]
if ix == DKIX_EMPTY:
return DKIX_EMPTY
if ix == DKIX_DUMMY:
# continue
pass
else:
entry = get_entry_at(self, ix)
if not entry.empty:
# continue (I'd rather remove the `not` and use `continue` here)
hash_entry = hash_key(entry.key) # might raise exception
hash_lookup = hash_key(key) # might raise exception
if not (hash_entry != hash_lookup):
# continue (ibid.)
if entry.key is key:
return ix
cmp = entry.key == key # might raise exception
if cmp:
return ix
return DKIX_EMPTY

def insert(self: _dict, key: Key, value: Value) -> void:
data: ptr[DictData] = self.__ll__
entry = data.entries[data.length]
# data.length += 1
data.length = data.length + 1
entry.key = key
entry.value = value
entry.empty = False
d0 = distance_0(self, key)
distance = -1
while distance < capacity(self):
distance += 1
ix = data.index[(d0 + distance) & capacity(self)]
if ix == DKIX_EMPTY:
data.index[d0 + distance] = data.length - 1
return
raise Exception("aargh!")
# assert False # must not loop through the entire index
# without finding a free slot

def resize(self: _dict) -> i32:
# The resize operation never decreases the memory used by dict;
# i.e., we never resize to decrease the size of dict.
# This follows the CPython implementation.
old_data: ptr[DictData] = self.__ll__
# assert old_data.log_size < MAX_LOG_SIZE
new = new_data(old_data.log_size + 1)
new.length = old_data.length
i = 0
while i < old_data.length:
entry = old_data.entries[i]
new.entries[i] = entry
i += 1
self.__ll__.data = new


@typelift
class _dict:
__ll__: ptr[DictData]

def __new__() -> _dict:
data = gc_alloc(DictData)(1)
data.log_size = MIN_LOG_SIZE
data.length = 0
data.index = new_index(data.log_size)
data.entries = new_entries(data.log_size)
return _dict.__lift__(data)

def __getitem__(self: _dict, key: Key) -> Value:
data: ptr[DictData] = self.__ll__
ix = lookup(self, key)
if ix == DKIX_EMPTY:
raise KeyError(key)
return data.entries[ix].value

def __setitem__(self: _dict, key: Key, value: Value) -> void:
data: ptr[DictData] = self.__ll__
ix = lookup(self, key)
if ix == DKIX_EMPTY:
insert(self, key, value)
if data.length >= capacity(self) * MAX_FILL_RATIO:
resize(self)
else:
entry = get_entry_at(self, ix)
# assert not entry.empty
entry.value = value

def __delitem__(self: _dict, key: Key) -> void:
pass

def __len__(self: _dict) -> i32:
data: ptr[DictData] = self.__ll__
return data.length


return _dict
9 changes: 9 additions & 0 deletions stdlib/hash.spy
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@blue.generic
def hash(T):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have a better way to deal with hash nowadays 🎉.
Look how we implement len:

@BUILTINS.builtin_func(color='blue', kind='metafunc')
def w_len(vm: 'SPyVM', wam_obj: W_MetaArg) -> W_OpSpec:
w_T = wam_obj.w_static_T
if w_fn := w_T.lookup_func('__len__'):
w_opspec = vm.fast_metacall(w_fn, [wam_obj])
return w_opspec
t = w_T.fqn.human_name
raise SPyError.simple(
'W_TypeError',
f'cannot call len(`{t}`)',
f'this is `{t}`',
wam_obj.loc
)

We should do the same for hash.
We can do it in this PR, but I'm also happy to do it in a follow-up PR if you prefer.

def hash_i32(v: i32) -> i32:
# this mimics the implementation of CPython's hash(int)
return v
if T == i32:
return hash_i32

raise TypeError("unsupported type for hash()")
15 changes: 15 additions & 0 deletions stdlib/test_dict.spy
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dict import dict


def test() -> void:
# d = dict[int, int]()
d = dict[i32, i32]()
d[1] = 1
if d[1] == 1:
print("✅")
else:
print("❌")


def main() -> void:
test()