Skip to content

Commit 874520b

Browse files
committed
model: add support for index, and HNSW index objectbox#24
TODO: fix/extend index unit tests!
1 parent c8ad84a commit 874520b

File tree

4 files changed

+120
-58
lines changed

4 files changed

+120
-58
lines changed

objectbox/model/entity.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ def fill_properties(self):
9090
elif self.id_property._ob_type != OBXPropertyType_Long:
9191
raise Exception("ID property must be an int")
9292

93+
def get_property(self, name: str):
94+
""" Gets the property having the given name. """
95+
for prop in self.properties:
96+
if prop._name == name:
97+
return prop
98+
raise Exception(f"Property \"{name}\" not found in Entity: \"{self.name}\"")
99+
93100
def get_value(self, object, prop: Property):
94101
# in case value is not overwritten on the object, it's the Property object itself (= as defined in the Class)
95102
val = getattr(object, prop._name)
@@ -228,12 +235,8 @@ def unmarshal(self, data: bytes):
228235
return obj
229236

230237

231-
# entity decorator - wrap _Entity to allow @Entity(id=, uid=), i.e. no class argument
232-
def Entity(cls=None, id: int = 0, uid: int = 0):
233-
if cls:
238+
def Entity(id: int = 0, uid: int = 0):
239+
""" Entity decorator that wraps _Entity to allow @Entity(id=, uid=); i.e. no class arguments. """
240+
def wrapper(cls):
234241
return _Entity(cls, id, uid)
235-
else:
236-
def wrapper(cls):
237-
return _Entity(cls, id, uid)
238-
239-
return wrapper
242+
return wrapper

objectbox/model/model.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515

1616
from objectbox.model.entity import _Entity
17+
from objectbox.model.properties import *
1718
from objectbox.c import *
19+
import logging
1820

1921

2022
class IdUid:
@@ -36,35 +38,58 @@ def __init__(self):
3638
self.last_index_id = IdUid(0, 0)
3739
self.last_relation_id = IdUid(0, 0)
3840

39-
def entity(self, entity: _Entity, last_property_id: IdUid):
41+
def _set_hnsw_params(self, index: HnswIndex):
42+
if index.dimensions is not None:
43+
obx_model_property_index_hnsw_dimensions(self._c_model, index.dimensions)
44+
if index.neighbors_per_node is not None:
45+
obx_model_property_index_hnsw_neighbors_per_node(self._c_model, index.neighbors_per_node)
46+
if index.indexing_search_count is not None:
47+
obx_model_property_index_hnsw_indexing_search_count(self._c_model, index.indexing_search_count)
48+
if index.flags is not None:
49+
obx_model_property_index_hnsw_flags(self._c_model, index.flags)
50+
if index.distance_type is not None:
51+
obx_model_property_index_hnsw_distance_type(self._c_model, index.distance_type)
52+
if index.reparation_backlink_probability is not None:
53+
obx_model_property_index_hnsw_reparation_backlink_probability(self._c_model, index.reparation_backlink_probability)
54+
if index.vector_cache_hint_size_kb is not None:
55+
obx_model_property_index_hnsw_vector_cache_hint_size_kb(self._c_model, index.vector_cache_hint_size_kb)
56+
57+
def entity(self, entity: _Entity, last_property_id: IdUid, last_index_id: Optional[IdUid] = None):
4058
if not isinstance(entity, _Entity):
4159
raise Exception("Given type is not an Entity. Are you passing an instance instead of a type or did you "
4260
"forget the '@Entity' annotation?")
4361

4462
entity.last_property_id = last_property_id
4563

46-
obx_model_entity(self._c_model, c_str(
47-
entity.name), entity.id, entity.uid)
64+
obx_model_entity(self._c_model, c_str(entity.name), entity.id, entity.uid)
4865

49-
for v in entity.properties:
50-
obx_model_property(self._c_model, c_str(
51-
v._name), v._ob_type, v._id, v._uid)
52-
if v._flags != 0:
53-
obx_model_property_flags(self._c_model, v._flags)
66+
logging.debug(f"Creating entity \"{entity.name}\" (ID={entity.id}, {entity.uid})")
5467

55-
obx_model_entity_last_property_id(
56-
self._c_model, last_property_id.id, last_property_id.uid)
68+
for property_ in entity.properties:
69+
obx_model_property(self._c_model, c_str(property_._name), property_._ob_type, property_._id, property_._uid)
5770

58-
# called by Builder
59-
def _finish(self):
71+
logging.debug(f"Creating property \"\" (ID={property_._id}, UID={property_._uid})")
72+
73+
if property_._flags != 0:
74+
obx_model_property_flags(self._c_model, property_._flags)
75+
76+
if property_._index is not None:
77+
index = property_._index
78+
if isinstance(index, HnswIndex):
79+
self._set_hnsw_params(index)
80+
logging.debug(f" HNSW index (ID={index.id}, UID{index.uid}); Dimensions: {index.dimensions}")
81+
else:
82+
logging.debug(f" Index (ID={index.id}, UID{index.uid}); Type: {index.type}")
83+
obx_model_property_index_id(self._c_model, index.id, index.uid)
84+
85+
obx_model_entity_last_property_id(self._c_model, last_property_id.id, last_property_id.uid)
86+
87+
def _finish(self): # Called by Builder
6088
if self.last_relation_id:
61-
obx_model_last_relation_id(
62-
self._c_model, self.last_relation_id.id, self.last_relation_id.uid)
89+
obx_model_last_relation_id(self._c_model, self.last_relation_id.id, self.last_relation_id.uid)
6390

6491
if self.last_index_id:
65-
obx_model_last_index_id(
66-
self._c_model, self.last_index_id.id, self.last_index_id.uid)
92+
obx_model_last_index_id(self._c_model, self.last_index_id.id, self.last_index_id.uid)
6793

6894
if self.last_entity_id:
69-
obx_model_last_entity_id(
70-
self._c_model, self.last_entity_id.id, self.last_entity_id.uid)
95+
obx_model_last_entity_id(self._c_model, self.last_entity_id.id, self.last_entity_id.uid)

objectbox/model/properties.py

Lines changed: 59 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from objectbox.c import *
1919
import flatbuffers.number_types
2020
import numpy as np
21+
from dataclasses import dataclass
2122

2223

2324
class PropertyType(IntEnum):
@@ -72,42 +73,68 @@ class PropertyType(IntEnum):
7273

7374

7475
class IndexType(IntEnum):
75-
value = OBXPropertyFlags_INDEXED
76-
hash = OBXPropertyFlags_INDEX_HASH
77-
hash64 = OBXPropertyFlags_INDEX_HASH64
76+
VALUE = OBXPropertyFlags_INDEXED
77+
HASH = OBXPropertyFlags_INDEX_HASH
78+
HASH64 = OBXPropertyFlags_INDEX_HASH64
79+
80+
81+
@dataclass
82+
class Index:
83+
id: int
84+
uid: int
85+
# TODO HNSW isn't a type but HASH and HASH64 are, remove type member and make HashIndex and Hash64Index classes?
86+
type: IndexType = IndexType.VALUE
87+
88+
89+
class HnswFlags(IntEnum):
90+
NONE = 0
91+
DEBUG_LOGS = 1
92+
DEBUG_LOGS_DETAILED = 2
93+
VECTOR_CACHE_SIMD_PADDING_OFF = 4
94+
REPARATION_LIMIT_CANDIDATES = 8
95+
96+
97+
class HnswDistanceType(IntEnum):
98+
UNKNOWN = OBXHnswDistanceType_UNKNOWN,
99+
EUCLIDEAN = OBXHnswDistanceType_EUCLIDEAN
100+
101+
102+
@dataclass
103+
class HnswIndex:
104+
id: int
105+
uid: int
106+
dimensions: int
107+
neighbors_per_node: Optional[int] = None
108+
indexing_search_count: Optional[int] = None
109+
flags: HnswFlags = HnswFlags.NONE
110+
distance_type: HnswDistanceType = HnswDistanceType.EUCLIDEAN
111+
reparation_backlink_probability: Optional[float] = None
112+
vector_cache_hint_size_kb: Optional[float] = None
78113

79114

80115
class Property:
81-
def __init__(self, py_type: type, id: int, uid: int, type: PropertyType = None, index: bool = None, index_type: IndexType = None):
82-
self._id = id
83-
self._uid = uid
116+
def __init__(self, pytype: Type, **kwargs):
117+
self._id = kwargs['id']
118+
self._uid = kwargs['uid']
84119
self._name = "" # set in Entity.fill_properties()
85120

86-
self._py_type = py_type
87-
self._ob_type = type if type != None else self.__determine_ob_type()
121+
self._py_type = pytype
122+
self._ob_type = kwargs['type'] if 'type' in kwargs else self._determine_ob_type()
88123
self._fb_type = fb_type_map[self._ob_type]
89124

90125
self._is_id = isinstance(self, Id)
91-
self._flags = OBXPropertyFlags(0)
92-
self.__set_flags()
126+
self._flags = 0
93127

94128
# FlatBuffers marshalling information
95129
self._fb_slot = self._id - 1
96-
self._fb_v_offset = 4 + 2*self._fb_slot
97-
98-
if index_type:
99-
if index == True or index == None:
100-
self._index = True
101-
self._index_type = index_type
102-
elif index == False:
103-
raise Exception(f"trying to set index type on property with id {self._id} while index is set to False")
104-
else:
105-
self._index = index if index != None else False
106-
if index:
107-
self._index_type = IndexType.value if self._py_type != str else IndexType.hash
130+
self._fb_v_offset = 4 + 2 * self._fb_slot
108131

132+
self._index = kwargs.get('index', None)
109133

110-
def __determine_ob_type(self) -> OBXPropertyType:
134+
self._set_flags()
135+
136+
def _determine_ob_type(self) -> OBXPropertyType:
137+
""" Tries to infer the OBX property type from the Python type. """
111138
ts = self._py_type
112139
if ts == str:
113140
return OBXPropertyType_String
@@ -124,9 +151,15 @@ def __determine_ob_type(self) -> OBXPropertyType:
124151
else:
125152
raise Exception("unknown property type %s" % ts)
126153

127-
def __set_flags(self):
154+
def _set_flags(self):
128155
if self._is_id:
129-
self._flags = OBXPropertyFlags_ID
156+
self._flags |= OBXPropertyFlags_ID
157+
158+
if self._index is not None:
159+
self._flags |= OBXPropertyFlags_INDEXED
160+
if isinstance(self._index, Index): # Generic index
161+
self._flags |= self._index.type
162+
print("Flags set to", self._flags, bin(self._flags))
130163

131164
def op(self, op: _ConditionOp, value, case_sensitive: bool = True) -> QueryCondition:
132165
return QueryCondition(self._id, op, value, case_sensitive)
@@ -165,4 +198,4 @@ def between(self, value_a, value_b) -> QueryCondition:
165198
# ID property (primary key)
166199
class Id(Property):
167200
def __init__(self, py_type: type = int, id: int = 0, uid: int = 0):
168-
super(Id, self).__init__(py_type, id, uid)
201+
super(Id, self).__init__(py_type, id=id, uid=uid)

tests/model.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from objectbox.model import *
2-
from objectbox.model.properties import IndexType
2+
from objectbox.model.properties import *
33
import numpy as np
44
from datetime import datetime
55
from typing import Generic, Dict, Any
@@ -10,16 +10,17 @@ class TestEntity:
1010
id = Id(id=1, uid=1001)
1111
# TODO Enable indexing dynamically, e.g. have a constructor to enable index(es).
1212
# E.g. indexString=False (defaults to false). Same for bytes.
13-
str = Property(str, id=2, uid=1002, index=True)
13+
# TODO Test HASH and HASH64 indices (only supported for strings)
14+
str = Property(str, id=2, uid=1002, index=Index(id=1, uid=10001))
1415
bool = Property(bool, id=3, uid=1003)
15-
int64 = Property(int, type=PropertyType.long, id=4, uid=1004, index=True)
16-
int32 = Property(int, type=PropertyType.int, id=5, uid=1005, index=True, index_type=IndexType.hash)
17-
int16 = Property(int, type=PropertyType.short, id=6, uid=1006, index_type=IndexType.hash)
16+
int64 = Property(int, type=PropertyType.long, id=4, uid=1004, index=Index(id=2, uid=10002))
17+
int32 = Property(int, type=PropertyType.int, id=5, uid=1005)
18+
int16 = Property(int, type=PropertyType.short, id=6, uid=1006)
1819
int8 = Property(int, type=PropertyType.byte, id=7, uid=1007)
1920
float64 = Property(float, type=PropertyType.double, id=8, uid=1008)
2021
float32 = Property(float, type=PropertyType.float, id=9, uid=1009)
2122
bools = Property(np.ndarray, type=PropertyType.boolVector, id=10, uid=1010)
22-
bytes = Property(bytes, id=11, uid=1011, index_type=IndexType.hash64)
23+
bytes = Property(bytes, id=11, uid=1011)
2324
shorts = Property(np.ndarray, type=PropertyType.shortVector, id=12, uid=1012)
2425
chars = Property(np.ndarray, type=PropertyType.charVector, id=13, uid=1013)
2526
ints = Property(np.ndarray, type=PropertyType.intVector, id=14, uid=1014)

0 commit comments

Comments
 (0)