Skip to content

Commit cacd08e

Browse files
committed
Update expressions
1 parent db90226 commit cacd08e

File tree

6 files changed

+93
-125
lines changed

6 files changed

+93
-125
lines changed

chromadb/api/models/AsyncCollection.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -300,9 +300,9 @@ async def search(
300300
Args:
301301
searches: List of SearchPayload objects, each containing:
302302
- filter: Optional filter criteria (user_ids, where)
303-
- score: Scoring expression for hybrid search
304-
- limit: Optional limit configuration (skip, fetch)
305-
- project: Optional projection configuration (fields to return)
303+
- rank: Ranking expression for hybrid search
304+
- limit: Optional limit configuration (offset, limit)
305+
- select: Optional selection configuration (fields to return)
306306
307307
Returns:
308308
SearchResult: List of search results for each search payload.
@@ -313,15 +313,15 @@ async def search(
313313
314314
Example:
315315
from chromadb.execution.expression.operator import (
316-
DenseKnn, RankScore, Val, Sum, Filter, Limit, Project
316+
Knn, Val, Sum, Filter, Limit, Select
317317
)
318318
from chromadb.execution.expression.plan import SearchPayload
319319
320320
payload = SearchPayload(
321321
filter=Filter(where={"category": "science"}),
322-
score=RankScore(source=DenseKnn(embedding=[0.1, 0.2, 0.3], limit=100)),
323-
limit=Limit(skip=0, fetch=10),
324-
project=Project(fields={"$document", "$score", "$metadata"})
322+
rank=Knn(embedding=[0.1, 0.2, 0.3], limit=100),
323+
limit=Limit(offset=0, limit=10),
324+
select=Select(fields={"#document", "#score", "#metadata"})
325325
)
326326
327327
results = await collection.search([payload])

chromadb/api/models/Collection.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,9 @@ def search(
304304
Args:
305305
searches: List of SearchPayload objects, each containing:
306306
- filter: Optional filter criteria (user_ids, where)
307-
- score: Scoring expression for hybrid search
308-
- limit: Optional limit configuration (skip, fetch)
309-
- project: Optional projection configuration (fields to return)
307+
- rank: Ranking expression for hybrid search
308+
- limit: Optional limit configuration (offset, limit)
309+
- select: Optional selection configuration (fields to return)
310310
311311
Returns:
312312
SearchResult: List of search results for each search payload.
@@ -317,15 +317,15 @@ def search(
317317
318318
Example:
319319
from chromadb.execution.expression.operator import (
320-
DenseKnn, RankScore, Val, Sum, Filter, Limit, Project
320+
Knn, Val, Sum, Filter, Limit, Select
321321
)
322322
from chromadb.execution.expression.plan import SearchPayload
323323
324324
payload = SearchPayload(
325325
filter=Filter(where={"category": "science"}),
326-
score=RankScore(source=DenseKnn(embedding=[0.1, 0.2, 0.3], limit=100)),
327-
limit=Limit(skip=0, fetch=10),
328-
project=Project(fields={"$document", "$score", "$metadata"})
326+
rank=Knn(embedding=[0.1, 0.2, 0.3], limit=100),
327+
limit=Limit(offset=0, limit=10),
328+
select=Select(fields={"#document", "#score", "#metadata"})
329329
)
330330
331331
results = collection.search([payload])

chromadb/execution/executor/local.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ def get(self, plan: GetPlan) -> GetResult:
5858
where=plan.filter.where,
5959
where_document=plan.filter.where_document,
6060
ids=plan.filter.user_ids,
61-
limit=plan.limit.fetch,
62-
offset=plan.limit.skip,
61+
limit=plan.limit.limit,
62+
offset=plan.limit.offset,
6363
include_metadata=True,
6464
)
6565

Lines changed: 71 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from dataclasses import dataclass, field
2-
from typing import Optional, List, Dict, Set, Any
2+
from typing import Optional, List, Dict, Set, Any, Union
33

44
from chromadb.api.types import Embeddings, IDs, Include, SparseVector
55
from chromadb.types import (
@@ -50,14 +50,14 @@ class KNN:
5050

5151
@dataclass
5252
class Limit:
53-
skip: int = 0
54-
fetch: Optional[int] = None
53+
offset: int = 0
54+
limit: Optional[int] = None
5555

5656
def to_dict(self) -> Dict[str, Any]:
5757
"""Convert the Limit to a dictionary for JSON serialization"""
58-
result = {"skip": self.skip}
59-
if self.fetch is not None:
60-
result["fetch"] = self.fetch
58+
result = {"offset": self.offset}
59+
if self.limit is not None:
60+
result["limit"] = self.limit
6161
return result
6262

6363

@@ -85,178 +85,146 @@ def included(self) -> Include:
8585
return includes # type: ignore[return-value]
8686

8787

88-
# Score expression types for hybrid search
88+
# Rank expression types for hybrid search
8989
@dataclass
90-
class Score:
91-
"""Base class for Score expressions (algebraic data type)"""
90+
class Rank:
91+
"""Base class for Rank expressions (algebraic data type)"""
9292

9393
def to_dict(self) -> Dict[str, Any]:
9494
"""Convert the Score expression to a dictionary for JSON serialization"""
9595
raise NotImplementedError("Subclasses must implement to_dict()")
9696

9797

9898
@dataclass
99-
class Abs(Score):
100-
"""Absolute value of a score"""
101-
score: Score
99+
class Abs(Rank):
100+
"""Absolute value of a rank"""
101+
rank: Rank
102102

103103
def to_dict(self) -> Dict[str, Any]:
104-
return {"$abs": self.score.to_dict()}
104+
return {"$abs": self.rank.to_dict()}
105105

106106

107107
@dataclass
108-
class Div(Score):
109-
"""Division of two scores"""
110-
left: Score
111-
right: Score
108+
class Div(Rank):
109+
"""Division of two ranks"""
110+
left: Rank
111+
right: Rank
112112

113113
def to_dict(self) -> Dict[str, Any]:
114114
return {"$div": {"left": self.left.to_dict(), "right": self.right.to_dict()}}
115115

116116

117117
@dataclass
118-
class Exp(Score):
119-
"""Exponentiation of a score"""
120-
score: Score
118+
class Exp(Rank):
119+
"""Exponentiation of a rank"""
120+
rank: Rank
121121

122122
def to_dict(self) -> Dict[str, Any]:
123-
return {"$exp": self.score.to_dict()}
123+
return {"$exp": self.rank.to_dict()}
124124

125125

126126
@dataclass
127-
class Log(Score):
128-
"""Logarithm of a score"""
129-
score: Score
127+
class Log(Rank):
128+
"""Logarithm of a rank"""
129+
rank: Rank
130130

131131
def to_dict(self) -> Dict[str, Any]:
132-
return {"$log": self.score.to_dict()}
132+
return {"$log": self.rank.to_dict()}
133133

134134

135135
@dataclass
136-
class Max(Score):
137-
"""Maximum of multiple scores"""
138-
scores: List[Score]
136+
class Max(Rank):
137+
"""Maximum of multiple ranks"""
138+
ranks: List[Rank]
139139

140140
def to_dict(self) -> Dict[str, Any]:
141-
return {"$max": [s.to_dict() for s in self.scores]}
141+
return {"$max": [r.to_dict() for r in self.ranks]}
142142

143143

144144
@dataclass
145-
class Min(Score):
146-
"""Minimum of multiple scores"""
147-
scores: List[Score]
145+
class Min(Rank):
146+
"""Minimum of multiple ranks"""
147+
ranks: List[Rank]
148148

149149
def to_dict(self) -> Dict[str, Any]:
150-
return {"$min": [s.to_dict() for s in self.scores]}
150+
return {"$min": [r.to_dict() for r in self.ranks]}
151151

152152

153153
@dataclass
154-
class Mul(Score):
155-
"""Multiplication of multiple scores"""
156-
scores: List[Score]
154+
class Mul(Rank):
155+
"""Multiplication of multiple ranks"""
156+
ranks: List[Rank]
157157

158158
def to_dict(self) -> Dict[str, Any]:
159-
return {"$mul": [s.to_dict() for s in self.scores]}
159+
return {"$mul": [r.to_dict() for r in self.ranks]}
160160

161161

162162
@dataclass
163-
class RankScore(Score):
164-
"""Score based on ranking"""
165-
source: 'Rank'
163+
class Knn(Rank):
164+
"""KNN-based ranking"""
165+
embedding: Union[List[float], SparseVector]
166+
key: str = "$chroma_embedding"
167+
limit: int = 1024
166168
default: Optional[float] = None
167169
ordinal: bool = False
168170

169171
def to_dict(self) -> Dict[str, Any]:
170-
result = {"source": self.source.to_dict()}
172+
# With untagged enum, embedding is serialized directly
173+
# (as a list for dense, or as a dict with indices/values for sparse)
174+
result = {
175+
"embedding": self.embedding,
176+
"key": self.key,
177+
"limit": self.limit
178+
}
179+
171180
if self.default is not None:
172181
result["default"] = self.default # type: ignore[assignment]
173182
if self.ordinal:
174-
result["ordinal"] = self.ordinal # type: ignore[assignment]
175-
return {"$rank": result}
183+
result["ordinal"] = self.ordinal
184+
185+
return {"$knn": result}
176186

177187

178188
@dataclass
179-
class Sub(Score):
180-
"""Subtraction of two scores"""
181-
left: Score
182-
right: Score
189+
class Sub(Rank):
190+
"""Subtraction of two ranks"""
191+
left: Rank
192+
right: Rank
183193

184194
def to_dict(self) -> Dict[str, Any]:
185195
return {"$sub": {"left": self.left.to_dict(), "right": self.right.to_dict()}}
186196

187197

188198
@dataclass
189-
class Sum(Score):
190-
"""Summation of multiple scores"""
191-
scores: List[Score]
199+
class Sum(Rank):
200+
"""Summation of multiple ranks"""
201+
ranks: List[Rank]
192202

193203
def to_dict(self) -> Dict[str, Any]:
194-
return {"$sum": [s.to_dict() for s in self.scores]}
204+
return {"$sum": [r.to_dict() for r in self.ranks]}
195205

196206

197207
@dataclass
198-
class Val(Score):
199-
"""Constant score value"""
208+
class Val(Rank):
209+
"""Constant rank value"""
200210
value: float
201211

202212
def to_dict(self) -> Dict[str, Any]:
203213
return {"$val": self.value}
204214

205-
206-
# Rank expression types for KNN search
207-
@dataclass
208-
class Rank:
209-
"""Base class for Rank expressions"""
210-
211-
def to_dict(self) -> Dict[str, Any]:
212-
"""Convert the Rank expression to a dictionary for JSON serialization"""
213-
raise NotImplementedError("Subclasses must implement to_dict()")
214-
215-
216-
@dataclass
217-
class DenseKnn(Rank):
218-
"""Dense KNN ranking"""
219-
embedding: List[float]
220-
key: str = "$chroma_embedding"
221-
limit: int = 1024
222-
223-
def to_dict(self) -> Dict[str, Any]:
224-
result = {"embedding": self.embedding}
225-
if self.key != "$chroma_embedding":
226-
result["key"] = self.key # type: ignore[assignment]
227-
if self.limit != 1024:
228-
result["limit"] = self.limit # type: ignore[assignment]
229-
return {"$dense-knn": result}
230-
231-
232-
@dataclass
233-
class SparseKnn(Rank):
234-
"""Sparse KNN ranking"""
235-
embedding: SparseVector # Sparse vector with indices and values
236-
key: str # No default for sparse KNN
237-
limit: int = 1024
238-
239-
def to_dict(self) -> Dict[str, Any]:
240-
# Convert SparseVector to the format expected by Rust API
241-
result = {"embedding": self.embedding, "key": self.key}
242-
if self.limit != 1024:
243-
result["limit"] = self.limit # type: ignore[assignment]
244-
return {"$sparse-knn": result}
245-
246-
247215
@dataclass
248-
class Project:
249-
"""Projection configuration for search results
216+
class Select:
217+
"""Selection configuration for search results
250218
251219
Fields can be:
252-
- "$document" - Project document field
253-
- "$embedding" - Project embedding field
254-
- "$metadata" - Project all metadata
255-
- "$score" - Project score field
256-
- Any other string - Project specific metadata property
220+
- "#document" - Select document field
221+
- "#embedding" - Select embedding field
222+
- "#metadata" - Select all metadata
223+
- "#score" - Select score field
224+
- Any other string - Select specific metadata property
257225
"""
258226
fields: Set[str] = field(default_factory=set)
259227

260228
def to_dict(self) -> Dict[str, Any]:
261-
"""Convert the Project to a dictionary for JSON serialization"""
229+
"""Convert the Select to a dictionary for JSON serialization"""
262230
return {"fields": list(self.fields)}

chromadb/execution/expression/plan.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import List, Dict, Any
33

44
from chromadb.execution.expression.operator import (
5-
KNN, Filter, Limit, Projection, Scan, Score, Project, Val
5+
KNN, Filter, Limit, Projection, Scan, Rank, Select, Val
66
)
77

88

@@ -31,15 +31,15 @@ class KNNPlan:
3131
class SearchPayload:
3232
"""Payload for hybrid search operations"""
3333
filter: Filter = field(default_factory=Filter)
34-
score: Score = field(default_factory=lambda: Val(value=0.0))
34+
rank: Rank = field(default_factory=lambda: Val(value=0.0))
3535
limit: Limit = field(default_factory=Limit)
36-
project: Project = field(default_factory=Project)
36+
select: Select = field(default_factory=Select)
3737

3838
def to_dict(self) -> Dict[str, Any]:
3939
"""Convert the SearchPayload to a dictionary for JSON serialization"""
4040
return {
4141
"filter": self.filter.to_dict(),
42-
"score": self.score.to_dict(),
42+
"rank": self.rank.to_dict(),
4343
"limit": self.limit.to_dict(),
44-
"project": self.project.to_dict()
44+
"select": self.select.to_dict()
4545
}

chromadb/proto/convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ def to_proto_knn(knn: KNN) -> query_pb.KNNOperator:
613613

614614

615615
def to_proto_limit(limit: Limit) -> query_pb.LimitOperator:
616-
return query_pb.LimitOperator(skip=limit.skip, fetch=limit.fetch)
616+
return query_pb.LimitOperator(offset=limit.offset, limit=limit.limit)
617617

618618

619619
def to_proto_projection(projection: Projection) -> query_pb.ProjectionOperator:

0 commit comments

Comments
 (0)