|
1 | 1 | from dataclasses import dataclass, field
|
2 |
| -from typing import Optional, List, Dict, Set, Any |
| 2 | +from typing import Optional, List, Dict, Set, Any, Union |
3 | 3 |
|
4 | 4 | from chromadb.api.types import Embeddings, IDs, Include, SparseVector
|
5 | 5 | from chromadb.types import (
|
@@ -50,14 +50,14 @@ class KNN:
|
50 | 50 |
|
51 | 51 | @dataclass
|
52 | 52 | class Limit:
|
53 |
| - skip: int = 0 |
54 |
| - fetch: Optional[int] = None |
| 53 | + offset: int = 0 |
| 54 | + limit: Optional[int] = None |
55 | 55 |
|
56 | 56 | def to_dict(self) -> Dict[str, Any]:
|
57 | 57 | """Convert the Limit to a dictionary for JSON serialization"""
|
58 |
| - result = {"skip": self.skip} |
59 |
| - if self.fetch is not None: |
60 |
| - result["fetch"] = self.fetch |
| 58 | + result = {"offset": self.offset} |
| 59 | + if self.limit is not None: |
| 60 | + result["limit"] = self.limit |
61 | 61 | return result
|
62 | 62 |
|
63 | 63 |
|
@@ -85,178 +85,146 @@ def included(self) -> Include:
|
85 | 85 | return includes # type: ignore[return-value]
|
86 | 86 |
|
87 | 87 |
|
88 |
| -# Score expression types for hybrid search |
| 88 | +# Rank expression types for hybrid search |
89 | 89 | @dataclass
|
90 |
| -class Score: |
91 |
| - """Base class for Score expressions (algebraic data type)""" |
| 90 | +class Rank: |
| 91 | + """Base class for Rank expressions (algebraic data type)""" |
92 | 92 |
|
93 | 93 | def to_dict(self) -> Dict[str, Any]:
|
94 | 94 | """Convert the Score expression to a dictionary for JSON serialization"""
|
95 | 95 | raise NotImplementedError("Subclasses must implement to_dict()")
|
96 | 96 |
|
97 | 97 |
|
98 | 98 | @dataclass
|
99 |
| -class Abs(Score): |
100 |
| - """Absolute value of a score""" |
101 |
| - score: Score |
| 99 | +class Abs(Rank): |
| 100 | + """Absolute value of a rank""" |
| 101 | + rank: Rank |
102 | 102 |
|
103 | 103 | def to_dict(self) -> Dict[str, Any]:
|
104 |
| - return {"$abs": self.score.to_dict()} |
| 104 | + return {"$abs": self.rank.to_dict()} |
105 | 105 |
|
106 | 106 |
|
107 | 107 | @dataclass
|
108 |
| -class Div(Score): |
109 |
| - """Division of two scores""" |
110 |
| - left: Score |
111 |
| - right: Score |
| 108 | +class Div(Rank): |
| 109 | + """Division of two ranks""" |
| 110 | + left: Rank |
| 111 | + right: Rank |
112 | 112 |
|
113 | 113 | def to_dict(self) -> Dict[str, Any]:
|
114 | 114 | return {"$div": {"left": self.left.to_dict(), "right": self.right.to_dict()}}
|
115 | 115 |
|
116 | 116 |
|
117 | 117 | @dataclass
|
118 |
| -class Exp(Score): |
119 |
| - """Exponentiation of a score""" |
120 |
| - score: Score |
| 118 | +class Exp(Rank): |
| 119 | + """Exponentiation of a rank""" |
| 120 | + rank: Rank |
121 | 121 |
|
122 | 122 | def to_dict(self) -> Dict[str, Any]:
|
123 |
| - return {"$exp": self.score.to_dict()} |
| 123 | + return {"$exp": self.rank.to_dict()} |
124 | 124 |
|
125 | 125 |
|
126 | 126 | @dataclass
|
127 |
| -class Log(Score): |
128 |
| - """Logarithm of a score""" |
129 |
| - score: Score |
| 127 | +class Log(Rank): |
| 128 | + """Logarithm of a rank""" |
| 129 | + rank: Rank |
130 | 130 |
|
131 | 131 | def to_dict(self) -> Dict[str, Any]:
|
132 |
| - return {"$log": self.score.to_dict()} |
| 132 | + return {"$log": self.rank.to_dict()} |
133 | 133 |
|
134 | 134 |
|
135 | 135 | @dataclass
|
136 |
| -class Max(Score): |
137 |
| - """Maximum of multiple scores""" |
138 |
| - scores: List[Score] |
| 136 | +class Max(Rank): |
| 137 | + """Maximum of multiple ranks""" |
| 138 | + ranks: List[Rank] |
139 | 139 |
|
140 | 140 | def to_dict(self) -> Dict[str, Any]:
|
141 |
| - return {"$max": [s.to_dict() for s in self.scores]} |
| 141 | + return {"$max": [r.to_dict() for r in self.ranks]} |
142 | 142 |
|
143 | 143 |
|
144 | 144 | @dataclass
|
145 |
| -class Min(Score): |
146 |
| - """Minimum of multiple scores""" |
147 |
| - scores: List[Score] |
| 145 | +class Min(Rank): |
| 146 | + """Minimum of multiple ranks""" |
| 147 | + ranks: List[Rank] |
148 | 148 |
|
149 | 149 | def to_dict(self) -> Dict[str, Any]:
|
150 |
| - return {"$min": [s.to_dict() for s in self.scores]} |
| 150 | + return {"$min": [r.to_dict() for r in self.ranks]} |
151 | 151 |
|
152 | 152 |
|
153 | 153 | @dataclass
|
154 |
| -class Mul(Score): |
155 |
| - """Multiplication of multiple scores""" |
156 |
| - scores: List[Score] |
| 154 | +class Mul(Rank): |
| 155 | + """Multiplication of multiple ranks""" |
| 156 | + ranks: List[Rank] |
157 | 157 |
|
158 | 158 | def to_dict(self) -> Dict[str, Any]:
|
159 |
| - return {"$mul": [s.to_dict() for s in self.scores]} |
| 159 | + return {"$mul": [r.to_dict() for r in self.ranks]} |
160 | 160 |
|
161 | 161 |
|
162 | 162 | @dataclass
|
163 |
| -class RankScore(Score): |
164 |
| - """Score based on ranking""" |
165 |
| - source: 'Rank' |
| 163 | +class Knn(Rank): |
| 164 | + """KNN-based ranking""" |
| 165 | + embedding: Union[List[float], SparseVector] |
| 166 | + key: str = "$chroma_embedding" |
| 167 | + limit: int = 1024 |
166 | 168 | default: Optional[float] = None
|
167 | 169 | ordinal: bool = False
|
168 | 170 |
|
169 | 171 | def to_dict(self) -> Dict[str, Any]:
|
170 |
| - result = {"source": self.source.to_dict()} |
| 172 | + # With untagged enum, embedding is serialized directly |
| 173 | + # (as a list for dense, or as a dict with indices/values for sparse) |
| 174 | + result = { |
| 175 | + "embedding": self.embedding, |
| 176 | + "key": self.key, |
| 177 | + "limit": self.limit |
| 178 | + } |
| 179 | + |
171 | 180 | if self.default is not None:
|
172 | 181 | result["default"] = self.default # type: ignore[assignment]
|
173 | 182 | if self.ordinal:
|
174 |
| - result["ordinal"] = self.ordinal # type: ignore[assignment] |
175 |
| - return {"$rank": result} |
| 183 | + result["ordinal"] = self.ordinal |
| 184 | + |
| 185 | + return {"$knn": result} |
176 | 186 |
|
177 | 187 |
|
178 | 188 | @dataclass
|
179 |
| -class Sub(Score): |
180 |
| - """Subtraction of two scores""" |
181 |
| - left: Score |
182 |
| - right: Score |
| 189 | +class Sub(Rank): |
| 190 | + """Subtraction of two ranks""" |
| 191 | + left: Rank |
| 192 | + right: Rank |
183 | 193 |
|
184 | 194 | def to_dict(self) -> Dict[str, Any]:
|
185 | 195 | return {"$sub": {"left": self.left.to_dict(), "right": self.right.to_dict()}}
|
186 | 196 |
|
187 | 197 |
|
188 | 198 | @dataclass
|
189 |
| -class Sum(Score): |
190 |
| - """Summation of multiple scores""" |
191 |
| - scores: List[Score] |
| 199 | +class Sum(Rank): |
| 200 | + """Summation of multiple ranks""" |
| 201 | + ranks: List[Rank] |
192 | 202 |
|
193 | 203 | def to_dict(self) -> Dict[str, Any]:
|
194 |
| - return {"$sum": [s.to_dict() for s in self.scores]} |
| 204 | + return {"$sum": [r.to_dict() for r in self.ranks]} |
195 | 205 |
|
196 | 206 |
|
197 | 207 | @dataclass
|
198 |
| -class Val(Score): |
199 |
| - """Constant score value""" |
| 208 | +class Val(Rank): |
| 209 | + """Constant rank value""" |
200 | 210 | value: float
|
201 | 211 |
|
202 | 212 | def to_dict(self) -> Dict[str, Any]:
|
203 | 213 | return {"$val": self.value}
|
204 | 214 |
|
205 |
| - |
206 |
| -# Rank expression types for KNN search |
207 |
| -@dataclass |
208 |
| -class Rank: |
209 |
| - """Base class for Rank expressions""" |
210 |
| - |
211 |
| - def to_dict(self) -> Dict[str, Any]: |
212 |
| - """Convert the Rank expression to a dictionary for JSON serialization""" |
213 |
| - raise NotImplementedError("Subclasses must implement to_dict()") |
214 |
| - |
215 |
| - |
216 |
| -@dataclass |
217 |
| -class DenseKnn(Rank): |
218 |
| - """Dense KNN ranking""" |
219 |
| - embedding: List[float] |
220 |
| - key: str = "$chroma_embedding" |
221 |
| - limit: int = 1024 |
222 |
| - |
223 |
| - def to_dict(self) -> Dict[str, Any]: |
224 |
| - result = {"embedding": self.embedding} |
225 |
| - if self.key != "$chroma_embedding": |
226 |
| - result["key"] = self.key # type: ignore[assignment] |
227 |
| - if self.limit != 1024: |
228 |
| - result["limit"] = self.limit # type: ignore[assignment] |
229 |
| - return {"$dense-knn": result} |
230 |
| - |
231 |
| - |
232 |
| -@dataclass |
233 |
| -class SparseKnn(Rank): |
234 |
| - """Sparse KNN ranking""" |
235 |
| - embedding: SparseVector # Sparse vector with indices and values |
236 |
| - key: str # No default for sparse KNN |
237 |
| - limit: int = 1024 |
238 |
| - |
239 |
| - def to_dict(self) -> Dict[str, Any]: |
240 |
| - # Convert SparseVector to the format expected by Rust API |
241 |
| - result = {"embedding": self.embedding, "key": self.key} |
242 |
| - if self.limit != 1024: |
243 |
| - result["limit"] = self.limit # type: ignore[assignment] |
244 |
| - return {"$sparse-knn": result} |
245 |
| - |
246 |
| - |
247 | 215 | @dataclass
|
248 |
| -class Project: |
249 |
| - """Projection configuration for search results |
| 216 | +class Select: |
| 217 | + """Selection configuration for search results |
250 | 218 |
|
251 | 219 | Fields can be:
|
252 |
| - - "$document" - Project document field |
253 |
| - - "$embedding" - Project embedding field |
254 |
| - - "$metadata" - Project all metadata |
255 |
| - - "$score" - Project score field |
256 |
| - - Any other string - Project specific metadata property |
| 220 | + - "#document" - Select document field |
| 221 | + - "#embedding" - Select embedding field |
| 222 | + - "#metadata" - Select all metadata |
| 223 | + - "#score" - Select score field |
| 224 | + - Any other string - Select specific metadata property |
257 | 225 | """
|
258 | 226 | fields: Set[str] = field(default_factory=set)
|
259 | 227 |
|
260 | 228 | def to_dict(self) -> Dict[str, Any]:
|
261 |
| - """Convert the Project to a dictionary for JSON serialization""" |
| 229 | + """Convert the Select to a dictionary for JSON serialization""" |
262 | 230 | return {"fields": list(self.fields)}
|
0 commit comments