Update expressions

Sicheng-Pan · Sicheng-Pan · commit cacd08e551ca · 2025-08-27T22:15:38.000-07:00
diff --git a/chromadb/api/models/AsyncCollection.py b/chromadb/api/models/AsyncCollection.py
@@ -300,9 +300,9 @@ async def search(
         Args:
             searches: List of SearchPayload objects, each containing:
                 - filter: Optional filter criteria (user_ids, where)
-                - score: Scoring expression for hybrid search
-                - limit: Optional limit configuration (skip, fetch)
-                - project: Optional projection configuration (fields to return)
+                - rank: Ranking expression for hybrid search
+                - limit: Optional limit configuration (offset, limit)
+                - select: Optional selection configuration (fields to return)
         
         Returns:
             SearchResult: List of search results for each search payload.
@@ -313,15 +313,15 @@ async def search(
         
         Example:
             from chromadb.execution.expression.operator import (
-                DenseKnn, RankScore, Val, Sum, Filter, Limit, Project
+                Knn, Val, Sum, Filter, Limit, Select
             )
             from chromadb.execution.expression.plan import SearchPayload
             
             payload = SearchPayload(
                 filter=Filter(where={"category": "science"}),
-                score=RankScore(source=DenseKnn(embedding=[0.1, 0.2, 0.3], limit=100)),
-                limit=Limit(skip=0, fetch=10),
-                project=Project(fields={"$document", "$score", "$metadata"})
+                rank=Knn(embedding=[0.1, 0.2, 0.3], limit=100),
+                limit=Limit(offset=0, limit=10),
+                select=Select(fields={"#document", "#score", "#metadata"})
             )
             
             results = await collection.search([payload])
diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py
@@ -304,9 +304,9 @@ def search(
         Args:
             searches: List of SearchPayload objects, each containing:
                 - filter: Optional filter criteria (user_ids, where)
-                - score: Scoring expression for hybrid search
-                - limit: Optional limit configuration (skip, fetch)
-                - project: Optional projection configuration (fields to return)
+                - rank: Ranking expression for hybrid search
+                - limit: Optional limit configuration (offset, limit)
+                - select: Optional selection configuration (fields to return)
         
         Returns:
             SearchResult: List of search results for each search payload.
@@ -317,15 +317,15 @@ def search(
         
         Example:
             from chromadb.execution.expression.operator import (
-                DenseKnn, RankScore, Val, Sum, Filter, Limit, Project
+                Knn, Val, Sum, Filter, Limit, Select
             )
             from chromadb.execution.expression.plan import SearchPayload
             
             payload = SearchPayload(
                 filter=Filter(where={"category": "science"}),
-                score=RankScore(source=DenseKnn(embedding=[0.1, 0.2, 0.3], limit=100)),
-                limit=Limit(skip=0, fetch=10),
-                project=Project(fields={"$document", "$score", "$metadata"})
+                rank=Knn(embedding=[0.1, 0.2, 0.3], limit=100),
+                limit=Limit(offset=0, limit=10),
+                select=Select(fields={"#document", "#score", "#metadata"})
             )
             
             results = collection.search([payload])
diff --git a/chromadb/execution/executor/local.py b/chromadb/execution/executor/local.py
@@ -58,8 +58,8 @@ def get(self, plan: GetPlan) -> GetResult:
             where=plan.filter.where,
             where_document=plan.filter.where_document,
             ids=plan.filter.user_ids,
-            limit=plan.limit.fetch,
-            offset=plan.limit.skip,
+            limit=plan.limit.limit,
+            offset=plan.limit.offset,
             include_metadata=True,
         )
 
diff --git a/chromadb/execution/expression/operator.py b/chromadb/execution/expression/operator.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Optional, List, Dict, Set, Any
+from typing import Optional, List, Dict, Set, Any, Union
 
 from chromadb.api.types import Embeddings, IDs, Include, SparseVector
 from chromadb.types import (
@@ -50,14 +50,14 @@ class KNN:
 
 @dataclass
 class Limit:
-    skip: int = 0
-    fetch: Optional[int] = None
+    offset: int = 0
+    limit: Optional[int] = None
     
     def to_dict(self) -> Dict[str, Any]:
         """Convert the Limit to a dictionary for JSON serialization"""
-        result = {"skip": self.skip}
-        if self.fetch is not None:
-            result["fetch"] = self.fetch
+        result = {"offset": self.offset}
+        if self.limit is not None:
+            result["limit"] = self.limit
         return result
 
 
@@ -85,178 +85,146 @@ def included(self) -> Include:
         return includes # type: ignore[return-value] 
 
 
-# Score expression types for hybrid search
+# Rank expression types for hybrid search
 @dataclass
-class Score:
-    """Base class for Score expressions (algebraic data type)"""
+class Rank:
+    """Base class for Rank expressions (algebraic data type)"""
     
     def to_dict(self) -> Dict[str, Any]:
         """Convert the Score expression to a dictionary for JSON serialization"""
         raise NotImplementedError("Subclasses must implement to_dict()")
 
 
 @dataclass
-class Abs(Score):
-    """Absolute value of a score"""
-    score: Score
+class Abs(Rank):
+    """Absolute value of a rank"""
+    rank: Rank
     
     def to_dict(self) -> Dict[str, Any]:
-        return {"$abs": self.score.to_dict()}
+        return {"$abs": self.rank.to_dict()}
 
 
 @dataclass
-class Div(Score):
-    """Division of two scores"""
-    left: Score
-    right: Score
+class Div(Rank):
+    """Division of two ranks"""
+    left: Rank
+    right: Rank
     
     def to_dict(self) -> Dict[str, Any]:
         return {"$div": {"left": self.left.to_dict(), "right": self.right.to_dict()}}
 
 
 @dataclass
-class Exp(Score):
-    """Exponentiation of a score"""
-    score: Score
+class Exp(Rank):
+    """Exponentiation of a rank"""
+    rank: Rank
     
     def to_dict(self) -> Dict[str, Any]:
-        return {"$exp": self.score.to_dict()}
+        return {"$exp": self.rank.to_dict()}
 
 
 @dataclass
-class Log(Score):
-    """Logarithm of a score"""
-    score: Score
+class Log(Rank):
+    """Logarithm of a rank"""
+    rank: Rank
     
     def to_dict(self) -> Dict[str, Any]:
-        return {"$log": self.score.to_dict()}
+        return {"$log": self.rank.to_dict()}
 
 
 @dataclass
-class Max(Score):
-    """Maximum of multiple scores"""
-    scores: List[Score]
+class Max(Rank):
+    """Maximum of multiple ranks"""
+    ranks: List[Rank]
     
     def to_dict(self) -> Dict[str, Any]:
-        return {"$max": [s.to_dict() for s in self.scores]}
+        return {"$max": [r.to_dict() for r in self.ranks]}
 
 
 @dataclass
-class Min(Score):
-    """Minimum of multiple scores"""
-    scores: List[Score]
+class Min(Rank):
+    """Minimum of multiple ranks"""
+    ranks: List[Rank]
     
     def to_dict(self) -> Dict[str, Any]:
-        return {"$min": [s.to_dict() for s in self.scores]}
+        return {"$min": [r.to_dict() for r in self.ranks]}
 
 
 @dataclass
-class Mul(Score):
-    """Multiplication of multiple scores"""
-    scores: List[Score]
+class Mul(Rank):
+    """Multiplication of multiple ranks"""
+    ranks: List[Rank]
     
     def to_dict(self) -> Dict[str, Any]:
-        return {"$mul": [s.to_dict() for s in self.scores]}
+        return {"$mul": [r.to_dict() for r in self.ranks]}
 
 
 @dataclass
-class RankScore(Score):
-    """Score based on ranking"""
-    source: 'Rank'
+class Knn(Rank):
+    """KNN-based ranking"""
+    embedding: Union[List[float], SparseVector]
+    key: str = "$chroma_embedding"
+    limit: int = 1024
     default: Optional[float] = None
     ordinal: bool = False
     
     def to_dict(self) -> Dict[str, Any]:
-        result = {"source": self.source.to_dict()}
+        # With untagged enum, embedding is serialized directly
+        # (as a list for dense, or as a dict with indices/values for sparse)
+        result = {
+            "embedding": self.embedding,
+            "key": self.key,
+            "limit": self.limit
+        }
+        
         if self.default is not None:
             result["default"] = self.default # type: ignore[assignment]
         if self.ordinal:
-            result["ordinal"] = self.ordinal # type: ignore[assignment]
-        return {"$rank": result}
+            result["ordinal"] = self.ordinal
+        
+        return {"$knn": result}
 
 
 @dataclass
-class Sub(Score):
-    """Subtraction of two scores"""
-    left: Score
-    right: Score
+class Sub(Rank):
+    """Subtraction of two ranks"""
+    left: Rank
+    right: Rank
     
     def to_dict(self) -> Dict[str, Any]:
         return {"$sub": {"left": self.left.to_dict(), "right": self.right.to_dict()}}
 
 
 @dataclass
-class Sum(Score):
-    """Summation of multiple scores"""
-    scores: List[Score]
+class Sum(Rank):
+    """Summation of multiple ranks"""
+    ranks: List[Rank]
     
     def to_dict(self) -> Dict[str, Any]:
-        return {"$sum": [s.to_dict() for s in self.scores]}
+        return {"$sum": [r.to_dict() for r in self.ranks]}
 
 
 @dataclass
-class Val(Score):
-    """Constant score value"""
+class Val(Rank):
+    """Constant rank value"""
     value: float
     
     def to_dict(self) -> Dict[str, Any]:
         return {"$val": self.value}
 
-
-# Rank expression types for KNN search
-@dataclass
-class Rank:
-    """Base class for Rank expressions"""
-    
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert the Rank expression to a dictionary for JSON serialization"""
-        raise NotImplementedError("Subclasses must implement to_dict()")
-
-
-@dataclass
-class DenseKnn(Rank):
-    """Dense KNN ranking"""
-    embedding: List[float]
-    key: str = "$chroma_embedding"
-    limit: int = 1024
-    
-    def to_dict(self) -> Dict[str, Any]:
-        result = {"embedding": self.embedding}
-        if self.key != "$chroma_embedding":
-            result["key"] = self.key # type: ignore[assignment]
-        if self.limit != 1024:
-            result["limit"] = self.limit # type: ignore[assignment]
-        return {"$dense-knn": result}
-
-
-@dataclass
-class SparseKnn(Rank):
-    """Sparse KNN ranking"""
-    embedding: SparseVector  # Sparse vector with indices and values
-    key: str  # No default for sparse KNN
-    limit: int = 1024
-    
-    def to_dict(self) -> Dict[str, Any]:
-        # Convert SparseVector to the format expected by Rust API
-        result = {"embedding": self.embedding, "key": self.key}
-        if self.limit != 1024:
-            result["limit"] = self.limit # type: ignore[assignment]
-        return {"$sparse-knn": result}
-
-
 @dataclass
-class Project:
-    """Projection configuration for search results
+class Select:
+    """Selection configuration for search results
     
     Fields can be:
-    - "$document" - Project document field
-    - "$embedding" - Project embedding field  
-    - "$metadata" - Project all metadata
-    - "$score" - Project score field
-    - Any other string - Project specific metadata property
+    - "#document" - Select document field
+    - "#embedding" - Select embedding field  
+    - "#metadata" - Select all metadata
+    - "#score" - Select score field
+    - Any other string - Select specific metadata property
     """
     fields: Set[str] = field(default_factory=set)
     
     def to_dict(self) -> Dict[str, Any]:
-        """Convert the Project to a dictionary for JSON serialization"""
+        """Convert the Select to a dictionary for JSON serialization"""
         return {"fields": list(self.fields)}
diff --git a/chromadb/execution/expression/plan.py b/chromadb/execution/expression/plan.py
@@ -2,7 +2,7 @@
 from typing import List, Dict, Any
 
 from chromadb.execution.expression.operator import (
-    KNN, Filter, Limit, Projection, Scan, Score, Project, Val
+    KNN, Filter, Limit, Projection, Scan, Rank, Select, Val
 )
 
 
@@ -31,15 +31,15 @@ class KNNPlan:
 class SearchPayload:
     """Payload for hybrid search operations"""
     filter: Filter = field(default_factory=Filter)
-    score: Score = field(default_factory=lambda: Val(value=0.0))
+    rank: Rank = field(default_factory=lambda: Val(value=0.0))
     limit: Limit = field(default_factory=Limit)
-    project: Project = field(default_factory=Project)
+    select: Select = field(default_factory=Select)
     
     def to_dict(self) -> Dict[str, Any]:
         """Convert the SearchPayload to a dictionary for JSON serialization"""
         return {
             "filter": self.filter.to_dict(),
-            "score": self.score.to_dict(),
+            "rank": self.rank.to_dict(),
             "limit": self.limit.to_dict(),
-            "project": self.project.to_dict()
+            "select": self.select.to_dict()
         }
diff --git a/chromadb/proto/convert.py b/chromadb/proto/convert.py
@@ -613,7 +613,7 @@ def to_proto_knn(knn: KNN) -> query_pb.KNNOperator:
 
 
 def to_proto_limit(limit: Limit) -> query_pb.LimitOperator:
-    return query_pb.LimitOperator(skip=limit.skip, fetch=limit.fetch)
+    return query_pb.LimitOperator(offset=limit.offset, limit=limit.limit)
 
 
 def to_proto_projection(projection: Projection) -> query_pb.ProjectionOperator: