Skip to content

Commit 3ef88f8

Browse files
authored
[ENH] Split knn orchestrators for reuse (#5347)
## Description of changes _Summarize the changes made by this PR._ - Improvements & Bug fixes - N/A - New functionality - Separate out the projection operator from `KnnOrchestrator` and `SpannKnnOrchestrator`, so that we can reuse these orchestrators for search endpoint ## Test plan _How are these changes tested?_ - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Migration plan _Are there any migrations, or any forwards/backwards compatibility changes needed in order to make sure this change deploys reliably?_ ## Observability plan _What is the plan to instrument and monitor this change?_ ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_
1 parent 0e3d03e commit 3ef88f8

File tree

30 files changed

+825
-311
lines changed

30 files changed

+825
-311
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

idl/chromadb/proto/query_executor.proto

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ message KNNOperator {
2525
}
2626

2727
message LimitOperator {
28-
uint32 skip = 1;
29-
optional uint32 fetch = 2;
28+
uint32 offset = 1;
29+
optional uint32 limit = 2;
3030
}
3131

3232
message ProjectionOperator {

rust/frontend/src/executor/local.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use chroma_system::ComponentHandle;
1212
use chroma_types::{
1313
operator::{
1414
CountResult, Filter, GetResult, KnnBatchResult, KnnProjectionOutput, KnnProjectionRecord,
15-
Limit, Projection, ProjectionRecord, RecordDistance, SearchResult,
15+
Limit, Projection, ProjectionRecord, RecordMeasure, SearchResult,
1616
},
1717
plan::{Count, Get, Knn, Search},
1818
CollectionAndSegments, CollectionUuid, ExecutorError, HnswSpace, SegmentType,
@@ -162,8 +162,8 @@ impl LocalExecutor {
162162
scan: plan.scan.clone(),
163163
filter: filter.clone(),
164164
limit: Limit {
165-
skip: 0,
166-
fetch: None,
165+
offset: 0,
166+
limit: None,
167167
},
168168
proj: Default::default(),
169169
};
@@ -234,7 +234,7 @@ impl LocalExecutor {
234234
.map_err(|err| ExecutorError::Internal(Box::new(err)))?;
235235

236236
let mut records = Vec::new();
237-
for RecordDistance { offset_id, measure } in distances {
237+
for RecordMeasure { offset_id, measure } in distances {
238238
let user_id = hnsw_reader
239239
.get_user_id_by_offset_id(offset_id)
240240
.await
@@ -268,8 +268,8 @@ impl LocalExecutor {
268268
where_clause: None,
269269
},
270270
limit: Limit {
271-
skip: 0,
272-
fetch: None,
271+
offset: 0,
272+
limit: None,
273273
},
274274
proj: Projection {
275275
document: plan.proj.projection.document,

rust/frontend/src/impls/in_memory_frontend.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -572,10 +572,7 @@ impl InMemoryFrontend {
572572
},
573573
},
574574
filter,
575-
limit: Limit {
576-
skip: offset,
577-
fetch: limit,
578-
},
575+
limit: Limit { offset, limit },
579576
proj: Projection {
580577
document: include.0.contains(&Include::Document),
581578
embedding: include.0.contains(&Include::Embedding),

rust/frontend/src/impls/service_based_frontend.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,8 +1035,8 @@ impl ServiceBasedFrontend {
10351035
},
10361036
filter,
10371037
limit: Limit {
1038-
skip: 0,
1039-
fetch: None,
1038+
offset: 0,
1039+
limit: None,
10401040
},
10411041
proj: Projection {
10421042
document: false,
@@ -1339,10 +1339,7 @@ impl ServiceBasedFrontend {
13391339
query_ids: ids,
13401340
where_clause: r#where,
13411341
},
1342-
limit: Limit {
1343-
skip: offset,
1344-
fetch: limit,
1345-
},
1342+
limit: Limit { offset, limit },
13461343
proj: Projection {
13471344
document: include.0.contains(&Include::Document),
13481345
embedding: include.0.contains(&Include::Embedding),

rust/garbage_collector/src/helper.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,8 @@ impl ChromaGrpcClients {
201201
where_document: None,
202202
}),
203203
limit: Some(LimitOperator {
204-
skip: 0,
205-
fetch: None,
204+
offset: 0,
205+
limit: None,
206206
}),
207207
projection: Some(ProjectionOperator {
208208
document: false, // include_documents,

rust/segment/src/local_hnsw.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use chroma_error::{ChromaError, ErrorCodes};
1010
use chroma_index::{HnswIndex, HnswIndexConfig, Index, IndexConfig, PersistentIndex};
1111
use chroma_sqlite::{db::SqliteDb, table::MaxSeqId};
1212
use chroma_types::{
13-
operator::RecordDistance, Chunk, Collection, HnswParametersFromSegmentError, LogRecord,
13+
operator::RecordMeasure, Chunk, Collection, HnswParametersFromSegmentError, LogRecord,
1414
Operation, OperationRecord, Segment, SegmentUuid,
1515
};
1616
use rayon::iter::{IntoParallelIterator, ParallelIterator};
@@ -277,7 +277,7 @@ impl LocalHnswSegmentReader {
277277
allowed_offset_ids: &[u32],
278278
embedding: Vec<f32>,
279279
k: u32,
280-
) -> Result<Vec<RecordDistance>, LocalHnswSegmentReaderError> {
280+
) -> Result<Vec<RecordMeasure>, LocalHnswSegmentReaderError> {
281281
let guard = self.index.inner.read().await;
282282
let len_with_deleted = guard.index.len_with_deleted();
283283
let actual_len = guard.index.len();
@@ -319,7 +319,7 @@ impl LocalHnswSegmentReader {
319319
.distance_function
320320
.distance(curr_embedding.as_slice(), embedding.as_slice());
321321
if max_heap.len() < k as usize {
322-
max_heap.push(RecordDistance {
322+
max_heap.push(RecordMeasure {
323323
offset_id: *curr_id as u32,
324324
measure: curr_distance,
325325
});
@@ -329,7 +329,7 @@ impl LocalHnswSegmentReader {
329329
let top = max_heap.peek().unwrap();
330330
if top.measure > curr_distance {
331331
max_heap.pop();
332-
max_heap.push(RecordDistance {
332+
max_heap.push(RecordMeasure {
333333
offset_id: *curr_id as u32,
334334
measure: curr_distance,
335335
});
@@ -358,7 +358,7 @@ impl LocalHnswSegmentReader {
358358
Ok(offset_ids
359359
.into_iter()
360360
.zip(distances)
361-
.map(|(offset_id, measure)| RecordDistance {
361+
.map(|(offset_id, measure)| RecordMeasure {
362362
offset_id: offset_id as u32,
363363
measure,
364364
})

rust/segment/src/sqlite_metadata.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,7 @@ impl SqliteMetadataReader {
762762
query_ids,
763763
where_clause,
764764
},
765-
limit: Limit { skip, fetch },
765+
limit: Limit { offset, limit },
766766
proj: Projection {
767767
document, metadata, ..
768768
},
@@ -789,8 +789,8 @@ impl SqliteMetadataReader {
789789

790790
filter_limit_query
791791
.order_by((Embeddings::Table, Embeddings::Id), sea_query::Order::Asc)
792-
.offset(skip as u64)
793-
.limit(fetch.unwrap_or(u32::MAX) as u64);
792+
.offset(offset as u64)
793+
.limit(limit.unwrap_or(u32::MAX) as u64);
794794

795795
let alias = Alias::new(SUBQ_ALIAS);
796796
let mut projection_query = Query::select();
@@ -954,8 +954,8 @@ mod tests {
954954
where_clause: Some(where_clause.clause),
955955
},
956956
limit: Limit {
957-
skip: 3,
958-
fetch: Some(6),
957+
offset: 3,
958+
limit: Some(6),
959959
},
960960
proj: Projection {
961961
document: true,
@@ -1046,8 +1046,8 @@ mod tests {
10461046
where_clause: Some(where_clause),
10471047
},
10481048
limit: Limit {
1049-
skip: 0,
1050-
fetch: None,
1049+
offset: 0,
1050+
limit: None,
10511051
},
10521052
proj: Projection {
10531053
document: false,
@@ -1083,8 +1083,8 @@ mod tests {
10831083
where_clause: Some(where_clause2),
10841084
},
10851085
limit: Limit {
1086-
skip: 0,
1087-
fetch: None,
1086+
offset: 0,
1087+
limit: None,
10881088
},
10891089
proj: Projection {
10901090
document: false,
@@ -1188,8 +1188,8 @@ mod tests {
11881188
where_clause: Some(fts_where_clause),
11891189
},
11901190
limit: Limit {
1191-
skip: 0,
1192-
fetch: None,
1191+
offset: 0,
1192+
limit: None,
11931193
},
11941194
proj: Projection {
11951195
document: true,
@@ -1217,8 +1217,8 @@ mod tests {
12171217
where_clause: Some(metadata_where_clause),
12181218
},
12191219
limit: Limit {
1220-
skip: 0,
1221-
fetch: None,
1220+
offset: 0,
1221+
limit: None,
12221222
},
12231223
proj: Projection {
12241224
document: false,
@@ -1246,8 +1246,8 @@ mod tests {
12461246
where_clause: Some(hybrid_where_clause),
12471247
},
12481248
limit: Limit {
1249-
skip: 0,
1250-
fetch: None,
1249+
offset: 0,
1250+
limit: None,
12511251
},
12521252
proj: Projection {
12531253
document: true,

rust/segment/src/test.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ impl TestReferenceSegment {
352352
result: ProjectionOutput {
353353
records: records
354354
.into_iter()
355-
.skip(plan.limit.skip as usize)
356-
.take(plan.limit.fetch.unwrap_or(u32::MAX) as usize)
355+
.skip(plan.limit.offset as usize)
356+
.take(plan.limit.limit.unwrap_or(u32::MAX) as usize)
357357
.map(|(_, mut rec)| {
358358
let Projection {
359359
document,

rust/types/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ validator = { workspace = true }
2222
regex = { workspace = true }
2323
regex-syntax = { workspace = true }
2424
utoipa = { workspace = true }
25+
sprs = { workspace = true }
2526

2627
# (Cross-crate testing dependencies)
2728
proptest = { workspace = true, optional = true }

0 commit comments

Comments
 (0)