Skip to content

Commit 969852e

Browse files
authored
Query caching (#84)
* Add union of query and query id as body * Refactor builder reader for field selection * Move Query into BuilderReader trait * Restructure query into body and block range * Undo query structure changes * Refactor capnp builder reader * Implement query hashing * Clippy warning for duplicate cfg * Clippy warnings * First impl of cache or retry * Implement request parser * Wip * Make traits pub * Add query parsing from just body and block range * Handle endpoint failure on cache query * Remove dbg logs * Add should_cache to the capnp body as opposed to the header and remove unused code * Remove dead struct * Remove testing on localhost * Bump versions * Fix clippy warnings
1 parent c5fb50f commit 969852e

File tree

17 files changed

+1856
-477
lines changed

17 files changed

+1856
-477
lines changed

examples/all_erc20/src/main.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
use std::{sync::Arc, time::Instant};
55

6-
use hypersync_client::{Client, ClientConfig, ColumnMapping, DataType, StreamConfig};
6+
use hypersync_client::{
7+
Client, ClientConfig, ColumnMapping, DataType, SerializationFormat, StreamConfig,
8+
};
79
use polars_arrow::{
810
array::{Array, Float64Array},
911
compute,
@@ -15,7 +17,13 @@ async fn main() {
1517
env_logger::init().unwrap();
1618

1719
// create default client, uses eth mainnet
18-
let client = Client::new(ClientConfig::default()).unwrap();
20+
let client = Client::new(ClientConfig {
21+
serialization_format: SerializationFormat::CapnProto {
22+
should_cache_queries: true,
23+
},
24+
..Default::default()
25+
})
26+
.unwrap();
1927

2028
let query = serde_json::from_value(serde_json::json!( {
2129
// start from block 10123123 and go to the end of the chain (we don't specify a toBlock).

hypersync-client/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "hypersync-client"
3-
version = "0.20.0-rc.2"
3+
version = "0.20.0-rc.3"
44
edition = "2021"
55
description = "client library for hypersync"
66
license = "MPL-2.0"
@@ -47,7 +47,7 @@ nohash-hasher = "0.2.0"
4747
ethers = { version = "2.0.14", optional = true }
4848
alloy-primitives = "1.1"
4949

50-
hypersync-net-types = { path = "../hypersync-net-types", version = "0.11.0-rc.2" }
50+
hypersync-net-types = { path = "../hypersync-net-types", version = "0.11.0-rc.3" }
5151
hypersync-format = { path = "../hypersync-format", version = "0.5.8" }
5252
hypersync-schema = { path = "../hypersync-schema", version = "0.3" }
5353

hypersync-client/src/config.rs

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,16 @@ pub struct ClientConfig {
2727
}
2828

2929
/// Determines query serialization format for HTTP requests.
30-
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
30+
#[derive(Default, Clone, Copy, Debug, Serialize, Deserialize)]
3131
pub enum SerializationFormat {
3232
/// Use JSON serialization (default)
33+
#[default]
3334
Json,
3435
/// Use Cap'n Proto binary serialization
35-
CapnProto,
36-
}
37-
38-
impl Default for SerializationFormat {
39-
fn default() -> Self {
40-
// Keep this the default until all hs instances are upgraded to use Cap'n Proto endpoint
41-
Self::Json
42-
}
36+
CapnProto {
37+
/// Whether to use query caching
38+
should_cache_queries: bool,
39+
},
4340
}
4441

4542
/// Config for hypersync event streaming.
@@ -78,18 +75,13 @@ pub struct StreamConfig {
7875
}
7976

8077
/// Determines format of Binary column
81-
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
78+
#[derive(Default, Clone, Copy, Debug, Serialize, Deserialize)]
8279
pub enum HexOutput {
8380
/// Binary column won't be formatted as hex
81+
#[default]
8482
NoEncode,
8583
/// Binary column would be formatted as prefixed hex i.e. 0xdeadbeef
8684
Prefixed,
8785
/// Binary column would be formatted as non prefixed hex i.e. deadbeef
8886
NonPrefixed,
8987
}
90-
91-
impl Default for HexOutput {
92-
fn default() -> Self {
93-
Self::NoEncode
94-
}
95-
}

hypersync-client/src/lib.rs

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::{num::NonZeroU64, sync::Arc, time::Duration};
44

55
use anyhow::{anyhow, Context, Result};
6-
use hypersync_net_types::{ArchiveHeight, ChainId, Query};
6+
use hypersync_net_types::{hypersync_net_types_capnp, ArchiveHeight, ChainId, Query};
77
use polars_arrow::{array::Array, record_batch::RecordBatchT as Chunk};
88
use reqwest::Method;
99

@@ -40,6 +40,7 @@ pub use decode::Decoder;
4040
pub use decode_call::CallDecoder;
4141
pub use types::{ArrowBatch, ArrowResponse, ArrowResponseData, QueryResponse};
4242

43+
use crate::parse_response::read_query_response;
4344
use crate::simple_types::InternalEventJoinStrategy;
4445

4546
type ArrowChunk = Chunk<Box<dyn Array>>;
@@ -421,6 +422,15 @@ impl Client {
421422
Ok((res, bytes.len().try_into().unwrap()))
422423
}
423424

425+
fn should_cache_queries(&self) -> bool {
426+
matches!(
427+
self.serialization_format,
428+
SerializationFormat::CapnProto {
429+
should_cache_queries: true
430+
}
431+
)
432+
}
433+
424434
/// Executes query once and returns the result in (Arrow, size) format using Cap'n Proto serialization.
425435
async fn get_arrow_impl_capnp(&self, query: &Query) -> Result<(ArrowResponse, u64)> {
426436
let mut url = self.url.clone();
@@ -429,16 +439,89 @@ impl Client {
429439
segments.push("arrow-ipc");
430440
segments.push("capnp");
431441
std::mem::drop(segments);
432-
let mut req = self.http_client.request(Method::POST, url);
433442

443+
let should_cache = self.should_cache_queries();
444+
445+
if should_cache {
446+
let query_with_id = {
447+
let mut message = capnp::message::Builder::new_default();
448+
let mut query_builder =
449+
message.init_root::<hypersync_net_types_capnp::query::Builder>();
450+
451+
query_builder.build_query_id_from_query(query)?;
452+
let mut query_with_id = Vec::new();
453+
capnp::serialize_packed::write_message(&mut query_with_id, &message)?;
454+
query_with_id
455+
};
456+
457+
let mut req = self.http_client.request(Method::POST, url.clone());
458+
req = req.header("content-type", "application/x-capnp");
459+
req = req.header("x-hypersync-cache-queries", "true");
460+
if let Some(bearer_token) = &self.bearer_token {
461+
req = req.bearer_auth(bearer_token);
462+
}
463+
464+
let res = req
465+
.body(query_with_id)
466+
.send()
467+
.await
468+
.context("execute http req")?;
469+
470+
let status = res.status();
471+
if status.is_success() {
472+
let bytes = res.bytes().await.context("read response body bytes")?;
473+
474+
let mut opts = capnp::message::ReaderOptions::new();
475+
opts.nesting_limit(i32::MAX).traversal_limit_in_words(None);
476+
let message_reader = capnp::serialize_packed::read_message(bytes.as_ref(), opts)
477+
.context("create message reader")?;
478+
let query_response = message_reader
479+
.get_root::<hypersync_net_types_capnp::cached_query_response::Reader>()
480+
.context("get cached_query_response root")?;
481+
match query_response.get_either().which()? {
482+
hypersync_net_types_capnp::cached_query_response::either::Which::QueryResponse(
483+
query_response,
484+
) => {
485+
let res = tokio::task::block_in_place(|| {
486+
let res = query_response?;
487+
read_query_response(&res).context("parse query response cached")
488+
})?;
489+
return Ok((res, bytes.len().try_into().unwrap()));
490+
}
491+
hypersync_net_types_capnp::cached_query_response::either::Which::NotCached(()) => {
492+
log::trace!("query was not cached, retrying with full query");
493+
}
494+
}
495+
} else {
496+
let text = res.text().await.context("read text to see error")?;
497+
log::error!(
498+
"Failed cache query, will retry full query. {}, err body: {}",
499+
status,
500+
text
501+
);
502+
}
503+
};
504+
505+
let full_query_bytes = {
506+
let mut message = capnp::message::Builder::new_default();
507+
let mut query_builder =
508+
message.init_root::<hypersync_net_types_capnp::query::Builder>();
509+
510+
query_builder.build_full_query_from_query(query, should_cache)?;
511+
let mut bytes = Vec::new();
512+
capnp::serialize_packed::write_message(&mut bytes, &message)?;
513+
bytes
514+
};
515+
516+
let mut req = self.http_client.request(Method::POST, url);
517+
req = req.header("content-type", "application/x-capnp");
434518
if let Some(bearer_token) = &self.bearer_token {
435519
req = req.bearer_auth(bearer_token);
436520
}
437521

438-
let query_bytes = query.to_bytes().context("serialize query to bytes")?;
439522
let res = req
440523
.header("content-type", "application/x-capnp")
441-
.body(query_bytes)
524+
.body(full_query_bytes)
442525
.send()
443526
.await
444527
.context("execute http req")?;
@@ -467,7 +550,7 @@ impl Client {
467550
async fn get_arrow_impl(&self, query: &Query) -> Result<(ArrowResponse, u64)> {
468551
match self.serialization_format {
469552
SerializationFormat::Json => self.get_arrow_impl_json(query).await,
470-
SerializationFormat::CapnProto => self.get_arrow_impl_capnp(query).await,
553+
SerializationFormat::CapnProto { .. } => self.get_arrow_impl_capnp(query).await,
471554
}
472555
}
473556

hypersync-client/src/parse_response.rs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,9 @@ fn read_chunks(bytes: &[u8]) -> Result<Vec<ArrowBatch>> {
2626
Ok(chunks)
2727
}
2828

29-
pub fn parse_query_response(bytes: &[u8]) -> Result<ArrowResponse> {
30-
let mut opts = capnp::message::ReaderOptions::new();
31-
opts.nesting_limit(i32::MAX).traversal_limit_in_words(None);
32-
let message_reader =
33-
capnp::serialize_packed::read_message(bytes, opts).context("create message reader")?;
34-
35-
let query_response = message_reader
36-
.get_root::<hypersync_net_types_capnp::query_response::Reader>()
37-
.context("get root")?;
38-
29+
pub fn read_query_response(
30+
query_response: &hypersync_net_types_capnp::query_response::Reader,
31+
) -> Result<ArrowResponse> {
3932
let archive_height = match query_response.get_archive_height() {
4033
-1 => None,
4134
h => Some(
@@ -70,12 +63,13 @@ pub fn parse_query_response(bytes: &[u8]) -> Result<ArrowResponse> {
7063

7164
let data = query_response.get_data().context("read data")?;
7265

73-
let blocks = read_chunks(data.get_blocks().context("get data")?).context("parse block data")?;
74-
let transactions =
75-
read_chunks(data.get_transactions().context("get data")?).context("parse tx data")?;
76-
let logs = read_chunks(data.get_logs().context("get data")?).context("parse log data")?;
66+
let blocks =
67+
read_chunks(data.get_blocks().context("get block data")?).context("parse block data")?;
68+
let transactions = read_chunks(data.get_transactions().context("get transaction data")?)
69+
.context("parse tx data")?;
70+
let logs = read_chunks(data.get_logs().context("get log data")?).context("parse log data")?;
7771
let traces = if data.has_traces() {
78-
read_chunks(data.get_traces().context("get data")?).context("parse traces data")?
72+
read_chunks(data.get_traces().context("get trace data")?).context("parse traces data")?
7973
} else {
8074
Vec::new()
8175
};
@@ -94,3 +88,15 @@ pub fn parse_query_response(bytes: &[u8]) -> Result<ArrowResponse> {
9488
rollback_guard,
9589
})
9690
}
91+
92+
pub fn parse_query_response(bytes: &[u8]) -> Result<ArrowResponse> {
93+
let mut opts = capnp::message::ReaderOptions::new();
94+
opts.nesting_limit(i32::MAX).traversal_limit_in_words(None);
95+
let message_reader =
96+
capnp::serialize_packed::read_message(bytes, opts).context("create message reader")?;
97+
98+
let query_response = message_reader
99+
.get_root::<hypersync_net_types_capnp::query_response::Reader>()
100+
.context("get root")?;
101+
read_query_response(&query_response).context("read query response")
102+
}

hypersync-client/src/to_ethers.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
//! This module implement specification for Provider generic from ethers.
2-
#![cfg(feature = "ethers")]
3-
42
use crate::simple_types::{Block, Log, Trace, Transaction};
53
use ethers;
64
use ethers::prelude::{CallResult, CreateResult};
@@ -14,7 +12,7 @@ use ethers::types::{
1412
use hypersync_format::{AccessList, Address, Data, Hash, Quantity};
1513
use polars_arrow::array::ViewType;
1614
use std::default::Default;
17-
use std::fmt::{Display, Formatter, Write};
15+
use std::fmt::{Display, Formatter};
1816

1917
/// Error happened during hypersync -> 3rd-party type conversion
2018
#[derive(Debug, Clone)]
@@ -774,13 +772,7 @@ impl TryFrom<Log> for EtherLog {
774772
topics: value
775773
.topics
776774
.into_iter()
777-
.filter_map(|topic| {
778-
if let Some(topic) = topic {
779-
Some(topic.into())
780-
} else {
781-
None
782-
}
783-
})
775+
.filter_map(|topic| topic.map(H256::from))
784776
.collect::<Vec<H256>>(),
785777
data: Bytes::from_iter(
786778
value

hypersync-client/tests/api_test.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,9 @@ async fn test_api_capnp_client() {
538538
let client = Arc::new(
539539
Client::new(ClientConfig {
540540
url: Some("http://localhost:1131".parse().unwrap()),
541-
serialization_format: SerializationFormat::CapnProto,
541+
serialization_format: SerializationFormat::CapnProto {
542+
should_cache_queries: true,
543+
},
542544

543545
..Default::default()
544546
})

hypersync-net-types/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "hypersync-net-types"
3-
version = "0.11.0-rc.2"
3+
version = "0.11.0-rc.3"
44
edition = "2021"
55
description = "hypersync types for transport over network"
66
license = "MPL-2.0"
@@ -14,8 +14,8 @@ hypersync-format = { path = "../hypersync-format", version = "0.5.8" }
1414
schemars = "1.0.4"
1515
strum = "0.27.2"
1616
strum_macros = "0.27.2"
17-
zstd = "0.13.3"
1817
anyhow = "1.0.100"
18+
xxhash-rust = "0.8.15"
1919

2020
[dev-dependencies]
2121
hypersync-schema = { path = "../hypersync-schema" }
@@ -24,6 +24,7 @@ pretty_assertions = "1"
2424
sha3 = "0.10.8"
2525
flate2 = "1.1.5"
2626
lz4_flex = "0.11.5"
27+
zstd = "0.13.3"
2728
tabled = "0.20.0"
2829

2930
[[bench]]

0 commit comments

Comments
 (0)