From ff8e34391eaea8e728f02309e787cd3747e02e7d Mon Sep 17 00:00:00 2001 From: AlanPonnachan Date: Fri, 19 Sep 2025 20:02:48 +0530 Subject: [PATCH 1/6] add converse handling function in prompt_caching --- .../instrumentation/bedrock/prompt_caching.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py index b94dc66127..1c240b07ea 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py @@ -41,3 +41,39 @@ def prompt_caching_handling(headers, vendor, model, metric_params): ) if write_cached_tokens > 0: span.set_attribute(CacheSpanAttrs.CACHED, "write") + + +def prompt_caching_converse_handling(response, vendor, model, metric_params): + base_attrs = { + "gen_ai.system": vendor, + "gen_ai.response.model": model, + } + span = trace.get_current_span() + if not isinstance(span, trace.Span): + return + + usage = response.get("usage", {}) + read_cached_tokens = usage.get("cache_read_input_tokens", 0) + write_cached_tokens = usage.get("cache_creation_input_tokens", 0) + + if read_cached_tokens > 0: + if metric_params.prompt_caching: + metric_params.prompt_caching.add( + read_cached_tokens, + attributes={ + **base_attrs, + CacheSpanAttrs.TYPE: "read", + }, + ) + span.set_attribute(CacheSpanAttrs.CACHED, "read") + + if write_cached_tokens > 0: + if metric_params.prompt_caching: + metric_params.prompt_caching.add( + write_cached_tokens, + attributes={ + **base_attrs, + CacheSpanAttrs.TYPE: "write", + }, + ) + span.set_attribute(CacheSpanAttrs.CACHED, "write") From 83bdb0b0c828ad58676c56cf30c0135bcf344f3e Mon Sep 17 00:00:00 2001 From: AlanPonnachan Date: Fri, 19 Sep 2025 20:09:26 +0530 Subject: [PATCH 2/6] update init --- .../opentelemetry/instrumentation/bedrock/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py index 9b1ec775bd..3b5b7964cc 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py @@ -22,7 +22,10 @@ guardrail_converse, guardrail_handling, ) -from opentelemetry.instrumentation.bedrock.prompt_caching import prompt_caching_handling +from opentelemetry.instrumentation.bedrock.prompt_caching import ( + prompt_caching_converse_handling, + prompt_caching_handling, +) from opentelemetry.instrumentation.bedrock.reusable_streaming_body import ( ReusableStreamingBody, ) @@ -354,6 +357,7 @@ def _handle_call(span: Span, kwargs, response, metric_params, event_logger): def _handle_converse(span, kwargs, response, metric_params, event_logger): (provider, model_vendor, model) = _get_vendor_model(kwargs.get("modelId")) guardrail_converse(span, response, provider, model, metric_params) + prompt_caching_converse_handling(response, provider, model, metric_params) set_converse_model_span_attributes(span, provider, model, kwargs) @@ -394,7 +398,11 @@ def wrap(*args, **kwargs): role = event["messageStart"]["role"] elif "metadata" in event: # last message sent + metadata = event.get("metadata", {}) guardrail_converse(span, event["metadata"], provider, model, metric_params) + prompt_caching_converse_handling( + metadata, provider, model, metric_params + ) converse_usage_record(span, event["metadata"], metric_params) span.end() elif "messageStop" in event: From 4fa37924888b5501ff40e9becdb27551deb01de6 Mon Sep 17 00:00:00 2001 From: AlanPonnachan Date: Fri, 19 Sep 2025 20:11:34 +0530 Subject: [PATCH 3/6] add test --- ...bedrock_converse_prompt_caching_metrics.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py new file mode 100644 index 0000000000..0775b5a0a2 --- /dev/null +++ b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py @@ -0,0 +1,69 @@ +import pytest +from opentelemetry.instrumentation.bedrock import PromptCaching +from opentelemetry.instrumentation.bedrock.prompt_caching import CacheSpanAttrs + + +def call(brt): + return brt.converse( + modelId="anthropic.claude-3-haiku-20240307-v1:0", + messages=[ + { + "role": "user", + "content": [ + { + "text": "What is the capital of the USA?", + } + ], + } + ], + inferenceConfig={"maxTokens": 50, "temperature": 0.1}, + additionalModelRequestFields={"cacheControl": {"type": "ephemeral"}}, + ) + + +def get_metric(resource_metrics, name): + for rm in resource_metrics: + for sm in rm.scope_metrics: + for metric in sm.metrics: + if metric.name == name: + return metric + raise Exception(f"No metric found with name {name}") + + +def assert_metric(reader, usage): + metrics_data = reader.get_metrics_data() + resource_metrics = metrics_data.resource_metrics + assert len(resource_metrics) > 0 + + m = get_metric(resource_metrics, PromptCaching.LLM_BEDROCK_PROMPT_CACHING) + for data_point in m.data.data_points: + assert data_point.attributes[CacheSpanAttrs.TYPE] in [ + "read", + "write", + ] + if data_point.attributes[CacheSpanAttrs.TYPE] == "read": + assert data_point.value == usage["cache_read_input_tokens"] + else: + assert data_point.value == usage["cache_creation_input_tokens"] + + +@pytest.mark.vcr +def test_prompt_cache_converse(test_context, brt): + _, _, reader = test_context + + response = call(brt) + # assert first prompt writes a cache + usage = response["usage"] + assert usage["cache_read_input_tokens"] == 0 + assert usage["cache_creation_input_tokens"] > 0 + cumulative_workaround = usage["cache_creation_input_tokens"] + assert_metric(reader, usage) + + response = call(brt) + # assert second prompt reads from the cache + usage = response["usage"] + assert usage["cache_read_input_tokens"] > 0 + assert usage["cache_creation_input_tokens"] == 0 + # data is stored across reads of metric data due to the cumulative behavior + usage["cache_creation_input_tokens"] = cumulative_workaround + assert_metric(reader, usage) \ No newline at end of file From c8abf6090f4525285bec2ac5a9621b1632f1257b Mon Sep 17 00:00:00 2001 From: Alan Ponnachan <85491837+AlanPonnachan@users.noreply.github.com> Date: Fri, 19 Sep 2025 20:23:53 +0530 Subject: [PATCH 4/6] Update packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- .../opentelemetry/instrumentation/bedrock/prompt_caching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py index 1c240b07ea..c28ea5290e 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py @@ -49,7 +49,7 @@ def prompt_caching_converse_handling(response, vendor, model, metric_params): "gen_ai.response.model": model, } span = trace.get_current_span() - if not isinstance(span, trace.Span): + if not isinstance(span, trace.Span) or not span.is_recording(): return usage = response.get("usage", {}) From 7d65ff8b42c151fef1fd0218ac5ecdf61dc80d0d Mon Sep 17 00:00:00 2001 From: AlanPonnachan Date: Sat, 20 Sep 2025 01:49:21 +0530 Subject: [PATCH 5/6] correct lint test --- .../opentelemetry/instrumentation/bedrock/__init__.py | 2 +- .../metrics/test_bedrock_converse_prompt_caching_metrics.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py index 3b5b7964cc..ba97c54a9e 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py @@ -400,7 +400,7 @@ def wrap(*args, **kwargs): # last message sent metadata = event.get("metadata", {}) guardrail_converse(span, event["metadata"], provider, model, metric_params) - prompt_caching_converse_handling( + prompt_caching_converse_handling( metadata, provider, model, metric_params ) converse_usage_record(span, event["metadata"], metric_params) diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py index 0775b5a0a2..81cb370c35 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_converse_prompt_caching_metrics.py @@ -66,4 +66,5 @@ def test_prompt_cache_converse(test_context, brt): assert usage["cache_creation_input_tokens"] == 0 # data is stored across reads of metric data due to the cumulative behavior usage["cache_creation_input_tokens"] = cumulative_workaround - assert_metric(reader, usage) \ No newline at end of file + assert_metric(reader, usage) + \ No newline at end of file From a3c94e3fdbc6b163a2a4664d896cf3490ee1a861 Mon Sep 17 00:00:00 2001 From: AlanPonnachan Date: Sat, 20 Sep 2025 03:36:58 +0530 Subject: [PATCH 6/6] add token data --- .../opentelemetry/instrumentation/bedrock/prompt_caching.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py index c28ea5290e..f98d57978b 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py @@ -66,6 +66,9 @@ def prompt_caching_converse_handling(response, vendor, model, metric_params): }, ) span.set_attribute(CacheSpanAttrs.CACHED, "read") + span.set_attribute( + "gen_ai.usage.cache_read_input_tokens", read_cached_tokens + ) if write_cached_tokens > 0: if metric_params.prompt_caching: @@ -77,3 +80,6 @@ def prompt_caching_converse_handling(response, vendor, model, metric_params): }, ) span.set_attribute(CacheSpanAttrs.CACHED, "write") + span.set_attribute( + "gen_ai.usage.cache_creation_input_tokens", write_cached_tokens + )