openai: Fix missing or double spans when completion stream is used with context manager (#80)

SrdjanLL · web-flow · commit bee16d462b22 · 2025-04-23T10:30:37.000+01:00
* openai: Add conditional span closure logic depending on whether streamed completion uses context manager or not

* openai: Add conditional span closure logic for async streaming completion

* openai: Guard span ending based on the flag rather than the context manager
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/wrappers.py
@@ -67,8 +67,13 @@ def __init__(
         self.choices = []
         self.usage = None
         self.service_tier = None
+        self.ended = False
 
     def end(self, exc=None):
+        if self.ended:
+            return
+
+        self.ended = True
         if exc is not None:
             self.span.set_status(StatusCode.ERROR, str(exc))
             self.span.set_attribute(ERROR_TYPE, exc.__class__.__qualname__)
@@ -111,6 +116,12 @@ def process_chunk(self, chunk):
         if hasattr(chunk, "service_tier"):
             self.service_tier = chunk.service_tier
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.end(exc_value)
+
     def __iter__(self):
         stream = self.__wrapped__
         try:
@@ -124,6 +135,13 @@ def __iter__(self):
             raise
         self.end()
 
+    async def __aenter__(self):
+        # No difference in behavior between sync and async context manager
+        return self.__enter__()
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        self.__exit__(exc_type, exc_value, traceback)
+
     async def __aiter__(self):
         stream = self.__wrapped__
         try:
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_stream_with_context_manager.yaml b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/cassettes/test_chat_stream_with_context_manager.yaml
@@ -0,0 +1,114 @@
+interactions:
+- request:
+    body: |-
+      {
+        "messages": [
+          {
+            "role": "user",
+            "content": "Answer in up to 3 words: Which ocean contains Bouvet Island?"
+          }
+        ],
+        "model": "gpt-4o-mini",
+        "stream": true
+      }
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      authorization:
+      - Bearer test_openai_api_key
+      connection:
+      - keep-alive
+      content-length:
+      - '147'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.6
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: |+
+        data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":"South"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":" Atlantic"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":" Ocean"},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+        data: {"id":"chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL","object":"chat.completion.chunk","created":1745234787,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+        data: [DONE]
+
+    headers:
+      CF-RAY:
+      - 933c86cb9ae5773e-LHR
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 21 Apr 2025 11:26:28 GMT
+      Server:
+      - cloudflare
+      Set-Cookie: test_set_cookie
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization: test_openai_org_id
+      openai-processing-ms:
+      - '460'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '200'
+      x-ratelimit-limit-tokens:
+      - '100000'
+      x-ratelimit-remaining-requests:
+      - '199'
+      x-ratelimit-remaining-tokens:
+      - '88447'
+      x-ratelimit-reset-requests:
+      - 7m12s
+      x-ratelimit-reset-tokens:
+      - 83h10m39.057s
+      x-request-id:
+      - req_a39b652ec0ccb45a968e10112e569bf4
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py b/instrumentation/elastic-opentelemetry-instrumentation-openai/tests/test_chat_completions.py
@@ -1103,6 +1103,62 @@ def test_chat_stream(default_openai_env, trace_exporter, metrics_reader, logs_ex
     )
 
 
+@pytest.mark.vcr()
+def test_chat_stream_with_context_manager(default_openai_env, trace_exporter, metrics_reader, logs_exporter):
+    client = openai.OpenAI()
+
+    messages = [
+        {
+            "role": "user",
+            "content": TEST_CHAT_INPUT,
+        }
+    ]
+
+    # Use a context manager for the streaming response
+    with client.chat.completions.create(model=TEST_CHAT_MODEL, messages=messages, stream=True) as chat_completion:
+        chunks = [chunk.choices[0].delta.content or "" for chunk in chat_completion if chunk.choices]
+        assert "".join(chunks) == "South Atlantic Ocean."
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"chat {TEST_CHAT_MODEL}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    address, port = address_and_port(client)
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: "chat",
+        GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_ID: "chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL",
+        GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL,
+        GEN_AI_RESPONSE_FINISH_REASONS: ("stop",),
+        SERVER_ADDRESS: address,
+        SERVER_PORT: port,
+        GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default",
+    }
+
+    logs = logs_exporter.get_finished_logs()
+    assert len(logs) == 2
+    log_records = logrecords_from_logs(logs)
+    user_message, choice = log_records
+    assert dict(user_message.attributes) == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"}
+    assert dict(user_message.body) == {}
+
+    assert_stop_log_record(choice)
+
+    (operation_duration_metric,) = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL,
+        GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL,
+    }
+    assert_operation_duration_metric(
+        client, "chat", operation_duration_metric, attributes=attributes, min_data_point=0.006761051714420319
+    )
+
+
 @pytest.mark.skipif(OPENAI_VERSION < (1, 8, 0), reason="LegacyAPIResponse available")
 @pytest.mark.vcr()
 def test_chat_stream_with_raw_response(default_openai_env, trace_exporter, metrics_reader, logs_exporter):
@@ -2096,6 +2152,67 @@ async def test_chat_async_stream(default_openai_env, trace_exporter, metrics_rea
     )
 
 
+@pytest.mark.vcr()
+@pytest.mark.asyncio
+async def test_chat_async_stream_with_context_manager(
+    default_openai_env, trace_exporter, metrics_reader, logs_exporter
+):
+    client = openai.AsyncOpenAI()
+
+    messages = [
+        {
+            "role": "user",
+            "content": TEST_CHAT_INPUT,
+        }
+    ]
+
+    # Use a context manager for the asynchronous streaming response
+    async with await client.chat.completions.create(
+        model=TEST_CHAT_MODEL, messages=messages, stream=True
+    ) as chat_completion:
+        chunks = [chunk.choices[0].delta.content or "" async for chunk in chat_completion if chunk.choices]
+        assert "".join(chunks) == "South Atlantic Ocean."
+
+    spans = trace_exporter.get_finished_spans()
+    assert len(spans) == 1
+
+    span = spans[0]
+    assert span.name == f"chat {TEST_CHAT_MODEL}"
+    assert span.kind == SpanKind.CLIENT
+    assert span.status.status_code == StatusCode.UNSET
+
+    address, port = address_and_port(client)
+    assert dict(span.attributes) == {
+        GEN_AI_OPERATION_NAME: "chat",
+        GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL,
+        GEN_AI_SYSTEM: "openai",
+        GEN_AI_RESPONSE_ID: "chatcmpl-BOja7e365tj5upRjLFinadEB8ZoDL",
+        GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL,
+        GEN_AI_RESPONSE_FINISH_REASONS: ("stop",),
+        SERVER_ADDRESS: address,
+        SERVER_PORT: port,
+        GEN_AI_OPENAI_RESPONSE_SERVICE_TIER: "default",
+    }
+
+    logs = logs_exporter.get_finished_logs()
+    assert len(logs) == 2
+    log_records = logrecords_from_logs(logs)
+    user_message, choice = log_records
+    assert dict(user_message.attributes) == {"gen_ai.system": "openai", "event.name": "gen_ai.user.message"}
+    assert dict(user_message.body) == {}
+
+    assert_stop_log_record(choice)
+
+    (operation_duration_metric,) = get_sorted_metrics(metrics_reader)
+    attributes = {
+        GEN_AI_REQUEST_MODEL: TEST_CHAT_MODEL,
+        GEN_AI_RESPONSE_MODEL: TEST_CHAT_RESPONSE_MODEL,
+    }
+    assert_operation_duration_metric(
+        client, "chat", operation_duration_metric, attributes=attributes, min_data_point=0.006761051714420319
+    )
+
+
 @pytest.mark.skipif(OPENAI_VERSION < (1, 8, 0), reason="LegacyAPIResponse available")
 @pytest.mark.vcr()
 @pytest.mark.asyncio