Skip to content

add async query to improve latency #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 38 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
1bc7370
add cleanlab-tlm as a dependency in pyproject.toml
elisno Mar 20, 2025
2529ae6
Add response validation functionality using TrustworthyRAG
elisno Mar 20, 2025
722d287
alt_answer -> expert_answer
elisno Mar 21, 2025
6f64a12
address comments
elisno Mar 21, 2025
a2c0ea5
have is_bad_response function take the BadResponseThreshold object in…
elisno Mar 21, 2025
b8a1e97
Enhance Validator with flexible thresholds and improved error handling
elisno Mar 22, 2025
db5fe24
move BadResponseThresholds
elisno Mar 22, 2025
29e231a
add prompt and form_prompt
elisno Mar 24, 2025
a741e15
fix formatting and type hints
elisno Mar 24, 2025
380b1ef
update docstrings
elisno Mar 24, 2025
4f40e3d
Add unit tests for Validator and BadResponseThresholds
elisno Mar 25, 2025
02b16e0
include type hints and fix formatting
elisno Mar 25, 2025
873f552
set "expert_answer" as first key
elisno Mar 25, 2025
b471371
clean up imports, type hints and docs
elisno Mar 25, 2025
be4745c
Update pyproject.toml
elisno Mar 26, 2025
54e866b
Update response_validation.py docstring to indicate module deprecatio…
elisno Mar 26, 2025
0a21649
add async query to improve latency
aditya1503 Mar 26, 2025
c632625
make remediate method private
elisno Mar 26, 2025
d422bcf
update docstrings
elisno Mar 26, 2025
d7bc592
revert and wait outside
aditya1503 Mar 26, 2025
2407b88
add event lopping
aditya1503 Mar 26, 2025
0ac8e5d
add thread correctly
aditya1503 Mar 26, 2025
94c626a
add try catch
aditya1503 Mar 26, 2025
ae49baf
Merge branch 'validator' into async_query
aditya1503 Mar 26, 2025
86707d9
Update validator.py
aditya1503 Mar 27, 2025
d57e2c9
merge main
aditya1503 Apr 1, 2025
0f1b838
docstring
aditya1503 Apr 1, 2025
2556833
add tab to docstring
aditya1503 Apr 1, 2025
cee4f13
add bool run_async
aditya1503 Apr 2, 2025
84cc0f7
linting
aditya1503 Apr 2, 2025
640a194
typing
aditya1503 Apr 2, 2025
158e1b2
entry fix
aditya1503 Apr 2, 2025
c4330fd
format fix
aditya1503 Apr 2, 2025
c9e1357
add docstring
aditya1503 Apr 2, 2025
63d2614
simpler cod
aditya1503 Apr 2, 2025
bc45c23
noqa
aditya1503 Apr 2, 2025
acb3beb
linting
aditya1503 Apr 2, 2025
573426d
Merge branch 'main' into async_query
aditya1503 Apr 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions src/cleanlab_codex/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Callable, Optional, cast
import asyncio
from typing import TYPE_CHECKING, Any, Callable, Optional, Tuple, cast

from cleanlab_tlm import TrustworthyRAG
from pydantic import BaseModel, Field, field_validator
Expand All @@ -17,6 +18,7 @@
from cleanlab_codex.project import Project

if TYPE_CHECKING:
from cleanlab_codex.types.entry import Entry
from cleanlab_codex.types.validator import ThresholdedTrustworthyRAGScore


Expand Down Expand Up @@ -94,6 +96,8 @@ def validate(
query: str,
context: str,
response: str,
*,
run_async: bool = False,
prompt: Optional[str] = None,
form_prompt: Optional[Callable[[str, str], str]] = None,
) -> dict[str, Any]:
Expand All @@ -104,6 +108,7 @@ def validate(
query (str): The user query that was used to generate the response.
context (str): The context that was retrieved from the RAG Knowledge Base and used to generate the response.
response (str): A reponse from your LLM/RAG system.
run_async (bool): If True, runs detect asynchronously
prompt (str, optional): Optional prompt representing the actual inputs (combining query, context, and system instructions into one string) to the LLM that generated the response.
form_prompt (Callable[[str, str], str], optional): Optional function to format the prompt based on query and context. Cannot be provided together with prompt, provide one or the other. This function should take query and context as parameters and return a formatted prompt string. If not provided, a default prompt formatter will be used. To include a system prompt or any other special instructions for your LLM, incorporate them directly in your custom form_prompt() function definition.

Expand All @@ -113,10 +118,32 @@ def validate(
- 'is_bad_response': True if the response is flagged as potentially bad, False otherwise. When True, a Codex lookup is performed, which logs this query into the Codex Project for SMEs to answer.
- Additional keys from a [`ThresholdedTrustworthyRAGScore`](/codex/api/python/types.validator/#class-thresholdedtrustworthyragscore) dictionary: each corresponds to a [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag) evaluation metric, and points to the score for this evaluation as well as a boolean `is_bad` flagging whether the score falls below the corresponding threshold.
"""
scores, is_bad_response = self.detect(query, context, response, prompt, form_prompt)
expert_answer = None
if is_bad_response:
expert_answer = self._remediate(query)
if run_async:
try:
loop = asyncio.get_running_loop()
except RuntimeError: # No running loop
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
expert_task = loop.create_task(self.remediate_async(query))
detect_task = loop.run_in_executor(None, self.detect, query, context, response, prompt, form_prompt)
expert_answer, maybe_entry = loop.run_until_complete(expert_task)
scores, is_bad_response = loop.run_until_complete(detect_task)
loop.close()
if is_bad_response:
if expert_answer is None:
# TODO: Make this async as well
project_id = self._project._id # noqa: SLF001
self._project._sdk_client.projects.entries.add_question( # noqa: SLF001
project_id,
question=query,
).model_dump()
else:
expert_answer = None
else:
scores, is_bad_response = self.detect(query, context, response, prompt, form_prompt)
expert_answer = None
if is_bad_response:
expert_answer = self._remediate(query)

return {
"expert_answer": expert_answer,
Expand Down Expand Up @@ -181,6 +208,10 @@ def _remediate(self, query: str) -> str | None:
codex_answer, _ = self._project.query(question=query)
return codex_answer

async def remediate_async(self, query: str) -> Tuple[Optional[str], Optional[Entry]]:
codex_answer, entry = self._project.query(question=query, read_only=True)
return codex_answer, entry


class BadResponseThresholds(BaseModel):
"""Config for determining if a response is bad.
Expand Down
Loading