From 9059aa0a32c0f78aa4fd882864a4e97cf8e2c736 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Wed, 4 Dec 2024 17:44:05 +0800 Subject: [PATCH 01/17] feat: support alias for semantics generation --- .../generation/semantics_description.py | 29 +++++++++++-------- .../web/v1/routers/semantics_description.py | 8 ++++- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/semantics_description.py index d85826f987..0d860330f8 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/semantics_description.py @@ -46,7 +46,7 @@ def extract(model: dict) -> dict: return [ extract(model) for model in mdl.get("models", []) - if model.get("name", "") in selected_models + if model.get("name", "") in selected_models or "*" in selected_models ] @@ -90,6 +90,7 @@ def wrapper(text: str) -> str: ## End of Pipeline class ModelProperties(BaseModel): + alias: str description: str @@ -135,12 +136,12 @@ class SemanticResult(BaseModel): ] ``` -Your task is to update this JSON structure by adding a `description` field inside both the `properties` attribute of each `column` and the `model` itself. -Each `description` should be derived from a user-provided input that explains the purpose or context of the `model` and its respective columns. +Your task is to update this JSON structure by adding `description`, `alias` fields inside both the `properties` attribute of each `column` and the `model` itself. +Each `description`, `alias` should be derived from a user-provided input that explains the purpose or context of the `model` and its respective columns. Follow these steps: -1. **For the `model`**: Prompt the user to provide a brief description of the model's overall purpose or its context. Insert this description in the `properties` field of the `model`. -2. **For each `column`**: Ask the user to describe each column's role or significance. Each column's description should be added under its respective `properties` field in the format: `'description': 'user-provided text'`. -3. Ensure that the output is a well-formatted JSON structure, preserving the input's original format and adding the appropriate `description` fields. +1. **For the `model`**: Prompt the user to provide a brief description and alias of the model's overall purpose or its context. Insert this description and alias in the `properties` field of the `model`. +2. **For each `column`**: Ask the user to describe each column's role or significance. Each column's description and alias should be added under its respective `properties` field in the format: `'description': 'user-provided text'`, `'alias': 'user-provided text'`. +3. Ensure that the output is a well-formatted JSON structure, preserving the input's original format and adding the appropriate `description`, `alias` fields. ### Output Format: @@ -153,25 +154,29 @@ class SemanticResult(BaseModel): { "name": "column_1", "properties": { + "alias": "", "description": "" } }, { "name": "column_2", "properties": { - "description": "" + "alias": "", + "description": "" } }, { "name": "column_3", "properties": { - "description": "" + "alias": "", + "description": "" } } ], "properties": { - "description": "" - } + "alias": "", + "description": "" + } } ] } @@ -186,7 +191,7 @@ class SemanticResult(BaseModel): Picked models: {{ picked_models }} Localization Language: {{ language }} -Please provide a brief description for the model and each column based on the user's prompt. +Please provide a brief description and alias for the model and each column based on the user's prompt. """ @@ -233,7 +238,7 @@ async def run( SemanticsDescription, "semantics_description", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", - selected_models=[], mdl={}, + selected_models=["*"], language="en", ) diff --git a/wren-ai-service/src/web/v1/routers/semantics_description.py b/wren-ai-service/src/web/v1/routers/semantics_description.py index b8d47bcbf2..4aec029c49 100644 --- a/wren-ai-service/src/web/v1/routers/semantics_description.py +++ b/wren-ai-service/src/web/v1/routers/semantics_description.py @@ -31,7 +31,7 @@ "mdl": "{ ... }", # JSON string of the MDL (Model Definition Language) "project_id": "project-id", # Optional project ID "configuration": { # Optional configuration settings - "language": "English" # Optional language, defaults to "English" + "language": "en" # Optional language, defaults to "en" } } - Response: PostResponse @@ -52,9 +52,11 @@ "columns": [ { "name": "col1", + "alias": "col1_alias", "description": "Unique identifier for each record in the example model." } ], + "alias": "model1_alias", "description": "This model is used for analysis purposes, capturing key attributes of records." }, { @@ -62,9 +64,11 @@ "columns": [ { "name": "col1", + "alias": "col1_alias", "description": "Unique identifier for each record in the example model." } ], + "alias": "model2_alias", "description": "This model is used for analysis purposes, capturing key attributes of records." } ], @@ -154,10 +158,12 @@ def _formatter(response: Optional[dict]) -> Optional[list[dict]]: "columns": [ { "name": column["name"], + "alias": column["properties"].get("alias", ""), "description": column["properties"].get("description", ""), } for column in model_data["columns"] ], + "alias": model_data["properties"].get("alias", ""), "description": model_data["properties"].get("description", ""), } for model_name, model_data in response.items() From d24761dd71ebac61b4d66ea1739efdd8d48d3a24 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Wed, 4 Dec 2024 17:51:20 +0800 Subject: [PATCH 02/17] feat: asterisk for all models --- .../src/pipelines/generation/semantics_description.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/semantics_description.py index 0d860330f8..1cde86c83c 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/semantics_description.py @@ -43,11 +43,10 @@ def extract(model: dict) -> dict: }, } - return [ - extract(model) - for model in mdl.get("models", []) - if model.get("name", "") in selected_models or "*" in selected_models - ] + def model_picker(model: dict) -> bool: + return model.get("name", "") in selected_models or "*" in selected_models + + return [extract(model) for model in mdl.get("models", []) if model_picker(model)] @observe(capture_input=False) From 880f47785a71d610b3445bd8456934dc116f1543 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 14:47:11 +0800 Subject: [PATCH 03/17] feat: optimize the prompt --- .../generation/semantics_description.py | 111 ++++++++---------- 1 file changed, 47 insertions(+), 64 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/semantics_description.py index 1cde86c83c..561eeb46e4 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/semantics_description.py @@ -17,7 +17,7 @@ ## Start of Pipeline @observe(capture_input=False) -def picked_models(mdl: dict, selected_models: list[str]) -> list[dict]: +def picked_models(mdl: dict) -> list[dict]: def relation_filter(column: dict) -> bool: return "relationship" not in column @@ -27,6 +27,7 @@ def column_formatter(columns: list[dict]) -> list[dict]: "name": column["name"], "type": column["type"], "properties": { + "alias": column["properties"].get("displayName", ""), "description": column["properties"].get("description", ""), }, } @@ -35,18 +36,17 @@ def column_formatter(columns: list[dict]) -> list[dict]: ] def extract(model: dict) -> dict: + prop = model["properties"] return { "name": model["name"], "columns": column_formatter(model["columns"]), "properties": { - "description": model["properties"].get("description", ""), + "alias": prop.get("displayName", ""), + "description": prop.get("description", ""), }, } - def model_picker(model: dict) -> bool: - return model.get("name", "") in selected_models or "*" in selected_models - - return [extract(model) for model in mdl.get("models", []) if model_picker(model)] + return [extract(model) for model in mdl.get("models", [])] @observe(capture_input=False) @@ -119,69 +119,54 @@ class SemanticResult(BaseModel): } system_prompt = """ -I have a data model represented in JSON format, with the following structure: - -``` -[ - {'name': 'model', 'columns': [ - {'name': 'column_1', 'type': 'type', 'properties': {} - }, - {'name': 'column_2', 'type': 'type', 'properties': {} - }, - {'name': 'column_3', 'type': 'type', 'properties': {} - } - ], 'properties': {} - } -] -``` - -Your task is to update this JSON structure by adding `description`, `alias` fields inside both the `properties` attribute of each `column` and the `model` itself. -Each `description`, `alias` should be derived from a user-provided input that explains the purpose or context of the `model` and its respective columns. -Follow these steps: -1. **For the `model`**: Prompt the user to provide a brief description and alias of the model's overall purpose or its context. Insert this description and alias in the `properties` field of the `model`. -2. **For each `column`**: Ask the user to describe each column's role or significance. Each column's description and alias should be added under its respective `properties` field in the format: `'description': 'user-provided text'`, `'alias': 'user-provided text'`. -3. Ensure that the output is a well-formatted JSON structure, preserving the input's original format and adding the appropriate `description`, `alias` fields. - -### Output Format: - -``` +You are a data model expert. Your task is to enrich a JSON data model with descriptive metadata. + +Input Format: +[{ + 'name': 'model', + 'columns': [{'name': 'column', 'type': 'type', 'properties': {'alias': 'alias', 'description': 'description'}}], + 'properties': {'alias': 'alias', 'description': 'description'} +}] + +For each model and column, you will: +1. Add a clear, concise alias that serves as a business-friendly name +2. Add a detailed description explaining its purpose and usage + +Guidelines: +- Descriptions should be clear, concise and business-focused +- Aliases should be intuitive and user-friendly +- Use the user's context to inform the descriptions +- Maintain technical accuracy while being accessible to non-technical users + +Output Format: { - "models": [ - { + "models": [{ "name": "model", - "columns": [ - { - "name": "column_1", - "properties": { - "alias": "", - "description": "" - } - }, - { - "name": "column_2", - "properties": { - "alias": "", - "description": "" - } - }, - { - "name": "column_3", - "properties": { - "alias": "", - "description": "" - } + "columns": [{ + "name": "column", + "properties": { + "alias": "User-friendly column name", + "description": "Clear explanation of column purpose" } - ], + }], "properties": { - "alias": "", - "description": "" + "alias": "User-friendly model name", + "description": "Clear explanation of model purpose" } - } - ] + }] +} + +Example: +Input model "orders" with column "created_at" might become: +{ + "name": "created_at", + "properties": { + "alias": "Order Creation Date", + "description": "Timestamp when the order was first created in the system" + } } -``` -Make sure that the descriptions are concise, informative, and contextually appropriate based on the input provided by the user. +Focus on providing business value through clear, accurate descriptions while maintaining JSON structure integrity. """ user_prompt_template = """ @@ -213,7 +198,6 @@ def __init__(self, llm_provider: LLMProvider, **_): async def run( self, user_prompt: str, - selected_models: list[str], mdl: dict, language: str = "en", ) -> dict: @@ -222,7 +206,6 @@ async def run( [self._final], inputs={ "user_prompt": user_prompt, - "selected_models": selected_models, "mdl": mdl, "language": language, **self._components, From 60b2215945d2fba5e10a2046adc240622f801973 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 15:07:53 +0800 Subject: [PATCH 04/17] feat: picking model in the service level --- .../src/web/v1/services/semantics_description.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/wren-ai-service/src/web/v1/services/semantics_description.py b/wren-ai-service/src/web/v1/services/semantics_description.py index 040f333dc4..aed141aa8e 100644 --- a/wren-ai-service/src/web/v1/services/semantics_description.py +++ b/wren-ai-service/src/web/v1/services/semantics_description.py @@ -65,24 +65,20 @@ def _chunking( "language": request.configuration.language, } + def _model_picker(model: dict, selected: list[str]) -> bool: + return model["name"] in selected or "*" in selected + chunks = [ { **model, "columns": model["columns"][i : i + chunk_size], } for model in mdl_dict["models"] - if model["name"] in request.selected_models + if _model_picker(model, request.selected_models) for i in range(0, len(model["columns"]), chunk_size) ] - return [ - { - **template, - "mdl": {"models": [chunk]}, - "selected_models": [chunk["name"]], - } - for chunk in chunks - ] + return [{**template, "mdl": {"models": [chunk]}} for chunk in chunks] async def _generate_task(self, request_id: str, chunk: dict): resp = await self._pipelines["semantics_description"].run(**chunk) From c65cb4dc079fbb368adaa6eac900404f7a5e5f07 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 15:51:14 +0800 Subject: [PATCH 05/17] chore: rename the semantics description to a more fitable name --- deployment/kustomizations/base/cm.yaml | 4 +- docker/config.example.yaml | 4 +- wren-ai-service/src/globals.py | 10 +-- .../src/pipelines/generation/__init__.py | 3 + ...tics_description.py => model_semantics.py} | 13 ++-- .../src/web/v1/routers/__init__.py | 4 + ...tics_description.py => model_semantics.py} | 10 +-- ...tics_description.py => model_semantics.py} | 18 +++-- wren-ai-service/tests/data/config.test.yaml | 2 +- ...description.py => test_model_semantics.py} | 74 +++++++++---------- .../tools/config/config.example.yaml | 4 +- wren-ai-service/tools/config/config.full.yaml | 4 +- 12 files changed, 78 insertions(+), 72 deletions(-) rename wren-ai-service/src/pipelines/generation/{semantics_description.py => model_semantics.py} (94%) rename wren-ai-service/src/web/v1/routers/{semantics_description.py => model_semantics.py} (95%) rename wren-ai-service/src/web/v1/services/{semantics_description.py => model_semantics.py} (87%) rename wren-ai-service/tests/pytest/services/{test_semantics_description.py => test_model_semantics.py} (79%) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index c5a681383d..eab22e5dcd 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -143,8 +143,8 @@ data: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/docker/config.example.yaml b/docker/config.example.yaml index 5f0557c462..122b0a7994 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -95,8 +95,8 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index 5555ffca5c..f65f52ebba 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -11,9 +11,9 @@ from src.web.v1.services.ask_details import AskDetailsService from src.web.v1.services.chart import ChartService from src.web.v1.services.chart_adjustment import ChartAdjustmentService +from src.web.v1.services.model_semantics import ModelSemantics from src.web.v1.services.question_recommendation import QuestionRecommendation from src.web.v1.services.relationship_recommendation import RelationshipRecommendation -from src.web.v1.services.semantics_description import SemanticsDescription from src.web.v1.services.semantics_preparation import SemanticsPreparationService from src.web.v1.services.sql_answer import SqlAnswerService from src.web.v1.services.sql_expansion import SqlExpansionService @@ -31,7 +31,7 @@ class ServiceContainer: ask_details_service: AskDetailsService question_recommendation: QuestionRecommendation relationship_recommendation: RelationshipRecommendation - semantics_description: SemanticsDescription + model_semantics: ModelSemantics semantics_preparation_service: SemanticsPreparationService chart_service: ChartService chart_adjustment_service: ChartAdjustmentService @@ -58,10 +58,10 @@ def create_service_container( "ttl": settings.query_cache_ttl, } return ServiceContainer( - semantics_description=SemanticsDescription( + model_semantics=ModelSemantics( pipelines={ - "semantics_description": generation.SemanticsDescription( - **pipe_components["semantics_description"], + "model_semantics": generation.ModelSemantics( + **pipe_components["model_semantics"], ) }, **query_cache, diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py index 7d804e1255..d9e076c304 100644 --- a/wren-ai-service/src/pipelines/generation/__init__.py +++ b/wren-ai-service/src/pipelines/generation/__init__.py @@ -37,3 +37,6 @@ "SQLSummary", "SQLQuestion", ] +from .model_semantics import ModelSemantics + +__all__ = ["ModelSemantics"] diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/model_semantics.py similarity index 94% rename from wren-ai-service/src/pipelines/generation/semantics_description.py rename to wren-ai-service/src/pipelines/generation/model_semantics.py index 561eeb46e4..6169d2411d 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/model_semantics.py @@ -108,11 +108,11 @@ class SemanticResult(BaseModel): models: list[SemanticModel] -SEMANTICS_DESCRIPTION_MODEL_KWARGS = { +MODEL_SEMANTICS_KWARGS = { "response_format": { "type": "json_schema", "json_schema": { - "name": "semantic_description", + "name": "model_semantics", "schema": SemanticResult.model_json_schema(), }, } @@ -179,13 +179,13 @@ class SemanticResult(BaseModel): """ -class SemanticsDescription(BasicPipeline): +class ModelSemantics(BasicPipeline): def __init__(self, llm_provider: LLMProvider, **_): self._components = { "prompt_builder": PromptBuilder(template=user_prompt_template), "generator": llm_provider.get_generator( system_prompt=system_prompt, - generation_kwargs=SEMANTICS_DESCRIPTION_MODEL_KWARGS, + generation_kwargs=MODEL_SEMANTICS_KWARGS, ), } self._final = "normalize" @@ -201,7 +201,6 @@ async def run( mdl: dict, language: str = "en", ) -> dict: - logger.info("Semantics Description Generation pipeline is running...") return await self._pipe.execute( [self._final], inputs={ @@ -217,8 +216,8 @@ async def run( from src.pipelines.common import dry_run_pipeline dry_run_pipeline( - SemanticsDescription, - "semantics_description", + ModelSemantics, + "model_semantics", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", mdl={}, selected_models=["*"], diff --git a/wren-ai-service/src/web/v1/routers/__init__.py b/wren-ai-service/src/web/v1/routers/__init__.py index 39a300aefc..5377d72c57 100644 --- a/wren-ai-service/src/web/v1/routers/__init__.py +++ b/wren-ai-service/src/web/v1/routers/__init__.py @@ -5,10 +5,12 @@ ask_details, chart, chart_adjustment, + model_semantics, question_recommendation, relationship_recommendation, semantics_description, semantics_preparation, + semantics_preparations, sql_answers, sql_expansions, sql_explanations, @@ -22,6 +24,8 @@ router.include_router(ask_details.router) router.include_router(question_recommendation.router) router.include_router(relationship_recommendation.router) +router.include_router(model_semantics.router) +router.include_router(semantics_preparations.router) router.include_router(semantics_description.router) router.include_router(semantics_preparation.router) router.include_router(sql_answers.router) diff --git a/wren-ai-service/src/web/v1/routers/semantics_description.py b/wren-ai-service/src/web/v1/routers/model_semantics.py similarity index 95% rename from wren-ai-service/src/web/v1/routers/semantics_description.py rename to wren-ai-service/src/web/v1/routers/model_semantics.py index 4aec029c49..70fe61874d 100644 --- a/wren-ai-service/src/web/v1/routers/semantics_description.py +++ b/wren-ai-service/src/web/v1/routers/model_semantics.py @@ -12,7 +12,7 @@ get_service_metadata, ) from src.web.v1.services import Configuration -from src.web.v1.services.semantics_description import SemanticsDescription +from src.web.v1.services.model_semantics import ModelSemantics router = APIRouter() @@ -113,10 +113,10 @@ async def generate( service_metadata: ServiceMetadata = Depends(get_service_metadata), ) -> PostResponse: id = str(uuid.uuid4()) - service = service_container.semantics_description + service = service_container.model_semantics - service[id] = SemanticsDescription.Resource(id=id) - input = SemanticsDescription.Input( + service[id] = ModelSemantics.Resource(id=id) + input = ModelSemantics.Input( id=id, selected_models=request.selected_models, user_prompt=request.user_prompt, @@ -146,7 +146,7 @@ async def get( id: str, service_container: ServiceContainer = Depends(get_service_container), ) -> GetResponse: - resource = service_container.semantics_description[id] + resource = service_container.model_semantics[id] def _formatter(response: Optional[dict]) -> Optional[list[dict]]: if response is None: diff --git a/wren-ai-service/src/web/v1/services/semantics_description.py b/wren-ai-service/src/web/v1/services/model_semantics.py similarity index 87% rename from wren-ai-service/src/web/v1/services/semantics_description.py rename to wren-ai-service/src/web/v1/services/model_semantics.py index aed141aa8e..64a54da735 100644 --- a/wren-ai-service/src/web/v1/services/semantics_description.py +++ b/wren-ai-service/src/web/v1/services/model_semantics.py @@ -14,7 +14,7 @@ logger = logging.getLogger("wren-ai-service") -class SemanticsDescription: +class ModelSemantics: class Input(BaseModel): id: str selected_models: list[str] @@ -40,7 +40,7 @@ def __init__( ttl: int = 120, ): self._pipelines = pipelines - self._cache: Dict[str, SemanticsDescription.Resource] = TTLCache( + self._cache: Dict[str, ModelSemantics.Resource] = TTLCache( maxsize=maxsize, ttl=ttl ) @@ -55,7 +55,7 @@ def _handle_exception( status="failed", error=self.Resource.Error(code=code, message=error_message), ) - logger.error(error_message) + logger.error(f"Project ID: {request.project_id}, {error_message}") def _chunking( self, mdl_dict: dict, request: Input, chunk_size: int = 50 @@ -81,7 +81,7 @@ def _model_picker(model: dict, selected: list[str]) -> bool: return [{**template, "mdl": {"models": [chunk]}} for chunk in chunks] async def _generate_task(self, request_id: str, chunk: dict): - resp = await self._pipelines["semantics_description"].run(**chunk) + resp = await self._pipelines["model_semantics"].run(**chunk) normalize = resp.get("normalize") current = self[request_id] @@ -94,10 +94,12 @@ async def _generate_task(self, request_id: str, chunk: dict): current.response[key]["columns"].extend(normalize[key]["columns"]) - @observe(name="Generate Semantics Description") + @observe(name="Generate Model Semantics") @trace_metadata async def generate(self, request: Input, **kwargs) -> Resource: - logger.info("Generate Semantics Description pipeline is running...") + logger.info( + f"Project ID: {request.project_id}, Generate Model Semantics pipeline is running..." + ) try: mdl_dict = orjson.loads(request.mdl) @@ -117,7 +119,7 @@ async def generate(self, request: Input, **kwargs) -> Resource: except Exception as e: self._handle_exception( request, - f"An error occurred during semantics description generation: {str(e)}", + f"An error occurred during model semantics generation: {str(e)}", ) return self[request.id].with_metadata() @@ -126,7 +128,7 @@ def __getitem__(self, id: str) -> Resource: response = self._cache.get(id) if response is None: - message = f"Semantics Description Resource with ID '{id}' not found." + message = f"Model Semantics Resource with ID '{id}' not found." logger.exception(message) return self.Resource( id=id, diff --git a/wren-ai-service/tests/data/config.test.yaml b/wren-ai-service/tests/data/config.test.yaml index ae366d17fc..fd973d739f 100644 --- a/wren-ai-service/tests/data/config.test.yaml +++ b/wren-ai-service/tests/data/config.test.yaml @@ -70,7 +70,7 @@ pipes: - name: sql_regeneration llm: openai_llm.gpt-4o-mini engine: wren_ui - - name: semantics_description + - name: model_semantics llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: openai_llm.gpt-4o-mini diff --git a/wren-ai-service/tests/pytest/services/test_semantics_description.py b/wren-ai-service/tests/pytest/services/test_model_semantics.py similarity index 79% rename from wren-ai-service/tests/pytest/services/test_semantics_description.py rename to wren-ai-service/tests/pytest/services/test_model_semantics.py index dc48e73396..31f82c7e4e 100644 --- a/wren-ai-service/tests/pytest/services/test_semantics_description.py +++ b/wren-ai-service/tests/pytest/services/test_model_semantics.py @@ -4,7 +4,7 @@ import orjson import pytest -from src.web.v1.services.semantics_description import SemanticsDescription +from src.web.v1.services.model_semantics import ModelSemantics @pytest.fixture @@ -19,16 +19,16 @@ def service(): } } - pipelines = {"semantics_description": mock_pipeline} - return SemanticsDescription(pipelines=pipelines) + pipelines = {"model_semantics": mock_pipeline} + return ModelSemantics(pipelines=pipelines) @pytest.mark.asyncio -async def test_generate_semantics_description( - service: SemanticsDescription, +async def test_generate_model_semantics( + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -50,11 +50,11 @@ async def test_generate_semantics_description( @pytest.mark.asyncio -async def test_generate_semantics_description_with_invalid_mdl( - service: SemanticsDescription, +async def test_generate_model_semantics_with_invalid_mdl( + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -72,20 +72,18 @@ async def test_generate_semantics_description_with_invalid_mdl( @pytest.mark.asyncio -async def test_generate_semantics_description_with_exception( - service: SemanticsDescription, +async def test_generate_model_semantics_with_exception( + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', ) - service._pipelines["semantics_description"].run.side_effect = Exception( - "Test exception" - ) + service._pipelines["model_semantics"].run.side_effect = Exception("Test exception") await service.generate(request) response = service[request.id] @@ -100,10 +98,10 @@ async def test_generate_semantics_description_with_exception( ) -def test_get_semantics_description_result( - service: SemanticsDescription, +def test_get_model_semantics_result( + service: ModelSemantics, ): - expected_response = SemanticsDescription.Resource( + expected_response = ModelSemantics.Resource( id="test_id", status="finished", response={"model1": {"description": "Test description"}}, @@ -115,8 +113,8 @@ def test_get_semantics_description_result( assert result == expected_response -def test_get_non_existent_semantics_description_result( - service: SemanticsDescription, +def test_get_non_existent_model_semantics_result( + service: ModelSemantics, ): result = service["non_existent_id"] @@ -129,10 +127,10 @@ def test_get_non_existent_semantics_description_result( @pytest.mark.asyncio async def test_batch_processing_with_multiple_models( - service: SemanticsDescription, + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3"], @@ -140,7 +138,7 @@ async def test_batch_processing_with_multiple_models( ) # Mock pipeline responses for each chunk - service._pipelines["semantics_description"].run.side_effect = [ + service._pipelines["model_semantics"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, {"normalize": {"model2": {"description": "Description 2"}}}, {"normalize": {"model3": {"description": "Description 3"}}}, @@ -165,10 +163,10 @@ async def test_batch_processing_with_multiple_models( def test_batch_processing_with_custom_chunk_size( - service: SemanticsDescription, + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3", "model4"], @@ -188,10 +186,10 @@ def test_batch_processing_with_custom_chunk_size( @pytest.mark.asyncio async def test_batch_processing_partial_failure( - service: SemanticsDescription, + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2"], @@ -199,7 +197,7 @@ async def test_batch_processing_partial_failure( ) # Mock first chunk succeeds, second chunk fails - service._pipelines["semantics_description"].run.side_effect = [ + service._pipelines["model_semantics"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, Exception("Failed processing model2"), ] @@ -215,12 +213,12 @@ async def test_batch_processing_partial_failure( @pytest.mark.asyncio async def test_concurrent_updates_no_race_condition( - service: SemanticsDescription, + service: ModelSemantics, ): test_id = "concurrent_test" - service[test_id] = SemanticsDescription.Resource(id=test_id) + service[test_id] = ModelSemantics.Resource(id=test_id) - request = SemanticsDescription.Input( + request = ModelSemantics.Input( id=test_id, user_prompt="Test concurrent updates", selected_models=["model1", "model2", "model3", "model4", "model5"], @@ -236,7 +234,7 @@ async def delayed_response(model_num, delay=0.1): } } - service._pipelines["semantics_description"].run.side_effect = [ + service._pipelines["model_semantics"].run.side_effect = [ await delayed_response(1), await delayed_response(2), await delayed_response(3), diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index 093a80e6d2..4c92b7d899 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -109,8 +109,8 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index 962c1153be..3e0714b1b4 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -109,8 +109,8 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui From 5eff92da4fd50a5af76634570b7fc1dedb8dc1b1 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 16:06:53 +0800 Subject: [PATCH 06/17] feat: update test cases --- .../pytest/services/test_model_semantics.py | 91 +++++++++++++++---- 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/wren-ai-service/tests/pytest/services/test_model_semantics.py b/wren-ai-service/tests/pytest/services/test_model_semantics.py index 31f82c7e4e..354c487da1 100644 --- a/wren-ai-service/tests/pytest/services/test_model_semantics.py +++ b/wren-ai-service/tests/pytest/services/test_model_semantics.py @@ -13,8 +13,21 @@ def service(): mock_pipeline.run.return_value = { "normalize": { "model1": { - "columns": [], - "properties": {"description": "Test description"}, + "columns": [ + { + "name": "column1", + "type": "varchar", + "notNull": False, + "properties": { + "description": "Test description", + "alias": "column1_alias", + }, + } + ], + "properties": { + "description": "Test description", + "alias": "model1_alias", + }, } } } @@ -42,8 +55,21 @@ async def test_generate_model_semantics( assert response.status == "finished" assert response.response == { "model1": { - "columns": [], - "properties": {"description": "Test description"}, + "columns": [ + { + "name": "column1", + "type": "varchar", + "notNull": False, + "properties": { + "description": "Test description", + "alias": "column1_alias", + }, + } + ], + "properties": { + "description": "Test description", + "alias": "model1_alias", + }, } } assert response.error is None @@ -93,8 +119,7 @@ async def test_generate_model_semantics_with_exception( assert response.response is None assert response.error.code == "OTHERS" assert ( - "An error occurred during semantics description generation" - in response.error.message + "An error occurred during model semantics generation:" in response.error.message ) @@ -159,41 +184,75 @@ async def test_batch_processing_with_multiple_models( assert len(chunks) == 3 # Default chunk_size=1 assert all("user_prompt" in chunk for chunk in chunks) assert all("mdl" in chunk for chunk in chunks) - assert [len(chunk["selected_models"]) for chunk in chunks] == [1, 1, 1] def test_batch_processing_with_custom_chunk_size( service: ModelSemantics, ): + test_mdl = { + "models": [ + { + "name": "model1", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model2", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model3", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model4", + "columns": [ + {"name": "column1", "type": "varchar", "notNull": False}, + {"name": "column2", "type": "varchar", "notNull": False}, + ], + }, + ] + } service["test_id"] = ModelSemantics.Resource(id="test_id") request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3", "model4"], - mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model2", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model3", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model4", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', + mdl=orjson.dumps(test_mdl), ) # Test chunking with custom chunk size - chunks = service._chunking(orjson.loads(request.mdl), request, chunk_size=2) + chunks = service._chunking(orjson.loads(request.mdl), request, chunk_size=1) - assert len(chunks) == 4 - assert [len(chunk["selected_models"]) for chunk in chunks] == [1, 1, 1, 1] - assert chunks[0]["selected_models"] == ["model1"] - assert chunks[1]["selected_models"] == ["model2"] - assert chunks[2]["selected_models"] == ["model3"] - assert chunks[3]["selected_models"] == ["model4"] + assert len(chunks) == 5 + assert chunks[0]["mdl"]["models"][0]["name"] == "model1" + assert chunks[1]["mdl"]["models"][0]["name"] == "model2" + assert chunks[2]["mdl"]["models"][0]["name"] == "model3" + assert chunks[3]["mdl"]["models"][0]["name"] == "model4" + assert chunks[4]["mdl"]["models"][0]["name"] == "model4" @pytest.mark.asyncio async def test_batch_processing_partial_failure( service: ModelSemantics, ): + test_mdl = { + "models": [ + { + "name": "model1", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model2", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + ] + } service["test_id"] = ModelSemantics.Resource(id="test_id") request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2"], - mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model2", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', + mdl=orjson.dumps(test_mdl), ) # Mock first chunk succeeds, second chunk fails From 64306e492c9eab927de5d8adc60b6a95317f4cc2 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 16:08:07 +0800 Subject: [PATCH 07/17] feat: change the attr name for web sepc --- wren-ai-service/src/web/v1/routers/model_semantics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/src/web/v1/routers/model_semantics.py b/wren-ai-service/src/web/v1/routers/model_semantics.py index 70fe61874d..079d8f7277 100644 --- a/wren-ai-service/src/web/v1/routers/model_semantics.py +++ b/wren-ai-service/src/web/v1/routers/model_semantics.py @@ -163,7 +163,7 @@ def _formatter(response: Optional[dict]) -> Optional[list[dict]]: } for column in model_data["columns"] ], - "alias": model_data["properties"].get("alias", ""), + "displayName": model_data["properties"].get("alias", ""), "description": model_data["properties"].get("description", ""), } for model_name, model_data in response.items() From c8594c8f0b1bc2199aecb50cabb0523d63cf7a3d Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 17:47:35 +0800 Subject: [PATCH 08/17] chore: rename the semantics description to a better name --- deployment/kustomizations/base/cm.yaml | 2 +- docker/config.example.yaml | 2 +- wren-ai-service/src/globals.py | 10 +-- .../src/pipelines/generation/__init__.py | 3 +- ...l_semantics.py => semantics_enrichment.py} | 12 +-- .../src/web/v1/routers/__init__.py | 4 +- ...l_semantics.py => semantics_enrichment.py} | 78 +++++++++---------- ...l_semantics.py => semantics_enrichment.py} | 14 ++-- wren-ai-service/tests/data/config.test.yaml | 2 +- ...antics.py => test_semantics_enrichment.py} | 78 +++++++++---------- .../tools/config/config.example.yaml | 2 +- wren-ai-service/tools/config/config.full.yaml | 2 +- 12 files changed, 105 insertions(+), 104 deletions(-) rename wren-ai-service/src/pipelines/generation/{model_semantics.py => semantics_enrichment.py} (96%) rename wren-ai-service/src/web/v1/routers/{model_semantics.py => semantics_enrichment.py} (73%) rename wren-ai-service/src/web/v1/services/{model_semantics.py => semantics_enrichment.py} (89%) rename wren-ai-service/tests/pytest/services/{test_model_semantics.py => test_semantics_enrichment.py} (81%) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index eab22e5dcd..a065938b35 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -143,7 +143,7 @@ data: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 diff --git a/docker/config.example.yaml b/docker/config.example.yaml index 122b0a7994..ca781ced8d 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -95,7 +95,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index f65f52ebba..d267e43b7e 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -11,9 +11,9 @@ from src.web.v1.services.ask_details import AskDetailsService from src.web.v1.services.chart import ChartService from src.web.v1.services.chart_adjustment import ChartAdjustmentService -from src.web.v1.services.model_semantics import ModelSemantics from src.web.v1.services.question_recommendation import QuestionRecommendation from src.web.v1.services.relationship_recommendation import RelationshipRecommendation +from src.web.v1.services.semantics_enrichment import SemanticsEnrichment from src.web.v1.services.semantics_preparation import SemanticsPreparationService from src.web.v1.services.sql_answer import SqlAnswerService from src.web.v1.services.sql_expansion import SqlExpansionService @@ -31,7 +31,7 @@ class ServiceContainer: ask_details_service: AskDetailsService question_recommendation: QuestionRecommendation relationship_recommendation: RelationshipRecommendation - model_semantics: ModelSemantics + semantics_enrichment: SemanticsEnrichment semantics_preparation_service: SemanticsPreparationService chart_service: ChartService chart_adjustment_service: ChartAdjustmentService @@ -58,10 +58,10 @@ def create_service_container( "ttl": settings.query_cache_ttl, } return ServiceContainer( - model_semantics=ModelSemantics( + semantics_enrichment=SemanticsEnrichment( pipelines={ - "model_semantics": generation.ModelSemantics( - **pipe_components["model_semantics"], + "semantics_enrichment": generation.SemanticsEnrichment( + **pipe_components["semantics_enrichment"], ) }, **query_cache, diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py index d9e076c304..78ee3494de 100644 --- a/wren-ai-service/src/pipelines/generation/__init__.py +++ b/wren-ai-service/src/pipelines/generation/__init__.py @@ -1,3 +1,4 @@ +from .semantics_enrichment import SemanticsEnrichment from .chart_adjustment import ChartAdjustment from .chart_generation import ChartGeneration from .data_assistance import DataAssistance @@ -39,4 +40,4 @@ ] from .model_semantics import ModelSemantics -__all__ = ["ModelSemantics"] +__all__ = ["SemanticsEnrichment"] diff --git a/wren-ai-service/src/pipelines/generation/model_semantics.py b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py similarity index 96% rename from wren-ai-service/src/pipelines/generation/model_semantics.py rename to wren-ai-service/src/pipelines/generation/semantics_enrichment.py index 6169d2411d..41fbbe5089 100644 --- a/wren-ai-service/src/pipelines/generation/model_semantics.py +++ b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py @@ -108,11 +108,11 @@ class SemanticResult(BaseModel): models: list[SemanticModel] -MODEL_SEMANTICS_KWARGS = { +semantics_enrichment_KWARGS = { "response_format": { "type": "json_schema", "json_schema": { - "name": "model_semantics", + "name": "semantics_enrichment", "schema": SemanticResult.model_json_schema(), }, } @@ -179,13 +179,13 @@ class SemanticResult(BaseModel): """ -class ModelSemantics(BasicPipeline): +class SemanticsEnrichment(BasicPipeline): def __init__(self, llm_provider: LLMProvider, **_): self._components = { "prompt_builder": PromptBuilder(template=user_prompt_template), "generator": llm_provider.get_generator( system_prompt=system_prompt, - generation_kwargs=MODEL_SEMANTICS_KWARGS, + generation_kwargs=semantics_enrichment_KWARGS, ), } self._final = "normalize" @@ -216,8 +216,8 @@ async def run( from src.pipelines.common import dry_run_pipeline dry_run_pipeline( - ModelSemantics, - "model_semantics", + SemanticsEnrichment, + "semantics_enrichment", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", mdl={}, selected_models=["*"], diff --git a/wren-ai-service/src/web/v1/routers/__init__.py b/wren-ai-service/src/web/v1/routers/__init__.py index 5377d72c57..1df8f4568a 100644 --- a/wren-ai-service/src/web/v1/routers/__init__.py +++ b/wren-ai-service/src/web/v1/routers/__init__.py @@ -5,11 +5,11 @@ ask_details, chart, chart_adjustment, - model_semantics, question_recommendation, relationship_recommendation, semantics_description, semantics_preparation, + semantics_enrichment, semantics_preparations, sql_answers, sql_expansions, @@ -24,7 +24,7 @@ router.include_router(ask_details.router) router.include_router(question_recommendation.router) router.include_router(relationship_recommendation.router) -router.include_router(model_semantics.router) +router.include_router(semantics_enrichment.router) router.include_router(semantics_preparations.router) router.include_router(semantics_description.router) router.include_router(semantics_preparation.router) diff --git a/wren-ai-service/src/web/v1/routers/model_semantics.py b/wren-ai-service/src/web/v1/routers/semantics_enrichment.py similarity index 73% rename from wren-ai-service/src/web/v1/routers/model_semantics.py rename to wren-ai-service/src/web/v1/routers/semantics_enrichment.py index 079d8f7277..0068187a47 100644 --- a/wren-ai-service/src/web/v1/routers/model_semantics.py +++ b/wren-ai-service/src/web/v1/routers/semantics_enrichment.py @@ -12,18 +12,18 @@ get_service_metadata, ) from src.web.v1.services import Configuration -from src.web.v1.services.model_semantics import ModelSemantics +from src.web.v1.services.semantics_enrichment import SemanticsEnrichment router = APIRouter() """ -Semantics Description Router +Semantics Enrichment Router -This router handles endpoints related to generating and retrieving semantic descriptions. +This router handles endpoints related to generating and retrieving semantics enrichment for data models. Endpoints: -1. POST /semantics-descriptions - - Generates a new semantic description +1. POST /semantics-enrichment + - Generates a new semantics enrichment task for data models - Request body: PostRequest { "selected_models": ["model1", "model2"], # List of model names to describe @@ -39,46 +39,36 @@ "id": "unique-uuid" # Unique identifier for the generated description } -2. GET /semantics-descriptions/{id} - - Retrieves the status and result of a semantic description generation +2. GET /semantics-enrichment/{id} + - Retrieves the status and result of a semantics enrichment generation - Path parameter: id (str) - Response: GetResponse { "id": "unique-uuid", # Unique identifier of the description "status": "generating" | "finished" | "failed", - "response": [ # Present only if status is "finished" or "generating" - { - "name": "model1", - "columns": [ - { - "name": "col1", - "alias": "col1_alias", - "description": "Unique identifier for each record in the example model." - } - ], - "alias": "model1_alias", - "description": "This model is used for analysis purposes, capturing key attributes of records." - }, - { - "name": "model2", - "columns": [ - { - "name": "col1", - "alias": "col1_alias", - "description": "Unique identifier for each record in the example model." - } - ], - "alias": "model2_alias", - "description": "This model is used for analysis purposes, capturing key attributes of records." - } - ], + "response": { # Present only if status is "finished" or "generating" + "models": [ + { + "name": "model1", + "columns": [ + { + "name": "col1", + "displayName": "col1_alias", + "description": "Unique identifier for each record in the example model." + } + ], + "displayName": "model1_alias", + "description": "This model is used for analysis purposes, capturing key attributes of records." + } + ] + }, "error": { # Present only if status is "failed" "code": "OTHERS", "message": "Error description" } } -The semantic description generation is an asynchronous process. The POST endpoint +The semantics enrichment generation is an asynchronous process. The POST endpoint initiates the generation and returns immediately with an ID. The GET endpoint can then be used to check the status and retrieve the result when it's ready. @@ -102,9 +92,14 @@ class PostResponse(BaseModel): id: str +@router.post( + "/semantics-enrichment", + response_model=PostResponse, +) @router.post( "/semantics-descriptions", response_model=PostResponse, + deprecated=True, ) async def generate( request: PostRequest, @@ -113,10 +108,10 @@ async def generate( service_metadata: ServiceMetadata = Depends(get_service_metadata), ) -> PostResponse: id = str(uuid.uuid4()) - service = service_container.model_semantics + service = service_container.semantics_enrichment - service[id] = ModelSemantics.Resource(id=id) - input = ModelSemantics.Input( + service[id] = SemanticsEnrichment.Resource(id=id) + input = SemanticsEnrichment.Input( id=id, selected_models=request.selected_models, user_prompt=request.user_prompt, @@ -138,15 +133,20 @@ class GetResponse(BaseModel): error: Optional[dict] +@router.get( + "/semantics-enrichment/{id}", + response_model=GetResponse, +) @router.get( "/semantics-descriptions/{id}", response_model=GetResponse, + deprecated=True, ) async def get( id: str, service_container: ServiceContainer = Depends(get_service_container), ) -> GetResponse: - resource = service_container.model_semantics[id] + resource = service_container.semantics_enrichment[id] def _formatter(response: Optional[dict]) -> Optional[list[dict]]: if response is None: @@ -158,7 +158,7 @@ def _formatter(response: Optional[dict]) -> Optional[list[dict]]: "columns": [ { "name": column["name"], - "alias": column["properties"].get("alias", ""), + "displayName": column["properties"].get("alias", ""), "description": column["properties"].get("description", ""), } for column in model_data["columns"] diff --git a/wren-ai-service/src/web/v1/services/model_semantics.py b/wren-ai-service/src/web/v1/services/semantics_enrichment.py similarity index 89% rename from wren-ai-service/src/web/v1/services/model_semantics.py rename to wren-ai-service/src/web/v1/services/semantics_enrichment.py index 64a54da735..8ad0e1af13 100644 --- a/wren-ai-service/src/web/v1/services/model_semantics.py +++ b/wren-ai-service/src/web/v1/services/semantics_enrichment.py @@ -14,7 +14,7 @@ logger = logging.getLogger("wren-ai-service") -class ModelSemantics: +class SemanticsEnrichment: class Input(BaseModel): id: str selected_models: list[str] @@ -40,7 +40,7 @@ def __init__( ttl: int = 120, ): self._pipelines = pipelines - self._cache: Dict[str, ModelSemantics.Resource] = TTLCache( + self._cache: Dict[str, SemanticsEnrichment.Resource] = TTLCache( maxsize=maxsize, ttl=ttl ) @@ -81,7 +81,7 @@ def _model_picker(model: dict, selected: list[str]) -> bool: return [{**template, "mdl": {"models": [chunk]}} for chunk in chunks] async def _generate_task(self, request_id: str, chunk: dict): - resp = await self._pipelines["model_semantics"].run(**chunk) + resp = await self._pipelines["semantics_enrichment"].run(**chunk) normalize = resp.get("normalize") current = self[request_id] @@ -94,11 +94,11 @@ async def _generate_task(self, request_id: str, chunk: dict): current.response[key]["columns"].extend(normalize[key]["columns"]) - @observe(name="Generate Model Semantics") + @observe(name="Enrich Semantics") @trace_metadata async def generate(self, request: Input, **kwargs) -> Resource: logger.info( - f"Project ID: {request.project_id}, Generate Model Semantics pipeline is running..." + f"Project ID: {request.project_id}, Enrich Semantics pipeline is running..." ) try: @@ -119,7 +119,7 @@ async def generate(self, request: Input, **kwargs) -> Resource: except Exception as e: self._handle_exception( request, - f"An error occurred during model semantics generation: {str(e)}", + f"An error occurred during semantics enrichment: {str(e)}", ) return self[request.id].with_metadata() @@ -128,7 +128,7 @@ def __getitem__(self, id: str) -> Resource: response = self._cache.get(id) if response is None: - message = f"Model Semantics Resource with ID '{id}' not found." + message = f"Semantics Enrichment Resource with ID '{id}' not found." logger.exception(message) return self.Resource( id=id, diff --git a/wren-ai-service/tests/data/config.test.yaml b/wren-ai-service/tests/data/config.test.yaml index fd973d739f..c80babc01e 100644 --- a/wren-ai-service/tests/data/config.test.yaml +++ b/wren-ai-service/tests/data/config.test.yaml @@ -70,7 +70,7 @@ pipes: - name: sql_regeneration llm: openai_llm.gpt-4o-mini engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: openai_llm.gpt-4o-mini diff --git a/wren-ai-service/tests/pytest/services/test_model_semantics.py b/wren-ai-service/tests/pytest/services/test_semantics_enrichment.py similarity index 81% rename from wren-ai-service/tests/pytest/services/test_model_semantics.py rename to wren-ai-service/tests/pytest/services/test_semantics_enrichment.py index 354c487da1..2d9c31f40a 100644 --- a/wren-ai-service/tests/pytest/services/test_model_semantics.py +++ b/wren-ai-service/tests/pytest/services/test_semantics_enrichment.py @@ -4,7 +4,7 @@ import orjson import pytest -from src.web.v1.services.model_semantics import ModelSemantics +from src.web.v1.services.semantics_enrichment import SemanticsEnrichment @pytest.fixture @@ -32,16 +32,16 @@ def service(): } } - pipelines = {"model_semantics": mock_pipeline} - return ModelSemantics(pipelines=pipelines) + pipelines = {"semantics_enrichment": mock_pipeline} + return SemanticsEnrichment(pipelines=pipelines) @pytest.mark.asyncio -async def test_generate_model_semantics( - service: ModelSemantics, +async def test_generate_semantics_enrichment( + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -76,11 +76,11 @@ async def test_generate_model_semantics( @pytest.mark.asyncio -async def test_generate_model_semantics_with_invalid_mdl( - service: ModelSemantics, +async def test_generate_semantics_enrichment_with_invalid_mdl( + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -98,18 +98,20 @@ async def test_generate_model_semantics_with_invalid_mdl( @pytest.mark.asyncio -async def test_generate_model_semantics_with_exception( - service: ModelSemantics, +async def test_generate_semantics_enrichment_with_exception( + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', ) - service._pipelines["model_semantics"].run.side_effect = Exception("Test exception") + service._pipelines["semantics_enrichment"].run.side_effect = Exception( + "Test exception" + ) await service.generate(request) response = service[request.id] @@ -118,15 +120,13 @@ async def test_generate_model_semantics_with_exception( assert response.status == "failed" assert response.response is None assert response.error.code == "OTHERS" - assert ( - "An error occurred during model semantics generation:" in response.error.message - ) + assert "An error occurred during semantics enrichment:" in response.error.message -def test_get_model_semantics_result( - service: ModelSemantics, +def test_get_semantics_enrichment_result( + service: SemanticsEnrichment, ): - expected_response = ModelSemantics.Resource( + expected_response = SemanticsEnrichment.Resource( id="test_id", status="finished", response={"model1": {"description": "Test description"}}, @@ -138,8 +138,8 @@ def test_get_model_semantics_result( assert result == expected_response -def test_get_non_existent_model_semantics_result( - service: ModelSemantics, +def test_get_non_existent_semantics_enrichment_result( + service: SemanticsEnrichment, ): result = service["non_existent_id"] @@ -152,10 +152,10 @@ def test_get_non_existent_model_semantics_result( @pytest.mark.asyncio async def test_batch_processing_with_multiple_models( - service: ModelSemantics, + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3"], @@ -163,7 +163,7 @@ async def test_batch_processing_with_multiple_models( ) # Mock pipeline responses for each chunk - service._pipelines["model_semantics"].run.side_effect = [ + service._pipelines["semantics_enrichment"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, {"normalize": {"model2": {"description": "Description 2"}}}, {"normalize": {"model3": {"description": "Description 3"}}}, @@ -187,7 +187,7 @@ async def test_batch_processing_with_multiple_models( def test_batch_processing_with_custom_chunk_size( - service: ModelSemantics, + service: SemanticsEnrichment, ): test_mdl = { "models": [ @@ -212,8 +212,8 @@ def test_batch_processing_with_custom_chunk_size( }, ] } - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3", "model4"], @@ -233,7 +233,7 @@ def test_batch_processing_with_custom_chunk_size( @pytest.mark.asyncio async def test_batch_processing_partial_failure( - service: ModelSemantics, + service: SemanticsEnrichment, ): test_mdl = { "models": [ @@ -247,8 +247,8 @@ async def test_batch_processing_partial_failure( }, ] } - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2"], @@ -256,7 +256,7 @@ async def test_batch_processing_partial_failure( ) # Mock first chunk succeeds, second chunk fails - service._pipelines["model_semantics"].run.side_effect = [ + service._pipelines["semantics_enrichment"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, Exception("Failed processing model2"), ] @@ -272,12 +272,12 @@ async def test_batch_processing_partial_failure( @pytest.mark.asyncio async def test_concurrent_updates_no_race_condition( - service: ModelSemantics, + service: SemanticsEnrichment, ): test_id = "concurrent_test" - service[test_id] = ModelSemantics.Resource(id=test_id) + service[test_id] = SemanticsEnrichment.Resource(id=test_id) - request = ModelSemantics.Input( + request = SemanticsEnrichment.Input( id=test_id, user_prompt="Test concurrent updates", selected_models=["model1", "model2", "model3", "model4", "model5"], @@ -293,7 +293,7 @@ async def delayed_response(model_num, delay=0.1): } } - service._pipelines["model_semantics"].run.side_effect = [ + service._pipelines["semantics_enrichment"].run.side_effect = [ await delayed_response(1), await delayed_response(2), await delayed_response(3), diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index 4c92b7d899..d7bd4d20f2 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -109,7 +109,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index 3e0714b1b4..b113c1c8a0 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -109,7 +109,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 From 23abc9cefc184f38871b005b2bbac56e413be5ee Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 17:54:01 +0800 Subject: [PATCH 09/17] feat: remove unused parameter --- wren-ai-service/src/pipelines/generation/semantics_enrichment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_enrichment.py b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py index 41fbbe5089..d824ae123a 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_enrichment.py +++ b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py @@ -220,6 +220,5 @@ async def run( "semantics_enrichment", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", mdl={}, - selected_models=["*"], language="en", ) From 153d8ffbde60ca8551182b518fc2c0299db43562 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Fri, 6 Dec 2024 15:36:31 +0800 Subject: [PATCH 10/17] chore: expose the class from package level for generation pipe --- wren-ai-service/src/globals.py | 2 +- .../src/pipelines/generation/__init__.py | 26 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index d267e43b7e..901aaccc5e 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -1,4 +1,4 @@ -import logging +import logging # noqa: I001 from dataclasses import asdict, dataclass import toml diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py index 78ee3494de..d6079f0fe6 100644 --- a/wren-ai-service/src/pipelines/generation/__init__.py +++ b/wren-ai-service/src/pipelines/generation/__init__.py @@ -1,3 +1,10 @@ +from .chart_adjustment import ChartAdjustment +from .chart_generation import ChartGeneration +from .data_assistance import DataAssistance +from .followup_sql_generation import FollowUpSQLGeneration +from .intent_classification import IntentClassification +from .question_recommendation import QuestionRecommendation +from .relationship_recommendation import RelationshipRecommendation from .semantics_enrichment import SemanticsEnrichment from .chart_adjustment import ChartAdjustment from .chart_generation import ChartGeneration @@ -40,4 +47,21 @@ ] from .model_semantics import ModelSemantics -__all__ = ["SemanticsEnrichment"] +__all__ = [ + "ChartAdjustment", + "ChartGeneration", + "DataAssistance", + "FollowUpSQLGeneration", + "IntentClassification", + "QuestionRecommendation", + "RelationshipRecommendation", + "SemanticsEnrichment", + "SQLAnswer", + "SQLBreakdown", + "SQLCorrection", + "SQLExpansion", + "SQLExplanation", + "SQLGeneration", + "SQLRegeneration", + "SQLSummary", +] From c68a90a8e88962c186a25c8ffbd9b1c2e0e1d6c6 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 12 Dec 2024 15:46:56 +0800 Subject: [PATCH 11/17] chore: change to litellm llm provider for deployment file --- deployment/kustomizations/base/cm.yaml | 2 +- docker/config.example.yaml | 2 +- wren-ai-service/tools/config/config.example.yaml | 2 +- wren-ai-service/tools/config/config.full.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index a065938b35..2d659d94cf 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -144,7 +144,7 @@ data: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/docker/config.example.yaml b/docker/config.example.yaml index ca781ced8d..4b2f0ec6cf 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -96,7 +96,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index d7bd4d20f2..1faf5145e4 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -110,7 +110,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index b113c1c8a0..b1cf708716 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -110,7 +110,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui From 53104849cc9953c5424e05a717f4145845c6763d Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Tue, 17 Dec 2024 18:39:11 +0800 Subject: [PATCH 12/17] chore: remove unnecessary comment --- wren-ai-service/src/globals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index 901aaccc5e..d267e43b7e 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -1,4 +1,4 @@ -import logging # noqa: I001 +import logging from dataclasses import asdict, dataclass import toml From 161c270f48f4a5e4c5a828af61d9b60cd7b36ade Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Mon, 23 Dec 2024 16:32:32 +0800 Subject: [PATCH 13/17] chore: specify the dated model snapshot id --- deployment/kustomizations/base/cm.yaml | 2 +- docker/config.example.yaml | 2 +- wren-ai-service/tools/config/config.example.yaml | 2 +- wren-ai-service/tools/config/config.full.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index 2d659d94cf..d1f6929556 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -144,7 +144,7 @@ data: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/docker/config.example.yaml b/docker/config.example.yaml index 4b2f0ec6cf..89cd53d417 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -96,7 +96,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index 1faf5145e4..dac474443d 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -110,7 +110,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index b1cf708716..d79ddd573f 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -110,7 +110,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui From 8ae8d5f13ae47a8bf2dc5b09591fdc49fe59e29b Mon Sep 17 00:00:00 2001 From: Pao Sheng Date: Fri, 24 Jan 2025 10:59:37 +0800 Subject: [PATCH 14/17] chore: solve the codebase change after rebasing the main branch --- .../config.google_ai_studio.yaml | 2 +- .../docs/config_examples/config.groq.yaml | 2 +- .../docs/config_examples/config.ollama.yaml | 2 +- wren-ai-service/src/globals.py | 6 ++-- .../src/pipelines/generation/__init__.py | 32 ++----------------- .../src/pipelines/indexing/__init__.py | 4 +-- .../src/web/v1/routers/__init__.py | 6 +--- 7 files changed, 11 insertions(+), 43 deletions(-) diff --git a/wren-ai-service/docs/config_examples/config.google_ai_studio.yaml b/wren-ai-service/docs/config_examples/config.google_ai_studio.yaml index e2ecdbbbb8..d3118e0152 100644 --- a/wren-ai-service/docs/config_examples/config.google_ai_studio.yaml +++ b/wren-ai-service/docs/config_examples/config.google_ai_studio.yaml @@ -80,7 +80,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.gemini/gemini-2.0-flash-exp engine: wren_ui - - name: semantics_description + - name: semantics_enrichment llm: litellm_llm.gemini/gemini-2.0-flash-exp - name: relationship_recommendation llm: litellm_llm.gemini/gemini-2.0-flash-exp diff --git a/wren-ai-service/docs/config_examples/config.groq.yaml b/wren-ai-service/docs/config_examples/config.groq.yaml index 13bead784a..85658a8513 100644 --- a/wren-ai-service/docs/config_examples/config.groq.yaml +++ b/wren-ai-service/docs/config_examples/config.groq.yaml @@ -82,7 +82,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.groq/llama-3.3-70b-specdec engine: wren_ui - - name: semantics_description + - name: semantics_enrichment llm: litellm_llm.groq/llama-3.3-70b-specdec - name: relationship_recommendation llm: litellm_llm.groq/llama-3.3-70b-specdec diff --git a/wren-ai-service/docs/config_examples/config.ollama.yaml b/wren-ai-service/docs/config_examples/config.ollama.yaml index de09ff16ee..4e0d174330 100644 --- a/wren-ai-service/docs/config_examples/config.ollama.yaml +++ b/wren-ai-service/docs/config_examples/config.ollama.yaml @@ -80,7 +80,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.openai/phi4:14b engine: wren_ui - - name: semantics_description + - name: semantics_enrichment llm: litellm_llm.openai/phi4:14b - name: relationship_recommendation llm: litellm_llm.openai/phi4:14b diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index d267e43b7e..c0a40f4842 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -29,18 +29,18 @@ class ServiceContainer: ask_service: AskService ask_details_service: AskDetailsService + chart_service: ChartService + chart_adjustment_service: ChartAdjustmentService question_recommendation: QuestionRecommendation relationship_recommendation: RelationshipRecommendation semantics_enrichment: SemanticsEnrichment semantics_preparation_service: SemanticsPreparationService - chart_service: ChartService - chart_adjustment_service: ChartAdjustmentService sql_answer_service: SqlAnswerService sql_expansion_service: SqlExpansionService sql_explanation_service: SqlExplanationService - sql_regeneration_service: SqlRegenerationService sql_pairs_preparation_service: SqlPairsPreparationService sql_question_service: SqlQuestionService + sql_regeneration_service: SqlRegenerationService @dataclass diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py index d6079f0fe6..ea1763d069 100644 --- a/wren-ai-service/src/pipelines/generation/__init__.py +++ b/wren-ai-service/src/pipelines/generation/__init__.py @@ -6,14 +6,6 @@ from .question_recommendation import QuestionRecommendation from .relationship_recommendation import RelationshipRecommendation from .semantics_enrichment import SemanticsEnrichment -from .chart_adjustment import ChartAdjustment -from .chart_generation import ChartGeneration -from .data_assistance import DataAssistance -from .followup_sql_generation import FollowUpSQLGeneration -from .intent_classification import IntentClassification -from .question_recommendation import QuestionRecommendation -from .relationship_recommendation import RelationshipRecommendation -from .semantics_description import SemanticsDescription from .sql_answer import SQLAnswer from .sql_breakdown import SQLBreakdown from .sql_correction import SQLCorrection @@ -25,28 +17,6 @@ from .sql_regeneration import SQLRegeneration from .sql_summary import SQLSummary -__all__ = [ - "SQLRegeneration", - "ChartGeneration", - "ChartAdjustment", - "DataAssistance", - "FollowUpSQLGeneration", - "IntentClassification", - "QuestionRecommendation", - "RelationshipRecommendation", - "SemanticsDescription", - "SQLAnswer", - "SQLBreakdown", - "SQLCorrection", - "SQLExpansion", - "SQLExplanation", - "SQLGeneration", - "SQLGenerationReasoning", - "SQLSummary", - "SQLQuestion", -] -from .model_semantics import ModelSemantics - __all__ = [ "ChartAdjustment", "ChartGeneration", @@ -62,6 +32,8 @@ "SQLExpansion", "SQLExplanation", "SQLGeneration", + "SQLGenerationReasoning", + "SQLQuestion", "SQLRegeneration", "SQLSummary", ] diff --git a/wren-ai-service/src/pipelines/indexing/__init__.py b/wren-ai-service/src/pipelines/indexing/__init__.py index bf138f49c7..9f7bac966e 100644 --- a/wren-ai-service/src/pipelines/indexing/__init__.py +++ b/wren-ai-service/src/pipelines/indexing/__init__.py @@ -120,8 +120,8 @@ async def run( __all__ = [ "DBSchema", - "TableDescription", "HistoricalQuestion", - "SqlPairsDeletion", "SqlPairs", + "SqlPairsDeletion", + "TableDescription", ] diff --git a/wren-ai-service/src/web/v1/routers/__init__.py b/wren-ai-service/src/web/v1/routers/__init__.py index 1df8f4568a..ad9831f9a4 100644 --- a/wren-ai-service/src/web/v1/routers/__init__.py +++ b/wren-ai-service/src/web/v1/routers/__init__.py @@ -7,10 +7,8 @@ chart_adjustment, question_recommendation, relationship_recommendation, - semantics_description, - semantics_preparation, semantics_enrichment, - semantics_preparations, + semantics_preparation, sql_answers, sql_expansions, sql_explanations, @@ -25,8 +23,6 @@ router.include_router(question_recommendation.router) router.include_router(relationship_recommendation.router) router.include_router(semantics_enrichment.router) -router.include_router(semantics_preparations.router) -router.include_router(semantics_description.router) router.include_router(semantics_preparation.router) router.include_router(sql_answers.router) router.include_router(sql_expansions.router) From 56b37433f09823666802d522508d55943e076b5f Mon Sep 17 00:00:00 2001 From: Pao Sheng Date: Fri, 24 Jan 2025 11:27:56 +0800 Subject: [PATCH 15/17] chore: change the image version for ibis to avoid duckdb module lacking issue and default platform to arm --- wren-ai-service/tools/dev/.env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wren-ai-service/tools/dev/.env b/wren-ai-service/tools/dev/.env index 56568537bf..8c0ce5c044 100644 --- a/wren-ai-service/tools/dev/.env +++ b/wren-ai-service/tools/dev/.env @@ -1,5 +1,5 @@ COMPOSE_PROJECT_NAME=wren -PLATFORM=linux/amd64 +PLATFORM=linux/arm64 # service port WREN_ENGINE_PORT=8080 @@ -14,7 +14,7 @@ WREN_PRODUCT_VERSION=development WREN_ENGINE_VERSION=latest WREN_AI_SERVICE_VERSION=latest WREN_UI_VERSION=latest -IBIS_SERVER_VERSION=latest +IBIS_SERVER_VERSION=sha-ce21e44 WREN_BOOTSTRAP_VERSION=latest LAUNCH_CLI_PATH=./launch-cli.sh From ec6e0ded703caf23be9cee3681493bfdede549dc Mon Sep 17 00:00:00 2001 From: Pao Sheng Date: Fri, 24 Jan 2025 14:33:18 +0800 Subject: [PATCH 16/17] chore: add a missing required parameter --- wren-ai-service/src/pipelines/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/src/pipelines/common.py b/wren-ai-service/src/pipelines/common.py index f11ab52468..10a74036fa 100644 --- a/wren-ai-service/src/pipelines/common.py +++ b/wren-ai-service/src/pipelines/common.py @@ -41,7 +41,7 @@ def dry_run_pipeline(pipeline_cls: BasicPipeline, pipeline_name: str, **kwargs): from src.providers import generate_components from src.utils import init_langfuse, setup_custom_logger - setup_custom_logger("wren-ai-service", level_str=settings.logging_level) + setup_custom_logger("wren-ai-service", level_str=settings.logging_level, is_dev=True) pipe_components = generate_components(settings.components) pipeline = pipeline_cls(**pipe_components[pipeline_name]) From d83d1a577a59376162218acceb409932352260fc Mon Sep 17 00:00:00 2001 From: Pao Sheng Date: Fri, 24 Jan 2025 14:39:30 +0800 Subject: [PATCH 17/17] feat: thin the prompt and correct some variable --- .../generation/semantics_enrichment.py | 41 ++++--------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_enrichment.py b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py index d824ae123a..9c328b450c 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_enrichment.py +++ b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py @@ -88,27 +88,27 @@ def wrapper(text: str) -> str: ## End of Pipeline -class ModelProperties(BaseModel): +class Properties(BaseModel): alias: str description: str class ModelColumns(BaseModel): name: str - properties: ModelProperties + properties: Properties class SemanticModel(BaseModel): name: str columns: list[ModelColumns] - properties: ModelProperties + properties: Properties class SemanticResult(BaseModel): models: list[SemanticModel] -semantics_enrichment_KWARGS = { +SEMANTICS_ENRICHMENT_KWARGS = { "response_format": { "type": "json_schema", "json_schema": { @@ -137,34 +137,9 @@ class SemanticResult(BaseModel): - Aliases should be intuitive and user-friendly - Use the user's context to inform the descriptions - Maintain technical accuracy while being accessible to non-technical users - -Output Format: -{ - "models": [{ - "name": "model", - "columns": [{ - "name": "column", - "properties": { - "alias": "User-friendly column name", - "description": "Clear explanation of column purpose" - } - }], - "properties": { - "alias": "User-friendly model name", - "description": "Clear explanation of model purpose" - } - }] -} - -Example: -Input model "orders" with column "created_at" might become: -{ - "name": "created_at", - "properties": { - "alias": "Order Creation Date", - "description": "Timestamp when the order was first created in the system" - } -} +- IMPORTANT: Never modify the model/table and column names in the 'name' field as this will invalidate the data model +- Only update the 'alias' field to provide user-friendly display names +- When the user prompt includes operators to modify names, apply those modifications to the alias field only Focus on providing business value through clear, accurate descriptions while maintaining JSON structure integrity. """ @@ -185,7 +160,7 @@ def __init__(self, llm_provider: LLMProvider, **_): "prompt_builder": PromptBuilder(template=user_prompt_template), "generator": llm_provider.get_generator( system_prompt=system_prompt, - generation_kwargs=semantics_enrichment_KWARGS, + generation_kwargs=SEMANTICS_ENRICHMENT_KWARGS, ), } self._final = "normalize"