Merge pull request #273 from zszabo-rh/feedback_category

tisnik · web-flow · commit 6459aac29023 · 2025-07-25T08:59:14.000+02:00
MGMT-21204: add categories to feedback API
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -957,6 +957,27 @@
                 "title": "DataCollectorConfiguration",
                 "description": "Data collector configuration for sending data to ingress server."
             },
+            "FeedbackCategory": {
+                "type": "string",
+                "enum": [
+                    "incorrect",
+                    "not_relevant",
+                    "incomplete",
+                    "outdated_information",
+                    "unsafe",
+                    "other"
+                ],
+                "x-enum-varnames": [
+                    "INCORRECT",
+                    "NOT_RELEVANT",
+                    "INCOMPLETE",
+                    "OUTDATED_INFORMATION",
+                    "UNSAFE",
+                    "OTHER"
+                ],
+                "title": "FeedbackCategory",
+                "description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality when users provide negative feedback (thumbs down). Multiple categories can be selected to provide comprehensive feedback about response issues."
+            },
             "FeedbackRequest": {
                 "properties": {
                     "conversation_id": {
@@ -997,6 +1018,27 @@
                         "examples": [
                             "I'm not satisfied with the response because it is too vague."
                         ]
+                    },
+                    "categories": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/FeedbackCategory"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Categories",
+                        "description": "List of feedback categories that apply to the LLM response.",
+                        "examples": [
+                            [
+                                "incorrect",
+                                "incomplete"
+                            ]
+                        ]
                     }
                 },
                 "type": "object",
@@ -1006,14 +1048,25 @@
                     "llm_response"
                 ],
                 "title": "FeedbackRequest",
-                "description": "Model representing a feedback request.\n\nAttributes:\n    conversation_id: The required conversation ID (UUID).\n    user_question: The required user question.\n    llm_response: The required LLM response.\n    sentiment: The optional sentiment.\n    user_feedback: The optional user feedback.\n\nExample:\n    ```python\n    feedback_request = FeedbackRequest(\n        conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n        user_question=\"what are you doing?\",\n        user_feedback=\"Great service!\",\n        llm_response=\"I don't know\",\n        sentiment=-1,\n    )\n    ```",
+                "description": "Model representing a feedback request.\n\nAttributes:\n    conversation_id: The required conversation ID (UUID).\n    user_question: The required user question.\n    llm_response: The required LLM response.\n    sentiment: The optional sentiment.\n    user_feedback: The optional user feedback.\n    categories: The optional list of feedback categories (multi-select).\n\nExample:\n    ```python\n    feedback_request = FeedbackRequest(\n        conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n        user_question=\"what are you doing?\",\n        user_feedback=\"Great service!\",\n        llm_response=\"I don't know\",\n        sentiment=-1,\n        categories=[FeedbackCategory.INCOMPLETE, FeedbackCategory.UNSAFE]\n    )\n    ```",
                 "examples": [
                     {
                         "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
                         "llm_response": "bar",
                         "sentiment": 1,
                         "user_feedback": "Great service!",
                         "user_question": "foo"
+                    },
+                    {
+                        "categories": [
+                            "incomplete",
+                            "not_relevant"
+                        ],
+                        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+                        "llm_response": "You need to use Docker and Kubernetes for everything.",
+                        "sentiment": -1,
+                        "user_feedback": "This response is too general and doesn't provide specific steps.",
+                        "user_question": "How do I deploy a web app?"
                     }
                 ]
             },
diff --git a/docs/openapi.md b/docs/openapi.md
@@ -457,6 +457,24 @@ Data collector configuration for sending data to ingress server.
 | connection_timeout | integer |  |
 
 
+## FeedbackCategory
+
+
+Enum representing predefined feedback categories for AI responses.
+
+These categories help provide structured feedback about AI inference quality.
+Multiple categories can be selected to provide comprehensive feedback.
+
+| Value | Description |
+|-------|-------------|
+| incorrect | The answer provided is completely wrong |
+| not_relevant | This answer doesn't address my question at all |
+| incomplete | The answer only covers part of what I asked about |
+| outdated_information | This information is from several years ago and no longer accurate |
+| unsafe | This response could be harmful or dangerous if followed |
+| other | The response has issues not covered by other categories |
+
+
 ## FeedbackRequest
 
 
@@ -468,15 +486,27 @@ Attributes:
     llm_response: The required LLM response.
     sentiment: The optional sentiment.
     user_feedback: The optional user feedback.
+    categories: The optional list of feedback categories (multi-select).
 
-Example:
+Examples:
     ```python
+    # Basic feedback
     feedback_request = FeedbackRequest(
         conversation_id="12345678-abcd-0000-0123-456789abcdef",
         user_question="what are you doing?",
         user_feedback="Great service!",
         llm_response="I don't know",
+        sentiment=1
+    )
+    
+    # Feedback with categories
+    feedback_request = FeedbackRequest(
+        conversation_id="12345678-abcd-0000-0123-456789abcdef",
+        user_question="How do I deploy a web app?",
+        llm_response="You need to use Docker and Kubernetes for everything.",
+        user_feedback="This response is too general and doesn't provide specific steps.",
         sentiment=-1,
+        categories=["incomplete", "not_relevant"]
     )
     ```
 
@@ -488,6 +518,7 @@ Example:
 | llm_response | string |  |
 | sentiment |  |  |
 | user_feedback |  | Feedback on the LLM response. |
+| categories | array[FeedbackCategory] | List of feedback categories that apply to the LLM response. |
 
 
 ## FeedbackResponse
diff --git a/src/models/requests.py b/src/models/requests.py
@@ -1,6 +1,7 @@
 """Model for service requests."""
 
 from typing import Optional, Self
+from enum import Enum
 
 from pydantic import BaseModel, model_validator, field_validator, Field
 from llama_stack_client.types.agents.turn_create_params import Document
@@ -146,6 +147,22 @@ def validate_media_type(self) -> Self:
         return self
 
 
+class FeedbackCategory(str, Enum):
+    """Enum representing predefined feedback categories for AI responses.
+
+    These categories help provide structured feedback about AI inference quality
+    when users provide negative feedback (thumbs down). Multiple categories can
+    be selected to provide comprehensive feedback about response issues.
+    """
+
+    INCORRECT = "incorrect"  # "The answer provided is completely wrong"
+    NOT_RELEVANT = "not_relevant"  # "This answer doesn't address my question at all"
+    INCOMPLETE = "incomplete"  # "The answer only covers part of what I asked about"
+    OUTDATED_INFORMATION = "outdated_information"  # "This information is from several years ago and no longer accurate"  # pylint: disable=line-too-long
+    UNSAFE = "unsafe"  # "This response could be harmful or dangerous if followed"
+    OTHER = "other"  # "The response has issues not covered by other categories"
+
+
 class FeedbackRequest(BaseModel):
     """Model representing a feedback request.
 
@@ -155,15 +172,17 @@ class FeedbackRequest(BaseModel):
         llm_response: The required LLM response.
         sentiment: The optional sentiment.
         user_feedback: The optional user feedback.
+        categories: The optional list of feedback categories (multi-select for negative feedback).
 
     Example:
         ```python
         feedback_request = FeedbackRequest(
             conversation_id="12345678-abcd-0000-0123-456789abcdef",
             user_question="what are you doing?",
-            user_feedback="Great service!",
+            user_feedback="This response is not helpful",
             llm_response="I don't know",
             sentiment=-1,
+            categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]
         )
         ```
     """
@@ -179,6 +198,15 @@ class FeedbackRequest(BaseModel):
         description="Feedback on the LLM response.",
         examples=["I'm not satisfied with the response because it is too vague."],
     )
+    # Optional list of predefined feedback categories for negative feedback
+    categories: Optional[list[FeedbackCategory]] = Field(
+        default=None,
+        description=(
+            "List of feedback categories that describe issues with the LLM response "
+            "(for negative feedback)."
+        ),
+        examples=[["incorrect", "incomplete"]],
+    )
 
     # provides examples for /docs endpoint
     model_config = {
@@ -188,9 +216,26 @@ class FeedbackRequest(BaseModel):
                     "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
                     "user_question": "foo",
                     "llm_response": "bar",
-                    "user_feedback": "Great service!",
-                    "sentiment": 1,
-                }
+                    "user_feedback": "Not satisfied with the response quality.",
+                    "sentiment": -1,
+                },
+                {
+                    "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+                    "user_question": "What is the capital of France?",
+                    "llm_response": "The capital of France is Berlin.",
+                    "sentiment": -1,
+                    "categories": ["incorrect"],
+                },
+                {
+                    "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+                    "user_question": "How do I deploy a web app?",
+                    "llm_response": "Use Docker.",
+                    "user_feedback": (
+                        "This response is too general and doesn't provide specific steps."
+                    ),
+                    "sentiment": -1,
+                    "categories": ["incomplete", "not_relevant"],
+                },
             ]
         }
     }
@@ -213,9 +258,34 @@ def check_sentiment(cls, value: Optional[int]) -> Optional[int]:
             )
         return value
 
+    @field_validator("categories")
+    @classmethod
+    def validate_categories(
+        cls, value: Optional[list[FeedbackCategory]]
+    ) -> Optional[list[FeedbackCategory]]:
+        """Validate feedback categories list."""
+        if value is None:
+            return value
+
+        if not isinstance(value, list):
+            raise ValueError("Categories must be a list")
+
+        if len(value) == 0:
+            return None  # Convert empty list to None for consistency
+
+        unique_categories = list(set(value))
+        return unique_categories
+
     @model_validator(mode="after")
-    def check_sentiment_or_user_feedback_set(self) -> Self:
-        """Ensure that either 'sentiment' or 'user_feedback' is set."""
-        if self.sentiment is None and self.user_feedback is None:
-            raise ValueError("Either 'sentiment' or 'user_feedback' must be set")
+    def check_feedback_provided(self) -> Self:
+        """Ensure that at least one form of feedback is provided."""
+        if (
+            self.sentiment is None
+            and self.user_feedback is None
+            and self.categories is None
+        ):
+            raise ValueError(
+                "At least one form of feedback must be provided: "
+                "'sentiment', 'user_feedback', or 'categories'"
+            )
         return self
diff --git a/tests/unit/app/endpoints/test_feedback.py b/tests/unit/app/endpoints/test_feedback.py
@@ -48,15 +48,31 @@ async def test_assert_feedback_enabled(mocker):
     await assert_feedback_enabled(mocker.Mock())
 
 
-def test_feedback_endpoint_handler(mocker):
-    """Test that feedback_endpoint_handler processes feedback correctly."""
+@pytest.mark.parametrize(
+    "feedback_request_data",
+    [
+        {},
+        {
+            "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+            "user_question": "What is Kubernetes?",
+            "llm_response": "It's some computer thing.",
+            "sentiment": -1,
+            "categories": ["incorrect", "incomplete"],
+        },
+    ],
+    ids=["no_categories", "with_negative_categories"],
+)
+def test_feedback_endpoint_handler(mocker, feedback_request_data):
+    """Test that feedback_endpoint_handler processes feedback for different payloads."""
+
     # Mock the dependencies
     mocker.patch("app.endpoints.feedback.assert_feedback_enabled", return_value=None)
     mocker.patch("utils.common.retrieve_user_id", return_value="test_user_id")
     mocker.patch("app.endpoints.feedback.store_feedback", return_value=None)
 
-    # Mock the feedback request
+    # Prepare the feedback request mock
     feedback_request = mocker.Mock()
+    feedback_request.model_dump.return_value = feedback_request_data
 
     # Call the endpoint handler
     result = feedback_endpoint_handler(
@@ -65,7 +81,7 @@ def test_feedback_endpoint_handler(mocker):
         _ensure_feedback_enabled=assert_feedback_enabled,
     )
 
-    # Assert that the response is as expected
+    # Assert that the expected response is returned
     assert result.response == "feedback received"
 
 
@@ -94,35 +110,50 @@ def test_feedback_endpoint_handler_error(mocker):
     assert exc_info.value.detail["response"] == "Error storing user feedback"
 
 
-def test_store_feedback(mocker):
-    """Test that store_feedback calls the correct storage function."""
+@pytest.mark.parametrize(
+    "feedback_request_data",
+    [
+        {
+            "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+            "user_question": "What is OpenStack?",
+            "llm_response": "It's some cloud thing.",
+            "user_feedback": "This response is not helpful!",
+            "sentiment": -1,
+        },
+        {
+            "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+            "user_question": "What is Kubernetes?",
+            "llm_response": "K8s.",
+            "sentiment": -1,
+            "categories": ["incorrect", "not_relevant", "incomplete"],
+        },
+    ],
+    ids=["negative_text_feedback", "negative_feedback_with_categories"],
+)
+def test_store_feedback(mocker, feedback_request_data):
+    """Test that store_feedback correctly stores various feedback payloads."""
+
     configuration.user_data_collection_configuration.feedback_storage = "fake-path"
 
+    # Patch filesystem and helpers
     mocker.patch("builtins.open", mocker.mock_open())
     mocker.patch("app.endpoints.feedback.Path", return_value=mocker.MagicMock())
     mocker.patch("app.endpoints.feedback.get_suid", return_value="fake-uuid")
 
-    # Mock the JSON to assert the data is stored correctly
+    # Patch json to inspect stored data
     mock_json = mocker.patch("app.endpoints.feedback.json")
 
     user_id = "test_user_id"
-    feedback_request = {
-        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
-        "user_question": "What is OpenStack?",
-        "llm_response": "OpenStack is a cloud computing platform.",
-        "user_feedback": "Great service!",
-        "sentiment": 1,
+
+    store_feedback(user_id, feedback_request_data)
+
+    expected_data = {
+        "user_id": user_id,
+        "timestamp": mocker.ANY,
+        **feedback_request_data,
     }
 
-    store_feedback(user_id, feedback_request)
-    mock_json.dump.assert_called_once_with(
-        {
-            "user_id": user_id,
-            "timestamp": mocker.ANY,
-            **feedback_request,
-        },
-        mocker.ANY,
-    )
+    mock_json.dump.assert_called_once_with(expected_data, mocker.ANY)
 
 
 def test_feedback_status():
diff --git a/tests/unit/models/test_requests.py b/tests/unit/models/test_requests.py