diff --git a/src/vuln_analysis/utils/checklist_prompt_generator.py b/src/vuln_analysis/utils/checklist_prompt_generator.py index 6eb7abfd..d9317854 100644 --- a/src/vuln_analysis/utils/checklist_prompt_generator.py +++ b/src/vuln_analysis/utils/checklist_prompt_generator.py @@ -14,7 +14,9 @@ # limitations under the License. import ast +import json import logging +import re from jinja2 import Template from langchain_core.language_models.base import BaseLanguageModel @@ -66,15 +68,30 @@ async def _parse_list(text: list[str]) -> list[list[str]]: # Remove newline characters that can cause incorrect string escaping in the next step x = x.replace("\n", "") - # Ensure backslashes are escaped - x = x.replace("\\", "\\\\") - # Try to do some very basic string cleanup to fix unescaped quotes x = attempt_fix_list_string(x) - # Only proceed if the input is a valid Python literal - # This isn't really dangerous, literal_eval only evaluates a small subset of python - current = ast.literal_eval(x) + # Handle backslash sequences that break JSON parsing + x = x.replace('\\\\..\\\\', '\\..\\') # Fix \\..\\ patterns + x = x.replace('\\\\"', '\\"') # Fix \\" patterns + + # Try JSON parsing first + try: + current = json.loads(x) + except json.JSONDecodeError: + # Use regex to extract list items manually + pattern = r'\"([^\"]*(?:\\.[^\"]*)*)\"' + matches = re.findall(pattern, x) + if matches: + # Clean up the matches by removing extra escaping + current = [] + for match in matches: + cleaned = match.replace('\\\\', '\\') + current.append(cleaned) + else: + # Fall back to ast.literal_eval + x = x.replace("\\", "\\\\") + current = ast.literal_eval(x) # Ensure that the parsed data is a list if not isinstance(current, list):