Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions src/vuln_analysis/utils/checklist_prompt_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
# limitations under the License.

import ast
import json
import logging
import re

from jinja2 import Template
from langchain_core.language_models.base import BaseLanguageModel
Expand Down Expand Up @@ -66,15 +68,30 @@ async def _parse_list(text: list[str]) -> list[list[str]]:
# Remove newline characters that can cause incorrect string escaping in the next step
x = x.replace("\n", "")

# Ensure backslashes are escaped
x = x.replace("\\", "\\\\")

# Try to do some very basic string cleanup to fix unescaped quotes
x = attempt_fix_list_string(x)

# Only proceed if the input is a valid Python literal
# This isn't really dangerous, literal_eval only evaluates a small subset of python
current = ast.literal_eval(x)
# Handle backslash sequences that break JSON parsing
x = x.replace('\\\\..\\\\', '\\..\\') # Fix \\..\\ patterns
x = x.replace('\\\\"', '\\"') # Fix \\" patterns

# Try JSON parsing first
try:
current = json.loads(x)
except json.JSONDecodeError:
# Use regex to extract list items manually
pattern = r'\"([^\"]*(?:\\.[^\"]*)*)\"'
matches = re.findall(pattern, x)
if matches:
# Clean up the matches by removing extra escaping
current = []
for match in matches:
cleaned = match.replace('\\\\', '\\')
current.append(cleaned)
else:
# Fall back to ast.literal_eval
x = x.replace("\\", "\\\\")
current = ast.literal_eval(x)

# Ensure that the parsed data is a list
if not isinstance(current, list):
Expand Down