From 6be9b98a2c2cf27ab604e26c2244e69a3f0f41c9 Mon Sep 17 00:00:00 2001 From: TeslaZY Date: Fri, 26 Sep 2025 23:48:39 +0800 Subject: [PATCH 1/2] Fix: image parser wrong of KeyError: 'llm_id' ### What problem does this PR solve? [Bug]: ERROR: KeyError: 'llm_id' change: modified to the correct key ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/flow/parser/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index dcdd2c4de3a..d0f4891f438 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -329,7 +329,7 @@ def _image(self, name, blob): else: lang = conf["lang"] # use VLM to describe the picture - cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang) + cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["parse_method"], lang=lang) img_binary = io.BytesIO() img.save(img_binary, format="JPEG") img_binary.seek(0) From b1263b2e621653d9b87c62dab9018390e4d3a139 Mon Sep 17 00:00:00 2001 From: TeslaZY Date: Sat, 27 Sep 2025 17:08:02 +0800 Subject: [PATCH 2/2] ### What problem does this PR solve? Feat: The prompt of the describe picture in cv_model supports customization #10320 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- rag/flow/parser/parser.py | 10 ++++++++-- web/src/locales/en.ts | 3 +++ web/src/locales/zh.ts | 3 +++ web/src/pages/data-flow/constant.tsx | 1 + .../data-flow/form/parser-form/image-form-fields.tsx | 12 ++++++++++++ web/src/pages/data-flow/form/parser-form/index.tsx | 1 + web/src/pages/data-flow/utils.ts | 1 + 7 files changed, 29 insertions(+), 2 deletions(-) diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index d0f4891f438..30bd4e329d9 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -108,8 +108,9 @@ def __init__(self): "parse_method": "ocr", "llm_id": "", "lang": "Chinese", + "system_prompt": "", "suffix": ["jpg", "jpeg", "png", "gif"], - "output_format": "json", + "output_format": "text", }, "email": { "suffix": [ @@ -333,7 +334,12 @@ def _image(self, name, blob): img_binary = io.BytesIO() img.save(img_binary, format="JPEG") img_binary.seek(0) - txt = cv_model.describe(img_binary.read()) + + system_prompt = conf.get("system_prompt") + if system_prompt: + txt = cv_model.describe_with_prompt(img_binary.read(), system_prompt) + else: + txt = cv_model.describe(img_binary.read()) self.set_output("text", txt) diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 4e9686fb194..9e2553f772a 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1708,6 +1708,9 @@ This delimiter is used to split the input text into several text pieces echo of filenameEmbdWeight: 'Filename embd weight', begin: 'File', parserMethod: 'Parser method', + systemPrompt: 'System Prompt', + systemPromptPlaceholder: + 'Enter system prompt for image analysis, if empty the system default value will be used', exportJson: 'Export JSON', viewResult: 'View Result', running: 'Running', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 030e27ee289..4dba7b4aa93 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1626,6 +1626,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 filenameEmbdWeight: '文件名嵌入权重', begin: '文件', parserMethod: '解析方法', + systemPrompt: '系统提示词', + systemPromptPlaceholder: + '请输入用于图像分析的系统提示词,若为空则使用系统缺省值', exportJson: '导出 JSON', viewResult: '查看结果', running: '运行中', diff --git a/web/src/pages/data-flow/constant.tsx b/web/src/pages/data-flow/constant.tsx index 6b2fd10947d..e0096b609ac 100644 --- a/web/src/pages/data-flow/constant.tsx +++ b/web/src/pages/data-flow/constant.tsx @@ -250,6 +250,7 @@ export const initialParserValues = { fileFormat: FileType.Image, output_format: ImageOutputFormat.Text, parse_method: ImageParseMethod.OCR, + system_prompt: '', }, { fileFormat: FileType.Email, diff --git a/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx b/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx index 7b15eda1928..4cff99ea770 100644 --- a/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx +++ b/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx @@ -1,7 +1,10 @@ +import { RAGFlowFormItem } from '@/components/ragflow-form'; +import { Textarea } from '@/components/ui/textarea'; import { buildOptions } from '@/utils/form'; import { isEmpty } from 'lodash'; import { useEffect, useMemo } from 'react'; import { useFormContext, useWatch } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; import { ImageParseMethod } from '../../constant'; import { LanguageFormField, ParserMethodFormField } from './common-form-fields'; import { CommonProps } from './interface'; @@ -11,6 +14,7 @@ import { buildFieldNameWithPrefix } from './utils'; const options = buildOptions(ImageParseMethod); export function ImageFormFields({ prefix }: CommonProps) { + const { t } = useTranslation(); const form = useFormContext(); const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix); @@ -40,6 +44,14 @@ export function ImageFormFields({ prefix }: CommonProps) { optionsWithoutLLM={options} > {languageShown && } + {languageShown && ( + +