From 6be9b98a2c2cf27ab604e26c2244e69a3f0f41c9 Mon Sep 17 00:00:00 2001
From: TeslaZY <TeslaZY@outlook.com>
Date: Fri, 26 Sep 2025 23:48:39 +0800
Subject: [PATCH 1/2] Fix: image parser wrong of KeyError: 'llm_id'

### What problem does this PR solve?
[Bug]: ERROR: KeyError: 'llm_id'
change: modified to the correct key

### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---
 rag/flow/parser/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py
index dcdd2c4de3a..d0f4891f438 100644
--- a/rag/flow/parser/parser.py
+++ b/rag/flow/parser/parser.py
@@ -329,7 +329,7 @@ def _image(self, name, blob):
         else:
             lang = conf["lang"]
             # use VLM to describe the picture
-            cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang)
+            cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["parse_method"], lang=lang)
             img_binary = io.BytesIO()
             img.save(img_binary, format="JPEG")
             img_binary.seek(0)

From b1263b2e621653d9b87c62dab9018390e4d3a139 Mon Sep 17 00:00:00 2001
From: TeslaZY <TeslaZY@outlook.com>
Date: Sat, 27 Sep 2025 17:08:02 +0800
Subject: [PATCH 2/2] ### What problem does this PR solve? Feat: The prompt of
 the describe picture in cv_model supports customization #10320

### Type of change
- [x] New Feature (non-breaking change which adds functionality)
---
 rag/flow/parser/parser.py                            | 10 ++++++++--
 web/src/locales/en.ts                                |  3 +++
 web/src/locales/zh.ts                                |  3 +++
 web/src/pages/data-flow/constant.tsx                 |  1 +
 .../data-flow/form/parser-form/image-form-fields.tsx | 12 ++++++++++++
 web/src/pages/data-flow/form/parser-form/index.tsx   |  1 +
 web/src/pages/data-flow/utils.ts                     |  1 +
 7 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py
index d0f4891f438..30bd4e329d9 100644
--- a/rag/flow/parser/parser.py
+++ b/rag/flow/parser/parser.py
@@ -108,8 +108,9 @@ def __init__(self):
                 "parse_method": "ocr",
                 "llm_id": "",
                 "lang": "Chinese",
+                "system_prompt": "",
                 "suffix": ["jpg", "jpeg", "png", "gif"],
-                "output_format": "json",
+                "output_format": "text",
             },
             "email": {
                 "suffix": [
@@ -333,7 +334,12 @@ def _image(self, name, blob):
             img_binary = io.BytesIO()
             img.save(img_binary, format="JPEG")
             img_binary.seek(0)
-            txt = cv_model.describe(img_binary.read())
+
+            system_prompt = conf.get("system_prompt")
+            if system_prompt:
+                txt = cv_model.describe_with_prompt(img_binary.read(), system_prompt)
+            else:
+                txt = cv_model.describe(img_binary.read())
 
         self.set_output("text", txt)
 
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index 4e9686fb194..9e2553f772a 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -1708,6 +1708,9 @@ This delimiter is used to split the input text into several text pieces echo of
       filenameEmbdWeight: 'Filename embd weight',
       begin: 'File',
       parserMethod: 'Parser method',
+      systemPrompt: 'System Prompt',
+      systemPromptPlaceholder:
+        'Enter system prompt for image analysis, if empty the system default value will be used',
       exportJson: 'Export JSON',
       viewResult: 'View Result',
       running: 'Running',
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index 030e27ee289..4dba7b4aa93 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -1626,6 +1626,9 @@ General：实体和关系提取提示来自 GitHub - microsoft/graphrag：基于
       filenameEmbdWeight: '文件名嵌入权重',
       begin: '文件',
       parserMethod: '解析方法',
+      systemPrompt: '系统提示词',
+      systemPromptPlaceholder:
+        '请输入用于图像分析的系统提示词，若为空则使用系统缺省值',
       exportJson: '导出 JSON',
       viewResult: '查看结果',
       running: '运行中',
diff --git a/web/src/pages/data-flow/constant.tsx b/web/src/pages/data-flow/constant.tsx
index 6b2fd10947d..e0096b609ac 100644
--- a/web/src/pages/data-flow/constant.tsx
+++ b/web/src/pages/data-flow/constant.tsx
@@ -250,6 +250,7 @@ export const initialParserValues = {
       fileFormat: FileType.Image,
       output_format: ImageOutputFormat.Text,
       parse_method: ImageParseMethod.OCR,
+      system_prompt: '',
     },
     {
       fileFormat: FileType.Email,
diff --git a/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx b/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx
index 7b15eda1928..4cff99ea770 100644
--- a/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx
+++ b/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx
@@ -1,7 +1,10 @@
+import { RAGFlowFormItem } from '@/components/ragflow-form';
+import { Textarea } from '@/components/ui/textarea';
 import { buildOptions } from '@/utils/form';
 import { isEmpty } from 'lodash';
 import { useEffect, useMemo } from 'react';
 import { useFormContext, useWatch } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
 import { ImageParseMethod } from '../../constant';
 import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
 import { CommonProps } from './interface';
@@ -11,6 +14,7 @@ import { buildFieldNameWithPrefix } from './utils';
 const options = buildOptions(ImageParseMethod);
 
 export function ImageFormFields({ prefix }: CommonProps) {
+  const { t } = useTranslation();
   const form = useFormContext();
   const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
 
@@ -40,6 +44,14 @@ export function ImageFormFields({ prefix }: CommonProps) {
         optionsWithoutLLM={options}
       ></ParserMethodFormField>
       {languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
+      {languageShown && (
+        <RAGFlowFormItem
+          name={buildFieldNameWithPrefix('system_prompt', prefix)}
+          label={t('dataflow.systemPrompt')}
+        >
+          <Textarea placeholder={t('dataflow.systemPromptPlaceholder')} />
+        </RAGFlowFormItem>
+      )}
     </>
   );
 }
diff --git a/web/src/pages/data-flow/form/parser-form/index.tsx b/web/src/pages/data-flow/form/parser-form/index.tsx
index df213e2155c..2d5f540e203 100644
--- a/web/src/pages/data-flow/form/parser-form/index.tsx
+++ b/web/src/pages/data-flow/form/parser-form/index.tsx
@@ -64,6 +64,7 @@ export const FormSchema = z.object({
       lang: z.string().optional(),
       fields: z.array(z.string()).optional(),
       llm_id: z.string().optional(),
+      system_prompt: z.string().optional(),
     }),
   ),
 });
diff --git a/web/src/pages/data-flow/utils.ts b/web/src/pages/data-flow/utils.ts
index 7fe7e1c842e..e766e085117 100644
--- a/web/src/pages/data-flow/utils.ts
+++ b/web/src/pages/data-flow/utils.ts
@@ -100,6 +100,7 @@ function transformParserParams(params: ParserFormSchemaType) {
             ...filteredSetup,
             parse_method: cur.parse_method,
             lang: cur.lang,
+            system_prompt: cur.system_prompt,
           };
           break;
         case FileType.Email: