diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index dcdd2c4de3a..30bd4e329d9 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -108,8 +108,9 @@ def __init__(self): "parse_method": "ocr", "llm_id": "", "lang": "Chinese", + "system_prompt": "", "suffix": ["jpg", "jpeg", "png", "gif"], - "output_format": "json", + "output_format": "text", }, "email": { "suffix": [ @@ -329,11 +330,16 @@ def _image(self, name, blob): else: lang = conf["lang"] # use VLM to describe the picture - cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang) + cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["parse_method"], lang=lang) img_binary = io.BytesIO() img.save(img_binary, format="JPEG") img_binary.seek(0) - txt = cv_model.describe(img_binary.read()) + + system_prompt = conf.get("system_prompt") + if system_prompt: + txt = cv_model.describe_with_prompt(img_binary.read(), system_prompt) + else: + txt = cv_model.describe(img_binary.read()) self.set_output("text", txt) diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 4e9686fb194..9e2553f772a 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1708,6 +1708,9 @@ This delimiter is used to split the input text into several text pieces echo of filenameEmbdWeight: 'Filename embd weight', begin: 'File', parserMethod: 'Parser method', + systemPrompt: 'System Prompt', + systemPromptPlaceholder: + 'Enter system prompt for image analysis, if empty the system default value will be used', exportJson: 'Export JSON', viewResult: 'View Result', running: 'Running', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 030e27ee289..4dba7b4aa93 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1626,6 +1626,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 filenameEmbdWeight: '文件名嵌入权重', begin: '文件', parserMethod: '解析方法', + systemPrompt: '系统提示词', + systemPromptPlaceholder: + '请输入用于图像分析的系统提示词,若为空则使用系统缺省值', exportJson: '导出 JSON', viewResult: '查看结果', running: '运行中', diff --git a/web/src/pages/data-flow/constant.tsx b/web/src/pages/data-flow/constant.tsx index 6b2fd10947d..e0096b609ac 100644 --- a/web/src/pages/data-flow/constant.tsx +++ b/web/src/pages/data-flow/constant.tsx @@ -250,6 +250,7 @@ export const initialParserValues = { fileFormat: FileType.Image, output_format: ImageOutputFormat.Text, parse_method: ImageParseMethod.OCR, + system_prompt: '', }, { fileFormat: FileType.Email, diff --git a/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx b/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx index 7b15eda1928..4cff99ea770 100644 --- a/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx +++ b/web/src/pages/data-flow/form/parser-form/image-form-fields.tsx @@ -1,7 +1,10 @@ +import { RAGFlowFormItem } from '@/components/ragflow-form'; +import { Textarea } from '@/components/ui/textarea'; import { buildOptions } from '@/utils/form'; import { isEmpty } from 'lodash'; import { useEffect, useMemo } from 'react'; import { useFormContext, useWatch } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; import { ImageParseMethod } from '../../constant'; import { LanguageFormField, ParserMethodFormField } from './common-form-fields'; import { CommonProps } from './interface'; @@ -11,6 +14,7 @@ import { buildFieldNameWithPrefix } from './utils'; const options = buildOptions(ImageParseMethod); export function ImageFormFields({ prefix }: CommonProps) { + const { t } = useTranslation(); const form = useFormContext(); const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix); @@ -40,6 +44,14 @@ export function ImageFormFields({ prefix }: CommonProps) { optionsWithoutLLM={options} > {languageShown && } + {languageShown && ( + +