Skip to content

Commit 4996dcb

Browse files
authored
Fix bug of image parser and prompt of parser supports customization (#10319)
### What problem does this PR solve? BugFix: ERROR: KeyError: 'llm_id' Feat: The prompt of the describe picture in cv_model supports customization #10320 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality)
1 parent 3521eb6 commit 4996dcb

File tree

7 files changed

+30
-3
lines changed

7 files changed

+30
-3
lines changed

rag/flow/parser/parser.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,9 @@ def __init__(self):
108108
"parse_method": "ocr",
109109
"llm_id": "",
110110
"lang": "Chinese",
111+
"system_prompt": "",
111112
"suffix": ["jpg", "jpeg", "png", "gif"],
112-
"output_format": "json",
113+
"output_format": "text",
113114
},
114115
"email": {
115116
"suffix": [
@@ -329,11 +330,16 @@ def _image(self, name, blob):
329330
else:
330331
lang = conf["lang"]
331332
# use VLM to describe the picture
332-
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang)
333+
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["parse_method"], lang=lang)
333334
img_binary = io.BytesIO()
334335
img.save(img_binary, format="JPEG")
335336
img_binary.seek(0)
336-
txt = cv_model.describe(img_binary.read())
337+
338+
system_prompt = conf.get("system_prompt")
339+
if system_prompt:
340+
txt = cv_model.describe_with_prompt(img_binary.read(), system_prompt)
341+
else:
342+
txt = cv_model.describe(img_binary.read())
337343

338344
self.set_output("text", txt)
339345

web/src/locales/en.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,9 @@ This delimiter is used to split the input text into several text pieces echo of
17081708
filenameEmbdWeight: 'Filename embd weight',
17091709
begin: 'File',
17101710
parserMethod: 'Parser method',
1711+
systemPrompt: 'System Prompt',
1712+
systemPromptPlaceholder:
1713+
'Enter system prompt for image analysis, if empty the system default value will be used',
17111714
exportJson: 'Export JSON',
17121715
viewResult: 'View Result',
17131716
running: 'Running',

web/src/locales/zh.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
16261626
filenameEmbdWeight: '文件名嵌入权重',
16271627
begin: '文件',
16281628
parserMethod: '解析方法',
1629+
systemPrompt: '系统提示词',
1630+
systemPromptPlaceholder:
1631+
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
16291632
exportJson: '导出 JSON',
16301633
viewResult: '查看结果',
16311634
running: '运行中',

web/src/pages/data-flow/constant.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ export const initialParserValues = {
250250
fileFormat: FileType.Image,
251251
output_format: ImageOutputFormat.Text,
252252
parse_method: ImageParseMethod.OCR,
253+
system_prompt: '',
253254
},
254255
{
255256
fileFormat: FileType.Email,

web/src/pages/data-flow/form/parser-form/image-form-fields.tsx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import { RAGFlowFormItem } from '@/components/ragflow-form';
2+
import { Textarea } from '@/components/ui/textarea';
13
import { buildOptions } from '@/utils/form';
24
import { isEmpty } from 'lodash';
35
import { useEffect, useMemo } from 'react';
46
import { useFormContext, useWatch } from 'react-hook-form';
7+
import { useTranslation } from 'react-i18next';
58
import { ImageParseMethod } from '../../constant';
69
import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
710
import { CommonProps } from './interface';
@@ -11,6 +14,7 @@ import { buildFieldNameWithPrefix } from './utils';
1114
const options = buildOptions(ImageParseMethod);
1215

1316
export function ImageFormFields({ prefix }: CommonProps) {
17+
const { t } = useTranslation();
1418
const form = useFormContext();
1519
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
1620

@@ -40,6 +44,14 @@ export function ImageFormFields({ prefix }: CommonProps) {
4044
optionsWithoutLLM={options}
4145
></ParserMethodFormField>
4246
{languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
47+
{languageShown && (
48+
<RAGFlowFormItem
49+
name={buildFieldNameWithPrefix('system_prompt', prefix)}
50+
label={t('dataflow.systemPrompt')}
51+
>
52+
<Textarea placeholder={t('dataflow.systemPromptPlaceholder')} />
53+
</RAGFlowFormItem>
54+
)}
4355
</>
4456
);
4557
}

web/src/pages/data-flow/form/parser-form/index.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ export const FormSchema = z.object({
6464
lang: z.string().optional(),
6565
fields: z.array(z.string()).optional(),
6666
llm_id: z.string().optional(),
67+
system_prompt: z.string().optional(),
6768
}),
6869
),
6970
});

web/src/pages/data-flow/utils.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ function transformParserParams(params: ParserFormSchemaType) {
100100
...filteredSetup,
101101
parse_method: cur.parse_method,
102102
lang: cur.lang,
103+
system_prompt: cur.system_prompt,
103104
};
104105
break;
105106
case FileType.Email:

0 commit comments

Comments
 (0)