Skip to content

Commit addf44e

Browse files
committed
feat: 新增异步任务提取服务
1 parent f24cd81 commit addf44e

File tree

21 files changed

+1216
-109
lines changed

21 files changed

+1216
-109
lines changed

.env.example

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,18 @@ COS_ENABLE_OLD_DOMAIN=true
121121
# 如果解析网络连接使用Web代理,需要配置以下参数
122122
# WEB_PROXY=your_web_proxy
123123

124+
# 如果使用Neo4j作为图数据库,需要配置以下参数
125+
# ENABLE_NEO4J_GRAPH=true
126+
127+
# Neo4j的访问地址
128+
# NEO4J_URI=neo4j://localhost:7687
129+
130+
# Neo4j的用户名和密码
131+
# NEO4J_USERNAME=neo4j
132+
133+
# Neo4j的密码
134+
# NEO4J_PASSWORD=password
135+
124136
##############################################################
125137

126138
###### 注意: 以下配置不再生效,已在Web“配置初始化”阶段完成 #########

config/config.yaml

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ server:
77
conversation:
88
max_rounds: 5
99
keyword_threshold: 0.3
10-
embedding_top_k: 10
10+
embedding_top_k: 5
1111
vector_threshold: 0.5
1212
rerank_threshold: 0.5
1313
rerank_top_k: 5
@@ -534,3 +534,37 @@ knowledge_base:
534534
split_markers: ["\n\n", "\n", "。"]
535535
image_processing:
536536
enable_multimodal: true
537+
538+
extract_graph:
539+
description: "依据给定文本,按逻辑顺序提取关键信息作为实体,全面补充实体的详细属性。在此基础上,准确提取实体间有效关系,并根据需要补充实体相关属性,确保信息完整、准确且逻辑清晰。同时,务必准确识别出关系涉及的两个主体,明确它们分别是谁。"
540+
examples:
541+
- text: "《红楼梦》,又名《石头记》,是清代作家曹雪芹创作的中国古典四大名著之一,被誉为中国封建社会的百科全书。该书前80回由曹雪芹所著,后40回一般认为是高鹗所续。小说以贾、史、王、薛四大家族的兴衰为背景,以贾宝玉、林黛玉和薛宝钗的爱情悲剧为主线,刻画了以贾宝玉和金陵十二钗为中心的正邪两赋、贤愚并出的高度复杂的人物群像。成书于乾隆年间(1743年前后),是中国文学史上现实主义的高峰,对后世影响深远。"
542+
node:
543+
- name: "红楼梦"
544+
attributes:
545+
作者: "曹雪芹"
546+
地位: "中国古典四大名著之一"
547+
- name: "曹雪芹"
548+
attributes:
549+
职业: "作者"
550+
介绍: "曹雪芹是清代作家,红楼梦的作者,创作了前80回"
551+
relation:
552+
- source:
553+
name: "红楼梦"
554+
target:
555+
name: "曹雪芹"
556+
type: "作者与作品"
557+
attributes:
558+
关系: "曹雪芹是红楼梦的主要作者,创作了前80回"
559+
extract_entity:
560+
description: |
561+
请基于用户给的问题,按以下步骤处理关键信息提取任务:
562+
1. 梳理逻辑关联:首先完整分析文本内容,明确其核心逻辑关系,并简要标注该核心逻辑类型;
563+
2. 提取关键实体:围绕梳理出的逻辑关系,精准提取文本中的关键信息并归类为明确实体,确保不遗漏核心信息、不添加冗余内容;
564+
3. 排序实体优先级:按实体与文本核心主题的关联紧密程度排序,优先呈现对理解文本主旨最重要的实体;
565+
examples:
566+
- text: "《红楼梦》,又名《石头记》,是清代作家曹雪芹创作的中国古典四大名著之一,被誉为中国封建社会的百科全书。"
567+
node:
568+
- name: "红楼梦"
569+
- name: "曹雪芹"
570+
- name: "中国古典四大名著"

docker-compose.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ services:
5454
- REDIS_DB=${REDIS_DB:-}
5555
- REDIS_PREFIX=${REDIS_PREFIX:-}
5656
- ENABLE_GRAPH_RAG=${ENABLE_GRAPH_RAG:-}
57+
- ENABLE_NEO4J_GRAPH=${ENABLE_NEO4J_GRAPH:-}
58+
- NEO4J_URI=${NEO4J_URI:-}
59+
- NEO4J_USERNAME=${NEO4J_USERNAME:-}
60+
- NEO4J_PASSWORD=${NEO4J_PASSWORD:-}
5761
- TENANT_AES_KEY=${TENANT_AES_KEY:-}
5862
- CONCURRENCY_POOL_SIZE=${CONCURRENCY_POOL_SIZE:-5}
5963
- INIT_LLM_MODEL_NAME=${INIT_LLM_MODEL_NAME:-}

go.mod

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ require (
1414
github.com/google/uuid v1.6.0
1515
github.com/hibiken/asynq v0.25.1
1616
github.com/minio/minio-go/v7 v7.0.90
17+
github.com/neo4j/neo4j-go-driver/v6 v6.0.0-alpha.1
1718
github.com/ollama/ollama v0.11.4
1819
github.com/panjf2000/ants/v2 v2.11.2
1920
github.com/parquet-go/parquet-go v0.25.0
2021
github.com/pgvector/pgvector-go v0.3.0
21-
github.com/redis/go-redis/v9 v9.7.3
22+
github.com/redis/go-redis/v9 v9.14.0
2223
github.com/sashabaranov/go-openai v1.40.5
2324
github.com/sirupsen/logrus v1.9.3
2425
github.com/spf13/viper v1.20.1
@@ -35,7 +36,7 @@ require (
3536
golang.org/x/crypto v0.42.0
3637
golang.org/x/sync v0.17.0
3738
google.golang.org/grpc v1.73.0
38-
google.golang.org/protobuf v1.36.6
39+
google.golang.org/protobuf v1.36.9
3940
gorm.io/driver/postgres v1.5.11
4041
gorm.io/gorm v1.25.12
4142
)
@@ -92,7 +93,7 @@ require (
9293
github.com/sagikazarmark/locafero v0.7.0 // indirect
9394
github.com/sourcegraph/conc v0.3.0 // indirect
9495
github.com/spf13/afero v1.12.0 // indirect
95-
github.com/spf13/cast v1.7.1 // indirect
96+
github.com/spf13/cast v1.10.0 // indirect
9697
github.com/spf13/pflag v1.0.6 // indirect
9798
github.com/subosito/gotenv v1.6.0 // indirect
9899
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
@@ -105,7 +106,7 @@ require (
105106
golang.org/x/net v0.43.0 // indirect
106107
golang.org/x/sys v0.36.0 // indirect
107108
golang.org/x/text v0.29.0 // indirect
108-
golang.org/x/time v0.11.0 // indirect
109+
golang.org/x/time v0.13.0 // indirect
109110
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect
110111
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect
111112
gopkg.in/yaml.v3 v3.0.1 // indirect

go.sum

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
141141
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
142142
github.com/mozillazg/go-httpheader v0.2.1 h1:geV7TrjbL8KXSyvghnFm+NyTux/hxwueTSrwhe88TQQ=
143143
github.com/mozillazg/go-httpheader v0.2.1/go.mod h1:jJ8xECTlalr6ValeXYdOF8fFUISeBAdw6E61aqQma60=
144+
github.com/neo4j/neo4j-go-driver/v6 v6.0.0-alpha.1 h1:nV3ZdYJTi73jel0mm3dpWumNY3i3nwyo25y69SPGwyg=
145+
github.com/neo4j/neo4j-go-driver/v6 v6.0.0-alpha.1/go.mod h1:hzSTfNfM31p1uRSzL1F/BAYOgaiTarE6OAQBajfsm+I=
144146
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
145147
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
146148
github.com/ollama/ollama v0.11.4 h1:6xLYLEPTKtw6N20qQecyEL/rrBktPO4o5U05cnvkSmI=
@@ -157,8 +159,8 @@ github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ
157159
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
158160
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
159161
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
160-
github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=
161-
github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=
162+
github.com/redis/go-redis/v9 v9.14.0 h1:u4tNCjXOyzfgeLN+vAZaW1xUooqWDqVEsZN0U01jfAE=
163+
github.com/redis/go-redis/v9 v9.14.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
162164
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
163165
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
164166
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
@@ -178,8 +180,8 @@ github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9yS
178180
github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
179181
github.com/spf13/afero v1.12.0 h1:UcOPyRBYczmFn6yvphxkn9ZEOY65cpwGKb5mL36mrqs=
180182
github.com/spf13/afero v1.12.0/go.mod h1:ZTlWwG4/ahT8W7T0WQ5uYmjI9duaLQGy3Q2OAl4sk/4=
181-
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
182-
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
183+
github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
184+
github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
183185
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
184186
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
185187
github.com/spf13/viper v1.20.1 h1:ZMi+z/lvLyPSCoNtFCpqjy0S4kPbirhpTMwl8BkW9X4=
@@ -269,17 +271,17 @@ golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=
269271
golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=
270272
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
271273
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
272-
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
273-
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
274+
golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=
275+
golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
274276
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
275277
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 h1:oWVWY3NzT7KJppx2UKhKmzPq4SRe0LdCijVRwvGeikY=
276278
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822/go.mod h1:h3c4v36UTKzUiuaOKQ6gr3S+0hovBtUrXzTG/i3+XEc=
277279
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 h1:fc6jSaCT0vBduLYZHYrBBNY4dsWuvgyff9noRNDdBeE=
278280
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
279281
google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok=
280282
google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc=
281-
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
282-
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
283+
google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
284+
google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
283285
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
284286
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
285287
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=

0 commit comments

Comments
 (0)