Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions components/indexer/pinecone/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Pinecone Indexer

English | [简体中文](README_zh.md)

A Pinecone indexer implementation for [Eino](https://github.com/cloudwego/eino) that implements the `Indexer` interface. This enables seamless integration with Eino's vector storage and retrieval system for enhanced semantic search capabilities.

## Quick Start

### Installation

It requires the go-pinecone client of version v3.x:

```bash
go get github.com/pinecone-io/go-pinecone/v3@latest
go get github.com/cloudwego/eino-ext/components/indexer/pinecone@latest
```

### Create the Pinecone Indexer

```go
package main

import (
"context"
"log"
"os"

pc "github.com/pinecone-io/go-pinecone/v3/pinecone"
"github.com/cloudwego/eino-ext/components/indexer/pinecone"
"github.com/cloudwego/eino/components/embedding"
"github.com/cloudwego/eino/schema"
)

func main() {
// Load configuration from environment variables
apiKey := os.Getenv("PINECONE_APIKEY")
if apiKey == "" {
log.Fatal("PINECONE_APIKEY environment variable is required")
}

// Initialize Pinecone client
client, err := pc.NewClient(pc.NewClientParams{
ApiKey: apiKey,
})
if err != nil {
log.Fatalf("Failed to create Pinecone client: %v", err)
}

// Create Pinecone indexer config
config := pinecone.IndexerConfig{
Client: client,
Dimension: 2560, // Set according to your embedding model
Embedding: &mockEmbedding{},
}

// Create an indexer
ctx := context.Background()
indexer, err := pinecone.NewIndexer(ctx, &config)
if err != nil {
log.Fatalf("Failed to create Pinecone indexer: %v", err)
}
log.Println("Indexer created successfully")

// Store documents
docs := []*schema.Document{
{
ID: "pinecone-1",
Content: "pinecone is a vector database",
MetaData: map[string]any{
"tag1": "pinecone",
"tag2": "vector",
"tag3": "database",
},
},
{
ID: "pinecone-2",
Content: "Pinecone is a vector database for building accurate and performant AI applications.",
},
}

ids, err := indexer.Store(ctx, docs)
if err != nil {
log.Fatalf("Failed to store documents: %v", err)
return
}
log.Printf("Stored document ids: %v", ids)
}

// mockEmbedding is a placeholder for your embedding implementation
// Replace with your actual embedding model

// type mockEmbedding struct{}
// func (m *mockEmbedding) EmbedStrings(ctx context.Context, texts []string, opts ...embedding.Option) ([][]float64, error) {
// // Implement your embedding logic here
// }
```

## Configuration

The following configuration options are available in `IndexerConfig`:

| Field | Type | Description | Default |
|---------------------|-------------------------|------------------------------------------------------------------|-----------------|
| Client | *pinecone.Client | Pinecone client instance (required) | - |
| IndexName | string | Name of the Pinecone index | "eino-index" |
| Cloud | pinecone.Cloud | Cloud provider (e.g., "aws") | "aws" |
| Region | string | Cloud region (e.g., "us-east-1") | "us-east-1" |
| Metric | pinecone.IndexMetric | Distance metric: "cosine", "euclidean", "dotproduct" | "cosine" |
| Dimension | int32 | Vector dimension | 2560 |
| VectorType | string | Type of vectors (e.g., "float32") | "float32" |
| Namespace | string | Namespace within the index | (default) |
| Field | string | Field to store content text | (default) |
| Tags | *pinecone.IndexTags | Metadata tags | (optional) |
| DeletionProtection | pinecone.DeletionProtection | Deletion protection | (optional) |
| DocumentConverter | func | Custom document converter | (optional) |
| BatchSize | int | Batch size for upserts | 100 |
| MaxConcurrency | int | Max concurrency for upserts | 10 |
| Embedding | embedding.Embedder | Embedding model instance | (required) |

## License

Apache 2.0. See [LICENSE](../../LICENSE) for details.
122 changes: 122 additions & 0 deletions components/indexer/pinecone/README_zh.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Pinecone 存储

[English](README.md) | [简体中文](README_zh.md)

基于 Pinecone 的向量存储实现,为 [Eino](https://github.com/cloudwego/eino) 提供了符合 `Indexer` 接口的存储方案。该组件可无缝集成到 Eino 的向量存储和检索系统中,增强语义搜索能力。

## 快速开始

### 安装

需要 go-pinecone v3.x 客户端:

```bash
go get github.com/pinecone-io/go-pinecone/v3@latest
go get github.com/cloudwego/eino-ext/components/indexer/pinecone@latest
```

### 创建 Pinecone 存储

```go
package main

import (
"context"
"log"
"os"

pc "github.com/pinecone-io/go-pinecone/v3/pinecone"
"github.com/cloudwego/eino-ext/components/indexer/pinecone"
"github.com/cloudwego/eino/components/embedding"
"github.com/cloudwego/eino/schema"
)

func main() {
// 从环境变量加载配置
apiKey := os.Getenv("PINECONE_APIKEY")
if apiKey == "" {
log.Fatal("PINECONE_APIKEY 环境变量必填")
}

// 初始化 Pinecone 客户端
client, err := pc.NewClient(pc.NewClientParams{
ApiKey: apiKey,
})
if err != nil {
log.Fatalf("创建 Pinecone 客户端失败: %v", err)
}

// 创建 Pinecone 存储配置
config := pinecone.IndexerConfig{
Client: client,
Dimension: 2560, // 按照你的 embedding 维度设置
Embedding: &mockEmbedding{},
}

// 创建 Indexer
ctx := context.Background()
indexer, err := pinecone.NewIndexer(ctx, &config)
if err != nil {
log.Fatalf("创建 Pinecone Indexer 失败: %v", err)
}
log.Println("Indexer 创建成功")

// 存储文档
docs := []*schema.Document{
{
ID: "pinecone-1",
Content: "pinecone 是一个向量数据库",
MetaData: map[string]any{
"tag1": "pinecone",
"tag2": "vector",
"tag3": "database",
},
},
{
ID: "pinecone-2",
Content: "Pinecone 是为 AI 应用构建的高性能向量数据库。",
},
}

ids, err := indexer.Store(ctx, docs)
if err != nil {
log.Fatalf("存储文档失败: %v", err)
return
}
log.Printf("已存储文档 ids: %v", ids)
}

// mockEmbedding 是 embedding 实现的占位符
// 请替换为你自己的 embedding 模型

// type mockEmbedding struct{}
// func (m *mockEmbedding) EmbedStrings(ctx context.Context, texts []string, opts ...embedding.Option) ([][]float64, error) {
// // 实现 embedding 逻辑
// }
```

## 配置说明

`IndexerConfig` 支持如下配置项:

| 字段 | 类型 | 说明 | 默认值 |
|--------------------|-----------------------------|----------------------------------------|----------------|
| Client | *pinecone.Client | Pinecone 客户端实例(必填) | - |
| IndexName | string | Pinecone 索引名称 | "eino-index" |
| Cloud | pinecone.Cloud | 云服务商(如 "aws") | "aws" |
| Region | string | 区域(如 "us-east-1") | "us-east-1" |
| Metric | pinecone.IndexMetric | 距离度量:"cosine"、"euclidean"、"dotproduct" | "cosine" |
| Dimension | int32 | 向量维度 | 2560 |
| VectorType | string | 向量类型(如 "float32") | "float32" |
| Namespace | string | Pinecone 命名空间 | (默认) |
| Field | string | 存储内容文本的字段 | (默认) |
| Tags | *pinecone.IndexTags | 元数据标签 | (可选) |
| DeletionProtection | pinecone.DeletionProtection | 删除保护 | (可选) |
| DocumentConverter | func | 自定义文档转换器 | (可选) |
| BatchSize | int | 批量 upsert 的大小 | 100 |
| MaxConcurrency | int | 并发 upsert 的最大协程数 | 10 |
| Embedding | embedding.Embedder | embedding 模型实例 | (必填) |

## 许可证

Apache 2.0。详见 [LICENSE](../../LICENSE)。
37 changes: 37 additions & 0 deletions components/indexer/pinecone/consts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright 2025 CloudWeGo Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package pinecone

import "github.com/pinecone-io/go-pinecone/v3/pinecone"

const (
typ = "pinecone"
defaultIndexName = "eino-index"
defaultCloud = pinecone.Aws
defaultRegion = "us-east-1"
defaultVectorType = "dense"
defaultDimension = int32(1536)
defaultMetric = pinecone.Cosine
defaultNamespace = "eino_space"
defaultField = "__content__"
defaultDeletionProtection = pinecone.DeletionProtectionDisabled
)

const (
defaultMaxConcurrency = 100
defaultBatchSize = 200
)
Loading