diff --git a/app_llm.go b/app_llm.go
index 2d3fe9ec..62b7986d 100644
--- a/app_llm.go
+++ b/app_llm.go
@@ -165,17 +165,34 @@ func (app *App) getSuggestedTags(
}
// getSuggestedTitle generates a suggested title for a document using the LLM
-func (app *App) getSuggestedTitle(ctx context.Context, content string, originalTitle string, logger *logrus.Entry) (string, error) {
+func (app *App) getSuggestedTitle(ctx context.Context, documentID int, content string, originalTitle string, logger *logrus.Entry) (string, error) {
likelyLanguage := getLikelyLanguage()
+ // Fetch similar documents to help with title consistency
+ var similarDocumentTitles []string
+ similarDocs, err := app.Client.GetSimilarDocuments(ctx, documentID, 5) // Get up to 5 similar documents
+ if err != nil {
+ // Log the error but don't fail the title generation
+ logger.Debugf("Failed to fetch similar documents for title consistency: %v", err)
+ } else {
+ // Extract titles from similar documents
+ for _, doc := range similarDocs {
+ if doc.Title != "" && doc.Title != originalTitle {
+ similarDocumentTitles = append(similarDocumentTitles, doc.Title)
+ }
+ }
+ logger.Debugf("Found %d similar documents for title consistency", len(similarDocumentTitles))
+ }
+
templateMutex.RLock()
defer templateMutex.RUnlock()
// Get available tokens for content
templateData := map[string]interface{}{
- "Language": likelyLanguage,
- "Content": content,
- "Title": originalTitle,
+ "Language": likelyLanguage,
+ "Content": content,
+ "Title": originalTitle,
+ "SimilarDocumentTitles": similarDocumentTitles,
}
availableTokens, err := getAvailableTokensForContent(titleTemplate, templateData)
@@ -448,7 +465,7 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
var suggestedCustomFields []CustomFieldSuggestion
if suggestionRequest.GenerateTitles {
- suggestedTitle, err = app.getSuggestedTitle(ctx, content, suggestedTitle, docLogger)
+ suggestedTitle, err = app.getSuggestedTitle(ctx, documentID, content, suggestedTitle, docLogger)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
diff --git a/app_llm_test.go b/app_llm_test.go
index 57833be1..b9b97f1c 100644
--- a/app_llm_test.go
+++ b/app_llm_test.go
@@ -104,8 +104,10 @@ func TestPromptTokenLimits(t *testing.T) {
// Create a test app with mock LLM
mockLLM := &mockLLM{}
+ mockClient := &mockPaperlessClient{}
app := &App{
- LLM: mockLLM,
+ LLM: mockLLM,
+ Client: mockClient,
}
// Set up test template
@@ -157,7 +159,7 @@ Content: {{.Content}}
// Test with the app's LLM
ctx := context.Background()
- _, err = app.getSuggestedTitle(ctx, truncatedContent, "Test Title", testLogger)
+ _, err = app.getSuggestedTitle(ctx, 1, truncatedContent, "Test Title", testLogger)
require.NoError(t, err)
// Verify truncation
@@ -269,8 +271,10 @@ func TestTokenLimitInTitleGeneration(t *testing.T) {
// Create a test app with mock LLM
mockLLM := &mockLLM{}
+ mockClient := &mockPaperlessClient{}
app := &App{
- LLM: mockLLM,
+ LLM: mockLLM,
+ Client: mockClient,
}
// Test content that would exceed reasonable token limits
@@ -284,7 +288,7 @@ func TestTokenLimitInTitleGeneration(t *testing.T) {
// Call getSuggestedTitle
ctx := context.Background()
- _, err := app.getSuggestedTitle(ctx, longContent, "Original Title", testLogger)
+ _, err := app.getSuggestedTitle(ctx, 1, longContent, "Original Title", testLogger)
require.NoError(t, err)
// Verify the final prompt size
@@ -360,8 +364,9 @@ func TestStripReasoning(t *testing.T) {
// mockPaperlessClient is a mock implementation of the ClientInterface for testing.
type mockPaperlessClient struct {
- CustomFields []CustomField
- Error error
+ CustomFields []CustomField
+ SimilarDocuments []Document
+ Error error
}
func (m *mockPaperlessClient) GetCustomFields(ctx context.Context) ([]CustomField, error) {
@@ -406,6 +411,12 @@ func (m *mockPaperlessClient) GetTaskStatus(ctx context.Context, taskID string)
return nil, nil
}
func (m *mockPaperlessClient) DeleteDocument(ctx context.Context, documentID int) error { return nil }
+func (m *mockPaperlessClient) GetSimilarDocuments(ctx context.Context, documentID int, count int) ([]Document, error) {
+ if m.Error != nil {
+ return nil, m.Error
+ }
+ return m.SimilarDocuments, nil
+}
func TestGetSuggestedCustomFields(t *testing.T) {
// 1. Setup
@@ -484,3 +495,135 @@ func findFieldByID(fields []CustomFieldSuggestion, id int) (CustomFieldSuggestio
}
return CustomFieldSuggestion{}, false
}
+
+func TestGetSuggestedTitle_WithSimilarDocuments(t *testing.T) {
+ testLogger := logrus.WithField("test", "test")
+
+ // Set higher token limit for this test
+ originalLimit := os.Getenv("TOKEN_LIMIT")
+ os.Setenv("TOKEN_LIMIT", "200")
+ resetTokenLimit()
+ defer func() {
+ os.Setenv("TOKEN_LIMIT", originalLimit)
+ resetTokenLimit()
+ }()
+
+ // Create a mock client that returns similar documents
+ mockClient := &mockPaperlessClient{
+ SimilarDocuments: []Document{
+ {ID: 2, Title: "Invoice January 2023 - Company ABC"},
+ {ID: 3, Title: "Invoice February 2023 - Company ABC"},
+ {ID: 4, Title: "Receipt March 2023 - Company XYZ"},
+ },
+ }
+
+ mockLLM := &mockLLM{Response: "Invoice March 2023 - Company ABC"}
+ app := &App{
+ LLM: mockLLM,
+ Client: mockClient,
+ }
+
+ // Set up title template
+ var err error
+ titleTemplate, err = template.New("title").Parse(`I will provide you with the content of a document.
+Your task is to find a suitable document title.
+{{if .SimilarDocumentTitles}}I have found some similar documents with the following titles:
+{{range .SimilarDocumentTitles}}- {{.}}
+{{end}}Please try to be consistent with the naming pattern.{{end}}
+
+{{.Title}}
+{{.Content}}`)
+ require.NoError(t, err)
+
+ ctx := context.Background()
+ content := "This is an invoice from Company ABC for March 2023 services."
+ originalTitle := "document.pdf"
+
+ title, err := app.getSuggestedTitle(ctx, 1, content, originalTitle, testLogger)
+ require.NoError(t, err)
+ assert.Equal(t, "Invoice March 2023 - Company ABC", title)
+
+ // Verify that the prompt included similar document titles
+ assert.Contains(t, mockLLM.lastPrompt, "Invoice January 2023 - Company ABC")
+ assert.Contains(t, mockLLM.lastPrompt, "Invoice February 2023 - Company ABC")
+ assert.Contains(t, mockLLM.lastPrompt, "Receipt March 2023 - Company XYZ")
+ assert.Contains(t, mockLLM.lastPrompt, "Please try to be consistent with the naming pattern")
+}
+
+func TestGetSuggestedTitle_NoSimilarDocuments(t *testing.T) {
+ testLogger := logrus.WithField("test", "test")
+
+ // Create a mock client that returns no similar documents
+ mockClient := &mockPaperlessClient{
+ SimilarDocuments: []Document{},
+ }
+
+ mockLLM := &mockLLM{Response: "Contract Agreement 2023"}
+ app := &App{
+ LLM: mockLLM,
+ Client: mockClient,
+ }
+
+ // Set up title template
+ var err error
+ titleTemplate, err = template.New("title").Parse(`I will provide you with the content of a document.
+Your task is to find a suitable document title.
+{{if .SimilarDocumentTitles}}I have found some similar documents with the following titles:
+{{range .SimilarDocumentTitles}}- {{.}}
+{{end}}Please try to be consistent with the naming pattern.{{end}}
+
+{{.Title}}
+{{.Content}}`)
+ require.NoError(t, err)
+
+ ctx := context.Background()
+ content := "This is a contract agreement document."
+ originalTitle := "document.pdf"
+
+ title, err := app.getSuggestedTitle(ctx, 1, content, originalTitle, testLogger)
+ require.NoError(t, err)
+ assert.Equal(t, "Contract Agreement 2023", title)
+
+ // Verify that the prompt did not include the similar documents section
+ assert.NotContains(t, mockLLM.lastPrompt, "I have found some similar documents")
+ assert.NotContains(t, mockLLM.lastPrompt, "Please try to be consistent with the naming pattern")
+}
+
+func TestGetSuggestedTitle_SimilarDocumentsError(t *testing.T) {
+ testLogger := logrus.WithField("test", "test")
+
+ // Create a mock client that returns an error for similar documents
+ mockClient := &mockPaperlessClient{
+ Error: fmt.Errorf("API error"),
+ }
+
+ mockLLM := &mockLLM{Response: "Generated Title"}
+ app := &App{
+ LLM: mockLLM,
+ Client: mockClient,
+ }
+
+ // Set up title template
+ var err error
+ titleTemplate, err = template.New("title").Parse(`I will provide you with the content of a document.
+Your task is to find a suitable document title.
+{{if .SimilarDocumentTitles}}I have found some similar documents with the following titles:
+{{range .SimilarDocumentTitles}}- {{.}}
+{{end}}Please try to be consistent with the naming pattern.{{end}}
+
+{{.Title}}
+{{.Content}}`)
+ require.NoError(t, err)
+
+ ctx := context.Background()
+ content := "This is a document."
+ originalTitle := "document.pdf"
+
+ // Should still work even if similar documents lookup fails
+ title, err := app.getSuggestedTitle(ctx, 1, content, originalTitle, testLogger)
+ require.NoError(t, err)
+ assert.Equal(t, "Generated Title", title)
+
+ // Verify that the prompt did not include the similar documents section
+ assert.NotContains(t, mockLLM.lastPrompt, "I have found some similar documents")
+}
diff --git a/default_prompts/title_prompt.tmpl b/default_prompts/title_prompt.tmpl
index df6c7f16..d128ccee 100644
--- a/default_prompts/title_prompt.tmpl
+++ b/default_prompts/title_prompt.tmpl
@@ -1,6 +1,9 @@
I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
If the original title is already adding value and not just a technical filename you can use it as extra information to enhance your suggestion.
+{{if .SimilarDocumentTitles}}I have found some similar documents in the database with the following titles that might help you maintain consistency:
+{{range .SimilarDocumentTitles}}- {{.}}
+{{end}}Please try to be consistent with the naming pattern of these similar documents if they provide informative titles.{{end}}
Respond only with the title, without any additional information. The content is likely in {{.Language}}.
The data will be provided using an XML-like format for clarity:
diff --git a/paperless.go b/paperless.go
index fd4b91f2..e7335290 100644
--- a/paperless.go
+++ b/paperless.go
@@ -395,6 +395,105 @@ func (client *PaperlessClient) GetDocument(ctx context.Context, documentID int)
}, nil
}
+// GetSimilarDocuments retrieves documents that are similar to the specified document
+func (client *PaperlessClient) GetSimilarDocuments(ctx context.Context, documentID int, maxResults int) ([]Document, error) {
+ // Get all tags to find the IDs of paperless-gpt tags to exclude
+ allTags, err := client.GetAllTags(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get tags for exclusion: %w", err)
+ }
+
+ // Find the tag IDs for paperless-gpt tags that should be excluded
+ var excludeTagIDs []string
+ for tagName, tagID := range allTags {
+ if tagName == manualTag || tagName == autoTag {
+ excludeTagIDs = append(excludeTagIDs, fmt.Sprintf("%d", tagID))
+ }
+ }
+
+ // Build the query path with tag exclusions
+ path := fmt.Sprintf("api/documents/?ordering=-score&truncate_content=true&more_like_id=%d&page_size=%d", documentID, maxResults)
+ if len(excludeTagIDs) > 0 {
+ path += "&tags__id__none=" + strings.Join(excludeTagIDs, ",")
+ }
+
+ resp, err := client.Do(ctx, "GET", path, nil)
+ if err != nil {
+ return nil, fmt.Errorf("HTTP request failed in GetSimilarDocuments: %w", err)
+ }
+ defer resp.Body.Close()
+
+ // Read the response body
+ bodyBytes, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read response body: %w", err)
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ log.WithFields(logrus.Fields{
+ "status_code": resp.StatusCode,
+ "path": path,
+ "response": string(bodyBytes),
+ "headers": resp.Header,
+ }).Error("Error response from server in GetSimilarDocuments")
+ return nil, fmt.Errorf("error searching similar documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes))
+ }
+
+ var documentsResponse GetDocumentsApiResponse
+ err = json.Unmarshal(bodyBytes, &documentsResponse)
+ if err != nil {
+ log.WithFields(logrus.Fields{
+ "response_body": string(bodyBytes),
+ "error": err,
+ }).Error("Failed to parse JSON response in GetSimilarDocuments")
+ return nil, fmt.Errorf("failed to parse JSON response: %w", err)
+ }
+
+ allCorrespondents, err := client.GetAllCorrespondents(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ documents := make([]Document, 0, len(documentsResponse.Results))
+ for _, result := range documentsResponse.Results {
+ // Skip the document itself if it appears in the results
+ if result.ID == documentID {
+ continue
+ }
+
+ tagNames := make([]string, len(result.Tags))
+ for i, resultTagID := range result.Tags {
+ for tagName, tagID := range allTags {
+ if resultTagID == tagID {
+ tagNames[i] = tagName
+ break
+ }
+ }
+ }
+
+ correspondentName := ""
+ if result.Correspondent != 0 {
+ for name, id := range allCorrespondents {
+ if result.Correspondent == id {
+ correspondentName = name
+ break
+ }
+ }
+ }
+
+ documents = append(documents, Document{
+ ID: result.ID,
+ Title: result.Title,
+ Content: result.Content,
+ Correspondent: correspondentName,
+ Tags: tagNames,
+ CreatedDate: result.CreatedDate,
+ })
+ }
+
+ return documents, nil
+}
+
// UpdateDocuments updates the specified documents with suggested changes
func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion, db *gorm.DB, isUndo bool) error {
availableTags, err := client.GetAllTags(ctx)
@@ -441,7 +540,7 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []
if !hasSameTags(originalDoc.Tags, finalTagNames) {
originalFields["tags"] = originalDoc.Tags
- var newTagIDs []int
+ var newTagIDs []int = []int{}
for _, tagName := range finalTagNames {
if tagID, exists := availableTags[tagName]; exists {
newTagIDs = append(newTagIDs, tagID)
diff --git a/paperless_test.go b/paperless_test.go
index ad4e595a..105ca36a 100644
--- a/paperless_test.go
+++ b/paperless_test.go
@@ -8,6 +8,7 @@ import (
"net/http"
"net/http/httptest"
"os"
+ "strings"
"testing"
"github.com/stretchr/testify/assert"
@@ -476,3 +477,269 @@ func TestDownloadDocumentAsPDF(t *testing.T) {
// Testing with splitting=true would be more complex so we'll skip that for simplicity
}
+
+func TestGetSimilarDocuments(t *testing.T) {
+ env := newTestEnv(t)
+ defer env.teardown()
+
+ // Mock response for similar documents API
+ similarDocs := []GetDocumentApiResponseResult{
+ {
+ ID: 2,
+ Title: "Invoice January 2023 - Company ABC",
+ },
+ {
+ ID: 3,
+ Title: "Invoice February 2023 - Company ABC",
+ },
+ {
+ ID: 4,
+ Title: "Receipt March 2023 - Company XYZ",
+ },
+ }
+
+ response := GetDocumentsApiResponse{
+ Count: 3,
+ Results: similarDocs,
+ }
+
+ env.mockResponses["/api/documents/"] = func(w http.ResponseWriter, r *http.Request) {
+ // Verify query parameters
+ assert.Equal(t, "GET", r.Method)
+ assert.Equal(t, "-score", r.URL.Query().Get("ordering"))
+ assert.Equal(t, "true", r.URL.Query().Get("truncate_content"))
+ assert.Equal(t, "1", r.URL.Query().Get("more_like_id"))
+ assert.Equal(t, "5", r.URL.Query().Get("page_size"))
+
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(response)
+ }
+
+ // Add required mocks for tags and correspondents that GetSimilarDocuments calls
+ env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(map[string]interface{}{
+ "results": []map[string]interface{}{
+ {"id": 1, "name": "tag1"},
+ },
+ "next": nil,
+ })
+ })
+
+ // Test successful case
+ ctx := context.Background()
+ documents, err := env.client.GetSimilarDocuments(ctx, 1, 5)
+ require.NoError(t, err)
+ assert.Len(t, documents, 3)
+ assert.Equal(t, "Invoice January 2023 - Company ABC", documents[0].Title)
+ assert.Equal(t, "Invoice February 2023 - Company ABC", documents[1].Title)
+ assert.Equal(t, "Receipt March 2023 - Company XYZ", documents[2].Title)
+}
+
+func TestGetSimilarDocuments_NoResults(t *testing.T) {
+ env := newTestEnv(t)
+ defer env.teardown()
+
+ // Mock response with no results
+ response := GetDocumentsApiResponse{
+ Count: 0,
+ Results: []GetDocumentApiResponseResult{},
+ }
+
+ env.mockResponses["/api/documents/"] = func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(response)
+ }
+
+ // Add required mocks for tags and correspondents
+ env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(map[string]interface{}{
+ "results": []map[string]interface{}{},
+ "next": nil,
+ })
+ })
+
+ ctx := context.Background()
+ documents, err := env.client.GetSimilarDocuments(ctx, 1, 5)
+ require.NoError(t, err)
+ assert.Len(t, documents, 0)
+}
+
+func TestGetSimilarDocuments_Error(t *testing.T) {
+ env := newTestEnv(t)
+ defer env.teardown()
+
+ // Add required mocks for tags (since GetSimilarDocuments calls GetAllTags first)
+ env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(map[string]interface{}{
+ "results": []map[string]interface{}{},
+ "next": nil,
+ })
+ })
+
+ env.mockResponses["/api/documents/"] = func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusInternalServerError)
+ w.Write([]byte("Internal Server Error"))
+ }
+
+ ctx := context.Background()
+ documents, err := env.client.GetSimilarDocuments(ctx, 1, 5)
+ assert.Error(t, err)
+ assert.Nil(t, documents)
+ assert.Contains(t, err.Error(), "error searching similar documents")
+}
+
+func TestGetSimilarDocuments_TagsError(t *testing.T) {
+ env := newTestEnv(t)
+ defer env.teardown()
+
+ // Mock tags endpoint to return an error
+ env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusInternalServerError)
+ w.Write([]byte("Tags API Error"))
+ })
+
+ ctx := context.Background()
+ documents, err := env.client.GetSimilarDocuments(ctx, 1, 5)
+ assert.Error(t, err)
+ assert.Nil(t, documents)
+ assert.Contains(t, err.Error(), "failed to get tags for exclusion")
+}
+
+func TestGetSimilarDocuments_ExcludesPaperlessGPTTags(t *testing.T) {
+ // Set environment variables for the test
+ originalManualTag := os.Getenv("MANUAL_TAG")
+ originalAutoTag := os.Getenv("AUTO_TAG")
+ defer func() {
+ os.Setenv("MANUAL_TAG", originalManualTag)
+ os.Setenv("AUTO_TAG", originalAutoTag)
+ }()
+
+ // Set the tag values and reinitialize the global variables
+ os.Setenv("MANUAL_TAG", "paperless-gpt")
+ os.Setenv("AUTO_TAG", "paperless-gpt-auto")
+ manualTag = "paperless-gpt"
+ autoTag = "paperless-gpt-auto"
+
+ env := newTestEnv(t)
+ defer env.teardown()
+
+ // Mock similar documents
+ similarDocs := []GetDocumentApiResponseResult{
+ {
+ ID: 2,
+ Title: "Test Document 1",
+ },
+ }
+
+ response := GetDocumentsApiResponse{
+ Count: 1,
+ Results: similarDocs,
+ }
+
+ // Track the received query parameters
+ var receivedQuery string
+ env.mockResponses["/api/documents/"] = func(w http.ResponseWriter, r *http.Request) {
+ receivedQuery = r.URL.RawQuery
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(response)
+ }
+
+ // Add required mocks for tags (include paperless-gpt tags)
+ env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(map[string]interface{}{
+ "results": []map[string]interface{}{
+ {"id": 1, "name": "regular-tag"},
+ {"id": 2, "name": "paperless-gpt"}, // manualTag
+ {"id": 3, "name": "paperless-gpt-auto"}, // autoTag
+ },
+ "next": nil,
+ })
+ })
+
+ ctx := context.Background()
+ documents, err := env.client.GetSimilarDocuments(ctx, 1, 5)
+ require.NoError(t, err)
+ assert.Len(t, documents, 1)
+
+ // Verify that the query excludes the paperless-gpt tags
+ assert.Contains(t, receivedQuery, "ordering=-score")
+ assert.Contains(t, receivedQuery, "truncate_content=true")
+ assert.Contains(t, receivedQuery, "more_like_id=1")
+ assert.Contains(t, receivedQuery, "page_size=5")
+ // Check that tag exclusion is present (order may vary)
+ assert.True(t,
+ strings.Contains(receivedQuery, "tags__id__none=2,3") || strings.Contains(receivedQuery, "tags__id__none=3,2"),
+ "Should exclude paperless-gpt tags with IDs 2 and 3 (in any order), got: %s", receivedQuery)
+}
+
+func TestGetSimilarDocuments_NoPaperlessGPTTagsToExclude(t *testing.T) {
+ // Set environment variables for the test
+ originalManualTag := os.Getenv("MANUAL_TAG")
+ originalAutoTag := os.Getenv("AUTO_TAG")
+ defer func() {
+ os.Setenv("MANUAL_TAG", originalManualTag)
+ os.Setenv("AUTO_TAG", originalAutoTag)
+ }()
+
+ // Set the tag values and reinitialize the global variables
+ os.Setenv("MANUAL_TAG", "paperless-gpt")
+ os.Setenv("AUTO_TAG", "paperless-gpt-auto")
+ manualTag = "paperless-gpt"
+ autoTag = "paperless-gpt-auto"
+
+ env := newTestEnv(t)
+ defer env.teardown()
+
+ // Mock similar documents
+ similarDocs := []GetDocumentApiResponseResult{
+ {
+ ID: 2,
+ Title: "Test Document 1",
+ },
+ }
+
+ response := GetDocumentsApiResponse{
+ Count: 1,
+ Results: similarDocs,
+ }
+
+ // Track the received query parameters
+ var receivedQuery string
+ env.mockResponses["/api/documents/"] = func(w http.ResponseWriter, r *http.Request) {
+ receivedQuery = r.URL.RawQuery
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(response)
+ }
+
+ // Add required mocks for tags (no paperless-gpt tags this time)
+ env.setMockResponse("/api/tags/", func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(map[string]interface{}{
+ "results": []map[string]interface{}{
+ {"id": 1, "name": "regular-tag"},
+ {"id": 2, "name": "other-tag"},
+ },
+ "next": nil,
+ })
+ })
+
+ ctx := context.Background()
+ documents, err := env.client.GetSimilarDocuments(ctx, 1, 5)
+ require.NoError(t, err)
+ assert.Len(t, documents, 1)
+
+ // Verify that the query does not include tag exclusions when no paperless-gpt tags exist
+ assert.Contains(t, receivedQuery, "ordering=-score")
+ assert.Contains(t, receivedQuery, "truncate_content=true")
+ assert.Contains(t, receivedQuery, "more_like_id=1")
+ assert.Contains(t, receivedQuery, "page_size=5")
+ assert.NotContains(t, receivedQuery, "tags__id__none", "Should not include tag exclusions when no paperless-gpt tags exist")
+}
diff --git a/types.go b/types.go
index ffeec83c..fa17700c 100644
--- a/types.go
+++ b/types.go
@@ -161,6 +161,7 @@ type ClientInterface interface {
UploadDocument(ctx context.Context, data []byte, filename string, metadata map[string]interface{}) (string, error)
GetTaskStatus(ctx context.Context, taskID string) (map[string]interface{}, error)
DeleteDocument(ctx context.Context, documentID int) error
+ GetSimilarDocuments(ctx context.Context, documentID int, count int) ([]Document, error)
}
// DocumentProcessor defines the interface for processing documents with OCR