Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions app_llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,17 +165,34 @@ func (app *App) getSuggestedTags(
}

// getSuggestedTitle generates a suggested title for a document using the LLM
func (app *App) getSuggestedTitle(ctx context.Context, content string, originalTitle string, logger *logrus.Entry) (string, error) {
func (app *App) getSuggestedTitle(ctx context.Context, documentID int, content string, originalTitle string, logger *logrus.Entry) (string, error) {
likelyLanguage := getLikelyLanguage()

// Fetch similar documents to help with title consistency
var similarDocumentTitles []string
similarDocs, err := app.Client.GetSimilarDocuments(ctx, documentID, 5) // Get up to 5 similar documents
if err != nil {
// Log the error but don't fail the title generation
logger.Debugf("Failed to fetch similar documents for title consistency: %v", err)
} else {
// Extract titles from similar documents
for _, doc := range similarDocs {
if doc.Title != "" && doc.Title != originalTitle {
similarDocumentTitles = append(similarDocumentTitles, doc.Title)
}
}
logger.Debugf("Found %d similar documents for title consistency", len(similarDocumentTitles))
}

Comment on lines +168 to +186
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Nil-pointer risk when Client is unset; also dedup similar titles

If App.Client is nil, this will panic. Guard it and deduplicate titles to reduce prompt noise.

-	// Fetch similar documents to help with title consistency
-	var similarDocumentTitles []string
-	similarDocs, err := app.Client.GetSimilarDocuments(ctx, documentID, 5) // Get up to 5 similar documents
-	if err != nil {
-		// Log the error but don't fail the title generation
-		logger.Debugf("Failed to fetch similar documents for title consistency: %v", err)
-	} else {
-		// Extract titles from similar documents
-		for _, doc := range similarDocs {
-			if doc.Title != "" && doc.Title != originalTitle {
-				similarDocumentTitles = append(similarDocumentTitles, doc.Title)
-			}
-		}
-		logger.Debugf("Found %d similar documents for title consistency", len(similarDocumentTitles))
-	}
+	// Fetch similar documents to help with title consistency
+	var similarDocumentTitles []string
+	if app.Client != nil {
+		similarDocs, err := app.Client.GetSimilarDocuments(ctx, documentID, 5) // Get up to 5 similar documents
+		if err != nil {
+			// Log the error but don't fail the title generation
+			logger.Debugf("Failed to fetch similar documents for title consistency: %v", err)
+		} else {
+			seen := make(map[string]struct{}, len(similarDocs))
+			for _, doc := range similarDocs {
+				title := strings.TrimSpace(doc.Title)
+				if title == "" || title == originalTitle {
+					continue
+				}
+				if _, ok := seen[title]; ok {
+					continue
+				}
+				seen[title] = struct{}{}
+				similarDocumentTitles = append(similarDocumentTitles, title)
+			}
+			logger.Debugf("Found %d similar documents for title consistency", len(similarDocumentTitles))
+		}
+	} else {
+		logger.Debug("Paperless client is nil; skipping similar document lookup for title consistency")
+	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
func (app *App) getSuggestedTitle(ctx context.Context, documentID int, content string, originalTitle string, logger *logrus.Entry) (string, error) {
likelyLanguage := getLikelyLanguage()
// Fetch similar documents to help with title consistency
var similarDocumentTitles []string
similarDocs, err := app.Client.GetSimilarDocuments(ctx, documentID, 5) // Get up to 5 similar documents
if err != nil {
// Log the error but don't fail the title generation
logger.Debugf("Failed to fetch similar documents for title consistency: %v", err)
} else {
// Extract titles from similar documents
for _, doc := range similarDocs {
if doc.Title != "" && doc.Title != originalTitle {
similarDocumentTitles = append(similarDocumentTitles, doc.Title)
}
}
logger.Debugf("Found %d similar documents for title consistency", len(similarDocumentTitles))
}
func (app *App) getSuggestedTitle(
ctx context.Context,
documentID int,
content string,
originalTitle string,
logger *logrus.Entry,
) (string, error) {
likelyLanguage := getLikelyLanguage()
// Fetch similar documents to help with title consistency
var similarDocumentTitles []string
if app.Client != nil {
similarDocs, err := app.Client.GetSimilarDocuments(ctx, documentID, 5) // Get up to 5 similar documents
if err != nil {
// Log the error but don't fail the title generation
logger.Debugf("Failed to fetch similar documents for title consistency: %v", err)
} else {
seen := make(map[string]struct{}, len(similarDocs))
for _, doc := range similarDocs {
title := strings.TrimSpace(doc.Title)
if title == "" || title == originalTitle {
continue
}
if _, ok := seen[title]; ok {
continue
}
seen[title] = struct{}{}
similarDocumentTitles = append(similarDocumentTitles, title)
}
logger.Debugf("Found %d similar documents for title consistency", len(similarDocumentTitles))
}
} else {
logger.Debug("Paperless client is nil; skipping similar document lookup for title consistency")
}
// ... remainder of getSuggestedTitle ...
}
🤖 Prompt for AI Agents
In app_llm.go around lines 168 to 186, the code risks a nil-pointer panic if
App.Client is unset and may append duplicate similar titles; first guard the
call by checking if app.Client != nil before calling GetSimilarDocuments and log
a debug/warn when the client is nil and skip fetching, and second deduplicate
the collected similarDocumentTitles (e.g., use a map to ensure each title is
only added once) before using or logging them to reduce prompt noise.

templateMutex.RLock()
defer templateMutex.RUnlock()

// Get available tokens for content
templateData := map[string]interface{}{
"Language": likelyLanguage,
"Content": content,
"Title": originalTitle,
"Language": likelyLanguage,
"Content": content,
"Title": originalTitle,
"SimilarDocumentTitles": similarDocumentTitles,
}

availableTokens, err := getAvailableTokensForContent(titleTemplate, templateData)
Expand Down Expand Up @@ -448,7 +465,7 @@ func (app *App) generateDocumentSuggestions(ctx context.Context, suggestionReque
var suggestedCustomFields []CustomFieldSuggestion

if suggestionRequest.GenerateTitles {
suggestedTitle, err = app.getSuggestedTitle(ctx, content, suggestedTitle, docLogger)
suggestedTitle, err = app.getSuggestedTitle(ctx, documentID, content, suggestedTitle, docLogger)
if err != nil {
mu.Lock()
errorsList = append(errorsList, fmt.Errorf("Document %d: %v", documentID, err))
Expand Down
155 changes: 149 additions & 6 deletions app_llm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ func TestPromptTokenLimits(t *testing.T) {

// Create a test app with mock LLM
mockLLM := &mockLLM{}
mockClient := &mockPaperlessClient{}
app := &App{
LLM: mockLLM,
LLM: mockLLM,
Client: mockClient,
}

// Set up test template
Expand Down Expand Up @@ -157,7 +159,7 @@ Content: {{.Content}}

// Test with the app's LLM
ctx := context.Background()
_, err = app.getSuggestedTitle(ctx, truncatedContent, "Test Title", testLogger)
_, err = app.getSuggestedTitle(ctx, 1, truncatedContent, "Test Title", testLogger)
require.NoError(t, err)

// Verify truncation
Expand Down Expand Up @@ -269,8 +271,10 @@ func TestTokenLimitInTitleGeneration(t *testing.T) {

// Create a test app with mock LLM
mockLLM := &mockLLM{}
mockClient := &mockPaperlessClient{}
app := &App{
LLM: mockLLM,
LLM: mockLLM,
Client: mockClient,
}

// Test content that would exceed reasonable token limits
Expand All @@ -284,7 +288,7 @@ func TestTokenLimitInTitleGeneration(t *testing.T) {
// Call getSuggestedTitle
ctx := context.Background()

_, err := app.getSuggestedTitle(ctx, longContent, "Original Title", testLogger)
_, err := app.getSuggestedTitle(ctx, 1, longContent, "Original Title", testLogger)
require.NoError(t, err)

// Verify the final prompt size
Expand Down Expand Up @@ -360,8 +364,9 @@ func TestStripReasoning(t *testing.T) {

// mockPaperlessClient is a mock implementation of the ClientInterface for testing.
type mockPaperlessClient struct {
CustomFields []CustomField
Error error
CustomFields []CustomField
SimilarDocuments []Document
Error error
}

func (m *mockPaperlessClient) GetCustomFields(ctx context.Context) ([]CustomField, error) {
Expand Down Expand Up @@ -406,6 +411,12 @@ func (m *mockPaperlessClient) GetTaskStatus(ctx context.Context, taskID string)
return nil, nil
}
func (m *mockPaperlessClient) DeleteDocument(ctx context.Context, documentID int) error { return nil }
func (m *mockPaperlessClient) GetSimilarDocuments(ctx context.Context, documentID int, count int) ([]Document, error) {
if m.Error != nil {
return nil, m.Error
}
return m.SimilarDocuments, nil
}

func TestGetSuggestedCustomFields(t *testing.T) {
// 1. Setup
Expand Down Expand Up @@ -484,3 +495,135 @@ func findFieldByID(fields []CustomFieldSuggestion, id int) (CustomFieldSuggestio
}
return CustomFieldSuggestion{}, false
}

func TestGetSuggestedTitle_WithSimilarDocuments(t *testing.T) {
testLogger := logrus.WithField("test", "test")

// Set higher token limit for this test
originalLimit := os.Getenv("TOKEN_LIMIT")
os.Setenv("TOKEN_LIMIT", "200")
resetTokenLimit()
defer func() {
os.Setenv("TOKEN_LIMIT", originalLimit)
resetTokenLimit()
}()

// Create a mock client that returns similar documents
mockClient := &mockPaperlessClient{
SimilarDocuments: []Document{
{ID: 2, Title: "Invoice January 2023 - Company ABC"},
{ID: 3, Title: "Invoice February 2023 - Company ABC"},
{ID: 4, Title: "Receipt March 2023 - Company XYZ"},
},
}

mockLLM := &mockLLM{Response: "Invoice March 2023 - Company ABC"}
app := &App{
LLM: mockLLM,
Client: mockClient,
}

// Set up title template
var err error
titleTemplate, err = template.New("title").Parse(`I will provide you with the content of a document.
Your task is to find a suitable document title.
{{if .SimilarDocumentTitles}}I have found some similar documents with the following titles:
{{range .SimilarDocumentTitles}}- {{.}}
{{end}}Please try to be consistent with the naming pattern.{{end}}

<original_title>{{.Title}}</original_title>
<content>{{.Content}}</content>`)
require.NoError(t, err)

ctx := context.Background()
content := "This is an invoice from Company ABC for March 2023 services."
originalTitle := "document.pdf"

title, err := app.getSuggestedTitle(ctx, 1, content, originalTitle, testLogger)
require.NoError(t, err)
assert.Equal(t, "Invoice March 2023 - Company ABC", title)

// Verify that the prompt included similar document titles
assert.Contains(t, mockLLM.lastPrompt, "Invoice January 2023 - Company ABC")
assert.Contains(t, mockLLM.lastPrompt, "Invoice February 2023 - Company ABC")
assert.Contains(t, mockLLM.lastPrompt, "Receipt March 2023 - Company XYZ")
assert.Contains(t, mockLLM.lastPrompt, "Please try to be consistent with the naming pattern")
}

func TestGetSuggestedTitle_NoSimilarDocuments(t *testing.T) {
testLogger := logrus.WithField("test", "test")

// Create a mock client that returns no similar documents
mockClient := &mockPaperlessClient{
SimilarDocuments: []Document{},
}

mockLLM := &mockLLM{Response: "Contract Agreement 2023"}
app := &App{
LLM: mockLLM,
Client: mockClient,
}

// Set up title template
var err error
titleTemplate, err = template.New("title").Parse(`I will provide you with the content of a document.
Your task is to find a suitable document title.
{{if .SimilarDocumentTitles}}I have found some similar documents with the following titles:
{{range .SimilarDocumentTitles}}- {{.}}
{{end}}Please try to be consistent with the naming pattern.{{end}}

<original_title>{{.Title}}</original_title>
<content>{{.Content}}</content>`)
require.NoError(t, err)

ctx := context.Background()
content := "This is a contract agreement document."
originalTitle := "document.pdf"

title, err := app.getSuggestedTitle(ctx, 1, content, originalTitle, testLogger)
require.NoError(t, err)
assert.Equal(t, "Contract Agreement 2023", title)

// Verify that the prompt did not include the similar documents section
assert.NotContains(t, mockLLM.lastPrompt, "I have found some similar documents")
assert.NotContains(t, mockLLM.lastPrompt, "Please try to be consistent with the naming pattern")
}

func TestGetSuggestedTitle_SimilarDocumentsError(t *testing.T) {
testLogger := logrus.WithField("test", "test")

// Create a mock client that returns an error for similar documents
mockClient := &mockPaperlessClient{
Error: fmt.Errorf("API error"),
}

mockLLM := &mockLLM{Response: "Generated Title"}
app := &App{
LLM: mockLLM,
Client: mockClient,
}

// Set up title template
var err error
titleTemplate, err = template.New("title").Parse(`I will provide you with the content of a document.
Your task is to find a suitable document title.
{{if .SimilarDocumentTitles}}I have found some similar documents with the following titles:
{{range .SimilarDocumentTitles}}- {{.}}
{{end}}Please try to be consistent with the naming pattern.{{end}}

<original_title>{{.Title}}</original_title>
<content>{{.Content}}</content>`)
require.NoError(t, err)

ctx := context.Background()
content := "This is a document."
originalTitle := "document.pdf"

// Should still work even if similar documents lookup fails
title, err := app.getSuggestedTitle(ctx, 1, content, originalTitle, testLogger)
require.NoError(t, err)
assert.Equal(t, "Generated Title", title)

// Verify that the prompt did not include the similar documents section
assert.NotContains(t, mockLLM.lastPrompt, "I have found some similar documents")
}
3 changes: 3 additions & 0 deletions default_prompts/title_prompt.tmpl
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
I will provide you with the content of a document that has been partially read by OCR (so it may contain errors).
Your task is to find a suitable document title that I can use as the title in the paperless-ngx program.
If the original title is already adding value and not just a technical filename you can use it as extra information to enhance your suggestion.
{{if .SimilarDocumentTitles}}I have found some similar documents in the database with the following titles that might help you maintain consistency:
{{range .SimilarDocumentTitles}}- {{.}}
{{end}}Please try to be consistent with the naming pattern of these similar documents if they provide informative titles.{{end}}
Respond only with the title, without any additional information. The content is likely in {{.Language}}.

The data will be provided using an XML-like format for clarity:
Expand Down
101 changes: 100 additions & 1 deletion paperless.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,105 @@ func (client *PaperlessClient) GetDocument(ctx context.Context, documentID int)
}, nil
}

// GetSimilarDocuments retrieves documents that are similar to the specified document
func (client *PaperlessClient) GetSimilarDocuments(ctx context.Context, documentID int, maxResults int) ([]Document, error) {
// Get all tags to find the IDs of paperless-gpt tags to exclude
allTags, err := client.GetAllTags(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get tags for exclusion: %w", err)
}

// Find the tag IDs for paperless-gpt tags that should be excluded
var excludeTagIDs []string
for tagName, tagID := range allTags {
if tagName == manualTag || tagName == autoTag {
excludeTagIDs = append(excludeTagIDs, fmt.Sprintf("%d", tagID))
}
}

Comment on lines +407 to +413
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Confirm filter semantics for tags__id__none and ordering with more_like_id

Please verify that:

  • tags__id__none excludes any document having any of the provided tag IDs.
  • ordering=-score is effective when using more_like_id (or unnecessary because similarity already sorts by score).
  • truncate_content is supported on this endpoint.

🌐 Web query:

Check Paperless‑ngx documents API for:
1) Whether the query parameter "tags__id__none" is supported and its semantics.
2) Whether "more_like_id" results are sortable or already sorted by a "score" and whether "ordering=-score" is valid.
3) Whether "truncate_content" is a valid query parameter on /api/documents/.

💡 Result:

Short answers with sources:

  1. tags__id__none — Not documented / not a supported query parameter. The official API docs list available filters (and examples) but do not mention a tags__id__none operator; issues show tag-filtering is done via the usual field lookups and that tag-exclusion has been a separate bug, implying no special "none" operator. (docs.paperless-ngx.com)

  2. more_like_id / score / ordering — full‑text and "more like" results are returned with a search_hit containing score, rank and highlights, and the docs state full‑text results are always sorted by search score (i.e. best match first). That implies you cannot rely on (or override) sorting by an API ordering like ordering=-score for full‑text/more_like queries. (docs.paperless-ngx.com)

  3. truncate_content on /api/documents/ — Not documented. The official REST API reference for /api/documents/ does not list a truncate_content query parameter, so it should not be considered a supported parameter. (docs.paperless-ngx.com)

If you want, I can: a) show exact example queries to get documents with/without tags using supported filters, or b) try a live request against a specific paperless‑ngx instance to confirm behavior.

Citations:


Remove unsupported API parameters from document queries

  • The tags__id__none filter isn’t supported by the /api/documents/ endpoint—drop it or implement exclusion client-side.
  • Full-text queries (including more_like_id) are intrinsically sorted by score; ordering=-score is ignored and should be removed.
  • truncate_content isn’t a valid query parameter—remove it.
🤖 Prompt for AI Agents
In paperless.go around lines 407 to 413, the code builds query parameters using
unsupported API filters: remove any use of the tags__id__none parameter, drop
ordering=-score when full-text/more_like_id is used, and remove truncate_content
from the request; instead pass only supported parameters to /api/documents/ and
implement exclusion client-side by collecting manual/auto tag IDs (as you
already do) and filtering the returned documents to exclude any document whose
tag IDs contain those IDs; ensure you no longer send tags__id__none,
ordering=-score, or truncate_content in the HTTP query.

// Build the query path with tag exclusions
path := fmt.Sprintf("api/documents/?ordering=-score&truncate_content=true&more_like_id=%d&page_size=%d", documentID, maxResults)
if len(excludeTagIDs) > 0 {
path += "&tags__id__none=" + strings.Join(excludeTagIDs, ",")
}

resp, err := client.Do(ctx, "GET", path, nil)
if err != nil {
return nil, fmt.Errorf("HTTP request failed in GetSimilarDocuments: %w", err)
}
defer resp.Body.Close()

// Read the response body
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}

if resp.StatusCode != http.StatusOK {
log.WithFields(logrus.Fields{
"status_code": resp.StatusCode,
"path": path,
"response": string(bodyBytes),
"headers": resp.Header,
}).Error("Error response from server in GetSimilarDocuments")
return nil, fmt.Errorf("error searching similar documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes))
}

var documentsResponse GetDocumentsApiResponse
err = json.Unmarshal(bodyBytes, &documentsResponse)
if err != nil {
log.WithFields(logrus.Fields{
"response_body": string(bodyBytes),
"error": err,
}).Error("Failed to parse JSON response in GetSimilarDocuments")
return nil, fmt.Errorf("failed to parse JSON response: %w", err)
}

allCorrespondents, err := client.GetAllCorrespondents(ctx)
if err != nil {
return nil, err
}

documents := make([]Document, 0, len(documentsResponse.Results))
for _, result := range documentsResponse.Results {
// Skip the document itself if it appears in the results
if result.ID == documentID {
continue
}

tagNames := make([]string, len(result.Tags))
for i, resultTagID := range result.Tags {
for tagName, tagID := range allTags {
if resultTagID == tagID {
tagNames[i] = tagName
break
}
}
}

correspondentName := ""
if result.Correspondent != 0 {
for name, id := range allCorrespondents {
if result.Correspondent == id {
correspondentName = name
break
}
}
}

documents = append(documents, Document{
ID: result.ID,
Title: result.Title,
Content: result.Content,
Correspondent: correspondentName,
Tags: tagNames,
CreatedDate: result.CreatedDate,
})
}

return documents, nil
}

// UpdateDocuments updates the specified documents with suggested changes
func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion, db *gorm.DB, isUndo bool) error {
availableTags, err := client.GetAllTags(ctx)
Expand Down Expand Up @@ -441,7 +540,7 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []

if !hasSameTags(originalDoc.Tags, finalTagNames) {
originalFields["tags"] = originalDoc.Tags
var newTagIDs []int
var newTagIDs []int = []int{}
for _, tagName := range finalTagNames {
if tagID, exists := availableTags[tagName]; exists {
newTagIDs = append(newTagIDs, tagID)
Expand Down
Loading
Loading