-
-
Notifications
You must be signed in to change notification settings - Fork 79
Include Similar Documents in Title Generation Prompt for Improved Naming Consistency #660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -395,6 +395,105 @@ func (client *PaperlessClient) GetDocument(ctx context.Context, documentID int) | |
}, nil | ||
} | ||
|
||
// GetSimilarDocuments retrieves documents that are similar to the specified document | ||
func (client *PaperlessClient) GetSimilarDocuments(ctx context.Context, documentID int, maxResults int) ([]Document, error) { | ||
// Get all tags to find the IDs of paperless-gpt tags to exclude | ||
allTags, err := client.GetAllTags(ctx) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to get tags for exclusion: %w", err) | ||
} | ||
|
||
// Find the tag IDs for paperless-gpt tags that should be excluded | ||
var excludeTagIDs []string | ||
for tagName, tagID := range allTags { | ||
if tagName == manualTag || tagName == autoTag { | ||
excludeTagIDs = append(excludeTagIDs, fmt.Sprintf("%d", tagID)) | ||
} | ||
} | ||
|
||
Comment on lines
+407
to
+413
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainConfirm filter semantics for tags__id__none and ordering with more_like_id Please verify that:
🌐 Web query:
💡 Result: Short answers with sources:
If you want, I can: a) show exact example queries to get documents with/without tags using supported filters, or b) try a live request against a specific paperless‑ngx instance to confirm behavior. Citations:
Remove unsupported API parameters from document queries
🤖 Prompt for AI Agents
|
||
// Build the query path with tag exclusions | ||
path := fmt.Sprintf("api/documents/?ordering=-score&truncate_content=true&more_like_id=%d&page_size=%d", documentID, maxResults) | ||
if len(excludeTagIDs) > 0 { | ||
path += "&tags__id__none=" + strings.Join(excludeTagIDs, ",") | ||
} | ||
|
||
resp, err := client.Do(ctx, "GET", path, nil) | ||
if err != nil { | ||
return nil, fmt.Errorf("HTTP request failed in GetSimilarDocuments: %w", err) | ||
} | ||
defer resp.Body.Close() | ||
|
||
// Read the response body | ||
bodyBytes, err := io.ReadAll(resp.Body) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to read response body: %w", err) | ||
} | ||
|
||
if resp.StatusCode != http.StatusOK { | ||
log.WithFields(logrus.Fields{ | ||
"status_code": resp.StatusCode, | ||
"path": path, | ||
"response": string(bodyBytes), | ||
"headers": resp.Header, | ||
}).Error("Error response from server in GetSimilarDocuments") | ||
return nil, fmt.Errorf("error searching similar documents: status=%d, body=%s", resp.StatusCode, string(bodyBytes)) | ||
} | ||
|
||
var documentsResponse GetDocumentsApiResponse | ||
err = json.Unmarshal(bodyBytes, &documentsResponse) | ||
if err != nil { | ||
log.WithFields(logrus.Fields{ | ||
"response_body": string(bodyBytes), | ||
"error": err, | ||
}).Error("Failed to parse JSON response in GetSimilarDocuments") | ||
return nil, fmt.Errorf("failed to parse JSON response: %w", err) | ||
} | ||
|
||
allCorrespondents, err := client.GetAllCorrespondents(ctx) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
documents := make([]Document, 0, len(documentsResponse.Results)) | ||
for _, result := range documentsResponse.Results { | ||
// Skip the document itself if it appears in the results | ||
if result.ID == documentID { | ||
continue | ||
} | ||
|
||
tagNames := make([]string, len(result.Tags)) | ||
for i, resultTagID := range result.Tags { | ||
for tagName, tagID := range allTags { | ||
if resultTagID == tagID { | ||
tagNames[i] = tagName | ||
break | ||
} | ||
} | ||
} | ||
|
||
correspondentName := "" | ||
if result.Correspondent != 0 { | ||
for name, id := range allCorrespondents { | ||
if result.Correspondent == id { | ||
correspondentName = name | ||
break | ||
} | ||
} | ||
} | ||
|
||
documents = append(documents, Document{ | ||
ID: result.ID, | ||
Title: result.Title, | ||
Content: result.Content, | ||
Correspondent: correspondentName, | ||
Tags: tagNames, | ||
CreatedDate: result.CreatedDate, | ||
}) | ||
} | ||
|
||
return documents, nil | ||
} | ||
|
||
// UpdateDocuments updates the specified documents with suggested changes | ||
func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion, db *gorm.DB, isUndo bool) error { | ||
availableTags, err := client.GetAllTags(ctx) | ||
|
@@ -441,7 +540,7 @@ func (client *PaperlessClient) UpdateDocuments(ctx context.Context, documents [] | |
|
||
if !hasSameTags(originalDoc.Tags, finalTagNames) { | ||
originalFields["tags"] = originalDoc.Tags | ||
var newTagIDs []int | ||
var newTagIDs []int = []int{} | ||
for _, tagName := range finalTagNames { | ||
if tagID, exists := availableTags[tagName]; exists { | ||
newTagIDs = append(newTagIDs, tagID) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nil-pointer risk when Client is unset; also dedup similar titles
If App.Client is nil, this will panic. Guard it and deduplicate titles to reduce prompt noise.
📝 Committable suggestion
🤖 Prompt for AI Agents