From c14d4c446606d0d4fd01673eb1422fd9b14b7a77 Mon Sep 17 00:00:00 2001 From: Dawid Kulpa Date: Sun, 4 May 2025 17:00:41 +0200 Subject: [PATCH] - fixes OCR progress reporting - introduces re-OCRing specific pages from UI (partially answers #223) - adds more options for LLM tweaking - allows setting temperature to workaround GPT-5 issue (fixes #448) adds tweaks for OCR: configuring ollama model, showing per page result, allowing re-ocring single page fixes re-ocring additional fixes some more fixes some more fixes some more fixes some more fixes some more fixes some more fixes some more fixes --- README.md | 8 + app_http_handlers.go | 179 ++++++++++++++++++- background.go | 16 +- background_test.go | 2 +- go.sum | 102 +---------- jobs.go | 46 ++++- local_db.go | 58 ++++++- main.go | 68 +++++++- ocr.go | 54 +++++- ocr/llm_provider.go | 71 ++++++-- ocr/provider.go | 14 ++ types.go | 3 +- web-app/src/ExperimentalOCR.tsx | 298 ++++++++++++++++++++++++++++---- web-app/src/ocrStatus.ts | 44 +++++ 14 files changed, 794 insertions(+), 169 deletions(-) create mode 100644 web-app/src/ocrStatus.ts diff --git a/README.md b/README.md index 72201929..a88f964d 100644 --- a/README.md +++ b/README.md @@ -162,6 +162,7 @@ services: # LLM_PROVIDER: "ollama" # LLM_MODEL: "qwen3:8b" # OLLAMA_HOST: "http://host.docker.internal:11434" + # OLLAMA_CONTEXT_LENGTH: "8192" # Sets Ollama NumCtx (context window) # TOKEN_LIMIT: 1000 # Recommended for smaller models # Optional LLM Settings @@ -535,6 +536,10 @@ For best results with the enhanced OCR features: | `VISION_LLM_REQUESTS_PER_MINUTE` | Maximum requests per minute for the Vision LLM. Useful for managing API costs or local LLM load. | No | 120 | | `VISION_LLM_MAX_RETRIES` | Maximum retry attempts for failed Vision LLM requests. | No | 3 | | `VISION_LLM_BACKOFF_MAX_WAIT` | Maximum wait time between retries for the Vision LLM (e.g., `30s`). | No | 30s | +| `VISION_LLM_MAX_TOKENS` | Maximum tokens for Vision LLM OCR output. | No | | +| `VISION_LLM_TEMPERATURE` | Sampling temperature for Vision OCR generation. Lower is more deterministic. Important: For OpenAI GPT-5 it must be explicitly set to `1.0`. | No | | +| `OLLAMA_CONTEXT_LENGTH` | (Ollama only) Integer. Sets NumCtx (context window) for the Ollama runner. If unset or 0, the model default is used. | No | | +| `OLLAMA_OCR_TOP_K` | (Ollama only) Top-k token sampling for Vision OCR. Lower favors more likely tokens; higher increases diversity. | No | | | `AZURE_DOCAI_ENDPOINT` | Azure Document Intelligence endpoint. Required if OCR_PROVIDER is `azure`. | Cond. | | | `AZURE_DOCAI_KEY` | Azure Document Intelligence API key. Required if OCR_PROVIDER is `azure`. | Cond. | | | `AZURE_DOCAI_MODEL_ID` | Azure Document Intelligence model ID. Optional if using `azure` provider. | No | prebuilt-read | @@ -867,6 +872,7 @@ When using local LLMs (like those through Ollama), you might need to adjust cert #### Token Management - Use `TOKEN_LIMIT` environment variable to control the maximum number of tokens sent to the LLM +- For Ollama, set `OLLAMA_CONTEXT_LENGTH` to control the model's context window (NumCtx). This is independent of `TOKEN_LIMIT` and configures the server-side KV cache size. If unset or 0, the model default is used. Choose a value within the model's supported window (e.g., 8192). - Smaller models might truncate content unexpectedly if given too much text - Start with a conservative limit (e.g., 1000 tokens) and adjust based on your model's capabilities - Set to `0` to disable the limit (use with caution) @@ -876,6 +882,7 @@ Example configuration for smaller models: ```yaml environment: TOKEN_LIMIT: "2000" # Adjust based on your model's context window + OLLAMA_CONTEXT_LENGTH: "4096" # Controls Ollama NumCtx (context window); if unset, model default is used LLM_PROVIDER: "ollama" LLM_MODEL: "qwen3:8b" # Or other local model ``` @@ -883,6 +890,7 @@ environment: Common issues and solutions: - If you see truncated or incomplete responses, try lowering the `TOKEN_LIMIT` +- On Ollama, if you hit "context length exceeded" or memory issues, reduce `OLLAMA_CONTEXT_LENGTH` or choose a smaller model/context size. - If processing is too limited, gradually increase the limit while monitoring performance - For models with larger context windows, you can increase the limit or disable it entirely diff --git a/app_http_handlers.go b/app_http_handlers.go index 85a944ec..e5abc68e 100644 --- a/app_http_handlers.go +++ b/app_http_handlers.go @@ -1,7 +1,9 @@ package main import ( + "context" "encoding/json" + "errors" "fmt" "net/http" "os" @@ -185,11 +187,12 @@ func (app *App) getJobStatusHandler(c *gin.Context) { } response := gin.H{ - "job_id": job.ID, - "status": job.Status, - "created_at": job.CreatedAt, - "updated_at": job.UpdatedAt, - "pages_done": job.PagesDone, + "job_id": job.ID, + "status": job.Status, + "created_at": job.CreatedAt, + "updated_at": job.UpdatedAt, + "pages_done": job.PagesDone, + "total_pages": job.TotalPages, } if job.Status == "completed" { @@ -226,6 +229,20 @@ func (app *App) getAllJobsHandler(c *gin.Context) { c.JSON(http.StatusOK, jobList) } +// POST /api/ocr/jobs/:job_id/stop +func (app *App) stopOCRJobHandler(c *gin.Context) { + jobID := c.Param("job_id") + jobCancellersMu.Lock() + cancel, exists := jobCancellers[jobID] + jobCancellersMu.Unlock() + if !exists { + c.JSON(http.StatusNotFound, gin.H{"error": "No running job with this ID"}) + return + } + cancel() + c.Status(http.StatusNoContent) +} + // getDocumentHandler handles the retrieval of a document by its ID func (app *App) getDocumentHandler() gin.HandlerFunc { return func(c *gin.Context) { @@ -245,6 +262,158 @@ func (app *App) getDocumentHandler() gin.HandlerFunc { } } +// getOCRPagesHandler returns per-page OCR results for a document +func (app *App) getOCRPagesHandler(c *gin.Context) { + id := c.Param("id") + parsedID, err := strconv.Atoi(id) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid document ID"}) + return + } + + dbResults, err := GetOcrPageResults(app.Database, parsedID) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch OCR page results"}) + return + } + + type OCRPageResult struct { + Text string `json:"text"` + OcrLimitHit bool `json:"ocrLimitHit"` + GenerationInfo map[string]interface{} `json:"generationInfo,omitempty"` + } + + var pages []OCRPageResult + for _, res := range dbResults { + var genInfo map[string]interface{} + if res.GenerationInfo != "" { + _ = json.Unmarshal([]byte(res.GenerationInfo), &genInfo) + } + pages = append(pages, OCRPageResult{ + Text: res.Text, + OcrLimitHit: res.OcrLimitHit, + GenerationInfo: genInfo, + }) + } + + c.JSON(http.StatusOK, gin.H{ + "pages": pages, + }) +} + +func (app *App) reOCRPageHandler(c *gin.Context) { + id := c.Param("id") + pageIdxStr := c.Param("pageIndex") + parsedID, err := strconv.Atoi(id) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid document ID"}) + return + } + pageIdx, err := strconv.Atoi(pageIdxStr) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid page index"}) + return + } + + // Download all images for the document, but only process the requested page + imagePaths, _, err := app.Client.DownloadDocumentAsImages(c.Request.Context(), parsedID, limitOcrPages) + if err != nil || pageIdx < 0 || pageIdx >= len(imagePaths) { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid page index or failed to download images"}) + return + } + imageContent, err := os.ReadFile(imagePaths[pageIdx]) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read image file"}) + return + } + + cancelKey := fmt.Sprintf("%d-%d", parsedID, pageIdx) + reOcrCtx, cancelReOcr := context.WithCancel(c.Request.Context()) + defer cancelReOcr() + + reOcrCancellersMu.Lock() + if existingCancel, ok := reOcrCancellers[cancelKey]; ok { + existingCancel() + } + reOcrCancellers[cancelKey] = cancelReOcr + reOcrCancellersMu.Unlock() + + defer func() { + reOcrCancellersMu.Lock() + delete(reOcrCancellers, cancelKey) + reOcrCancellersMu.Unlock() + }() + + result, err := app.ocrProvider.ProcessImage(reOcrCtx, imageContent, pageIdx+1) + + if err != nil { + if errors.Is(err, context.Canceled) { + log.Infof("Re-OCR for doc %d page %d cancelled.", parsedID, pageIdx) + c.Status(499) + } else { + log.Errorf("Failed to re-OCR doc %d page %d: %v", parsedID, pageIdx, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to re-OCR page"}) + } + return + } + if result == nil { + log.Errorf("Re-OCR for doc %d page %d returned nil result.", parsedID, pageIdx) + c.JSON(http.StatusInternalServerError, gin.H{"error": "Re-OCR returned no result"}) + return + } + + var genInfoJSON string + if result.GenerationInfo != nil { + if b, err := json.Marshal(result.GenerationInfo); err == nil { + genInfoJSON = string(b) + } + } + saveErr := SaveSingleOcrPageResult(app.Database, parsedID, pageIdx, result.Text, result.OcrLimitHit, genInfoJSON) + if saveErr != nil { + log.Errorf("Failed to save re-OCR result for doc %d page %d: %v", parsedID, pageIdx, saveErr) + } + + c.JSON(http.StatusOK, gin.H{ + "text": result.Text, + "ocrLimitHit": result.OcrLimitHit, + "generationInfo": result.GenerationInfo, + }) +} + +// cancelReOCRPageHandler handles the DELETE request to cancel an ongoing re-OCR for a specific page. +func (app *App) cancelReOCRPageHandler(c *gin.Context) { + id := c.Param("id") + pageIdxStr := c.Param("pageIndex") + parsedID, err := strconv.Atoi(id) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid document ID"}) + return + } + pageIdx, err := strconv.Atoi(pageIdxStr) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid page index"}) + return + } + + cancelKey := fmt.Sprintf("%d-%d", parsedID, pageIdx) + + reOcrCancellersMu.Lock() + cancel, exists := reOcrCancellers[cancelKey] + if exists { + delete(reOcrCancellers, cancelKey) + } + reOcrCancellersMu.Unlock() + + if exists { + cancel() + log.Infof("Cancellation requested for re-OCR doc %d page %d", parsedID, pageIdx) + c.Status(http.StatusNoContent) + } else { + log.Warnf("No active re-OCR found to cancel for doc %d page %d", parsedID, pageIdx) + c.JSON(http.StatusNotFound, gin.H{"error": "No active re-OCR operation found for this page"}) + } +} + // Section for local-db actions func (app *App) getModificationHistoryHandler(c *gin.Context) { diff --git a/background.go b/background.go index 640861d2..0f3b86ce 100644 --- a/background.go +++ b/background.go @@ -210,16 +210,21 @@ func (app *App) processAutoOcrTagDocuments(ctx context.Context) (int, error) { var err error if app.docProcessor != nil { // Use injected processor if available - processedDoc, err = app.docProcessor.ProcessDocumentOCR(ctx, document.ID, options) + processedDoc, err = app.docProcessor.ProcessDocumentOCR(ctx, document.ID, options, "") } else { // Use the app's own implementation if no processor is injected - processedDoc, err = app.ProcessDocumentOCR(ctx, document.ID, options) + processedDoc, err = app.ProcessDocumentOCR(ctx, document.ID, options, "") } + if err != nil { docLogger.Errorf("OCR processing failed: %v", err) errs = append(errs, fmt.Errorf("document %d OCR error: %w", document.ID, err)) continue } + if processedDoc == nil { + docLogger.Info("OCR processing skipped for document") + continue + } docLogger.Debug("OCR processing completed") documentSuggestion := DocumentSuggestion{ @@ -227,6 +232,13 @@ func (app *App) processAutoOcrTagDocuments(ctx context.Context) (int, error) { OriginalDocument: document, SuggestedContent: processedDoc.Text, RemoveTags: []string{autoOcrTag}, + // Add OCR complete tag if tagging is enabled and PDF wasn't uploaded (upload handles tagging) + AddTags: func() []string { + if app.pdfOCRTagging && !options.UploadPDF { + return []string{app.pdfOCRCompleteTag} + } + return nil + }(), } if (app.pdfOCRTagging) && app.pdfOCRCompleteTag != "" { diff --git a/background_test.go b/background_test.go index d99b1d64..9243dcf8 100644 --- a/background_test.go +++ b/background_test.go @@ -40,7 +40,7 @@ type mockDocumentProcessor struct { mockText string } -func (m *mockDocumentProcessor) ProcessDocumentOCR(ctx context.Context, documentID int, options OCROptions) (*ProcessedDocument, error) { +func (m *mockDocumentProcessor) ProcessDocumentOCR(ctx context.Context, documentID int, options OCROptions, jobID string) (*ProcessedDocument, error) { return &ProcessedDocument{ ID: documentID, Text: m.mockText, diff --git a/go.sum b/go.sum index 376a33b4..04ec402d 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,5 @@ cloud.google.com/go v0.120.0 h1:wc6bgG9DHyKqF5/vQvX1CiZrtHnxJjBlKUyF9nP6meA= cloud.google.com/go v0.120.0/go.mod h1:/beW32s8/pGRuj4IILWQNd4uuebeT4dkOhKmkfit64Q= -cloud.google.com/go/auth v0.16.1 h1:XrXauHMd30LhQYVRHLGvJiYeczweKQXZxsTbV9TiguU= -cloud.google.com/go/auth v0.16.1/go.mod h1:1howDHJ5IETh/LwYs3ZxvlkXF48aSqqJUM+5o02dNOI= -cloud.google.com/go/auth v0.16.2 h1:QvBAGFPLrDeoiNjyfVunhQ10HKNYuOwZ5noee0M5df4= -cloud.google.com/go/auth v0.16.2/go.mod h1:sRBas2Y1fB1vZTdurouM0AzuYQBMZinrUYL8EufhtEA= cloud.google.com/go/auth v0.16.3 h1:kabzoQ9/bobUmnseYnBO6qQG7q4a/CffFRlJSxv2wCc= cloud.google.com/go/auth v0.16.3/go.mod h1:NucRGjaXfzP1ltpcQ7On/VTZ0H4kWB5Jy+Y9Dnm76fA= cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= @@ -39,8 +35,6 @@ github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1 github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/ebitengine/purego v0.8.2 h1:jPPGWs2sZ1UgOSgD2bClL0MJIqu58nOmIcBuXr62z1I= -github.com/ebitengine/purego v0.8.2/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw= github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= @@ -55,8 +49,6 @@ github.com/gage-technologies/mistral-go v1.1.0 h1:POv1wM9jA/9OBXGV2YdPi9Y/h09+Mj github.com/gage-technologies/mistral-go v1.1.0/go.mod h1:tF++Xt7U975GcLlzhrjSQb8l/x+PrriO9QEdsgm9l28= github.com/gardar/ocrchestra v0.0.0-20250521145628-aaae7e4d40e9 h1:5akDtE0utMq6MrxfK8ZQ7mKjZPQAhAc/U/kY958UtRA= github.com/gardar/ocrchestra v0.0.0-20250521145628-aaae7e4d40e9/go.mod h1:KyEcdoxMqdFEld5EYXr0nXSl/AaiWmz0IVD7ITHltUc= -github.com/gen2brain/go-fitz v1.24.14 h1:09weRkjVtLYNGo7l0J7DyOwBExbwi8SJ9h8YPhw9WEo= -github.com/gen2brain/go-fitz v1.24.14/go.mod h1:0KaZeQgASc20Yp5R/pFzyy7SmP01XcoHKNF842U2/S4= github.com/gen2brain/go-fitz v1.24.15 h1:sJNB1MOWkqnzzENPHggFpgxTwW0+S5WF/rM5wUBpJWo= github.com/gen2brain/go-fitz v1.24.15/go.mod h1:SftkiVbTHqF141DuiLwBBM65zP7ig6AVDQpf2WlHamo= github.com/gin-contrib/sse v1.0.0 h1:y3bT1mUWUxDpW4JLQg/HnTqV4rozuW4tC9eFKTxYI9E= @@ -64,8 +56,6 @@ github.com/gin-contrib/sse v1.0.0/go.mod h1:zNuFdwarAygJBht0NTKiSi3jRf6RbqeILZ9S github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ= github.com/gin-gonic/gin v1.10.1/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -91,8 +81,6 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU9uHLo7OnF5tL52HFAgMmyrf4= github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA= -github.com/googleapis/gax-go/v2 v2.14.2 h1:eBLnkZ9635krYIPD+ag1USrOAI0Nr0QYF3+/3GqO0k0= -github.com/googleapis/gax-go/v2 v2.14.2/go.mod h1:ON64QhlJkhVtSqp4v1uaK92VyZ2gmvDQsweuyLV+8+w= github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo= github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= @@ -101,8 +89,6 @@ github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9n github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= -github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= -github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-retryablehttp v0.7.8 h1:ylXZWnqa7Lhqpk0L1P1LzDtGcCR0rPVUrx/c8Unxc48= github.com/hashicorp/go-retryablehttp v0.7.8/go.mod h1:rjiScheydd+CxvumBsIrFKlx3iS0jrZ7LvzFGFmuKbw= github.com/hhrutter/lzw v1.0.0 h1:laL89Llp86W3rRs83LvKbwYRx6INE8gDn0XNb1oXtm0= @@ -119,8 +105,6 @@ github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/jupiterrider/ffi v0.4.0 h1:7mhlrfiBZa0kHhh2DV7mGAdXN/D8zDeu8UlaBO+ZSko= -github.com/jupiterrider/ffi v0.4.0/go.mod h1:1QCaf2VVPpGyIeU3RqQ2rHYrAPT8m9l0GhQupVYQB24= github.com/jupiterrider/ffi v0.5.0 h1:j2nSgpabbV1JOwgP4Kn449sJUHq3cVLAZVBoOYn44V8= github.com/jupiterrider/ffi v0.5.0/go.mod h1:x7xdNKo8h0AmLuXfswDUBxUsd2OqUP4ekC8sCnsmbvo= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -208,126 +192,54 @@ gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJW gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 h1:x7wzEgXfnzJcHDwStJT+mxOz4etr2EcexjqhBvmoakw= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0/go.mod h1:rg+RlpR5dKwaS95IyyZqj5Wd4E13lk/msnTS0Xl9lJM= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 h1:q4XOmH/0opmeuJtPsbFNivyl7bCt7yRBbeEm2sC/XtQ= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0/go.mod h1:snMWehoOh2wsEwnvvwtDyFCxVeDAODenXHtn5vzrKjo= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= -go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ= -go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y= go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= -go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= -go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= -go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= -go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= -go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= -go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= -go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs= -go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= +go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= +go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= +go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis= +go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4= go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw= golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE= -golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= -golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= -golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= -golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM= golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY= golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.27.0 h1:C8gA4oWU/tKkdCfYT6T2u4faJu3MeNS5O8UPWlPF61w= golang.org/x/image v0.27.0/go.mod h1:xbdrClrAUway1MUTEZDq9mz/UpRwYAkFFNUslZtcB+g= -golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= -golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= -golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= -golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= -golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= -golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= -golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= -golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= -golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= -golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= -golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -google.golang.org/api v0.234.0 h1:d3sAmYq3E9gdr2mpmiWGbm9pHsA/KJmyiLkwKfHBqU4= -google.golang.org/api v0.234.0/go.mod h1:QpeJkemzkFKe5VCE/PMv7GsUfn9ZF+u+q1Q7w6ckxTg= -google.golang.org/api v0.238.0 h1:+EldkglWIg/pWjkq97sd+XxH7PxakNYoe/rkSTbnvOs= -google.golang.org/api v0.238.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= -google.golang.org/api v0.240.0 h1:PxG3AA2UIqT1ofIzWV2COM3j3JagKTKSwy7L6RHNXNU= -google.golang.org/api v0.240.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= -google.golang.org/api v0.241.0 h1:QKwqWQlkc6O895LchPEDUSYr22Xp3NCxpQRiWTB6avE= -google.golang.org/api v0.241.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= -google.golang.org/api v0.242.0 h1:7Lnb1nfnpvbkCiZek6IXKdJ0MFuAZNAJKQfA1ws62xg= -google.golang.org/api v0.242.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= -google.golang.org/api v0.243.0 h1:sw+ESIJ4BVnlJcWu9S+p2Z6Qq1PjG77T8IJ1xtp4jZQ= -google.golang.org/api v0.243.0/go.mod h1:GE4QtYfaybx1KmeHMdBnNnyLzBZCVihGBXAmJu/uUr8= -google.golang.org/api v0.244.0 h1:lpkP8wVibSKr++NCD36XzTk/IzeKJ3klj7vbj+XU5pE= -google.golang.org/api v0.244.0/go.mod h1:dMVhVcylamkirHdzEBAIQWUCgqY885ivNeZYd7VAVr8= -google.golang.org/api v0.245.0 h1:YliGvz1rjXB+sTLNIST6Ffeji9WlRdLQ+LPl9ruSa5Y= -google.golang.org/api v0.245.0/go.mod h1:dMVhVcylamkirHdzEBAIQWUCgqY885ivNeZYd7VAVr8= google.golang.org/api v0.246.0 h1:H0ODDs5PnMZVZAEtdLMn2Ul2eQi7QNjqM2DIFp8TlTM= google.golang.org/api v0.246.0/go.mod h1:dMVhVcylamkirHdzEBAIQWUCgqY885ivNeZYd7VAVr8= -google.golang.org/genai v1.7.0 h1:DMOaygzMDUapj280sXBDvkaoY8kvSCJqsdH0iHbQBKA= -google.golang.org/genai v1.7.0/go.mod h1:TyfOKRz/QyCaj6f/ZDt505x+YreXnY40l2I6k8TvgqY= -google.golang.org/genai v1.13.0 h1:LRhwx5PU+bXhfnXyPEHu2kt9yc+MpvuYbajxSorOJjg= -google.golang.org/genai v1.13.0/go.mod h1:QPj5NGJw+3wEOHg+PrsWwJKvG6UC84ex5FR7qAYsN/M= -google.golang.org/genai v1.15.0 h1:zFaM+1JfGa0KCGDqrZdwVMucEu9n5AJEKkWcSPw0qro= -google.golang.org/genai v1.15.0/go.mod h1:QPj5NGJw+3wEOHg+PrsWwJKvG6UC84ex5FR7qAYsN/M= -google.golang.org/genai v1.16.0 h1:MkPOZt7MFGeOL2lTpox4GyLfSKIISbxzjuQ8b/G/qBk= -google.golang.org/genai v1.16.0/go.mod h1:QPj5NGJw+3wEOHg+PrsWwJKvG6UC84ex5FR7qAYsN/M= -google.golang.org/genai v1.17.0 h1:lXYSnWShPYjxTouxRj0zF8RsNmSF+SKo7SQ7dM35NlI= -google.golang.org/genai v1.17.0/go.mod h1:QPj5NGJw+3wEOHg+PrsWwJKvG6UC84ex5FR7qAYsN/M= -google.golang.org/genai v1.18.0 h1:fTmK7y30CO0CL8xRyyFSjTkd1MNbYUeFUehvDyU/2gQ= -google.golang.org/genai v1.18.0/go.mod h1:QPj5NGJw+3wEOHg+PrsWwJKvG6UC84ex5FR7qAYsN/M= google.golang.org/genai v1.19.0 h1:zNYUCVwwUmc+jCund9yFphKZdbbso6XUZxo0c5COI48= google.golang.org/genai v1.19.0/go.mod h1:QPj5NGJw+3wEOHg+PrsWwJKvG6UC84ex5FR7qAYsN/M= -google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 h1:1tXaIXCracvtsRxSBsYDiSBN0cuJvM7QYW+MrpIRY78= -google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2/go.mod h1:49MsLSx0oWMOZqcpB3uL8ZOkAh1+TndpJ8ONoCBWiZk= google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4= google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s= -google.golang.org/genproto/googleapis/api v0.0.0-20250505200425-f936aa4a68b2 h1:vPV0tzlsK6EzEDHNNH5sa7Hs9bd7iXR7B1tSiPepkV0= -google.golang.org/genproto/googleapis/api v0.0.0-20250505200425-f936aa4a68b2/go.mod h1:pKLAc5OolXC3ViWGI62vvC0n10CpwAtRcTNCFwTKBEw= google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 h1:oWVWY3NzT7KJppx2UKhKmzPq4SRe0LdCijVRwvGeikY= google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822/go.mod h1:h3c4v36UTKzUiuaOKQ6gr3S+0hovBtUrXzTG/i3+XEc= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9 h1:IkAfh6J/yllPtpYFU0zZN1hUPYdT0ogkBT/9hMxHjvg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250512202823-5a2f75b736a9/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 h1:fc6jSaCT0vBduLYZHYrBBNY4dsWuvgyff9noRNDdBeE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250715232539-7130f93afb79 h1:1ZwqphdOdWYXsUHgMpU/101nCtf/kSp9hOrcvFsnl10= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250715232539-7130f93afb79/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 h1:MAKi5q709QWfnkkpNQ0M12hYJ1+e8qYVDyowc4U1XZM= google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= -google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA= -google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= -google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= -google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= google.golang.org/grpc v1.74.2 h1:WoosgB65DlWVC9FqI82dGsZhWFNBSLjQ84bjROOpMu4= google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= @@ -340,14 +252,8 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gorm.io/driver/sqlite v1.5.7 h1:8NvsrhP0ifM7LX9G4zPB97NwovUakUxc+2V2uuf3Z1I= -gorm.io/driver/sqlite v1.5.7/go.mod h1:U+J8craQU6Fzkcvu8oLeAQmi50TkwPEhHDEjQZXDah4= gorm.io/driver/sqlite v1.6.0 h1:WHRRrIiulaPiPFmDcod6prc4l2VGVWHz80KspNsxSfQ= gorm.io/driver/sqlite v1.6.0/go.mod h1:AO9V1qIQddBESngQUKWL9yoH93HIeA1X6V633rBwyT8= -gorm.io/gorm v1.26.1 h1:ghB2gUI9FkS46luZtn6DLZ0f6ooBJ5IbVej2ENFDjRw= -gorm.io/gorm v1.26.1/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE= -gorm.io/gorm v1.30.0 h1:qbT5aPv1UH8gI99OsRlvDToLxW5zR7FzS9acZDOZcgs= -gorm.io/gorm v1.30.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE= gorm.io/gorm v1.30.1 h1:lSHg33jJTBxs2mgJRfRZeLDG+WZaHYCk3Wtfl6Ngzo4= gorm.io/gorm v1.30.1/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= diff --git a/jobs.go b/jobs.go index 0b0873cc..ed9df27b 100644 --- a/jobs.go +++ b/jobs.go @@ -11,15 +11,24 @@ import ( "github.com/sirupsen/logrus" ) +var ( + jobCancellersMu sync.Mutex + jobCancellers = make(map[string]context.CancelFunc) + + reOcrCancellersMu sync.Mutex + reOcrCancellers = make(map[string]context.CancelFunc) +) + // Job represents an OCR job type Job struct { ID string DocumentID int - Status string // "pending", "in_progress", "completed", "failed" - Result string // OCR result or error message + Status string // "pending", "in_progress", "completed", "failed", "cancelled" + Result string // OCR result (combined text) or error message CreatedAt time.Time UpdatedAt time.Time PagesDone int // Number of pages processed + TotalPages int // Total number of pages in the document Options OCROptions // OCR processing options } @@ -122,7 +131,22 @@ func startWorkerPool(app *App, numWorkers int) { func processJob(app *App, job *Job) { jobStore.updateJobStatus(job.ID, "in_progress", "") - ctx := context.Background() + jobCtx, cancel := context.WithCancel(context.Background()) + jobCancellersMu.Lock() + jobCancellers[job.ID] = cancel + jobCancellersMu.Unlock() + defer func() { + cancel() + jobCancellersMu.Lock() + delete(jobCancellers, job.ID) + jobCancellersMu.Unlock() + }() + + // Delete old OCR page results for this document before starting new OCR + if err := DeleteOcrPageResults(app.Database, job.DocumentID); err != nil { + logger.Errorf("Failed to delete old OCR page results for document %d: %v", job.DocumentID, err) + // Continue processing even if deletion fails + } // Create OCR options from job options or app defaults options := job.Options @@ -136,10 +160,20 @@ func processJob(app *App, job *Job) { } } - processedDoc, err := app.ProcessDocumentOCR(ctx, job.DocumentID, options) + processedDoc, err := app.ProcessDocumentOCR(jobCtx, job.DocumentID, options, job.ID) if err != nil { - logger.Errorf("Error processing document OCR for job %s: %v", job.ID, err) - jobStore.updateJobStatus(job.ID, "failed", err.Error()) + if jobCtx.Err() == context.Canceled { + jobStore.updateJobStatus(job.ID, "cancelled", "Job cancelled by user") + logger.Infof("Job cancelled: %s", job.ID) + } else { + logger.Errorf("Error processing document OCR for job %s: %v", job.ID, err) + jobStore.updateJobStatus(job.ID, "failed", err.Error()) + } + return + } + if processedDoc == nil { + logger.Infof("OCR processing skipped for job %s (document %d)", job.ID, job.DocumentID) + jobStore.updateJobStatus(job.ID, "completed", "Skipped (already processed or other reason)") return } diff --git a/local_db.go b/local_db.go index 2d696f2c..ecf642d2 100644 --- a/local_db.go +++ b/local_db.go @@ -1,6 +1,7 @@ package main import ( + "errors" "os" "path/filepath" "time" @@ -21,6 +22,17 @@ type ModificationHistory struct { UndoneDate string `gorm:"default:null"` // Date and time of undoing the modification } +type OCRPageResult struct { + ID uint `gorm:"primaryKey"` + DocumentID int `gorm:"index;not null"` + PageIndex int `gorm:"not null"` + Text string `gorm:"size:1048576"` + OcrLimitHit bool + GenerationInfo string `gorm:"type:TEXT"` + CreatedAt time.Time + UpdatedAt time.Time +} + // InitializeDB initializes the SQLite database and migrates the schema func InitializeDB() *gorm.DB { // Ensure db directory exists @@ -37,8 +49,8 @@ func InitializeDB() *gorm.DB { log.Fatalf("Failed to connect to database: %v", err) } - // Migrate the schema (create the table if it doesn't exist) - err = db.AutoMigrate(&ModificationHistory{}) + // Migrate the schema (create the tables if they don't exist) + err = db.AutoMigrate(&ModificationHistory{}, &OCRPageResult{}) if err != nil { log.Fatalf("Failed to migrate database schema: %v", err) } @@ -99,3 +111,45 @@ func SetModificationUndone(db *gorm.DB, record *ModificationHistory) error { result := db.Save(&record) // GORM's Save method return result.Error } + +// SaveSingleOcrPageResult saves or updates the OCR result for a single page, including GenerationInfo as JSON +func SaveSingleOcrPageResult(db *gorm.DB, docID int, pageIdx int, text string, ocrLimitHit bool, generationInfoJSON string) error { + var result OCRPageResult + tx := db.Where("document_id = ? AND page_index = ?", docID, pageIdx).First(&result) + if tx.Error == nil { + result.Text = text + result.OcrLimitHit = ocrLimitHit + result.GenerationInfo = generationInfoJSON + return db.Save(&result).Error + } else if tx.Error != nil { + log.Debugf("SaveSingleOcrPageResult: db.First error: %v (is gorm.ErrRecordNotFound: %v)", tx.Error, errors.Is(tx.Error, gorm.ErrRecordNotFound)) + if errors.Is(tx.Error, gorm.ErrRecordNotFound) { + result = OCRPageResult{ + DocumentID: docID, + PageIndex: pageIdx, + Text: text, + OcrLimitHit: ocrLimitHit, + GenerationInfo: generationInfoJSON, + } + return db.Create(&result).Error + } else { + log.Errorf("Unexpected DB error in SaveSingleOcrPageResult: %v", tx.Error) + return tx.Error + } + } + return nil +} + +func GetOcrPageResults(db *gorm.DB, docID int) ([]OCRPageResult, error) { + var results []OCRPageResult + tx := db.Where("document_id = ?", docID).Order("page_index ASC").Find(&results) + return results, tx.Error +} + +func UpdateOcrPageResult(db *gorm.DB, docID int, pageIdx int, text string, ocrLimitHit bool, generationInfoJSON string) error { + return SaveSingleOcrPageResult(db, docID, pageIdx, text, ocrLimitHit, generationInfoJSON) +} + +func DeleteOcrPageResults(db *gorm.DB, docID int) error { + return db.Where("document_id = ?", docID).Delete(&OCRPageResult{}).Error +} diff --git a/main.go b/main.go index 7793b414..f861bd4f 100644 --- a/main.go +++ b/main.go @@ -235,6 +235,42 @@ func main() { ocrPrompt := promptBuffer.String() + var visionLlmMaxTokens int + if maxTokensStr := os.Getenv("VISION_LLM_MAX_TOKENS"); maxTokensStr != "" { + if parsed, err := strconv.Atoi(maxTokensStr); err == nil { + visionLlmMaxTokens = parsed + } else { + log.Warnf("Invalid VISION_LLM_MAX_TOKENS value: %v, using default (0)", err) + } + } + + var visionLlmTemperature *float64 + if tempStr := os.Getenv("VISION_LLM_TEMPERATURE"); tempStr != "" { + if parsed, err := strconv.ParseFloat(tempStr, 64); err == nil { + visionLlmTemperature = &parsed + } else { + log.Warnf("Invalid VISION_LLM_TEMPERATURE value: %v, ignoring", err) + } + } + + var ollamaOcrTopK *int + if topKStr := os.Getenv("OLLAMA_OCR_TOP_K"); topKStr != "" { + if parsed, err := strconv.Atoi(topKStr); err == nil { + ollamaOcrTopK = &parsed + } else { + log.Warnf("Invalid OLLAMA_OCR_TOP_K value: %v, ignoring", err) + } + } + + var ollamaContextLength int + if ctxLenStr := os.Getenv("OLLAMA_CONTEXT_LENGTH"); ctxLenStr != "" { + if parsed, err := strconv.Atoi(ctxLenStr); err == nil { + ollamaContextLength = parsed + } else { + log.Warnf("Invalid OLLAMA_CONTEXT_LENGTH value: %v, ignoring", err) + } + } + ocrConfig := ocr.Config{ Provider: providerType, GoogleProjectID: os.Getenv("GOOGLE_PROJECT_ID"), @@ -252,6 +288,10 @@ func main() { DoclingURL: doclingURL, DoclingImageExportMode: doclingImageExportMode, EnableHOCR: true, // Always generate hOCR struct if provider supports it + VisionLLMMaxTokens: visionLlmMaxTokens, + VisionLLMTemperature: visionLlmTemperature, + OllamaOcrTopK: ollamaOcrTopK, + OllamaContextLength: ollamaContextLength, } // Parse Azure timeout if set @@ -340,8 +380,12 @@ func main() { // OCR endpoints api.POST("/documents/:id/ocr", app.submitOCRJobHandler) + api.GET("/documents/:id/ocr_pages", app.getOCRPagesHandler) + api.POST("/documents/:id/ocr_pages/:pageIndex/reocr", app.reOCRPageHandler) + api.DELETE("/documents/:id/ocr_pages/:pageIndex/reocr", app.cancelReOCRPageHandler) api.GET("/jobs/ocr/:job_id", app.getJobStatusHandler) api.GET("/jobs/ocr", app.getAllJobsHandler) + api.POST("/ocr/jobs/:job_id/stop", app.stopOCRJobHandler) // Endpoint to see if user enabled OCR api.GET("/experimental/ocr", func(c *gin.Context) { @@ -852,10 +896,18 @@ func createLLM() (llms.Model, error) { if host == "" { host = "http://127.0.0.1:11434" } - llm, err := ollama.New( + opts := []ollama.Option{ ollama.WithModel(llmModel), ollama.WithServerURL(host), - ) + } + if ctxLenStr := os.Getenv("OLLAMA_CONTEXT_LENGTH"); ctxLenStr != "" { + if parsed, err := strconv.Atoi(ctxLenStr); err == nil && parsed > 0 { + opts = append(opts, ollama.WithRunnerNumCtx(parsed)) + } else if err != nil { + log.Warnf("Invalid OLLAMA_CONTEXT_LENGTH value: %v, ignoring", err) + } + } + llm, err := ollama.New(opts...) if err != nil { return nil, err } @@ -937,10 +989,18 @@ func createVisionLLM() (llms.Model, error) { if host == "" { host = "http://127.0.0.1:11434" } - llm, err := ollama.New( + opts := []ollama.Option{ ollama.WithModel(visionLlmModel), ollama.WithServerURL(host), - ) + } + if ctxLenStr := os.Getenv("OLLAMA_CONTEXT_LENGTH"); ctxLenStr != "" { + if parsed, err := strconv.Atoi(ctxLenStr); err == nil && parsed > 0 { + opts = append(opts, ollama.WithRunnerNumCtx(parsed)) + } else if err != nil { + log.Warnf("Invalid OLLAMA_CONTEXT_LENGTH value: %v, ignoring", err) + } + } + llm, err := ollama.New(opts...) if err != nil { return nil, err } diff --git a/ocr.go b/ocr.go index 0e550adf..4ef5a325 100644 --- a/ocr.go +++ b/ocr.go @@ -2,12 +2,15 @@ package main import ( "context" + "encoding/json" "fmt" "os" "path/filepath" "strings" "time" + "paperless-gpt/ocr" + "github.com/gardar/ocrchestra/pkg/hocr" "github.com/gardar/ocrchestra/pkg/pdfocr" "github.com/sirupsen/logrus" @@ -38,13 +41,16 @@ type HOCRCapable interface { } // ProcessDocumentOCR processes a document through OCR and returns the combined text, hOCR and PDF -func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int, options OCROptions) (*ProcessedDocument, error) { +func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int, options OCROptions, jobID string) (*ProcessedDocument, error) { // Validate options for safety if !options.UploadPDF && options.ReplaceOriginal { return nil, fmt.Errorf("invalid OCROptions: cannot set ReplaceOriginal=true when UploadPDF=false") } docLogger := documentLogger(documentID) + if jobID != "" { + docLogger = docLogger.WithField("job_id", jobID) + } docLogger.Info("Starting OCR processing") // Determine the actual process mode to use @@ -128,6 +134,7 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int, options var originalPDFData []byte var totalPdfPages int var imagePaths []string + var ocrResults []*ocr.OCRResult // Default process mode to app's ocrProcessMode if not set in options processMode = options.ProcessMode @@ -185,6 +192,14 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int, options originalPDFData = pdfData totalPdfPages = pdfPageCount + if jobID != "" { + jobStore.Lock() + if job, exists := jobStore.jobs[jobID]; exists { + job.TotalPages = totalPdfPages + } + jobStore.Unlock() + } + // Log the page count information docLogger.WithFields(logrus.Fields{ "processed_page_count": len(pdfPaths), @@ -233,6 +248,14 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int, options totalPdfPages = imgPageCount + if jobID != "" { + jobStore.Lock() + if job, exists := jobStore.jobs[jobID]; exists { + job.TotalPages = totalPdfPages + } + jobStore.Unlock() + } + // Log the page count information docLogger.WithFields(logrus.Fields{ "processed_page_count": len(imagePaths), @@ -241,6 +264,17 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int, options }).Debug("Downloaded document images") for i, imagePath := range imagePaths { + select { + case <-ctx.Done(): + docLogger.Info("Job cancelled before processing page") + // Return partial results if cancelled + return &ProcessedDocument{ + ID: documentID, + Text: strings.Join(ocrTexts, "\n\n"), + }, ctx.Err() + default: + } + pageLogger := docLogger.WithField("page", i+1) pageLogger.Debug("Processing page") @@ -262,11 +296,29 @@ func (app *App) ProcessDocumentOCR(ctx context.Context, documentID int, options return nil, fmt.Errorf("error performing OCR for document %d, page %d: nil result", documentID, i+1) } + if jobID != "" { + jobStore.updatePagesDone(jobID, i+1) + } + pageLogger.WithField("has_hocr_page", result.HOCRPage != nil). WithField("metadata", result.Metadata). Debug("OCR completed for page") ocrTexts = append(ocrTexts, result.Text) + ocrResults = append(ocrResults, result) + + var genInfoJSON string + if result.GenerationInfo != nil { + if b, err := json.Marshal(result.GenerationInfo); err == nil { + genInfoJSON = string(b) + } + } + + saveErr := SaveSingleOcrPageResult(app.Database, documentID, i, result.Text, result.OcrLimitHit, genInfoJSON) + if saveErr != nil { + pageLogger.WithError(saveErr).Error("Failed to save OCR page result to database") + // Continue processing other pages even if saving fails for one + } } } diff --git a/ocr/llm_provider.go b/ocr/llm_provider.go index 476f20dd..e42ca9a4 100644 --- a/ocr/llm_provider.go +++ b/ocr/llm_provider.go @@ -20,10 +20,13 @@ import ( // LLMProvider implements OCR using LLM vision models type LLMProvider struct { - provider string - model string - llm llms.Model - prompt string // OCR prompt template + provider string + model string + llm llms.Model + prompt string + maxTokens int + temperature *float64 + ollamaTopK *int } func newLLMProvider(config Config) (*LLMProvider, error) { @@ -57,10 +60,13 @@ func newLLMProvider(config Config) (*LLMProvider, error) { logger.Info("Successfully initialized LLM OCR provider") return &LLMProvider{ - provider: config.VisionLLMProvider, - model: config.VisionLLMModel, - llm: model, - prompt: config.VisionLLMPrompt, + provider: config.VisionLLMProvider, + model: config.VisionLLMModel, + llm: model, + prompt: config.VisionLLMPrompt, + maxTokens: config.VisionLLMMaxTokens, + temperature: config.VisionLLMTemperature, + ollamaTopK: config.OllamaOcrTopK, }, nil } @@ -88,7 +94,6 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte, pag // Prepare content parts based on provider type var parts []llms.ContentPart - var imagePart llms.ContentPart providerName := strings.ToLower(p.provider) @@ -105,6 +110,17 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte, pag llms.TextPart(p.prompt), } + var callOpts []llms.CallOption + if p.maxTokens > 0 { + callOpts = append(callOpts, llms.WithMaxTokens(p.maxTokens)) + } + if p.temperature != nil { + callOpts = append(callOpts, llms.WithTemperature(*p.temperature)) + } + if providerName == "ollama" && p.ollamaTopK != nil { + callOpts = append(callOpts, llms.WithTopK(*p.ollamaTopK)) + } + // Convert the image to text logger.Debug("Sending request to vision model") completion, err := p.llm.GenerateContent(ctx, []llms.MessageContent{ @@ -112,20 +128,43 @@ func (p *LLMProvider) ProcessImage(ctx context.Context, imageContent []byte, pag Parts: parts, Role: llms.ChatMessageTypeHuman, }, - }) + }, callOpts...) if err != nil { logger.WithError(err).Error("Failed to get response from vision model") return nil, fmt.Errorf("error getting response from LLM: %w", err) } + text := completion.Choices[0].Content + limitHit := false + tokenCount := -1 + + if p.maxTokens > 0 { + genInfo := completion.Choices[0].GenerationInfo + if genInfo != nil && genInfo["TotalTokens"] != nil { + if v, ok := genInfo["TotalTokens"].(int); ok { + tokenCount = v + } + } + // Fallback: count tokens using langchaingo (might not be accurate for all models) + if tokenCount < 0 { + tokenCount = llms.CountTokens(p.model, text) + } + if tokenCount >= p.maxTokens { + limitHit = true + } + } + result := &OCRResult{ - Text: completion.Choices[0].Content, + Text: text, Metadata: map[string]string{ "provider": p.provider, "model": p.model, }, + OcrLimitHit: limitHit, + GenerationInfo: completion.Choices[0].GenerationInfo, } - logger.WithField("content_length", len(result.Text)).Info("Successfully processed image") + + logger.WithField("content_length", len(result.Text)).WithFields(completion.Choices[0].GenerationInfo).Info("Successfully processed image") return result, nil } @@ -147,10 +186,14 @@ func createOllamaClient(config Config) (llms.Model, error) { if host == "" { host = "http://127.0.0.1:11434" } - return ollama.New( + opts := []ollama.Option{ ollama.WithModel(config.VisionLLMModel), ollama.WithServerURL(host), - ) + } + if config.OllamaContextLength > 0 { + opts = append(opts, ollama.WithRunnerNumCtx(config.OllamaContextLength)) + } + return ollama.New(opts...) } // createMistralClient creates a new Mistral vision model client diff --git a/ocr/provider.go b/ocr/provider.go index acf330ad..68161e1b 100644 --- a/ocr/provider.go +++ b/ocr/provider.go @@ -20,6 +20,12 @@ type OCRResult struct { // Additional provider-specific metadata Metadata map[string]string + + // Indicates if the OCR output hit the max token limit + OcrLimitHit bool + + // LLM GenerationInfo (arbitrary metadata, per page) + GenerationInfo map[string]interface{} } // Provider defines the interface for OCR processing @@ -36,6 +42,14 @@ type Config struct { MistralAPIKey string MistralModel string // Optional, defaults to "mistral-ocr-latest" + // Generic Vision LLM settings + VisionLLMMaxTokens int + VisionLLMTemperature *float64 + + // Ollama OCR-specific settings + OllamaOcrTopK *int + OllamaContextLength int + // Google Document AI settings GoogleProjectID string GoogleLocation string diff --git a/types.go b/types.go index 156be35f..e750ed23 100644 --- a/types.go +++ b/types.go @@ -98,6 +98,7 @@ type DocumentSuggestion struct { SuggestedCreatedDate string `json:"suggested_created_date,omitempty"` KeepOriginalTags bool `json:"keep_original_tags,omitempty"` RemoveTags []string `json:"remove_tags,omitempty"` + AddTags []string `json:"add_tags,omitempty"` } type Correspondent struct { @@ -144,5 +145,5 @@ type ClientInterface interface { // DocumentProcessor defines the interface for processing documents with OCR type DocumentProcessor interface { - ProcessDocumentOCR(ctx context.Context, documentID int, options OCROptions) (*ProcessedDocument, error) + ProcessDocumentOCR(ctx context.Context, documentID int, options OCROptions, jobID string) (*ProcessedDocument, error) } diff --git a/web-app/src/ExperimentalOCR.tsx b/web-app/src/ExperimentalOCR.tsx index ef613c0f..ff09838e 100644 --- a/web-app/src/ExperimentalOCR.tsx +++ b/web-app/src/ExperimentalOCR.tsx @@ -1,18 +1,46 @@ import axios from 'axios'; -import React, { useCallback, useEffect, useState } from 'react'; +import React, { useCallback, useEffect, useState, useRef } from 'react'; import { FaSpinner } from 'react-icons/fa'; import { Document, DocumentSuggestion } from './DocumentProcessor'; +import { Tooltip } from 'react-tooltip'; +import { ClientStatus, OCRJobStatus, getStatusViewOptions, mapJobStatus } from './ocrStatus'; + +type OCRPageResult = { + text: string; + ocrLimitHit: boolean; + generationInfo?: Record; +}; +type OCRCombinedResult = { combinedText: string; perPageResults: OCRPageResult[] }; const ExperimentalOCR: React.FC = () => { const refreshInterval = 1000; // Refresh interval in milliseconds const [documentId, setDocumentId] = useState(0); const [jobId, setJobId] = useState(''); const [ocrResult, setOcrResult] = useState(''); - const [status, setStatus] = useState(''); - const [error, setError] = useState(''); - const [pagesDone, setPagesDone] = useState(0); // New state for pages done - const [saving, setSaving] = useState(false); // New state for saving - const [documentDetails, setDocumentDetails] = useState(null); // New state for document details + const [jobStatus, setJobStatus] = useState('idle'); + const [clientStatus, setClientStatus] = useState('idle'); + const [error, setError] = useState(null); + const [message, setMessage] = useState(null); + const [pagesDone, setPagesDone] = useState(0); + const [totalPages, setTotalPages] = useState(null); + const [saving, setSaving] = useState(false); + const [documentDetails, setDocumentDetails] = useState(null); + const [perPageResults, setPerPageResults] = useState([]); + const lastFetchedPagesDoneRef = useRef(0); + + const [reOcrLoading, setReOcrLoading] = useState<{ [pageIdx: number]: boolean }>({}); + const [reOcrErrors, setReOcrErrors] = useState<{ [pageIdx: number]: string }>({}); + const [reOcrAbortControllers, setReOcrAbortControllers] = useState<{ [pageIdx: number]: AbortController | null }>({}); + + const stopOCRJob = async () => { + if (!jobId) return; + try { + await axios.post(`/api/ocr/jobs/${jobId}/stop`); + setJobStatus('cancelled'); + } catch (err) { + setError('Failed to stop OCR job.'); + } + }; const fetchDocumentDetails = useCallback(async () => { if (!documentId) return; @@ -26,24 +54,40 @@ const ExperimentalOCR: React.FC = () => { } }, [documentId]); + const fetchPerPageResults = useCallback(async () => { + if (!documentId) return; + try { + const response = await axios.get<{ pages: OCRPageResult[] }>(`/api/documents/${documentId}/ocr_pages`); + setPerPageResults(response.data.pages); + } catch (err) { + console.error("Error fetching per-page OCR results:", err); + setError("Failed to fetch per-page OCR results."); + } + }, [documentId]); + const submitOCRJob = async () => { - setStatus(''); - setError(''); + setError(null); + setMessage(null); setJobId(''); setOcrResult(''); - setPagesDone(0); // Reset pages done + setPagesDone(0); + setPerPageResults([]); + setJobStatus('idle'); + setClientStatus('fetching_details'); + lastFetchedPagesDoneRef.current = 0; try { - setStatus('Fetching document details...'); - await fetchDocumentDetails(); // Fetch document details before submitting the job + await fetchDocumentDetails(); - setStatus('Submitting OCR job...'); + setClientStatus('submitting'); const response = await axios.post(`./api/documents/${documentId}/ocr`); setJobId(response.data.job_id); - setStatus('Job submitted. Processing...'); + setJobStatus('pending'); + setClientStatus('idle'); } catch (err) { console.error(err); setError('Failed to submit OCR job.'); + setClientStatus('idle'); } }; @@ -52,23 +96,38 @@ const ExperimentalOCR: React.FC = () => { try { const response = await axios.get(`./api/jobs/ocr/${jobId}`); - const jobStatus = response.data.status; - setPagesDone(response.data.pages_done); // Update pages done - if (jobStatus === 'completed') { - setOcrResult(response.data.result); - setStatus('OCR completed successfully.'); - } else if (jobStatus === 'failed') { + const newJobStatus = mapJobStatus(response.data.status); + setJobStatus(newJobStatus); + const newPagesDone = response.data.pages_done; + setPagesDone(newPagesDone); + setTotalPages(response.data.total_pages ?? null); + + if (newPagesDone > lastFetchedPagesDoneRef.current) { + await fetchPerPageResults(); + lastFetchedPagesDoneRef.current = newPagesDone; + } + + if (newJobStatus === 'completed') { + let parsedResult: OCRCombinedResult | null = null; + try { + parsedResult = JSON.parse(response.data.result); + } catch (e) { + setOcrResult(response.data.result); + return; + } + if (parsedResult) { + setOcrResult(parsedResult.combinedText); + setPerPageResults(parsedResult.perPageResults); + } + } else if (newJobStatus === 'failed') { setError(response.data.error); - setStatus('OCR failed.'); } else { - setStatus(`Job status: ${jobStatus}. This may take a few minutes.`); - // Automatically check again after a delay - setTimeout(checkJobStatus, refreshInterval); + setTimeout(() => checkJobStatus(), refreshInterval); } } catch (err) { console.error(err); setError('Failed to check job status.'); - } + } }; const handleSaveContent = async () => { @@ -81,12 +140,12 @@ const ExperimentalOCR: React.FC = () => { } const requestPayload: DocumentSuggestion = { id: documentId, - original_document: documentDetails, // Use fetched document details + original_document: documentDetails, suggested_content: ocrResult, }; await axios.patch("./api/update-documents", [requestPayload]); - setStatus('Content saved successfully.'); + setMessage('Content saved successfully.'); } catch (err) { console.error("Error saving content:", err); setError("Failed to save content."); @@ -95,14 +154,82 @@ const ExperimentalOCR: React.FC = () => { } }; - // Start checking job status when jobId is set + const handleReOcrPage = async (pageIdx: number) => { + if (!perPageResults[pageIdx]) { + setReOcrErrors((prev) => ({ ...prev, [pageIdx]: "Page data not available." })); + return; + } + + setReOcrLoading((prev) => ({ ...prev, [pageIdx]: true })); + setReOcrErrors((prev) => ({ ...prev, [pageIdx]: "" })); + + const controller = new AbortController(); + setReOcrAbortControllers((prev) => ({ ...prev, [pageIdx]: controller })); + + try { + const response = await axios.post( + `/api/documents/${documentId}/ocr_pages/${pageIdx}/reocr`, + {}, + { signal: controller.signal } + ); + + setPerPageResults((prev) => + prev.map((res, idx) => + idx === pageIdx + ? { + text: response.data.text, + ocrLimitHit: response.data.ocrLimitHit, + generationInfo: response.data.generationInfo, + } + : res + ) + ); + + if (pageIdx + 1 > lastFetchedPagesDoneRef.current) { + lastFetchedPagesDoneRef.current = pageIdx + 1; + } + } catch (err: any) { + if (err.name === 'CanceledError' || err.code === 'ERR_CANCELED') { + setReOcrErrors((prev) => ({ + ...prev, + [pageIdx]: "Re-OCR cancelled.", + })); + } else { + setReOcrErrors((prev) => ({ + ...prev, + [pageIdx]: "Failed to re-OCR page.", + })); + } + } finally { + setReOcrLoading((prev) => ({ ...prev, [pageIdx]: false })); + setReOcrAbortControllers((prev) => ({ ...prev, [pageIdx]: null })); + } + }; + + const handleCancelReOcrPage = async (pageIdx: number) => { + const controller = reOcrAbortControllers[pageIdx]; + if (controller) { + controller.abort(); + } + + try { + await axios.delete(`/api/documents/${documentId}/ocr_pages/${pageIdx}/reocr`); + console.log(`Cancellation request sent for page ${pageIdx}`); + } catch (err) { + console.error(`Failed to send cancellation request for page ${pageIdx}:`, err); + } + }; + useEffect(() => { if (jobId) { + lastFetchedPagesDoneRef.current = 0; checkJobStatus(); } // eslint-disable-next-line react-hooks/exhaustive-deps }, [jobId]); + const statusViewOptions = getStatusViewOptions(jobStatus, clientStatus); + return (

OCR via LLMs (Experimental)

@@ -128,7 +255,7 @@ const ExperimentalOCR: React.FC = () => { className="w-full bg-blue-600 hover:bg-blue-700 text-white font-semibold py-2 px-4 rounded transition duration-200" disabled={!documentId} > - {status.startsWith('Submitting') ? ( + {clientStatus === 'submitting' ? ( Submitting... @@ -137,20 +264,31 @@ const ExperimentalOCR: React.FC = () => { 'Submit OCR Job' )} - {status && ( + {(statusViewOptions.label || pagesDone > 0) && (
- {status.includes('in_progress') && ( + {statusViewOptions.showSpinner ? ( - {status} + {statusViewOptions.label} + ) : ( + statusViewOptions.label )} - {!status.includes('in_progress') && status} {pagesDone > 0 && (
- Pages processed: {pagesDone} + {totalPages && totalPages > 1 + ? `Pages processed: ${pagesDone} / ${totalPages}` + : `Pages processed: ${pagesDone}`}
)} + {jobId && statusViewOptions.canStop && ( + + )}
)} {error && ( @@ -158,9 +296,99 @@ const ExperimentalOCR: React.FC = () => { {error}
)} + {message && ( +
+ {message} +
+ )} + {perPageResults.length > 0 && totalPages && totalPages > 1 && ( +
+

Per-Page OCR Results:

+ {perPageResults.map((page, idx) => ( +
+
+ Page {idx + 1} + {page.ocrLimitHit && ( + + Token Limit Hit + + )} + {page.generationInfo && Object.keys(page.generationInfo).length > 0 && ( + <> + + + + + + ( +
+ + + {Object.entries(page.generationInfo ?? {}).map(([key, value]) => ( + + + + + ))} + +
{key}:{typeof value === 'object' ? JSON.stringify(value) : String(value)}
+
+ )} + /> + + )} +
+
+                  {page.text}
+                
+
+
+ + {reOcrLoading[idx] && ( + + )} +
+ {reOcrErrors[idx] && ( + {reOcrErrors[idx]} + )} +
+
+ ))} +
+ )} {ocrResult && (
-

OCR Result:

+

Combined OCR Result:

{ocrResult}
@@ -185,4 +413,4 @@ const ExperimentalOCR: React.FC = () => { ); }; -export default ExperimentalOCR; \ No newline at end of file +export default ExperimentalOCR; diff --git a/web-app/src/ocrStatus.ts b/web-app/src/ocrStatus.ts new file mode 100644 index 00000000..bf931146 --- /dev/null +++ b/web-app/src/ocrStatus.ts @@ -0,0 +1,44 @@ +export type OCRJobStatus = 'idle' | 'pending' | 'in_progress' | 'completed' | 'failed' | 'cancelled'; +export type ClientStatus = 'idle' | 'fetching_details' | 'submitting'; + +export type StatusViewOptions = { + label: string; + showSpinner: boolean; + canStop: boolean; +}; + +export const mapJobStatus = (raw: string | null | undefined): OCRJobStatus => { + switch (raw) { + case 'pending': + case 'in_progress': + case 'completed': + case 'failed': + case 'cancelled': + return raw; + default: + throw new Error(`Unknown job status: ${raw}`); + } +}; + +export const getStatusViewOptions = (job: OCRJobStatus, clientStatus: ClientStatus): StatusViewOptions => { + if (clientStatus === 'fetching_details') { + return { label: 'Fetching document details...', showSpinner: true, canStop: false }; + } + if (clientStatus === 'submitting') { + return { label: 'Submitting OCR job...', showSpinner: true, canStop: false }; + } + switch (job) { + case 'idle': + return { label: '', showSpinner: false, canStop: false }; + case 'pending': + return { label: 'Job status: pending. This may take a few minutes.', showSpinner: true, canStop: true }; + case 'in_progress': + return { label: 'Job status: in progress. This may take a few minutes.', showSpinner: true, canStop: true }; + case 'completed': + return { label: 'OCR completed successfully.', showSpinner: false, canStop: false }; + case 'failed': + return { label: 'OCR failed.', showSpinner: false, canStop: false }; + case 'cancelled': + return { label: 'Job cancelled by user.', showSpinner: false, canStop: false }; + } +};