Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0965ed4
getdocuments reques / response zod schema
thierryverse Nov 20, 2025
2cb1e63
createdocuments reques / response zod schema
thierryverse Nov 20, 2025
2e7a1fa
add upload document request / response zod schema
thierryverse Nov 20, 2025
a8d8eea
add seach documents request / response zod schema
thierryverse Nov 20, 2025
e83a887
add getdocuments types counts request / response zod schema
thierryverse Nov 20, 2025
9d53ace
add get document by chunks request / response zod schema
thierryverse Nov 20, 2025
495ade8
add get document request / response zod schema
thierryverse Nov 20, 2025
8198cb2
add update document request / response zod schema
thierryverse Nov 20, 2025
5c8817a
add delete document request / response zod schema
thierryverse Nov 20, 2025
629ebce
add documents api service
thierryverse Nov 20, 2025
c94f9a2
add documents queries & mutations atoms
thierryverse Nov 20, 2025
2f76f8d
fix (migration 40): Add an 'if not exists' guard
CREDO23 Dec 2, 2025
fde87f5
fix (migration 43): Add an 'if not exists' guard
CREDO23 Dec 2, 2025
5de0383
fix (migration1): add 'if not exixst' guard
CREDO23 Dec 2, 2025
259b3e6
Refactor request validation in getDocuments method
CREDO23 Dec 3, 2025
af344d8
Refactor request validation in getDocument method
CREDO23 Dec 3, 2025
4e1be68
Clean up comments in createDocument method
CREDO23 Dec 3, 2025
58ce20e
Improve request validation for uploadDocument
CREDO23 Dec 3, 2025
e95bab4
Improve request validation in searchDocuments
CREDO23 Dec 3, 2025
b961050
Merge branch 'MODSetter:main' into feat/add-jotai-tanstack
CREDO23 Dec 4, 2025
5d34e03
chore: add .pnpm-store to .gitignore
CREDO23 Dec 4, 2025
60815cd
feat: add useQuery implementation with centralized cache key manageme…
CREDO23 Dec 4, 2025
6ed0b8e
refactor: use descriptive variable names for useQuery destructuring i…
CREDO23 Dec 4, 2025
19fde39
refactor: replace useDocumentByChunk with useQuery implementation in …
CREDO23 Dec 4, 2025
ffcf88f
feat: migrate document upload to use jotai + tanstack query mutation …
CREDO23 Dec 4, 2025
339f64a
refactor: update SourceDetailSheet to use useQuery with centralized c…
CREDO23 Dec 4, 2025
22984b9
refactor: migrate YouTube tab to use jotai + tanstack query mutation
CREDO23 Dec 4, 2025
8c3b56c
refactor: remove unused use-document-by-chunk hook
CREDO23 Dec 4, 2025
aff7ca0
refactor: migrate document types fetch to jotai + tanstack query
CREDO23 Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
./surfsense_backend/podcasts/
.env
node_modules/
.ruff_cache/
.ruff_cache/
.venv
.pnpm-store
72 changes: 41 additions & 31 deletions surfsense_backend/alembic/versions/1_add_github_connector_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,12 @@

Revision ID: 1
Revises:

"""

from collections.abc import Sequence

from alembic import op

# Import pgvector if needed for other types, though not for this ENUM change
# import pgvector


# revision identifiers, used by Alembic.
revision: str = "1"
down_revision: str | None = None
Expand All @@ -21,10 +16,24 @@


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
# Ensure the enum type exists
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'searchsourceconnectortype') THEN
CREATE TYPE searchsourceconnectortype AS ENUM(
'SERPER_API',
'TAVILY_API',
'SLACK_CONNECTOR',
'NOTION_CONNECTOR'
);
END IF;
END$$;
"""
)

# Manually add the command to add the enum value
# Note: It's generally better to let autogenerate handle this, but we're bypassing it
# Add the new enum value if it doesn't exist
op.execute(
"""
DO $$
Expand All @@ -43,30 +52,31 @@ def upgrade() -> None:
"""
)

# Pass for the rest, as autogenerate didn't run to add other schema details
pass
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###

# Downgrading removal of an enum value is complex and potentially dangerous
# if the value is in use. Often omitted or requires manual SQL based on context.
# For now, we'll just pass. If you needed to reverse this, you'd likely
# have to manually check if 'GITHUB_CONNECTOR' is used in the table
# and then potentially recreate the type without it.
# Removing an enum value safely requires recreating the type
op.execute(
"ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old"
)
op.execute(
"CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR')"
)
op.execute(
"ALTER TABLE search_source_connectors ALTER COLUMN connector_type TYPE searchsourceconnectortype USING "
"connector_type::text::searchsourceconnectortype"
)
op.execute("DROP TYPE searchsourceconnectortype_old")
"""
DO $$
BEGIN
-- Rename existing type
ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old;

-- Create new type without GITHUB_CONNECTOR
CREATE TYPE searchsourceconnectortype AS ENUM(
'SERPER_API',
'TAVILY_API',
'SLACK_CONNECTOR',
'NOTION_CONNECTOR'
);

-- Update table columns to use new type
ALTER TABLE search_source_connectors
ALTER COLUMN connector_type TYPE searchsourceconnectortype
USING connector_type::text::searchsourceconnectortype;

pass
# ### end Alembic commands ###
-- Drop old type
DROP TYPE searchsourceconnectortype_old;
END$$;
"""
)
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
"""Move LLM preferences from user-level to search space level

Revision ID: 40
Revises: 39
Create Date: 2024-11-27

This migration moves LLM preferences (long_context_llm_id, fast_llm_id, strategic_llm_id)
from the user_search_space_preferences table to the searchspaces table itself.

This change supports the RBAC model where LLM preferences are shared by all members
of a search space, rather than being per-user.
"""

import sqlalchemy as sa

from alembic import op
from sqlalchemy import inspect

# revision identifiers, used by Alembic.
revision = "40"
Expand All @@ -23,26 +10,32 @@


def upgrade():
# Add LLM preference columns to searchspaces table
op.add_column(
"searchspaces",
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
)
op.add_column(
"searchspaces",
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
)
op.add_column(
"searchspaces",
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
)
conn = op.get_bind()
inspector = inspect(conn)

existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")}

# Add columns only if they don't already exist
if "long_context_llm_id" not in existing_cols:
op.add_column(
"searchspaces",
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
)

if "fast_llm_id" not in existing_cols:
op.add_column(
"searchspaces",
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
)

# Migrate existing preferences from user_search_space_preferences to searchspaces
# We take the owner's preferences (the user who created the search space)
connection = op.get_bind()
if "strategic_llm_id" not in existing_cols:
op.add_column(
"searchspaces",
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
)

# Get all search spaces and their owner's preferences
connection.execute(
# Migrate existing data
conn.execute(
sa.text("""
UPDATE searchspaces ss
SET
Expand All @@ -57,7 +50,16 @@ def upgrade():


def downgrade():
# Remove LLM preference columns from searchspaces table
op.drop_column("searchspaces", "strategic_llm_id")
op.drop_column("searchspaces", "fast_llm_id")
op.drop_column("searchspaces", "long_context_llm_id")
conn = op.get_bind()
inspector = inspect(conn)
existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")}

# Drop columns only if they exist
if "strategic_llm_id" in existing_cols:
op.drop_column("searchspaces", "strategic_llm_id")

if "fast_llm_id" in existing_cols:
op.drop_column("searchspaces", "fast_llm_id")

if "long_context_llm_id" in existing_cols:
op.drop_column("searchspaces", "long_context_llm_id")
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sqlalchemy.dialects import postgresql

from alembic import op
from sqlalchemy import inspect

# revision identifiers, used by Alembic.
revision: str = "43"
Expand All @@ -25,51 +26,57 @@


def upgrade() -> None:
"""Upgrade schema - Add BlockNote fields and trigger population task."""

# Add the columns
op.add_column(
"documents",
sa.Column(
"blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
)
op.add_column(
"documents",
sa.Column(
"content_needs_reindexing",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
op.add_column(
"documents",
sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
)

# Trigger the Celery task to populate blocknote_document for existing documents
try:
from app.tasks.celery_tasks.blocknote_migration_tasks import (
populate_blocknote_for_documents_task,
"""Upgrade schema - Add BlockNote fields (idempotent)."""

conn = op.get_bind()
inspector = inspect(conn)
existing_cols = {c["name"] for c in inspector.get_columns("documents")}

# Add blocknote_document (JSONB) if doest not exist
if "blocknote_document" not in existing_cols:
op.add_column(
"documents",
sa.Column(
"blocknote_document",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
)

# Queue the task to run asynchronously
populate_blocknote_for_documents_task.apply_async()
print(
"✓ Queued Celery task to populate blocknote_document for existing documents"
# Add content_needs_reindexing (boolean) if doest not exist
if "content_needs_reindexing" not in existing_cols:
op.add_column(
"documents",
sa.Column(
"content_needs_reindexing",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
except Exception as e:
# If Celery is not available or task queueing fails, log but don't fail the migration
print(f"⚠ Warning: Could not queue blocknote population task: {e}")
print(" You can manually trigger it later with:")
print(
" celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task"

# Add last_edited_at (timestamp with tz) if doest not exist
if "last_edited_at" not in existing_cols:
op.add_column(
"documents",
sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
)

# NOTE: We intentionally do NOT import or queue Celery tasks here.
# Running background jobs during migrations causes hard-to-debug failures.
# After running migrations, trigger the backfill task manually (instructions below).


def downgrade() -> None:
"""Downgrade schema - Remove BlockNote fields."""
op.drop_column("documents", "last_edited_at")
op.drop_column("documents", "content_needs_reindexing")
op.drop_column("documents", "blocknote_document")
"""Downgrade schema - Remove BlockNote fields (only if present)."""

conn = op.get_bind()
inspector = inspect(conn)
existing_cols = {c["name"] for c in inspector.get_columns("documents")}

if "last_edited_at" in existing_cols:
op.drop_column("documents", "last_edited_at")
if "content_needs_reindexing" in existing_cols:
op.drop_column("documents", "content_needs_reindexing")
if "blocknote_document" in existing_cols:
op.drop_column("documents", "blocknote_document")
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
"use client";

import { type CreateMessage, type Message, useChat } from "@ai-sdk/react";
import { useAtomValue } from "jotai";
import { useAtom, useAtomValue } from "jotai";
import { useParams, useRouter } from "next/navigation";
import { useEffect, useMemo, useRef } from "react";
import { createChatMutationAtom, updateChatMutationAtom } from "@/atoms/chats/chat-mutation.atoms";
import { activeChatAtom } from "@/atoms/chats/chat-query.atoms";
import { activeChatIdAtom } from "@/atoms/chats/ui.atoms";
import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms";
import ChatInterface from "@/components/chat/ChatInterface";
import { useChatState } from "@/hooks/use-chat";
import { useDocumentTypes } from "@/hooks/use-document-types";
import type { Document } from "@/hooks/use-documents";
import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors";

Expand Down Expand Up @@ -45,7 +45,19 @@ export default function ResearcherPage() {
});

// Fetch all available sources (document types + live search connectors)
const { documentTypes } = useDocumentTypes(Number(search_space_id));
// Use the documentTypeCountsAtom for fetching document types
const [documentTypeCountsQuery] = useAtom(documentTypeCountsAtom);
const { data: documentTypeCountsData } = documentTypeCountsQuery;

// Transform the response into the expected format
const documentTypes = useMemo(() => {
if (!documentTypeCountsData) return [];
return Object.entries(documentTypeCountsData).map(([type, count]) => ({
type,
count,
}));
}, [documentTypeCountsData]);

const { connectors: searchConnectors } = useSearchSourceConnectors(
false,
Number(search_space_id)
Expand Down
Loading