MODSetter · CREDO23 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,6 @@
 ./surfsense_backend/podcasts/
 .env
 node_modules/
-.ruff_cache/
+.ruff_cache/
+.venv
+.pnpm-store
diff --git a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py
@@ -2,17 +2,12 @@
 
 Revision ID: 1
 Revises:
-
 """
 
 from collections.abc import Sequence
 
 from alembic import op
 
-# Import pgvector if needed for other types, though not for this ENUM change
-# import pgvector
-
-
 # revision identifiers, used by Alembic.
 revision: str = "1"
 down_revision: str | None = None
@@ -21,10 +16,24 @@
 
 
 def upgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
+    # Ensure the enum type exists
+    op.execute(
+        """
+DO $$
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'searchsourceconnectortype') THEN
+        CREATE TYPE searchsourceconnectortype AS ENUM(
+            'SERPER_API', 
+            'TAVILY_API', 
+            'SLACK_CONNECTOR', 
+            'NOTION_CONNECTOR'
+        );
+    END IF;
+END$$;
+"""
+    )
 
-    # Manually add the command to add the enum value
-    # Note: It's generally better to let autogenerate handle this, but we're bypassing it
+    # Add the new enum value if it doesn't exist
     op.execute(
         """
 DO $$
@@ -43,30 +52,31 @@ def upgrade() -> None:
 """
     )
 
-    # Pass for the rest, as autogenerate didn't run to add other schema details
-    pass
-    # ### end Alembic commands ###
-
 
 def downgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
-
-    # Downgrading removal of an enum value is complex and potentially dangerous
-    # if the value is in use. Often omitted or requires manual SQL based on context.
-    # For now, we'll just pass. If you needed to reverse this, you'd likely
-    # have to manually check if 'GITHUB_CONNECTOR' is used in the table
-    # and then potentially recreate the type without it.
+    # Removing an enum value safely requires recreating the type
     op.execute(
-        "ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old"
-    )
-    op.execute(
-        "CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR')"
-    )
-    op.execute(
-        "ALTER TABLE search_source_connectors ALTER COLUMN connector_type TYPE searchsourceconnectortype USING "
-        "connector_type::text::searchsourceconnectortype"
-    )
-    op.execute("DROP TYPE searchsourceconnectortype_old")
+        """
+DO $$
+BEGIN
+    -- Rename existing type
+    ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old;
+
+    -- Create new type without GITHUB_CONNECTOR
+    CREATE TYPE searchsourceconnectortype AS ENUM(
+        'SERPER_API', 
+        'TAVILY_API', 
+        'SLACK_CONNECTOR', 
+        'NOTION_CONNECTOR'
+    );
+
+    -- Update table columns to use new type
+    ALTER TABLE search_source_connectors
+    ALTER COLUMN connector_type TYPE searchsourceconnectortype
+    USING connector_type::text::searchsourceconnectortype;
 
-    pass
-    # ### end Alembic commands ###
+    -- Drop old type
+    DROP TYPE searchsourceconnectortype_old;
+END$$;
+"""
+    )
diff --git a/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py b/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py
@@ -1,19 +1,6 @@
-"""Move LLM preferences from user-level to search space level
-
-Revision ID: 40
-Revises: 39
-Create Date: 2024-11-27
-
-This migration moves LLM preferences (long_context_llm_id, fast_llm_id, strategic_llm_id)
-from the user_search_space_preferences table to the searchspaces table itself.
-
-This change supports the RBAC model where LLM preferences are shared by all members
-of a search space, rather than being per-user.
-"""
-
 import sqlalchemy as sa
-
 from alembic import op
+from sqlalchemy import inspect
 
 # revision identifiers, used by Alembic.
 revision = "40"
@@ -23,26 +10,32 @@
 
 
 def upgrade():
-    # Add LLM preference columns to searchspaces table
-    op.add_column(
-        "searchspaces",
-        sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
-    )
-    op.add_column(
-        "searchspaces",
-        sa.Column("fast_llm_id", sa.Integer(), nullable=True),
-    )
-    op.add_column(
-        "searchspaces",
-        sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
-    )
+    conn = op.get_bind()
+    inspector = inspect(conn)
+
+    existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")}
+
+    # Add columns only if they don't already exist
+    if "long_context_llm_id" not in existing_cols:
+        op.add_column(
+            "searchspaces",
+            sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
+        )
+
+    if "fast_llm_id" not in existing_cols:
+        op.add_column(
+            "searchspaces",
+            sa.Column("fast_llm_id", sa.Integer(), nullable=True),
+        )
 
-    # Migrate existing preferences from user_search_space_preferences to searchspaces
-    # We take the owner's preferences (the user who created the search space)
-    connection = op.get_bind()
+    if "strategic_llm_id" not in existing_cols:
+        op.add_column(
+            "searchspaces",
+            sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
+        )
 
-    # Get all search spaces and their owner's preferences
-    connection.execute(
+    # Migrate existing data
+    conn.execute(
         sa.text("""
             UPDATE searchspaces ss
             SET 
@@ -57,7 +50,16 @@ def upgrade():
 
 
 def downgrade():
-    # Remove LLM preference columns from searchspaces table
-    op.drop_column("searchspaces", "strategic_llm_id")
-    op.drop_column("searchspaces", "fast_llm_id")
-    op.drop_column("searchspaces", "long_context_llm_id")
+    conn = op.get_bind()
+    inspector = inspect(conn)
+    existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")}
+
+    # Drop columns only if they exist
+    if "strategic_llm_id" in existing_cols:
+        op.drop_column("searchspaces", "strategic_llm_id")
+
+    if "fast_llm_id" in existing_cols:
+        op.drop_column("searchspaces", "fast_llm_id")
+
+    if "long_context_llm_id" in existing_cols:
+        op.drop_column("searchspaces", "long_context_llm_id")
diff --git a/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py b/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py
@@ -16,6 +16,7 @@
 from sqlalchemy.dialects import postgresql
 
 from alembic import op
+from sqlalchemy import inspect
 
 # revision identifiers, used by Alembic.
 revision: str = "43"
@@ -25,51 +26,57 @@
 
 
 def upgrade() -> None:
-    """Upgrade schema - Add BlockNote fields and trigger population task."""
-
-    # Add the columns
-    op.add_column(
-        "documents",
-        sa.Column(
-            "blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
-        ),
-    )
-    op.add_column(
-        "documents",
-        sa.Column(
-            "content_needs_reindexing",
-            sa.Boolean(),
-            nullable=False,
-            server_default=sa.false(),
-        ),
-    )
-    op.add_column(
-        "documents",
-        sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
-    )
-
-    # Trigger the Celery task to populate blocknote_document for existing documents
-    try:
-        from app.tasks.celery_tasks.blocknote_migration_tasks import (
-            populate_blocknote_for_documents_task,
+    """Upgrade schema - Add BlockNote fields (idempotent)."""
+
+    conn = op.get_bind()
+    inspector = inspect(conn)
+    existing_cols = {c["name"] for c in inspector.get_columns("documents")}
+
+    # Add blocknote_document (JSONB) if doest not exist
+    if "blocknote_document" not in existing_cols:
+        op.add_column(
+            "documents",
+            sa.Column(
+                "blocknote_document",
+                postgresql.JSONB(astext_type=sa.Text()),
+                nullable=True,
+            ),
         )
 
-        # Queue the task to run asynchronously
-        populate_blocknote_for_documents_task.apply_async()
-        print(
-            "✓ Queued Celery task to populate blocknote_document for existing documents"
+    # Add content_needs_reindexing (boolean) if doest not exist
+    if "content_needs_reindexing" not in existing_cols:
+        op.add_column(
+            "documents",
+            sa.Column(
+                "content_needs_reindexing",
+                sa.Boolean(),
+                nullable=False,
+                server_default=sa.false(),
+            ),
         )
-    except Exception as e:
-        # If Celery is not available or task queueing fails, log but don't fail the migration
-        print(f"⚠ Warning: Could not queue blocknote population task: {e}")
-        print("  You can manually trigger it later with:")
-        print(
-            "  celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task"
+
+    # Add last_edited_at (timestamp with tz) if doest not exist
+    if "last_edited_at" not in existing_cols:
+        op.add_column(
+            "documents",
+            sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
         )
 
+    # NOTE: We intentionally do NOT import or queue Celery tasks here.
+    # Running background jobs during migrations causes hard-to-debug failures.
+    # After running migrations, trigger the backfill task manually (instructions below).
+
 
 def downgrade() -> None:
-    """Downgrade schema - Remove BlockNote fields."""
-    op.drop_column("documents", "last_edited_at")
-    op.drop_column("documents", "content_needs_reindexing")
-    op.drop_column("documents", "blocknote_document")
+    """Downgrade schema - Remove BlockNote fields (only if present)."""
+
+    conn = op.get_bind()
+    inspector = inspect(conn)
+    existing_cols = {c["name"] for c in inspector.get_columns("documents")}
+
+    if "last_edited_at" in existing_cols:
+        op.drop_column("documents", "last_edited_at")
+    if "content_needs_reindexing" in existing_cols:
+        op.drop_column("documents", "content_needs_reindexing")
+    if "blocknote_document" in existing_cols:
+        op.drop_column("documents", "blocknote_document")
diff --git a/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/researcher/[[...chat_id]]/page.tsx
@@ -1,15 +1,15 @@
 "use client";
 
 import { type CreateMessage, type Message, useChat } from "@ai-sdk/react";
-import { useAtomValue } from "jotai";
+import { useAtom, useAtomValue } from "jotai";
 import { useParams, useRouter } from "next/navigation";
 import { useEffect, useMemo, useRef } from "react";
 import { createChatMutationAtom, updateChatMutationAtom } from "@/atoms/chats/chat-mutation.atoms";
 import { activeChatAtom } from "@/atoms/chats/chat-query.atoms";
 import { activeChatIdAtom } from "@/atoms/chats/ui.atoms";
+import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms";
 import ChatInterface from "@/components/chat/ChatInterface";
 import { useChatState } from "@/hooks/use-chat";
-import { useDocumentTypes } from "@/hooks/use-document-types";
 import type { Document } from "@/hooks/use-documents";
 import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors";
 
@@ -45,7 +45,19 @@ export default function ResearcherPage() {
 	});
 
 	// Fetch all available sources (document types + live search connectors)
-	const { documentTypes } = useDocumentTypes(Number(search_space_id));
+	// Use the documentTypeCountsAtom for fetching document types
+	const [documentTypeCountsQuery] = useAtom(documentTypeCountsAtom);
+	const { data: documentTypeCountsData } = documentTypeCountsQuery;
+
+	// Transform the response into the expected format
+	const documentTypes = useMemo(() => {
+		if (!documentTypeCountsData) return [];
+		return Object.entries(documentTypeCountsData).map(([type, count]) => ({
+			type,
+			count,
+		}));
+	}, [documentTypeCountsData]);
+
 	const { connectors: searchConnectors } = useSearchSourceConnectors(
 		false,
 		Number(search_space_id)