johnhuang316 · marksolly · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/README.md b/README.md
@@ -52,6 +52,7 @@ Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) se
 - **Persistent Caching**: Stores indexes for lightning-fast subsequent access
 - **Lazy Loading**: Tools detected only when needed for optimal startup
 - **Memory Efficient**: Intelligent caching strategies for large codebases
+- **🚫 `.indexerignore` Support**: Exclude files and directories from indexing and search using gitignore-style patterns
 
 ## Supported File Types
 
@@ -128,6 +129,60 @@ Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) se
 
 </details>
 
+## 🚫 `.indexerignore` Support
+
+Code Index MCP supports `.indexerignore` files to exclude specific files and directories from indexing and search operations. This feature works similarly to `.gitignore` files and helps keep your index focused on relevant code.
+
+### How It Works
+
+1. **Create a `.indexerignore` file** in your project root directory
+2. **Add patterns** using gitwildmatch syntax (similar to gitignore)
+3. **The indexer will automatically** skip matching files and directories
+
+### Example `.indexerignore` File
+
+```
+# Ignore build directories
+build/
+dist/
+target/
+
+# Ignore dependency directories
+node_modules/
+venv/
+.env/
+
+# Ignore temporary files
+*.tmp
+*.log
+.DS_Store
+
+# Ignore test files (if not needed for analysis)
+tests/
+__tests__/
+test/
+```
+
+### Pattern Syntax
+
+- `*.py` - Ignore all Python files
+- `build/` - Ignore the build directory and all its contents
+- `!important.py` - Include `important.py` even if it would be ignored by a previous pattern
+- `docs/*.md` - Ignore all Markdown files in the docs directory
+
+### Debugging
+
+If you're having issues with the `.indexerignore` functionality:
+- Check the `.indexer.log` file created in your project root during indexing
+- This log shows which files are included (+) and excluded (-) from the index
+- Look for patterns that might not be working as expected
+
+### Integration
+
+- **Indexing**: Files matching `.indexerignore` patterns are skipped during project scanning
+- **Searching**: Search operations automatically exclude ignored files
+- **File Watcher**: The auto-refresh feature also respects ignore patterns
+
 ## Quick Start
 
 ### 🚀 **Recommended Setup (Most Users)**

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,7 @@ authors = [
 dependencies = [
     "mcp>=0.3.0",
     "watchdog>=3.0.0",
+    "pathspec>=0.12.1",
 ]
 
 [project.urls]

diff --git a/src/code_index_mcp/indexing/builder.py b/src/code_index_mcp/indexing/builder.py
@@ -31,7 +31,7 @@ def __init__(self, max_workers: Optional[int] = None):
         self.relationship_tracker = RelationshipTracker()
         self.project_path = ""  # Initialize project_path
 
-    def build_index(self, project_path: str) -> CodeIndex:
+    def build_index(self, project_path: str, generate_log_file: bool = False) -> CodeIndex:
         """
         Build complete code index for a project.
 
@@ -46,7 +46,7 @@ def build_index(self, project_path: str) -> CodeIndex:
 
         try:
             # Step 1: Scan project directory
-            scanner = ProjectScanner(project_path)
+            scanner = ProjectScanner(project_path, generate_log_file)
             scan_result = scanner.scan_project()
 
             # Step 2: Read file contents and analyze in parallel

diff --git a/src/code_index_mcp/indexing/scanner.py b/src/code_index_mcp/indexing/scanner.py
@@ -4,12 +4,13 @@
 This module handles scanning project directories, building directory trees,
 and categorizing special files like configuration, documentation, and build files.
 """
-
+ 
 import os
 import glob
+import pathspec
 from pathlib import Path
 from datetime import datetime
-from typing import Dict, List, Any
+from typing import Dict, List, Any, Optional
 from .models import FileInfo, ProjectScanResult, SpecialFiles
 from .qualified_names import normalize_file_path
 from code_index_mcp.constants import SUPPORTED_EXTENSIONS
@@ -69,9 +70,17 @@ class ProjectScanner:
 
     }
 
-    def __init__(self, base_path: str):
+    def __init__(self, base_path: str, generate_log_file: bool = False):
         self.base_path = Path(base_path).resolve()
         self.file_id_counter = 0
+        self.generate_log_file = generate_log_file
+        self.log_file = self.base_path / '.indexer.log' if self.generate_log_file else None
+
+        # Clear log file at the start of each scan if it's enabled
+        if self.log_file and self.log_file.exists():
+            self.log_file.unlink()
+
+        self.ignore_spec = self._load_ignore_spec()
 
     def scan_project(self) -> ProjectScanResult:
         """
@@ -103,25 +112,64 @@ def scan_project(self) -> ProjectScanResult:
             project_metadata=project_metadata
         )
 
+    def _load_ignore_spec(self) -> Optional[pathspec.PathSpec]:
+        """Load .indexerignore file and return a PathSpec object."""
+        ignore_file = self.base_path / '.indexerignore'
+        if ignore_file.is_file():
+            try:
+                with open(ignore_file, 'r', encoding='utf-8') as f:
+                    return pathspec.PathSpec.from_lines('gitwildmatch', f)
+            except (OSError, UnicodeDecodeError):
+                return None
+        return None
+
     def _discover_files(self) -> List[str]:
         """Discover all files in the project directory."""
         files = []
 
-        for root, dirs, filenames in os.walk(self.base_path):
-            # Skip common directories that shouldn't be indexed
-            dirs[:] = [d for d in dirs if not self._should_skip_directory(d)]
-
-            for filename in filenames:
-                if not self._should_skip_file(filename):
-                    file_path = os.path.join(root, filename)
-                    # Convert to relative path from base_path
-                    rel_path = os.path.relpath(file_path, self.base_path)
-                    files.append(normalize_file_path(rel_path))  # Normalize path separators
-
+        def process_directory(log_f):
+            for root, dirs, filenames in os.walk(self.base_path, topdown=True):
+                # Convert root to be relative to base_path for pathspec matching
+                relative_root = os.path.relpath(root, self.base_path)
+                if relative_root == '.':
+                    relative_root = ''
+
+                # Filter directories in-place
+                original_dirs = list(dirs)
+
+                # Log and filter directories
+                kept_dirs = []
+                for d in original_dirs:
+                    should_skip = self._should_skip_directory(d, relative_root)
+                    if log_f:
+                        log_f.write(f"{'-' if should_skip else '+'} {os.path.join(relative_root, d)}\n")
+                    if not should_skip:
+                        kept_dirs.append(d)
+                dirs[:] = kept_dirs
+
+                for filename in filenames:
+                    relative_path = os.path.join(relative_root, filename)
+                    should_skip = self._should_skip_file(filename, relative_path)
+                    if log_f:
+                        log_f.write(f"{'-' if should_skip else '+'} {relative_path}\n")
+                    if not should_skip:
+                        files.append(normalize_file_path(relative_path))
+
+        if self.log_file:
+            with open(self.log_file, 'a', encoding='utf-8') as f:
+                process_directory(f)
+        else:
+            process_directory(None)
+
         return files
 
-    def _should_skip_directory(self, dirname: str) -> bool:
+    def _should_skip_directory(self, dirname: str, relative_root: str) -> bool:
         """Check if directory should be skipped during scanning."""
+        # Check against .indexerignore first
+        dir_path = os.path.join(relative_root, dirname)
+        if self.ignore_spec and self.ignore_spec.match_file(dir_path):
+            return True
+
         skip_dirs = {
             '__pycache__', '.pytest_cache', '.mypy_cache',
             'node_modules', '.npm', '.yarn',
@@ -137,10 +185,14 @@ def _should_skip_directory(self, dirname: str) -> bool:
         }
         return dirname in skip_dirs or dirname.startswith('.')
 
-    def _should_skip_file(self, filename: str) -> bool:
+    def _should_skip_file(self, filename: str, relative_path: str) -> bool:
         """Check if file should be skipped during scanning."""
+        # Check against .indexerignore first
+        if self.ignore_spec and self.ignore_spec.match_file(relative_path):
+            return True
+
         # Skip hidden files and common non-code files
-        if filename.startswith('.') and filename not in {'.gitignore', '.gitattributes'}:
+        if filename.startswith('.') and filename not in {'.gitignore', '.gitattributes', '.indexerignore'}:
             return True
 
         # Skip common binary and temporary files
@@ -319,4 +371,4 @@ def _create_project_metadata(self, file_list: List[FileInfo]) -> Dict[str, Any]:
             'indexed_at': datetime.now(),
             'total_files': len(file_list),
             'total_lines': total_lines
-        }
+        }
diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py
@@ -1,6 +1,7 @@
 """
 Search Strategy for The Silver Searcher (ag)
 """
+import os
 import shutil
 import subprocess
 from typing import Dict, List, Optional, Tuple
@@ -93,6 +94,19 @@ def search(
 
             cmd.extend(['-G', regex_pattern])
 
+        # Exclude files and directories from .indexerignore
+        ignore_file = os.path.join(base_path, '.indexerignore')
+        if os.path.exists(ignore_file):
+            with open(ignore_file, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith('#'):
+                        continue
+                    if line.endswith('/'):
+                        cmd.append(f'--ignore-dir={line[:-1]}')
+                    else:
+                        cmd.append(f'--ignore={line}')
+
         # Add -- to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(search_pattern)
@@ -122,4 +136,4 @@ def search(
             raise RuntimeError("'ag' (The Silver Searcher) not found. Please install it and ensure it's in your PATH.")
         except Exception as e:
             # Re-raise other potential exceptions like permission errors
-            raise RuntimeError(f"An error occurred while running ag: {e}") 
+            raise RuntimeError(f"An error occurred while running ag: {e}")
diff --git a/src/code_index_mcp/search/grep.py b/src/code_index_mcp/search/grep.py
@@ -1,6 +1,7 @@
 """
 Search Strategy for standard grep
 """
+import os
 import shutil
 import subprocess
 from typing import Dict, List, Optional, Tuple
@@ -81,6 +82,19 @@ def search(
             # Note: grep's --include uses glob patterns, not regex
             cmd.append(f'--include={file_pattern}')
 
+        # Exclude files and directories from .indexerignore
+        ignore_file = os.path.join(base_path, '.indexerignore')
+        if os.path.exists(ignore_file):
+            with open(ignore_file, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith('#'):
+                        continue
+                    if line.endswith('/'):
+                        cmd.append(f'--exclude-dir={line[:-1]}')
+                    else:
+                        cmd.append(f'--exclude={line}')
+
         # Add -- to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(search_pattern)
@@ -107,4 +121,4 @@ def search(
         except FileNotFoundError:
             raise RuntimeError("'grep' not found. Please install it and ensure it's in your PATH.")
         except Exception as e:
-            raise RuntimeError(f"An error occurred while running grep: {e}") 
+            raise RuntimeError(f"An error occurred while running grep: {e}")
diff --git a/src/code_index_mcp/search/ripgrep.py b/src/code_index_mcp/search/ripgrep.py
@@ -1,6 +1,7 @@
 """
 Search Strategy for ripgrep
 """
+import os
 import shutil
 import subprocess
 from typing import Dict, List, Optional, Tuple
@@ -67,6 +68,11 @@ def search(
         if file_pattern:
             cmd.extend(['--glob', file_pattern])
 
+        # Exclude files and directories from .indexerignore
+        ignore_file = os.path.join(base_path, '.indexerignore')
+        if os.path.exists(ignore_file):
+            cmd.append(f'--ignore-file={ignore_file}')
+
         # Add -- to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(search_pattern)
@@ -93,4 +99,4 @@ def search(
             raise RuntimeError("ripgrep (rg) not found. Please install it and ensure it's in your PATH.")
         except Exception as e:
             # Re-raise other potential exceptions like permission errors
-            raise RuntimeError(f"An error occurred while running ripgrep: {e}") 
+            raise RuntimeError(f"An error occurred while running ripgrep: {e}")
diff --git a/src/code_index_mcp/search/ugrep.py b/src/code_index_mcp/search/ugrep.py
@@ -1,6 +1,7 @@
 """
 Search Strategy for ugrep
 """
+import os
 import shutil
 import subprocess
 from typing import Dict, List, Optional, Tuple
@@ -67,6 +68,11 @@ def search(
         if file_pattern:
             cmd.extend(['-g', file_pattern])  # Correct parameter for file patterns
 
+        # Exclude files and directories from .indexerignore
+        ignore_file = os.path.join(base_path, '.indexerignore')
+        if os.path.exists(ignore_file):
+            cmd.append(f'--ignore-files={ignore_file}')
+
         # Add '--' to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(pattern)

diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py
@@ -109,9 +109,15 @@ def get_settings_stats() -> str:
 
 @mcp.tool()
 @handle_mcp_tool_errors(return_type='str')
-def set_project_path(path: str, ctx: Context) -> str:
-    """Set the base project path for indexing."""
-    return ProjectService(ctx).initialize_project(path)
+def set_project_path(path: str, ctx: Context, generate_log_file: bool = False) -> str:
+    """
+    Set the base project path for indexing and initializes the index.
+
+    Args:
+        path: The project directory path to initialize. Must be a full path, not relative.
+        generate_log_file: Enable to generate a .logger.log file containing a list of all files and their included/ignored status.
+    """
+    return ProjectService(ctx).initialize_project(path, generate_log_file)
 
 @mcp.tool()
 @handle_mcp_tool_errors(return_type='dict')