Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) se
- **Persistent Caching**: Stores indexes for lightning-fast subsequent access
- **Lazy Loading**: Tools detected only when needed for optimal startup
- **Memory Efficient**: Intelligent caching strategies for large codebases
- **🚫 `.indexerignore` Support**: Exclude files and directories from indexing and search using gitignore-style patterns

## Supported File Types

Expand Down Expand Up @@ -128,6 +129,60 @@ Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) se

</details>

## 🚫 `.indexerignore` Support

Code Index MCP supports `.indexerignore` files to exclude specific files and directories from indexing and search operations. This feature works similarly to `.gitignore` files and helps keep your index focused on relevant code.

### How It Works

1. **Create a `.indexerignore` file** in your project root directory
2. **Add patterns** using gitwildmatch syntax (similar to gitignore)
3. **The indexer will automatically** skip matching files and directories

### Example `.indexerignore` File

```
# Ignore build directories
build/
dist/
target/

# Ignore dependency directories
node_modules/
venv/
.env/

# Ignore temporary files
*.tmp
*.log
.DS_Store

# Ignore test files (if not needed for analysis)
tests/
__tests__/
test/
```

### Pattern Syntax

- `*.py` - Ignore all Python files
- `build/` - Ignore the build directory and all its contents
- `!important.py` - Include `important.py` even if it would be ignored by a previous pattern
- `docs/*.md` - Ignore all Markdown files in the docs directory

### Debugging

If you're having issues with the `.indexerignore` functionality:
- Check the `.indexer.log` file created in your project root during indexing
- This log shows which files are included (+) and excluded (-) from the index
- Look for patterns that might not be working as expected

### Integration

- **Indexing**: Files matching `.indexerignore` patterns are skipped during project scanning
- **Searching**: Search operations automatically exclude ignored files
- **File Watcher**: The auto-refresh feature also respects ignore patterns

## Quick Start

### 🚀 **Recommended Setup (Most Users)**
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ authors = [
dependencies = [
"mcp>=0.3.0",
"watchdog>=3.0.0",
"pathspec>=0.12.1",
]

[project.urls]
Expand Down
4 changes: 2 additions & 2 deletions src/code_index_mcp/indexing/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, max_workers: Optional[int] = None):
self.relationship_tracker = RelationshipTracker()
self.project_path = "" # Initialize project_path

def build_index(self, project_path: str) -> CodeIndex:
def build_index(self, project_path: str, generate_log_file: bool = False) -> CodeIndex:
"""
Build complete code index for a project.

Expand All @@ -46,7 +46,7 @@ def build_index(self, project_path: str) -> CodeIndex:

try:
# Step 1: Scan project directory
scanner = ProjectScanner(project_path)
scanner = ProjectScanner(project_path, generate_log_file)
scan_result = scanner.scan_project()

# Step 2: Read file contents and analyze in parallel
Expand Down
88 changes: 70 additions & 18 deletions src/code_index_mcp/indexing/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
This module handles scanning project directories, building directory trees,
and categorizing special files like configuration, documentation, and build files.
"""

import os
import glob
import pathspec
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any
from typing import Dict, List, Any, Optional
from .models import FileInfo, ProjectScanResult, SpecialFiles
from .qualified_names import normalize_file_path
from code_index_mcp.constants import SUPPORTED_EXTENSIONS
Expand Down Expand Up @@ -69,9 +70,17 @@ class ProjectScanner:

}

def __init__(self, base_path: str):
def __init__(self, base_path: str, generate_log_file: bool = False):
self.base_path = Path(base_path).resolve()
self.file_id_counter = 0
self.generate_log_file = generate_log_file
self.log_file = self.base_path / '.indexer.log' if self.generate_log_file else None

# Clear log file at the start of each scan if it's enabled
if self.log_file and self.log_file.exists():
self.log_file.unlink()

self.ignore_spec = self._load_ignore_spec()

def scan_project(self) -> ProjectScanResult:
"""
Expand Down Expand Up @@ -103,25 +112,64 @@ def scan_project(self) -> ProjectScanResult:
project_metadata=project_metadata
)

def _load_ignore_spec(self) -> Optional[pathspec.PathSpec]:
"""Load .indexerignore file and return a PathSpec object."""
ignore_file = self.base_path / '.indexerignore'
if ignore_file.is_file():
try:
with open(ignore_file, 'r', encoding='utf-8') as f:
return pathspec.PathSpec.from_lines('gitwildmatch', f)
except (OSError, UnicodeDecodeError):
return None
return None

def _discover_files(self) -> List[str]:
"""Discover all files in the project directory."""
files = []

for root, dirs, filenames in os.walk(self.base_path):
# Skip common directories that shouldn't be indexed
dirs[:] = [d for d in dirs if not self._should_skip_directory(d)]

for filename in filenames:
if not self._should_skip_file(filename):
file_path = os.path.join(root, filename)
# Convert to relative path from base_path
rel_path = os.path.relpath(file_path, self.base_path)
files.append(normalize_file_path(rel_path)) # Normalize path separators

def process_directory(log_f):
for root, dirs, filenames in os.walk(self.base_path, topdown=True):
# Convert root to be relative to base_path for pathspec matching
relative_root = os.path.relpath(root, self.base_path)
if relative_root == '.':
relative_root = ''

# Filter directories in-place
original_dirs = list(dirs)

# Log and filter directories
kept_dirs = []
for d in original_dirs:
should_skip = self._should_skip_directory(d, relative_root)
if log_f:
log_f.write(f"{'-' if should_skip else '+'} {os.path.join(relative_root, d)}\n")
if not should_skip:
kept_dirs.append(d)
dirs[:] = kept_dirs

for filename in filenames:
relative_path = os.path.join(relative_root, filename)
should_skip = self._should_skip_file(filename, relative_path)
if log_f:
log_f.write(f"{'-' if should_skip else '+'} {relative_path}\n")
if not should_skip:
files.append(normalize_file_path(relative_path))

if self.log_file:
with open(self.log_file, 'a', encoding='utf-8') as f:
process_directory(f)
else:
process_directory(None)

return files

def _should_skip_directory(self, dirname: str) -> bool:
def _should_skip_directory(self, dirname: str, relative_root: str) -> bool:
"""Check if directory should be skipped during scanning."""
# Check against .indexerignore first
dir_path = os.path.join(relative_root, dirname)
if self.ignore_spec and self.ignore_spec.match_file(dir_path):
return True

skip_dirs = {
'__pycache__', '.pytest_cache', '.mypy_cache',
'node_modules', '.npm', '.yarn',
Expand All @@ -137,10 +185,14 @@ def _should_skip_directory(self, dirname: str) -> bool:
}
return dirname in skip_dirs or dirname.startswith('.')

def _should_skip_file(self, filename: str) -> bool:
def _should_skip_file(self, filename: str, relative_path: str) -> bool:
"""Check if file should be skipped during scanning."""
# Check against .indexerignore first
if self.ignore_spec and self.ignore_spec.match_file(relative_path):
return True

# Skip hidden files and common non-code files
if filename.startswith('.') and filename not in {'.gitignore', '.gitattributes'}:
if filename.startswith('.') and filename not in {'.gitignore', '.gitattributes', '.indexerignore'}:
return True

# Skip common binary and temporary files
Expand Down Expand Up @@ -319,4 +371,4 @@ def _create_project_metadata(self, file_list: List[FileInfo]) -> Dict[str, Any]:
'indexed_at': datetime.now(),
'total_files': len(file_list),
'total_lines': total_lines
}
}
16 changes: 15 additions & 1 deletion src/code_index_mcp/search/ag.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Search Strategy for The Silver Searcher (ag)
"""
import os
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
Expand Down Expand Up @@ -93,6 +94,19 @@ def search(

cmd.extend(['-G', regex_pattern])

# Exclude files and directories from .indexerignore
ignore_file = os.path.join(base_path, '.indexerignore')
if os.path.exists(ignore_file):
with open(ignore_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if line.endswith('/'):
cmd.append(f'--ignore-dir={line[:-1]}')
else:
cmd.append(f'--ignore={line}')

# Add -- to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(search_pattern)
Expand Down Expand Up @@ -122,4 +136,4 @@ def search(
raise RuntimeError("'ag' (The Silver Searcher) not found. Please install it and ensure it's in your PATH.")
except Exception as e:
# Re-raise other potential exceptions like permission errors
raise RuntimeError(f"An error occurred while running ag: {e}")
raise RuntimeError(f"An error occurred while running ag: {e}")
16 changes: 15 additions & 1 deletion src/code_index_mcp/search/grep.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Search Strategy for standard grep
"""
import os
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
Expand Down Expand Up @@ -81,6 +82,19 @@ def search(
# Note: grep's --include uses glob patterns, not regex
cmd.append(f'--include={file_pattern}')

# Exclude files and directories from .indexerignore
ignore_file = os.path.join(base_path, '.indexerignore')
if os.path.exists(ignore_file):
with open(ignore_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if line.endswith('/'):
cmd.append(f'--exclude-dir={line[:-1]}')
else:
cmd.append(f'--exclude={line}')

# Add -- to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(search_pattern)
Expand All @@ -107,4 +121,4 @@ def search(
except FileNotFoundError:
raise RuntimeError("'grep' not found. Please install it and ensure it's in your PATH.")
except Exception as e:
raise RuntimeError(f"An error occurred while running grep: {e}")
raise RuntimeError(f"An error occurred while running grep: {e}")
8 changes: 7 additions & 1 deletion src/code_index_mcp/search/ripgrep.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Search Strategy for ripgrep
"""
import os
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
Expand Down Expand Up @@ -67,6 +68,11 @@ def search(
if file_pattern:
cmd.extend(['--glob', file_pattern])

# Exclude files and directories from .indexerignore
ignore_file = os.path.join(base_path, '.indexerignore')
if os.path.exists(ignore_file):
cmd.append(f'--ignore-file={ignore_file}')

# Add -- to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(search_pattern)
Expand All @@ -93,4 +99,4 @@ def search(
raise RuntimeError("ripgrep (rg) not found. Please install it and ensure it's in your PATH.")
except Exception as e:
# Re-raise other potential exceptions like permission errors
raise RuntimeError(f"An error occurred while running ripgrep: {e}")
raise RuntimeError(f"An error occurred while running ripgrep: {e}")
6 changes: 6 additions & 0 deletions src/code_index_mcp/search/ugrep.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Search Strategy for ugrep
"""
import os
import shutil
import subprocess
from typing import Dict, List, Optional, Tuple
Expand Down Expand Up @@ -67,6 +68,11 @@ def search(
if file_pattern:
cmd.extend(['-g', file_pattern]) # Correct parameter for file patterns

# Exclude files and directories from .indexerignore
ignore_file = os.path.join(base_path, '.indexerignore')
if os.path.exists(ignore_file):
cmd.append(f'--ignore-files={ignore_file}')

# Add '--' to treat pattern as a literal argument, preventing injection
cmd.append('--')
cmd.append(pattern)
Expand Down
12 changes: 9 additions & 3 deletions src/code_index_mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,15 @@ def get_settings_stats() -> str:

@mcp.tool()
@handle_mcp_tool_errors(return_type='str')
def set_project_path(path: str, ctx: Context) -> str:
"""Set the base project path for indexing."""
return ProjectService(ctx).initialize_project(path)
def set_project_path(path: str, ctx: Context, generate_log_file: bool = False) -> str:
"""
Set the base project path for indexing and initializes the index.

Args:
path: The project directory path to initialize. Must be a full path, not relative.
generate_log_file: Enable to generate a .logger.log file containing a list of all files and their included/ignored status.
"""
return ProjectService(ctx).initialize_project(path, generate_log_file)

@mcp.tool()
@handle_mcp_tool_errors(return_type='dict')
Expand Down
Loading