diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 0a2dd89..d643504 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -10,7 +10,7 @@ on:
 jobs:
   pre-commit:
     runs-on: ubuntu-latest
-    name: Run pre-commit hooks on Go, Rust, JavaScripts and Python files
+    name: Run pre-commit hooks on Go, Rust, JavaScripts, Markdown and Python files
 
     steps:
     - name: Check out the repo
@@ -45,7 +45,8 @@ jobs:
         sudo apt-get install -y \
           make \
           build-essential \
-          pkg-config
+          pkg-config 
+        npm install -g markdownlint-cli
 
     - name: Cache Rust dependencies
       uses: actions/cache@v4
@@ -81,10 +82,10 @@ jobs:
     - name: Install pre-commit
       run: pip install pre-commit
 
-    - name: Run pre-commit on Go, Rust, JavaScript and Python files
+    - name: Run pre-commit on Go, Rust, JavaScript, Markdown and Python files
       run: |
-        # Find all Go, Rust, JavaScripts and Python files (excluding vendored/generated code)
-        FILES=$(find . -type f \( -name "*.go" -o -name "*.rs" -o -name "*.py" -o -name "*.js" \) \
+        # Find all Go, Rust, JavaScripts, Markdown and Python files (excluding vendored/generated code)
+        FILES=$(find . -type f \( -name "*.go" -o -name "*.rs" -o -name "*.py" -o -name "*.js" -o -name "*.md" \) \
           ! -path "./target/*" \
           ! -path "./candle-binding/target/*" \
           ! -path "./.git/*" \
@@ -99,7 +100,7 @@ jobs:
           echo "Running pre-commit on files: $FILES"
           pre-commit run --files $FILES
         else
-          echo "No Go, Rust, JavaScript or Python files found to check"
+          echo "No Go, Rust, JavaScript, Markdown or Python files found to check"
         fi
 
     - name: Show pre-commit results
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1af836e..38d36c9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
-# Basic hooks for Go, Rust, Python files only
+# Basic hooks for Go, Rust, Python And JavaScript files only
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v6.0.0
     hooks:
@@ -22,6 +22,16 @@ repos:
         language: system
         files: \.go$
 
+# Markdown specific hooks
+-   repo: local
+    hooks:
+      -   id: md-fmt
+          name: md fmt
+          entry: bash -c "make markdown-lint"
+          language: system
+          files: \.md$
+          exclude: ^(\node_modules/)
+
 # JavaScript specific hooks
 -   repo: local
     hooks:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 03f4f3e..01f9129 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -29,18 +29,22 @@ Before you begin, ensure you have the following installed:
 ### Initial Setup
 
 1. **Clone the repository:**
+
    ```bash
    git clone https://github.com/vllm-project/semantic-router.git
    cd semantic-router
    ```
 
 2. **Download required models:**
+
    ```bash
    make download-models
    ```
+
    This downloads the pre-trained classification models from Hugging Face.
 
 3. **Install Python dependencies(Optional):**
+
    ```bash
    # For training and development
    pip install -r requirements.txt
@@ -54,6 +58,7 @@ Before you begin, ensure you have the following installed:
 The project consists of multiple components that need to be built in order:
 
 ### Build Everything
+
 ```bash
 make build
 ```
@@ -61,11 +66,13 @@ make build
 ### Build Individual Components
 
 1. **Rust library (Candle binding):**
+
    ```bash
    make rust
    ```
 
 2. **Go router:**
+
    ```bash
    make build-router
    ```
@@ -73,11 +80,13 @@ make build
 ### Running the System
 
 1. **Start Envoy proxy** (in one terminal):
+
    ```bash
    make run-envoy
    ```
 
 2. **Start the semantic router** (in another terminal):
+
    ```bash
    make run-router
    ```
@@ -87,16 +96,19 @@ make build
 ### Unit Tests
 
 1. **Test Rust bindings:**
+
    ```bash
    make test-binding
    ```
 
 2. **Test Go semantic router:**
+
    ```bash
    make test-semantic-router
    ```
 
 3. **Test individual classifiers:**
+
    ```bash
    make test-category-classifier
    make test-pii-classifier
@@ -141,18 +153,19 @@ python e2e-tests/run_all_tests.py --check-only
 
 The test suite includes:
 
-+ Basic client request tests
-+ Envoy ExtProc interaction tests
-+ Router classification tests
-+ Semantic cache tests
-+ Category-specific tests
-+ Metrics validation tests
+- Basic client request tests
+- Envoy ExtProc interaction tests
+- Router classification tests
+- Semantic cache tests
+- Category-specific tests
+- Metrics validation tests
 
 ## Development Workflow
 
 ### Making Changes
 
 1. **Create a feature branch:**
+
    ```bash
    git checkout -b feature/your-feature-name
    ```
@@ -160,6 +173,7 @@ The test suite includes:
 2. **Make your changes** following the project structure and coding standards.
 
 3. **Build and test:**
+
    ```bash
    make clean
    make build
@@ -167,6 +181,7 @@ The test suite includes:
    ```
 
 4. **Run end-to-end tests:**
+
    ```bash
    # Start services
    make run-envoy &
@@ -179,6 +194,7 @@ The test suite includes:
 5. **Commit your changes:**
 
    Commit your changes with a clear message, making sure to **sign off** on your work using the `-s` flag. This is required by the project's **Developer Certificate of Origin (DCO)**.
+
    ```bash
    git add .
    git commit -s -m "feat: add your feature description"
@@ -197,6 +213,7 @@ The test suite includes:
 Before submitting a PR, please run the pre-commit hooks to ensure code quality and consistency. **These checks are mandatory** and will be automatically run on every commit once installed.
 
 **Step 1: Install pre-commit tool**
+
 ```bash
 # Using pip (recommended)
 pip install pre-commit
@@ -209,6 +226,7 @@ brew install pre-commit
 ```
 
 **Step 2: Install pre-commit hooks for this repository**
+
 ```bash
 # Install pre-commit hooks
 pre-commit install
@@ -218,6 +236,7 @@ pre-commit run --all-files
 ```
 
 ### Go Code
+
 - Follow standard Go formatting (`gofmt`)
 - Use meaningful variable and function names
 - Add comments for exported functions and types
@@ -228,12 +247,14 @@ pre-commit run --all-files
   - The CI will automatically check that `go.mod` and `go.sum` files are tidy using `make check-go-mod-tidy`
 
 ### Rust Code
+
 - Follow Rust formatting (`cargo fmt`)
 - Use `cargo clippy` for linting
 - Handle errors appropriately with `Result` types
 - Document public APIs
 
 ### Python Code
+
 - Follow PEP 8 style guidelines
 - Use type hints where appropriate
 - Write docstrings for functions and classes
@@ -241,6 +262,7 @@ pre-commit run --all-files
 ## Submitting Changes
 
 1. **Ensure all tests pass:**
+
    ```bash
    make test
    python e2e-tests/run_all_tests.py
diff --git a/Makefile b/Makefile
index 0e5bc1b..510e019 100644
--- a/Makefile
+++ b/Makefile
@@ -343,3 +343,11 @@ docs-lint:
 docs-lint-fix:
 	@echo "Fixing documentation lint issues..."
 	cd website && npm run lint:fix
+
+markdown-lint:
+	@echo "Linting markdown files..."
+	markdownlint -c markdownlint.yaml "**/*.md" --ignore node_modules --ignore website/node_modules
+
+markdown-lint-fix:
+	@echo "Fixing markdown lint issues..."
+	markdownlint -c markdownlint.yaml "**/*.md" --ignore node_modules --ignore website/node_modules --fix
diff --git a/README.md b/README.md
index b590103..d464f3b 100644
--- a/README.md
+++ b/README.md
@@ -6,17 +6,17 @@
 [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Community-yellow)](https://huggingface.co/LLM-Semantic-Router)
 [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
 [![Crates.io](https://img.shields.io/crates/v/candle-semantic-router.svg)](https://crates.io/crates/candle-semantic-router)
-![](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)
+![Test And Build](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)
 
 **📚 [Complete Documentation](https://vllm-semantic-router.com) | 🚀 [Quick Start](https://vllm-semantic-router.com/docs/getting-started/installation) | 📣 [Blog](https://vllm-semantic-router.com/blog/) | 📖 [API Reference](https://vllm-semantic-router.com/docs/api/router/)**
 
-![](./website/static/img/code.png)
+![code](./website/static/img/code.png)
 
 </div>
 
 ## Innovations ✨
 
-![](./website/static/img/architecture.png)
+![architecture](./website/static/img/architecture.png)
 
 ### Intelligent Routing 🧠
 
@@ -35,6 +35,7 @@ The screenshot below shows the LLM Router dashboard in Grafana.
 ![LLM Router Dashboard](./website/static/img/grafana_screenshot.png)
 
 The router is implemented in two ways: 
+
 - Golang (with Rust FFI based on the [candle](https://github.com/huggingface/candle) rust ML framework)
 - Python
 Benchmarking will be conducted to determine the best implementation.
@@ -64,6 +65,7 @@ For comprehensive documentation including detailed setup instructions, architect
 **👉 [Complete Documentation at Read the Docs](https://vllm-semantic-router.com/)**
 
 The documentation includes:
+
 - **[Installation Guide](https://vllm-semantic-router.com/docs/getting-started/installation/)** - Complete setup instructions
 - **[System Architecture](https://vllm-semantic-router.com/docs/architecture/system-architecture/)** - Technical deep dive
 - **[Model Training](https://vllm-semantic-router.com/docs/training/training-overview/)** - How classification models work
@@ -90,4 +92,4 @@ If you find Semantic Router helpful in your research or projects, please conside
 
 We opened the project at Aug 31, 2025. We love open source  and collaboration ❤️
 
-[![Star History Chart](https://api.star-history.com/svg?repos=vllm-project/semantic-router&type=Date)](https://www.star-history.com/#vllm-project/semantic-router&Date)
\ No newline at end of file
+[![Star History Chart](https://api.star-history.com/svg?repos=vllm-project/semantic-router&type=Date)](https://www.star-history.com/#vllm-project/semantic-router&Date)
diff --git a/candle-binding/README.md b/candle-binding/README.md
index 5f6c2c8..7338f15 100644
--- a/candle-binding/README.md
+++ b/candle-binding/README.md
@@ -33,9 +33,11 @@ go test -v
 
 - The `-v` flag enables verbose output.
 - If you want to run a specific test, use:
+
   ```sh
   go test -v -run TestName
   ```
+
   Replace `TestName` with the name of the test function.
 
 ## Troubleshooting
@@ -46,4 +48,4 @@ go test -v
 ## Notes
 
 - The Go tests depend on the native library being present and correctly built.
-- Some tests may download data from the internet (e.g., from norvig.com). 
\ No newline at end of file
+- Some tests may download data from the internet (e.g., from norvig.com). 
diff --git a/deploy/kubernetes/README.md b/deploy/kubernetes/README.md
index 0817662..2b8007f 100644
--- a/deploy/kubernetes/README.md
+++ b/deploy/kubernetes/README.md
@@ -22,7 +22,6 @@ The deployment consists of:
 
 ## Deployment
 
-
 ```bash
 kubectl apply -k deploy/kubernetes/
 
diff --git a/docker/README.md b/docker/README.md
index 37daa76..cc868f9 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -10,21 +10,25 @@ This Docker Compose configuration allows you to quickly run Semantic Router + En
 ## Install in Docker Compose
 
 1. **Clone the repository and navigate to the project directory**
+
    ```bash
    git clone <repository-url>
    cd semantic_router
    ```
 
 2. **Download required models** (if not already present):
+
    ```bash
    make download-models
    ```
+
    This will download the necessary ML models for classification:
    - Category classifier (ModernBERT-base)
    - PII classifier (ModernBERT-base)
    - Jailbreak classifier (ModernBERT-base)
 
 3. **Start the services using Docker Compose**
+
    ```bash
    # Start core services (semantic-router + envoy)
    docker-compose up --build
diff --git a/e2e-tests/README.md b/e2e-tests/README.md
index 478503a..3e9ab1c 100644
--- a/e2e-tests/README.md
+++ b/e2e-tests/README.md
@@ -38,11 +38,13 @@ This test suite provides a progressive approach to testing the Semantic Router,
 ## Running Tests
 
 Individual tests can be run with:
+
 ```
 python tests/XX-test-name.py
 ```
 
 Or run all tests sequentially with:
+
 ```
 cd tests && python -m pytest
 ```
@@ -51,4 +53,4 @@ cd tests && python -m pytest
 
 - Envoy must be running (make run-envoy)
 - Router must be running (make run-router)
-- Python dependencies installed 
\ No newline at end of file
+- Python dependencies installed 
diff --git a/markdownlint.yaml b/markdownlint.yaml
new file mode 100644
index 0000000..0e00f8e
--- /dev/null
+++ b/markdownlint.yaml
@@ -0,0 +1,52 @@
+# MD003 heading-style/header-style Heading style
+MD003: false
+
+# MD025/single-title/single-h1
+MD025: false
+
+# For deep learning docs end
+
+# MD013 Line length
+MD013: false
+
+# MD014 Dollar signs used before commands without showing output
+MD014: false
+
+# MD024 Multiple headings with the same content
+MD024: false
+
+# MD026/no-trailing-punctuation Trailing punctuation in heading
+MD026: false
+
+# MD029/ol-prefix Ordered list item prefix
+MD029: false
+
+# MD033/no-inline-html
+MD033: false
+
+# MD034/no-bare-urls
+MD034: false
+
+# MD040/fenced-code-language
+MD040: false
+
+# MD041/first-line-heading/first-line-h1 First line in file should be a top level heading
+MD041: false
+
+# MD036/emphasis used instead of a header
+MD036: false
+
+# MD037/no-space-in-emphasis Spaces inside emphasis markers
+MD037: false
+
+# MD046/Two formats of code blocks are allowed: fenced and indented
+MD046: false
+
+# blanks-around-headings、blanks-around-headers
+MD022: false
+
+# Trailing spaces
+MD009: false
+
+#  MD005/list-indent Inconsistent indentation for list items
+MD005: false
diff --git a/src/training/dual_classifier/DUAL_CLASSIFIER_SYSTEM_TEST_SUMMARY.md b/src/training/dual_classifier/DUAL_CLASSIFIER_SYSTEM_TEST_SUMMARY.md
index 80675db..3aafe64 100644
--- a/src/training/dual_classifier/DUAL_CLASSIFIER_SYSTEM_TEST_SUMMARY.md
+++ b/src/training/dual_classifier/DUAL_CLASSIFIER_SYSTEM_TEST_SUMMARY.md
@@ -8,46 +8,54 @@ Task 2 successfully implemented and tested a complete dual-purpose DistilBERT cl
 ### ✅ Component Tests (14/14 Passed)
 
 #### 1. Synthetic Data Generator Tests
+
 - **Initialization**: Validates proper setup of 10 categories, templates, and 5 PII pattern types
 - **Sample Generation**: Tests both PII and non-PII sample creation with proper labeling
 - **Dataset Generation**: Validates batch dataset creation with configurable PII ratios
 - **PII Pattern Detection**: Confirms email and phone number detection in text
 
 #### 2. Dual-Task Dataset Tests
+
 - **Dataset Creation**: Validates PyTorch Dataset implementation with correct tensor shapes
 - **Tokenization**: Tests DistilBERT tokenizer integration with proper padding/truncation
 - **Label Alignment**: Ensures category and PII labels align with tokenized sequences
 
 #### 3. Dual-Task Loss Function Tests
+
 - **Loss Initialization**: Validates weighted loss combining category and PII objectives
 - **Loss Computation**: Tests gradient flow and loss calculation for both tasks
 - **Padding Mask Handling**: Ensures padded tokens are properly ignored in PII loss
 
 #### 4. Dual-Task Trainer Tests
+
 - **Trainer Initialization**: Validates setup with proper data loaders and optimizers
 - **Training Step**: Confirms model parameters update during training
 - **Evaluation**: Tests validation metrics calculation (accuracy, F1-score)
 - **Model Persistence**: Validates save/load functionality with state preservation
 
 #### 5. Integration Tests
+
 - **End-to-End Training**: Complete training pipeline with 2 epochs
 - **Memory Efficiency**: Confirms dual-head architecture has reasonable parameter count (~67M)
 
 ## Performance Results
 
 ### Training Performance
+
 - **Dataset Size**: 50 training samples, 20 validation samples
 - **Training Time**: 18.6 seconds (0.372 seconds per sample)
 - **Performance Rating**: 🚀 Excellent performance!
 - **System**: 8-core CPU, 16GB RAM (no GPU required)
 
 ### Model Architecture
+
 - **Base Model**: DistilBERT (66M parameters)
 - **Total Parameters**: 67,553,292 (efficient shared backbone)
 - **Category Head**: 10-class classification
 - **PII Head**: Token-level binary classification
 
 ### Training Results (From Previous Run)
+
 - **Final Training Metrics**:
   - Training Loss: 1.4948
   - Category Loss: 1.3069
@@ -60,6 +68,7 @@ Task 2 successfully implemented and tested a complete dual-purpose DistilBERT cl
 ## Test Infrastructure
 
 ### Automated Testing
+
 ```bash
 # Run full test suite
 python -m pytest test_dual_classifier_system.py -v
@@ -69,6 +78,7 @@ python test_dual_classifier_system.py
 ```
 
 ### Manual Validation
+
 ```bash
 # Test existing trained model
 python test_existing_model.py
@@ -77,22 +87,26 @@ python test_existing_model.py
 ## Key Technical Achievements
 
 ### 1. **Multi-Task Learning Architecture**
+
 - Single DistilBERT backbone serving dual purposes
 - Separate classification heads for different tasks
 - Shared representations for memory efficiency
 
 ### 2. **Robust Training Pipeline**
+
 - Combined loss function with task weighting
 - Proper gradient flow and parameter updates
 - Validation metrics for both tasks
 
 ### 3. **Synthetic Data Generation**
+
 - 10 category templates (math, science, history, etc.)
 - 5 PII pattern types (email, phone, SSN, name, address)
 - Configurable PII injection rates
 - Token-level PII labeling
 
 ### 4. **Production-Ready Features**
+
 - Model persistence (save/load)
 - Training history tracking
 - Progress monitoring with tqdm
@@ -101,21 +115,25 @@ python test_existing_model.py
 ## Testing Methodology
 
 ### Unit Tests
+
 - Individual component validation
 - Mock data for isolated testing
 - Edge case handling
 
 ### Integration Tests
+
 - Full pipeline validation
 - Real data flow testing
 - Performance benchmarking
 
 ### Validation Tests
+
 - Model loading/saving
 - Prediction consistency
 - Memory efficiency
 
 ## File Structure
+
 ```
 dual_classifier/
 ├── test_dual_classifier_system.py           # Comprehensive test suite
@@ -140,12 +158,14 @@ dual_classifier/
 
 ## Next Steps
 Task 2 is fully complete and validated. The implementation provides a solid foundation for:
+
 - Task 3: Data Pipeline Implementation (real dataset integration)
 - Task 4: Advanced Training Pipeline (optimization and scaling)
 - Task 5: Rust Implementation with Candle (performance optimization)
 
 ## Performance Notes
+
 - Training completes in under 20 seconds for 50 samples
 - Model achieves 45% category accuracy and 91% PII F1-score on small synthetic dataset
 - Memory usage is efficient for laptop deployment
-- No GPU required for development and testing 
\ No newline at end of file
+- No GPU required for development and testing 
diff --git a/src/training/dual_classifier/README.md b/src/training/dual_classifier/README.md
index b9cd982..dc10887 100644
--- a/src/training/dual_classifier/README.md
+++ b/src/training/dual_classifier/README.md
@@ -10,6 +10,7 @@ A dual-purpose neural network classifier using DistilBERT for both **category cl
 This project implements a **proof-of-concept** dual-task learning system that demonstrates:
 
 ### ✅ **Key Accomplishments (Task 2)**
+
 - ✅ **Dual-Purpose Architecture**: Single DistilBERT model (~67M parameters) for both category classification and PII detection
 - ✅ **Memory Efficiency**: Shared backbone reduces parameters vs. two separate models
 - ✅ **Synthetic Data Pipeline**: Complete data generation with 10 categories and 5 PII pattern types
@@ -20,6 +21,7 @@ This project implements a **proof-of-concept** dual-task learning system that de
 - ✅ **Production-Ready Features**: Progress tracking, metrics, and model checkpointing
 
 ### 🔬 **POC Characteristics**
+
 - **Data Source**: **Synthetic data generation** (not real-world datasets)
 - **Scale**: Small-scale validation (50 training, 20 validation samples)
 - **Purpose**: Architecture validation and training pipeline proof
@@ -27,11 +29,13 @@ This project implements a **proof-of-concept** dual-task learning system that de
 - **PII Patterns**: 5 predefined types (email, phone, SSN, name, address)
 
 ### 🚀 **Next Steps Roadmap**
+
 - **Task 3**: Real dataset integration (transition from synthetic to production data)
 - **Task 4**: Advanced training optimization and scaling
 - **Task 5**: Rust implementation with Candle framework
 
 This POC successfully demonstrates that:
+
 - The dual-head architecture works effectively
 - Multi-task learning can be implemented efficiently
 - The training pipeline is robust and measurable
@@ -45,6 +49,7 @@ This POC successfully demonstrates that:
 
 #### `dual_classifier.py`
 **Main Model Implementation**
+
 - Contains the `DualClassifier` class built on DistilBERT
 - Implements dual-head architecture:
   - **Category Head**: Sequence-level classification for 10 categories
@@ -55,6 +60,7 @@ This POC successfully demonstrates that:
 
 #### `trainer.py`
 **Training Infrastructure**
+
 - `DualTaskDataset`: PyTorch Dataset class for handling dual-task data
 - `DualTaskLoss`: Combined loss function for both classification tasks
 - `DualTaskTrainer`: Complete training pipeline with:
@@ -66,6 +72,7 @@ This POC successfully demonstrates that:
 
 #### `data_generator.py`
 **Synthetic Data Generation**
+
 - `SyntheticDataGenerator`: Creates realistic training data
 - **Categories**: 10 predefined categories with template texts
 - **PII Patterns**: 5 types (email, phone, SSN, name, address)
@@ -79,6 +86,7 @@ This POC successfully demonstrates that:
 
 #### `train_example.py`
 **Training Demonstration**
+
 - Complete end-to-end training example
 - Shows system performance monitoring (CPU, memory, GPU)
 - Demonstrates model training with synthetic data
@@ -88,6 +96,7 @@ This POC successfully demonstrates that:
 
 #### `example.py`
 **Basic Usage Example**
+
 - Simple demonstration of model usage
 - Shows how to:
   - Initialize the DualClassifier
@@ -98,6 +107,7 @@ This POC successfully demonstrates that:
 
 #### `test_existing_model.py`
 **Trained Model Validation**
+
 - Tests loading and using a pre-trained model
 - Validates that saved models work correctly
 - Demonstrates prediction on sample texts
@@ -108,6 +118,7 @@ This POC successfully demonstrates that:
 
 #### `test_dual_classifier_system.py`
 **Comprehensive Test Suite**
+
 - **14 Test Cases** covering all components:
   - Synthetic data generator functionality
   - Dataset creation and tokenization
@@ -120,6 +131,7 @@ This POC successfully demonstrates that:
 
 #### `test_dual_classifier.py`
 **Core Model Tests**
+
 - Unit tests for the `DualClassifier` class
 - Tests model initialization, forward pass, and prediction methods
 - Validates tensor shapes and output formats
@@ -130,6 +142,7 @@ This POC successfully demonstrates that:
 
 #### `requirements.txt`
 **Project Dependencies**
+
 - **PyTorch**: `>=2.0.0,<=2.2.2` (Neural network backend)
 - **Transformers**: `>=4.36.0,<4.45.0` (DistilBERT model)
 - **NumPy**: `>=1.24.0,<2.0` (Numerical operations)
@@ -141,6 +154,7 @@ This POC successfully demonstrates that:
 
 #### `DUAL_CLASSIFIER_SYSTEM_TEST_SUMMARY.md`
 **Test Results & Documentation**
+
 - Comprehensive testing summary with all results
 - Performance benchmarks and system requirements
 - Technical achievements and success criteria
@@ -151,6 +165,7 @@ This POC successfully demonstrates that:
 
 #### `trained_model/` Directory
 **Saved Model Files**
+
 - `model.pt` (258MB): Complete trained model state
 - `config.json`: Model configuration and hyperparameters
 - `training_history.json`: Training metrics and loss curves
@@ -161,26 +176,31 @@ This POC successfully demonstrates that:
 ## 🚀 Quick Start
 
 ### 1. Install Dependencies
+
 ```bash
 pip install -r requirements.txt
 ```
 
 ### 2. Run Basic Example
+
 ```bash
 python example.py
 ```
 
 ### 3. Train Your Own Model
+
 ```bash
 python train_example.py
 ```
 
 ### 4. Test Existing Model
+
 ```bash
 python test_existing_model.py
 ```
 
 ### 5. Run Full Test Suite
+
 ```bash
 python -m pytest test_dual_classifier_system.py -v
 ```
@@ -188,6 +208,7 @@ python -m pytest test_dual_classifier_system.py -v
 ## 🏗️ Architecture
 
 ### Model Architecture
+
 - **Base Model**: DistilBERT (66M parameters)
 - **Total Parameters**: 67,553,292
 - **Category Head**: 10-class sequence classification
@@ -195,6 +216,7 @@ python -m pytest test_dual_classifier_system.py -v
 - **Shared Backbone**: Memory-efficient design
 
 ### Training Pipeline
+
 - **Multi-task Loss**: Weighted combination of category and PII losses
 - **Metrics**: Category accuracy and PII F1-score
 - **Data**: Synthetic generation with configurable PII injection
@@ -203,11 +225,13 @@ python -m pytest test_dual_classifier_system.py -v
 ## 📊 Performance
 
 ### Training Performance
+
 - **Training Time**: ~18.6 seconds for 50 samples
 - **System Requirements**: 8-core CPU, 16GB RAM (no GPU required)
 - **Memory Efficiency**: Single model vs. two separate models
 
 ### Model Performance
+
 - **Category Accuracy**: 45% (on small synthetic dataset)
 - **PII F1-Score**: 91.09%
 - **Training Loss**: 1.4948 (final)
@@ -216,6 +240,7 @@ python -m pytest test_dual_classifier_system.py -v
 ## 🧪 Testing
 
 The project includes comprehensive testing with 14 test cases covering:
+
 - ✅ Synthetic data generation
 - ✅ Dataset creation and tokenization
 - ✅ Loss function computation
@@ -228,6 +253,7 @@ All tests pass with excellent performance ratings.
 ## 📈 Next Steps
 
 This implementation provides a foundation for:
+
 - **Task 3**: Real dataset integration
 - **Task 4**: Advanced training optimization
 - **Task 5**: Rust implementation with Candle framework
@@ -235,6 +261,7 @@ This implementation provides a foundation for:
 ## 🤝 Usage Examples
 
 ### Basic Prediction
+
 ```python
 from dual_classifier import DualClassifier
 
@@ -301,12 +328,14 @@ for token, pred in zip(tokens, pii_predictions):
 ```
 
 **Key Points:**
+
 - 📍 **Single Input, Dual Output**: One text → category + PII results simultaneously
 - 🔄 **`encode_text()`**: Just preprocessing, no predictions
 - 🎯 **`predict()`**: Does BOTH tasks at once using shared DistilBERT backbone
 - 🧠 **Memory Efficient**: Single model handles both tasks vs. separate models
 
 ### Training New Model
+
 ```python
 from trainer import DualTaskTrainer
 from data_generator import create_sample_datasets
@@ -317,4 +346,4 @@ train_dataset, val_dataset = create_sample_datasets()
 # Train model
 trainer = DualTaskTrainer(model, train_dataset, val_dataset)
 trainer.train(num_epochs=2)
-``` 
\ No newline at end of file
+``` 
diff --git a/src/training/dual_classifier/trained_model/README.md b/src/training/dual_classifier/trained_model/README.md
index b13ef70..78738f5 100644
--- a/src/training/dual_classifier/trained_model/README.md
+++ b/src/training/dual_classifier/trained_model/README.md
@@ -14,6 +14,7 @@ This directory contains the trained model files for the dual classifier. Due to
 ## To generate these files:
 
 Run the training script to create a new model:
+
 ```bash
 cd dual_classifier
 python train_example.py
@@ -22,6 +23,7 @@ python train_example.py
 ## Alternative storage:
 
 For sharing large model files, consider:
+
 - Git LFS (Large File Storage)
 - Cloud storage (S3, Google Drive, etc.)
-- Model registries (HuggingFace Hub, MLflow, etc.) 
\ No newline at end of file
+- Model registries (HuggingFace Hub, MLflow, etc.) 
diff --git a/website/README.md b/website/README.md
index 9713709..d0a11c0 100644
--- a/website/README.md
+++ b/website/README.md
@@ -5,12 +5,14 @@ This directory contains the Docusaurus-based documentation website for the vLLM
 ## 🚀 Quick Start
 
 ### Prerequisites
+
 - Node.js 18+ 
 - npm or yarn
 
 ### Development
 
 Start the development server with hot reload:
+
 ```bash
 # From project root
 make docs-dev
@@ -24,6 +26,7 @@ The site will be available at http://localhost:3000
 ### Production Build
 
 Build the static site for production:
+
 ```bash
 # From project root
 make docs-build
@@ -35,6 +38,7 @@ cd website && npm run build
 ### Preview Production Build
 
 Serve the production build locally:
+
 ```bash
 # From project root
 make docs-serve
@@ -46,18 +50,21 @@ cd website && npm run serve
 ## 🎨 Features
 
 ### ✨ Modern Tech-Inspired Design
+
 - **Dark theme by default** with neon blue/green accents
 - **Glassmorphism effects** with backdrop blur and transparency
 - **Gradient backgrounds** and animated hover effects
 - **Responsive design** optimized for all devices
 
 ### 🔧 Enhanced Functionality
+
 - **Mermaid diagram support** with dark theme optimization
 - **Advanced code highlighting** with multiple language support
 - **Interactive navigation** with smooth animations
 - **Search functionality** (ready for Algolia integration)
 
 ### 📱 User Experience
+
 - **Fast loading** with optimized builds
 - **Accessible design** following WCAG guidelines
 - **Mobile-first** responsive layout
@@ -82,6 +89,7 @@ website/
 
 ### Themes and Colors
 Edit `src/css/custom.css` to modify:
+
 - Color scheme and gradients
 - Typography and spacing
 - Component styling
@@ -89,12 +97,14 @@ Edit `src/css/custom.css` to modify:
 
 ### Navigation
 Update `sidebars.js` to modify:
+
 - Documentation structure
 - Category organization
 - Page ordering
 
 ### Site Configuration
 Modify `docusaurus.config.js` for:
+
 - Site metadata
 - Plugin configuration
 - Theme settings
diff --git a/website/blog/2025-09-06-welcome.md b/website/blog/2025-09-06-welcome.md
index 895f126..48d4318 100644
--- a/website/blog/2025-09-06-welcome.md
+++ b/website/blog/2025-09-06-welcome.md
@@ -5,7 +5,7 @@ authors: [rootfs, wangchen615, yuezhu1, Xunzhuo]
 tags: [welcome, announcement, vllm, semantic-router]
 ---
 
-![](/img/code.png)
+![code](/img/code.png)
 
 <!-- truncate -->
 
@@ -51,7 +51,7 @@ To overcome this gap, we introduce the **vLLM Semantic Router** — an intent-aw
 
 By classifying queries at the semantic level and selectively enabling reasoning, the vLLM Semantic Router delivers **higher accuracy where it matters** and **significant cost savings where it doesn’t** — a step toward the principle that no token should be wasted.
 
-![](/img/architecture.png)
+![architecture](/img/architecture.png)
 
 ### Architecture Design
 
diff --git a/website/docs/api/classification.md b/website/docs/api/classification.md
index cda2344..8ddbecb 100644
--- a/website/docs/api/classification.md
+++ b/website/docs/api/classification.md
@@ -5,6 +5,7 @@ The Classification API provides direct access to the Semantic Router's classific
 ## API Endpoints
 
 ### Base URL
+
 ```
 http://localhost:8080/api/v1/classify
 ```
@@ -12,11 +13,13 @@ http://localhost:8080/api/v1/classify
 ## Server Status
 
 The Classification API server runs alongside the main Semantic Router ExtProc server:
+
 - **Classification API**: `http://localhost:8080` (HTTP REST API)
 - **ExtProc Server**: `http://localhost:50051` (gRPC for Envoy integration)
 - **Metrics Server**: `http://localhost:9190` (Prometheus metrics)
 
 Start the server with:
+
 ```bash
 make run-router
 ```
@@ -24,6 +27,7 @@ make run-router
 ## Implementation Status
 
 ### ✅ Fully Implemented
+
 - `GET /health` - Health check endpoint
 - `POST /api/v1/classify/intent` - Intent classification with real model inference
 - `POST /api/v1/classify/pii` - PII detection with real model inference
@@ -33,6 +37,7 @@ make run-router
 - `GET /info/classifier` - Detailed classifier capabilities and configuration
 
 ### 🔄 Placeholder Implementation
+
 - `POST /api/v1/classify/combined` - Returns "not implemented" response
 - `GET /metrics/classification` - Returns "not implemented" response
 - `GET /config/classification` - Returns "not implemented" response
@@ -122,6 +127,7 @@ Classify user queries into routing categories.
 ### Available Categories
 
 The current model supports the following 14 categories:
+
 - `business`
 - `law`
 - `psychology`
@@ -369,6 +375,7 @@ api:
 ### Error Handling
 
 **Batch Too Large (400 Bad Request):**
+
 ```json
 {
   "error": {
@@ -380,6 +387,7 @@ api:
 ```
 
 **Empty Batch (400 Bad Request):**
+
 ```json
 {
   "error": {
@@ -625,6 +633,7 @@ Get real-time classification performance metrics.
 ### Example Error Responses
 
 **Invalid Input (400 Bad Request):**
+
 ```json
 {
   "error": {
@@ -636,6 +645,7 @@ Get real-time classification performance metrics.
 ```
 
 **Not Implemented (501 Not Implemented):**
+
 ```json
 {
   "error": {
diff --git a/website/docs/api/router.md b/website/docs/api/router.md
index 9795ac5..4ec6b01 100644
--- a/website/docs/api/router.md
+++ b/website/docs/api/router.md
@@ -271,6 +271,7 @@ model_config:
 ```
 
 Notes:
+
 - Pricing is optional; if omitted, cost is treated as 0 and only token metrics are emitted.
 - Cost is computed as: (prompt_tokens * prompt_per_1m + completion_tokens * completion_per_1m) / 1_000_000 (in the configured currency).
 
diff --git a/website/docs/architecture/system-architecture.md b/website/docs/architecture/system-architecture.md
index 4eb5ced..71420ed 100644
--- a/website/docs/architecture/system-architecture.md
+++ b/website/docs/architecture/system-architecture.md
@@ -80,6 +80,7 @@ graph TB
 - **Timeout Management**: Configures appropriate timeouts for different model types
 
 **Configuration Highlights**:
+
 ```yaml
 # Envoy listener configuration
 listeners:
@@ -107,6 +108,7 @@ http_filters:
 **Role**: The brain of the system that makes intelligent routing decisions.
 
 **Architecture**:
+
 ```go
 type OpenAIRouter struct {
     Config               *config.RouterConfig
@@ -122,6 +124,7 @@ type OpenAIRouter struct {
 ```
 
 **Processing Pipeline**:
+
 ```mermaid
 sequenceDiagram
     participant E as Envoy
@@ -157,6 +160,7 @@ sequenceDiagram
 The classification system uses ModernBERT models for multiple classification tasks:
 
 #### Category Classification
+
 ```mermaid
 graph LR
     Query[User Query] --> Tokenizer[ModernBERT Tokenizer]
@@ -182,6 +186,7 @@ graph LR
 ```
 
 #### Multi-Task Architecture
+
 ```python
 # Conceptual model architecture
 class SemanticRouter:
diff --git a/website/docs/getting-started/configuration.md b/website/docs/getting-started/configuration.md
index eeaa79b..a5a6f6e 100644
--- a/website/docs/getting-started/configuration.md
+++ b/website/docs/getting-started/configuration.md
@@ -265,6 +265,7 @@ default_reasoning_effort: "medium"
 #### Model Reasoning Configuration Options
 
 **Configuration Structure:**
+
 - `name`: A unique identifier for the model family
 - `patterns`: Array of patterns to match against model names
 - `reasoning_syntax.type`: How the model expects reasoning mode to be specified
@@ -282,6 +283,7 @@ The system supports both simple string patterns and regular expressions for flex
 - **Multiple patterns**: `["deepseek", "ds-", "^phi.*"]` matches any of these patterns
 
 **Regex Pattern Examples:**
+
 ```yaml
 patterns:
   - "^gpt-4.*"        # Models starting with "gpt-4"
@@ -429,11 +431,13 @@ api:
 The configuration includes preset examples for quick setup. Here's how to use them:
 
 **Step 1: Choose your scenario**
+
 - `fast` - For real-time APIs (microsecond to millisecond response times)
 - `standard` - For typical web APIs (millisecond to second response times)  
 - `slow` - For batch processing or heavy computation (seconds to minutes)
 
 **Step 2: Copy the preset values**
+
 ```yaml
 # Example: Switch to fast API configuration
 # Copy from preset_examples.fast and paste to the actual config:
@@ -442,6 +446,7 @@ size_buckets: [1, 2, 3, 5, 8, 10]
 ```
 
 **Step 3: Restart the service**
+
 ```bash
 pkill -f "router"
 make run-router
@@ -463,6 +468,7 @@ The system provides sensible default batch size ranges that work well for most u
 ### Configuration Examples by Use Case
 
 **Real-time Chat API (fast preset)**
+
 ```yaml
 # Copy these values to your config for sub-millisecond monitoring
 duration_buckets: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1]
@@ -471,6 +477,7 @@ size_buckets: [1, 2, 3, 5, 8, 10]
 ```
 
 **E-commerce API (standard preset)**
+
 ```yaml
 # Copy these values for typical web API response times
 duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
@@ -479,6 +486,7 @@ size_buckets: [1, 2, 5, 10, 20, 50, 100]
 ```
 
 **Data Processing Pipeline (slow preset)**
+
 ```yaml
 # Copy these values for heavy computation workloads
 duration_buckets: [0.1, 0.5, 1, 5, 10, 30, 60, 120]
@@ -492,6 +500,7 @@ batch_size_ranges:
 ```
 
 **Available Metrics:**
+
 - `batch_classification_requests_total` - Total number of batch requests
 - `batch_classification_duration_seconds` - Processing duration histogram
 - `batch_classification_texts_total` - Total number of texts processed
@@ -576,6 +585,7 @@ make run-router
 ### Common Configuration Patterns
 
 **Multiple Models:**
+
 ```yaml
 vllm_endpoints:
   - name: "math_endpoint"
@@ -601,6 +611,7 @@ categories:
 ```
 
 **Load Balancing:**
+
 ```yaml
 vllm_endpoints:
   - name: "endpoint1"
@@ -658,6 +669,7 @@ classifier:
 ### Development vs Production
 
 **Development:**
+
 ```yaml
 # Relaxed settings for testing
 classifier:
@@ -670,6 +682,7 @@ semantic_cache:
 ```
 
 **Production:**
+
 ```yaml
 # Strict settings for production
 classifier:
@@ -686,12 +699,14 @@ semantic_cache:
 ### Common Issues
 
 **Invalid YAML syntax:**
+
 ```bash
 # Validate YAML syntax
 python -c "import yaml; yaml.safe_load(open('config/config.yaml'))"
 ```
 
 **Missing model files:**
+
 ```bash
 # Check if models are downloaded
 ls -la models/
@@ -699,12 +714,14 @@ ls -la models/
 ```
 
 **Endpoint connectivity:**
+
 ```bash
 # Test your backend server
 curl -f http://your-server:8000/health
 ```
 
 **Configuration not taking effect:**
+
 ```bash
 # Restart the router after config changes
 make run-router
@@ -723,17 +740,20 @@ make test-prompt-guard               # Jailbreak protection
 ### Model Reasoning Configuration Issues
 
 **Model not getting reasoning fields:**
+
 - Check that the model name matches a pattern in `model_reasoning_configs`
 - Verify the pattern syntax (exact matches vs prefixes)
 - Unknown models will have no reasoning fields applied (this is by design)
 
 **Wrong reasoning syntax applied:**
+
 - Ensure the `reasoning_syntax.type` matches your model's expected format
 - Check the `reasoning_syntax.parameter` name is correct
 - DeepSeek models typically use `chat_template_kwargs` with `"thinking"`
 - GPT models typically use `reasoning_effort`
 
 **Adding support for new models:**
+
 ```yaml
 # Add a new model configuration
 model_reasoning_configs:
@@ -745,6 +765,7 @@ model_reasoning_configs:
 ```
 
 **Testing model reasoning configuration:**
+
 ```bash
 # Test reasoning with your specific model
 curl -X POST http://localhost:8801/v1/chat/completions \
@@ -762,6 +783,7 @@ The Semantic Router supports automated configuration generation based on model p
 ### Benchmarking Workflow
 
 1. **Run MMLU-Pro Evaluation:**
+
    ```bash
    # Evaluate models using MMLU-Pro benchmark
    python src/training/model_eval/mmlu_pro_vllm_eval.py \
@@ -774,6 +796,7 @@ The Semantic Router supports automated configuration generation based on model p
    ```
 
 2. **Generate Configuration:**
+
    ```bash
    # Generate config.yaml from benchmark results
    python src/training/model_eval/result_to_config.py \
@@ -845,6 +868,7 @@ make test
 ```
 
 This workflow ensures your configuration is:
+
 - Based on actual model performance
 - Properly tested before deployment
 - Version controlled for tracking changes
diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index 1ddeaca..59f0ccc 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -25,6 +25,7 @@ cd semantic-router
 ### 2. Install Dependencies
 
 #### Install Go (if not already installed)
+
 ```bash
 # Check if Go is installed
 go version
@@ -36,6 +37,7 @@ go version
 ```
 
 #### Install Rust (if not already installed)
+
 ```bash
 # Check if Rust is installed
 rustc --version
@@ -46,6 +48,7 @@ source ~/.cargo/env
 ```
 
 #### Install Python (if not already installed)
+
 ```bash
 # Check if Python is installed
 python --version
@@ -56,6 +59,7 @@ python --version
 ```
 
 #### Install HuggingFace CLI
+
 ```bash
 pip install huggingface_hub
 ```
@@ -68,6 +72,7 @@ make build
 ```
 
 This command will:
+
 - Build the Rust candle-binding library
 - Build the Go router binary
 - Place the executable in `bin/router`
@@ -80,6 +85,7 @@ make download-models
 ```
 
 This downloads the CPU-optimized BERT models for:
+
 - Category classification
 - PII detection
 - Jailbreak detection
@@ -118,11 +124,13 @@ The default configuration includes example endpoints that you should update for
 Open two terminals and run:
 
 **Terminal 1: Start Envoy Proxy**
+
 ```bash
 make run-envoy
 ```
 
 **Terminal 2: Start Semantic Router**
+
 ```bash
 make run-router
 ```
diff --git a/website/docs/getting-started/reasoning-routing-quickstart.md b/website/docs/getting-started/reasoning-routing-quickstart.md
index 19b4207..d51057e 100644
--- a/website/docs/getting-started/reasoning-routing-quickstart.md
+++ b/website/docs/getting-started/reasoning-routing-quickstart.md
@@ -1,16 +1,19 @@
 # Reasoning Routing Quickstart
 
 This short guide shows how to enable and verify “reasoning routing” in the Semantic Router:
+
 - Minimal config.yaml fields you need
 - Example request/response (OpenAI-compatible)
 - A comprehensive evaluation command you can run
 
 Prerequisites
+
 - A running OpenAI-compatible backend for your models (e.g., vLLM or any OpenAI-compatible server). It must be reachable at the addresses you configure under vllm_endpoints (address:port).
 - Envoy + the router (see Start the router section)
 
 1) Minimal configuration
 Put this in config/config.yaml (or merge into your existing config). It defines:
+
 - Categories that require reasoning (e.g., math)
 - Reasoning families for model syntax differences (DeepSeek/Qwen3 use chat_template_kwargs; GPT-OSS/GPT use reasoning_effort)
 - Which concrete models use which reasoning family
@@ -84,6 +87,7 @@ default_model: qwen3-30b
 ```
 
 Notes
+
 - Reasoning is controlled by categories.use_reasoning and optionally categories.reasoning_effort.
 - A model only gets reasoning fields if it has a model_config.&lt;MODEL&gt;.reasoning_family that maps to a reasoning_families entry.
 - DeepSeek/Qwen3 (chat_template_kwargs): the router injects chat_template_kwargs only when reasoning is enabled. When disabled, no chat_template_kwargs are added.
@@ -93,6 +97,7 @@ Notes
 
 2) Start the router
 Option A: Local build + Envoy
+
 - Download classifier models and mappings (required)
   - make download-models
 - Build and run the router
@@ -102,6 +107,7 @@ Option A: Local build + Envoy
   - func-e run --config-path config/envoy.yaml --component-log-level "ext_proc:trace,router:trace,http:trace"
 
 Option B: Docker Compose
+
 - docker compose up -d
   - Exposes Envoy at http://localhost:8801 (proxying /v1/* to backends via the router)
 
@@ -109,6 +115,7 @@ Note: Ensure your OpenAI-compatible backend is running and reachable (e.g., http
 
 3) Send example requests
 Math (reasoning should be ON and effort high)
+
 ```bash
 curl -sS http://localhost:8801/v1/chat/completions \
   -H "Content-Type: application/json" \
@@ -122,6 +129,7 @@ curl -sS http://localhost:8801/v1/chat/completions \
 ```
 
 General (reasoning should be OFF)
+
 ```bash
 curl -sS http://localhost:8801/v1/chat/completions \
   -H "Content-Type: application/json" \
@@ -136,10 +144,12 @@ curl -sS http://localhost:8801/v1/chat/completions \
 
 Verify routing via response headers
 The router does not inject routing metadata into the JSON body. Instead, inspect the response headers added by the router:
+
 - X-Selected-Model
 - X-Semantic-Destination-Endpoint
 
 Example:
+
 ```bash
 curl -i http://localhost:8801/v1/chat/completions \
   -H "Content-Type: application/json" \
@@ -159,6 +169,7 @@ curl -i http://localhost:8801/v1/chat/completions \
 You can benchmark the router vs a direct vLLM endpoint across categories using the included script. This runs a ReasoningBench based on MMLU-Pro and produces summaries and plots.
 
 Quick start (router + vLLM):
+
 ```bash
 SAMPLES_PER_CATEGORY=25 \
 CONCURRENT_REQUESTS=4 \
@@ -168,6 +179,7 @@ VLLM_MODELS="openai/gpt-oss-20b" \
 ```
 
 Router-only benchmark:
+
 ```bash
 BENCHMARK_ROUTER_ONLY=true \
 SAMPLES_PER_CATEGORY=25 \
@@ -177,6 +189,7 @@ ROUTER_MODELS="auto" \
 ```
 
 Direct invocation (advanced):
+
 ```bash
 python bench/router_reason_bench.py \
   --run-router \
@@ -191,8 +204,8 @@ python bench/router_reason_bench.py \
 ```
 
 Tips
+
 - If your math request doesn’t enable reasoning, confirm the classifier assigns the "math" category with sufficient confidence (see classifier.category_model.threshold) and that the target model has a reasoning_family.
 - For models without a reasoning_family, the router will not inject reasoning fields even when the category requires reasoning (this is by design to avoid invalid requests).
 - You can override the effort per category via categories.reasoning_effort or set a global default via default_reasoning_effort.
 - Ensure your OpenAI-compatible backend is reachable at the configured vllm_endpoints (address:port). If it’s not running, routing will fail even though the router and Envoy are up.
-
diff --git a/website/docs/intro.md b/website/docs/intro.md
index 908904e..5d25394 100644
--- a/website/docs/intro.md
+++ b/website/docs/intro.md
@@ -7,7 +7,7 @@ sidebar_position: 1
 [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/vllm-project/semantic-router/blob/main/LICENSE)
 [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Community-yellow)](https://huggingface.co/LLM-Semantic-Router)
 [![Go Report Card](https://goreportcard.com/badge/github.com/vllm-project/semantic-router/src/semantic-router)](https://goreportcard.com/report/github.com/vllm-project/semantic-router/src/semantic-router)
-![](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)
+![Test And Build](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)
 
 An intelligent **Mixture-of-Models (MoM)** router that acts as an Envoy External Processor (ExtProc) to intelligently direct OpenAI API requests to the most suitable backend model from a defined pool. Using BERT-based semantic understanding and classification, it optimizes both performance and cost efficiency.
 
@@ -15,21 +15,25 @@ An intelligent **Mixture-of-Models (MoM)** router that acts as an Envoy External
 
 ### 🎯 **Auto-selection of Models**
 Intelligently routes requests to specialized models based on semantic understanding:
+
 - **Math queries** → Math-specialized models
 - **Creative writing** → Creative-specialized models  
 - **Code generation** → Code-specialized models
 - **General queries** → Balanced general-purpose models
 
 ### 🛡️ **Security & Privacy**
+
 - **PII Detection**: Automatically detects and handles personally identifiable information
 - **Prompt Guard**: Identifies and blocks jailbreak attempts
 - **Safe Routing**: Ensures sensitive prompts are handled appropriately
 
 ### ⚡ **Performance Optimization**
+
 - **Semantic Cache**: Caches semantic representations to reduce latency
 - **Tool Selection**: Auto-selects relevant tools to reduce token usage and improve tool selection accuracy
 
 ### 🏗️ **Architecture**
+
 - **Envoy ExtProc Integration**: Seamlessly integrates with Envoy proxy
 - **Dual Implementation**: Available in both Go (with Rust FFI) and Python
 - **Scalable Design**: Production-ready with comprehensive monitoring
@@ -40,7 +44,6 @@ Our testing shows significant improvements in model accuracy through specialized
 
 ![Model Accuracy](/img/category_accuracies.png)
 
-
 ## 🛠️ Architecture Overview
 
 ```mermaid
@@ -84,6 +87,7 @@ graph TB
 ## 📈 Monitoring & Observability
 
 The router provides comprehensive monitoring through:
+
 - **Grafana Dashboard**: Real-time metrics and performance tracking
 - **Prometheus Metrics**: Detailed routing statistics and performance data
 - **Request Tracing**: Full visibility into routing decisions and performance
diff --git a/website/docs/overview/mixture-of-models.md b/website/docs/overview/mixture-of-models.md
index 7ef52f9..ecb5d59 100644
--- a/website/docs/overview/mixture-of-models.md
+++ b/website/docs/overview/mixture-of-models.md
@@ -9,6 +9,7 @@ The Mixture of Models (MoM) approach represents a fundamental shift from traditi
 When organizations deploy a single high-performance model (like GPT-4 or Claude-3) for all use cases, they encounter several critical issues:
 
 #### 1. **Economic Inefficiency**
+
 ```
 Example: Customer Support Chatbot
 - Simple FAQ: "What are your hours?" 
@@ -20,6 +21,7 @@ Example: Customer Support Chatbot
 ```
 
 #### 2. **Performance Suboptimality**
+
 ```
 Math Problem: "Solve 2x + 5 = 15"
 - General GPT-4: Good performance, but overkill
@@ -33,12 +35,14 @@ Creative Writing: "Write a poem about spring"
 ```
 
 #### 3. **Resource Waste**
+
 - **Computing Power**: Using a 1.8T parameter model for simple classification
 - **Memory**: Loading massive models for lightweight tasks  
 - **Latency**: Slower inference for tasks that could be handled quickly
 - **Throughput**: Lower requests/second due to model size
 
 #### 4. **Operational Risks**
+
 - **Single Point of Failure**: Model downtime affects entire system
 - **Vendor Lock-in**: Dependent on single provider's availability and pricing
 - **Limited Flexibility**: Cannot optimize for specific use cases
@@ -65,6 +69,7 @@ graph TB
 ```
 
 **Cost Impact Analysis:**
+
 ```python
 # Traditional approach
 traditional_cost = 100000 * 0.03  # All queries to GPT-4
@@ -128,6 +133,7 @@ graph TB
 **Challenge**: Balance customer satisfaction with operational costs
 
 #### Before MoM:
+
 ```
 Setup: GPT-4 for all customer service queries
 Daily Cost: $4,500  
@@ -139,6 +145,7 @@ Issues:
 ```
 
 #### After MoM Implementation:
+
 ```python
 # Query distribution and routing
 routing_strategy = {
@@ -170,6 +177,7 @@ routing_strategy = {
 ```
 
 #### Results:
+
 - **Cost Reduction**: 72% ($4,500 → $1,260/day)
 - **Customer Satisfaction**: +12% (specialized models performed better)
 - **Response Time**: -35% average latency
@@ -182,6 +190,7 @@ routing_strategy = {
 **Use Cases**: Code review, documentation generation, bug analysis
 
 #### Implementation Strategy:
+
 ```mermaid
 graph TB
     CodeQuery[Code Query] --> Classifier[Code Intent Classifier]
@@ -198,6 +207,7 @@ graph TB
 ```
 
 #### Performance Metrics:
+
 | Metric | Before MoM | After MoM | Improvement |
 |--------|------------|-----------|-------------|
 | Daily Cost | $750 | $285 | 62% reduction |
@@ -212,6 +222,7 @@ graph TB
 **Challenge**: Provide personalized learning assistance across multiple subjects
 
 #### Specialized Model Deployment:
+
 ```python
 subject_routing = {
     "mathematics": {
@@ -253,6 +264,7 @@ subject_routing = {
 ```
 
 #### Educational Impact:
+
 - **Cost Efficiency**: $3,000/day → $890/day (70% reduction)
 - **Learning Outcomes**: +23% improvement in problem-solving scores
 - **Personalization**: Better subject-specific assistance
@@ -327,6 +339,7 @@ scaling_rules = {
 ### Challenge 1: Router Accuracy
 **Problem**: Incorrect routing leads to poor user experience  
 **Solution**: 
+
 - Multi-stage classification with confidence scores
 - Fallback mechanisms for uncertain classifications
 - Continuous learning from user feedback
@@ -347,6 +360,7 @@ def route_query(query):
 ### Challenge 2: Latency Overhead
 **Problem**: Classification adds latency to each request  
 **Solution**:
+
 - Optimized lightweight classifiers (&lt;10ms inference)
 - Parallel processing of classification and request preparation
 - Caching of classification results for similar queries
@@ -354,6 +368,7 @@ def route_query(query):
 ### Challenge 3: Context Preservation
 **Problem**: Switching models mid-conversation loses context  
 **Solution**:
+
 - Conversation-aware routing (same model for session)
 - Context summarization and transfer between models
 - Hybrid approaches with context bridges
@@ -389,6 +404,7 @@ print(f"ROI achieved in: {roi_months:.1f} months")
 ```
 
 **Output:**
+
 ```
 12-month savings: $165,000.00
 ROI achieved in: 3.6 months
diff --git a/website/docs/overview/semantic-router-overview.md b/website/docs/overview/semantic-router-overview.md
index 61e8369..8b30ebc 100644
--- a/website/docs/overview/semantic-router-overview.md
+++ b/website/docs/overview/semantic-router-overview.md
@@ -16,6 +16,7 @@ Semantic routing is the process of **dynamically selecting the most suitable lan
 ## The Evolution of LLM Routing
 
 ### Traditional Approach: One-Size-Fits-All
+
 ```mermaid
 graph LR
     Query[User Query] --> Model[Single LLM<br/>GPT-4, Claude, etc.]
@@ -30,6 +31,7 @@ graph LR
 - No flexibility in model selection
 
 ### Modern Approach: Semantic Routing
+
 ```mermaid
 graph TB
     Query[User Query] --> Router[Semantic Router<br/>BERT Classifier]
@@ -124,6 +126,7 @@ graph TB
 - **Benchmark Performance**: Evaluated on MMLU, GSM8K, and MT Bench
 
 **Training Approach:**
+
 ```python
 # RouteLLM training conceptually
 preference_data = load_chatbot_arena_data()  # Human comparisons
@@ -147,6 +150,7 @@ GPT-5 introduces a revolutionary **router-as-coordinator** architecture:
 - **Efficiency**: Computation flows along optimal paths
 
 **Operational Flow:**
+
 ```mermaid
 sequenceDiagram
     participant User
@@ -173,6 +177,7 @@ sequenceDiagram
 ## Why Mixture of Models is Superior
 
 ### 1. **Economic Efficiency**
+
 ```
 Traditional: All queries → GPT-4 ($0.03/1K tokens)
 MoM Routing: 
@@ -182,16 +187,19 @@ MoM Routing:
 ```
 
 ### 2. **Performance Specialization**
+
 - **Domain Expertise**: Code generation models excel at programming tasks
 - **Task Optimization**: Math models optimized for numerical reasoning
 - **Context Efficiency**: Smaller models for simpler tasks reduce latency
 
 ### 3. **Flexibility and Scalability**
+
 - **Model Independence**: Each model can be updated independently
 - **Provider Diversity**: Mix OpenAI, Anthropic, local, and fine-tuned models
 - **Easy Extensions**: Add new specialized models without system redesign
 
 ### 4. **Risk Distribution**
+
 - **Vendor Independence**: Not locked into single provider
 - **Failure Isolation**: One model failure doesn't affect others
 - **A/B Testing**: Easy to test new models in production
@@ -201,6 +209,7 @@ MoM Routing:
 ### Case Study: Enterprise API Gateway
 
 **Before Semantic Routing:**
+
 ```
 Workload: 100K queries/day
 Model: GPT-4 for all queries
@@ -209,6 +218,7 @@ Quality: High but inconsistent for simple tasks
 ```
 
 **After Semantic Routing:**
+
 ```
 Workload: 100K queries/day distributed as:
   - 60% simple → GPT-3.5: $120/day
diff --git a/website/docs/training/training-overview.md b/website/docs/training/training-overview.md
index b049afd..be4f000 100644
--- a/website/docs/training/training-overview.md
+++ b/website/docs/training/training-overview.md
@@ -41,18 +41,21 @@ graph TB
 [ModernBERT](https://arxiv.org/abs/2412.13663) represents the latest evolution in BERT architecture with several key improvements over traditional BERT models:
 
 #### 1. **Enhanced Architecture**
+
 - **Rotary Position Embedding (RoPE)**: Better handling of positional information
 - **GeGLU Activation**: Improved gradient flow and representation capacity  
 - **Attention Bias Removal**: Cleaner attention mechanisms
 - **Modern Layer Normalization**: Better training stability
 
 #### 2. **Training Improvements**
+
 - **Longer Context**: Trained on sequences up to 8,192 tokens vs BERT's 512
 - **Better Data**: Trained on higher-quality, more recent datasets
 - **Improved Tokenization**: More efficient vocabulary and tokenization
 - **Anti-overfitting Techniques**: Built-in regularization improvements
 
 #### 3. **Performance Benefits**
+
 ```python
 # Performance comparison on classification tasks
 model_performance = {
@@ -212,6 +215,7 @@ class UnifiedBERTFinetuning:
 **Purpose**: Route queries to specialized models based on academic/professional domains.
 
 #### Dataset: MMLU-Pro Academic Domains
+
 ```python
 # Dataset composition
 mmlu_categories = {
@@ -245,6 +249,7 @@ mmlu_categories = {
 ```
 
 #### Training Configuration
+
 ```yaml
 model_config:
   base_model: "modernbert-base"
@@ -264,6 +269,7 @@ evaluation_metrics:
 ```
 
 #### Model Performance
+
 ```python
 category_performance = {
     "overall_accuracy": 0.942,
@@ -286,6 +292,7 @@ category_performance = {
 **Purpose**: Identify personally identifiable information to protect user privacy.
 
 #### Dataset: Microsoft Presidio + Custom Synthetic Data
+
 ```python
 # PII entity types and examples
 pii_entities = {
@@ -323,6 +330,7 @@ pii_entities = {
 ```
 
 #### Training Approach: Token Classification
+
 ```python
 class PIITokenClassifier:
     def __init__(self):
@@ -353,6 +361,7 @@ class PIITokenClassifier:
 ```
 
 #### Performance Metrics
+
 ```python
 pii_performance = {
     "overall_f1": 0.957,
@@ -376,6 +385,7 @@ pii_performance = {
 **Purpose**: Identify and block attempts to circumvent AI safety measures.
 
 #### Dataset: Jailbreak Classification Dataset
+
 ```python
 jailbreak_dataset = {
     "benign": {
@@ -402,6 +412,7 @@ jailbreak_dataset = {
 ```
 
 #### Training Strategy
+
 ```python
 class JailbreakDetector:
     def __init__(self):
@@ -423,6 +434,7 @@ class JailbreakDetector:
 ```
 
 #### Performance Analysis
+
 ```python
 jailbreak_performance = {
     "overall_metrics": {
@@ -451,6 +463,7 @@ jailbreak_performance = {
 **Purpose**: Classify queries for tool selection and function calling optimization.
 
 #### Dataset: Glaive Function Calling v2
+
 ```python
 intent_categories = {
     "information_retrieval": {
@@ -499,6 +512,7 @@ intent_categories = {
 ## Training Infrastructure
 
 ### Hardware Requirements
+
 ```yaml
 training_infrastructure:
   gpu_requirements:
@@ -517,6 +531,7 @@ training_infrastructure:
 ```
 
 ### Training Pipeline Automation
+
 ```python
 class TrainingPipeline:
     def __init__(self, config_path):