sio2project
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 48 additions & 9 deletions b/‎.github/workflows/test.yml‎
Lines changed: 48 additions & 9 deletions
diff --git a/‎README.md‎
Lines changed: 126 additions & 2 deletions b/‎README.md‎
Lines changed: 126 additions & 2 deletions
diff --git a/‎src/lib.rs‎
Lines changed: 2 additions & 2 deletions b/‎src/lib.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/locks/mod.rs‎
Lines changed: 29 additions & 4 deletions b/‎src/locks/mod.rs‎
Lines changed: 29 additions & 4 deletions
@@ -21,6 +21,8 @@ jobs:
             locks: memory
           - kvstorage: postgres
             locks: memory
+          - kvstorage: postgres
+            locks: postgres
 
     services:
       postgres:
@@ -84,22 +86,16 @@ jobs:
         ./mc alias set local http://localhost:9000 minioadmin minioadmin
         ./mc mb local/bucket1 || true
 
-    - name: Check formatting
-      run: cargo fmt -- --check
-
-    - name: Run clippy
-      run: cargo clippy -- -D warnings
-
     - name: Run tests (SQLite)
       if: matrix.kvstorage == 'sqlite'
       run: cargo test --verbose
       env:
         RUST_LOG: debug
 
-    - name: Run tests (Postgres)
-      if: matrix.kvstorage == 'postgres'
+    - name: Run tests (Postgres + Memory Locks)
+      if: matrix.kvstorage == 'postgres' && matrix.locks == 'memory'
       run: |
-        # Create a test config for Postgres
+        # Create a test config for Postgres with memory locks
         cat > config.test.json << EOF
         {
           "logging": {
@@ -139,6 +135,49 @@ jobs:
         RUST_LOG: debug
         DATABASE_URL: postgres://postgres:postgres@localhost:5432/s3dedup_test
 
+    - name: Run tests (Postgres + Postgres Locks)
+      if: matrix.kvstorage == 'postgres' && matrix.locks == 'postgres'
+      run: |
+        # Create a test config for Postgres with PostgreSQL locks
+        cat > config.test.json << EOF
+        {
+          "logging": {
+            "level": "debug",
+            "json": false
+          },
+          "buckets": [
+            {
+              "name": "bucket1",
+              "address": "0.0.0.0",
+              "port": 3000,
+              "kvstorage_type": "postgres",
+              "postgres": {
+                "host": "localhost",
+                "port": 5432,
+                "user": "postgres",
+                "password": "postgres",
+                "dbname": "s3dedup_test",
+                "pool_size": 10
+              },
+              "locks_type": "postgres",
+              "s3storage_type": "minio",
+              "minio": {
+                "endpoint": "http://localhost:9000",
+                "access_key": "minioadmin",
+                "secret_key": "minioadmin",
+                "force_path_style": true
+              }
+            }
+          ]
+        }
+        EOF
+
+        # Run tests
+        cargo test --verbose
+      env:
+        RUST_LOG: debug
+        DATABASE_URL: postgres://postgres:postgres@localhost:5432/s3dedup_test
+
     - name: Clean up test databases
       if: always()
       run: rm -rf db/test_*.db*
@@ -11,6 +11,7 @@ S3 deduplication proxy server with Filetracker protocol compatibility.
 - **Content Deduplication**: Files are stored by SHA256 hash, identical content is stored only once
 - **Filetracker Compatible**: Drop-in replacement for legacy Filetracker servers
 - **Pluggable Storage**: Support for SQLite and PostgreSQL metadata storage
+- **Distributed Locking**: PostgreSQL advisory locks for distributed, high-availability deployments
 - **Migration Support**: Offline and live migration from old Filetracker instances
 - **Auto Cleanup**: Background cleaner removes unreferenced S3 objects
 - **Multi-bucket**: Run multiple independent buckets on different ports
@@ -65,6 +66,7 @@ docker run -d \
 | `KVSTORAGE_TYPE` | `sqlite` | KV storage backend (sqlite, postgres) |
 | `SQLITE_PATH` | `/app/data/kv.db` | SQLite database path |
 | `SQLITE_MAX_CONNECTIONS` | `10` | SQLite connection pool size |
+| `LOCKS_TYPE` | `memory` | Lock manager backend (memory, postgres) |
 | `S3_ENDPOINT` | *required* | S3/MinIO endpoint URL |
 | `S3_ACCESS_KEY` | *required* | S3 access key |
 | `S3_SECRET_KEY` | *required* | S3 secret key |
@@ -76,7 +78,9 @@ docker run -d \
 | `FILETRACKER_URL` | - | Old Filetracker URL for live migration (HTTP fallback) |
 | `FILETRACKER_V1_DIR` | - | V1 Filetracker directory for filesystem-based migration |
 
-For PostgreSQL, use:
+### PostgreSQL Configuration
+
+For PostgreSQL KV storage, use:
 ```
 KVSTORAGE_TYPE=postgres
 POSTGRES_HOST=localhost
@@ -87,6 +91,124 @@ POSTGRES_DB=s3dedup
 POSTGRES_MAX_CONNECTIONS=10
 ```
 
+### Distributed Locking (PostgreSQL Advisory Locks)
+
+For high-availability deployments with multiple s3dedup instances, enable PostgreSQL-based distributed locks:
+
+```
+LOCKS_TYPE=postgres
+POSTGRES_HOST=localhost
+POSTGRES_PORT=5432
+POSTGRES_USER=postgres
+POSTGRES_PASSWORD=password
+POSTGRES_DB=s3dedup
+POSTGRES_MAX_CONNECTIONS=10
+```
+
+**Benefits of PostgreSQL Locks**:
+- **Distributed Locking**: Multiple s3dedup instances can safely coordinate file operations
+- **High Availability**: If one instance fails, others can continue with the same locks
+- **Load Balancing**: Multiple instances can share the same database for coordinated access
+- **Atomic Operations**: Prevents race conditions in concurrent file operations
+
+**How It Works**:
+- Uses PostgreSQL's built-in advisory locks (`pg_advisory_lock`, `pg_advisory_lock_shared`)
+- Lock keys are hashed to 64-bit integers for PostgreSQL's lock API
+- Shared locks allow concurrent reads; exclusive locks ensure serialized writes
+- Automatic lock release when guard is dropped (via background cleanup tasks)
+
+**Note**: PostgreSQL locks require the same PostgreSQL instance used for KV storage. Connection pool is shared between both uses.
+
+### Connection Pool Sizing
+
+The `POSTGRES_MAX_CONNECTIONS` setting controls the maximum number of concurrent database connections from a single s3dedup instance. This **single pool** is shared between KV storage operations and lock management.
+
+**How to Choose Pool Size:**
+
+```
+Pool Size = (Concurrent Requests × 1.5) + Lock Overhead
+```
+
+**General Guidelines:**
+
+| Deployment | Concurrency | Recommended Pool Size | Notes |
+|------------|-------------|----------------------|-------|
+| **Low** | 1-5 concurrent requests | 10 | Default, suitable for development/testing |
+| **Medium** | 5-20 concurrent requests | 20-30 | Small production deployments |
+| **High** | 20-100 concurrent requests | 50-100 | Large production deployments |
+| **Very High** | 100+ concurrent requests | 100-200 | Use multiple instances with load balancing |
+
+**Factors to Consider:**
+
+1. **Number of s3dedup Instances**
+   - If you have N instances, each needs its own pool
+   - Total connections = N instances × pool_size
+   - PostgreSQL must have enough capacity for all instances
+   - Example: 3 instances × 30 pool_size = 90 connections needed
+
+2. **Lock Contention**
+   - File operations acquire locks (1 connection per lock)
+   - Concurrent uploads/downloads increase lock pressure
+   - Add 20% overhead for lock operations
+   - Example: 20 concurrent requests → pool_size = (20 × 1.5) + overhead ≈ 35
+
+3. **Database Configuration**
+   - Check PostgreSQL `max_connections` setting
+   - Reserve connections for maintenance, monitoring, backups
+   - Example: PostgreSQL with 200 max_connections:
+     - Reserve 10 for maintenance
+     - If 3 s3dedup instances: (200 - 10) / 3 ≈ 63 per instance
+
+4. **Memory Usage Per Connection**
+   - Each connection uses ~5-10 MB of memory
+   - Pool size 50 = ~250-500 MB per instance
+   - Monitor actual usage and adjust accordingly
+
+**Example Configurations:**
+
+**Development (1 instance, low throughput):**
+```json
+"postgres": {
+  "pool_size": 10
+}
+```
+
+**Production (3 instances, medium throughput):**
+```json
+"postgres": {
+  "pool_size": 30
+}
+```
+With PostgreSQL `max_connections = 100`:
+- 3 × 30 = 90 connections (10 reserved)
+
+**High-Availability (5 instances, high throughput with PostgreSQL max_connections = 200):**
+```json
+"postgres": {
+  "pool_size": 35
+}
+```
+- 5 × 35 = 175 connections (25 reserved for other operations)
+
+**Monitoring and Tuning:**
+
+Monitor these metrics to optimize pool size:
+
+1. **Connection Utilization**: Check if connections are frequently exhausted
+   ```sql
+   SELECT count(*) FROM pg_stat_activity WHERE datname = 's3dedup';
+   ```
+
+2. **Lock Wait Times**: Monitor if operations wait for available connections
+3. **Memory Usage**: Watch instance memory as pool size increases
+
+**Scaling Strategy:**
+
+- **Start Conservative**: Begin with pool_size = 10-20
+- **Monitor Usage**: Track connection utilization over 1-2 weeks
+- **Increase Gradually**: Increment by 10-20 when you see high utilization
+- **Scale Horizontally**: Instead of very large pools (>100), use more instances with moderate pools
+
 ### Config File
 
 Alternatively, use a JSON config file:
@@ -251,7 +373,9 @@ cargo run -- server --config config.json
 - **Deduplication**: SHA256-based content addressing
 - **Storage Backend**: S3-compatible object storage (MinIO, AWS S3, etc.)
 - **Metadata Store**: SQLite or PostgreSQL for file metadata and reference counts
-- **Lock Manager**: In-memory file-level locks for concurrent operations
+- **Lock Manager**: In-memory (single-instance) or PostgreSQL advisory locks (distributed, multi-instance HA)
+  - Memory locks: Fast, suitable for single-instance deployments
+  - PostgreSQL locks: Distributed coordination, suitable for multi-instance HA setups
 - **Cleaner**: Background worker that removes unreferenced S3 objects
 
 For detailed architecture documentation, see [docs/deduplication.md](docs/deduplication.md).
 
@@ -40,7 +40,7 @@ pub struct AppState {
 impl AppState {
     pub async fn new(config: &config::BucketConfig) -> Result<Arc<Self>> {
         let kvstorage = kvstorage::KVStorage::new(config).await?;
-        let locks = locks::LocksStorage::new(config.locks_type);
+        let locks = locks::LocksStorage::new_with_config(config.locks_type, config).await?;
         let s3storage = s3storage::S3Storage::new(config).await?;
         let metrics = Arc::new(metrics::Metrics::new());
         Ok(Arc::new(Self {
@@ -58,7 +58,7 @@ impl AppState {
         filetracker_url: String,
     ) -> Result<Arc<Self>> {
         let kvstorage = kvstorage::KVStorage::new(config).await?;
-        let locks = locks::LocksStorage::new(config.locks_type);
+        let locks = locks::LocksStorage::new_with_config(config.locks_type, config).await?;
         let s3storage = s3storage::S3Storage::new(config).await?;
         let filetracker_client = filetracker_client::FiletrackerClient::new(filetracker_url);
         let metrics = Arc::new(metrics::Metrics::new());
 
@@ -3,6 +3,7 @@ use serde::Deserialize;
 use tracing::info;
 
 pub mod memory;
+pub mod postgres;
 
 /**
  * Get key for lock on file
@@ -23,16 +24,18 @@ fn hash_lock(bucket: &str, hash: &str) -> String {
 pub enum LocksType {
     #[serde(rename = "memory")]
     Memory,
+    #[serde(rename = "postgres")]
+    Postgres,
 }
 
 #[must_use = "droping temporary lock makes no sense"]
-pub(crate) trait SharedLockGuard<'a> {}
+pub trait SharedLockGuard<'a> {}
 #[must_use = "droping temporary lock makes no sense"]
-pub(crate) trait ExclusiveLockGuard<'a> {}
+pub trait ExclusiveLockGuard<'a> {}
 
 #[async_trait]
 #[must_use = "preparing temporary lock makes no sense"]
-pub(crate) trait Lock {
+pub trait Lock {
     async fn acquire_shared<'a>(&'a self) -> Box<dyn SharedLockGuard<'a> + Send + 'a>;
     async fn acquire_exclusive<'a>(&'a self) -> Box<dyn ExclusiveLockGuard<'a> + Send + 'a>;
 }
@@ -48,6 +51,7 @@ pub(crate) trait LockStorage {
 #[derive(Clone)]
 pub enum LocksStorage {
     Memory(memory::MemoryLocks),
+    Postgres(Box<postgres::PostgresLocks>),
 }
 
 impl LocksStorage {
@@ -57,12 +61,33 @@ impl LocksStorage {
                 info!("Using memory as locks storage");
                 Box::new(LocksStorage::Memory(*memory::MemoryLocks::new()))
             }
+            LocksType::Postgres => {
+                panic!("PostgreSQL locks must be initialized with config via new_with_config")
+            }
+        }
+    }
+
+    pub async fn new_with_config(
+        lock_type: LocksType,
+        bucket_config: &crate::config::BucketConfig,
+    ) -> anyhow::Result<Box<Self>> {
+        match lock_type {
+            LocksType::Memory => {
+                info!("Using memory as locks storage");
+                Ok(Box::new(LocksStorage::Memory(*memory::MemoryLocks::new())))
+            }
+            LocksType::Postgres => {
+                info!("Using PostgreSQL as locks storage");
+                let pg_locks = postgres::PostgresLocks::new_with_config(bucket_config).await?;
+                Ok(Box::new(LocksStorage::Postgres(pg_locks)))
+            }
         }
     }
 
-    pub(crate) async fn prepare_lock<'a>(&'a self, key: String) -> Box<dyn Lock + 'a + Send> {
+    pub async fn prepare_lock<'a>(&'a self, key: String) -> Box<dyn Lock + 'a + Send> {
         match self {
             LocksStorage::Memory(memory_locks) => memory_locks.prepare_lock(key).await,
+            LocksStorage::Postgres(postgres_locks) => postgres_locks.prepare_lock(key).await,
         }
     }
 }