Skip to content

Commit fa5bbc7

Browse files
committed
Add postgres for lock management
1 parent 7021e88 commit fa5bbc7

File tree

8 files changed

+718
-19
lines changed

8 files changed

+718
-19
lines changed

.github/workflows/test.yml

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ jobs:
2121
locks: memory
2222
- kvstorage: postgres
2323
locks: memory
24+
- kvstorage: postgres
25+
locks: postgres
2426

2527
services:
2628
postgres:
@@ -84,22 +86,16 @@ jobs:
8486
./mc alias set local http://localhost:9000 minioadmin minioadmin
8587
./mc mb local/bucket1 || true
8688
87-
- name: Check formatting
88-
run: cargo fmt -- --check
89-
90-
- name: Run clippy
91-
run: cargo clippy -- -D warnings
92-
9389
- name: Run tests (SQLite)
9490
if: matrix.kvstorage == 'sqlite'
9591
run: cargo test --verbose
9692
env:
9793
RUST_LOG: debug
9894

99-
- name: Run tests (Postgres)
100-
if: matrix.kvstorage == 'postgres'
95+
- name: Run tests (Postgres + Memory Locks)
96+
if: matrix.kvstorage == 'postgres' && matrix.locks == 'memory'
10197
run: |
102-
# Create a test config for Postgres
98+
# Create a test config for Postgres with memory locks
10399
cat > config.test.json << EOF
104100
{
105101
"logging": {
@@ -139,6 +135,49 @@ jobs:
139135
RUST_LOG: debug
140136
DATABASE_URL: postgres://postgres:postgres@localhost:5432/s3dedup_test
141137

138+
- name: Run tests (Postgres + Postgres Locks)
139+
if: matrix.kvstorage == 'postgres' && matrix.locks == 'postgres'
140+
run: |
141+
# Create a test config for Postgres with PostgreSQL locks
142+
cat > config.test.json << EOF
143+
{
144+
"logging": {
145+
"level": "debug",
146+
"json": false
147+
},
148+
"buckets": [
149+
{
150+
"name": "bucket1",
151+
"address": "0.0.0.0",
152+
"port": 3000,
153+
"kvstorage_type": "postgres",
154+
"postgres": {
155+
"host": "localhost",
156+
"port": 5432,
157+
"user": "postgres",
158+
"password": "postgres",
159+
"dbname": "s3dedup_test",
160+
"pool_size": 10
161+
},
162+
"locks_type": "postgres",
163+
"s3storage_type": "minio",
164+
"minio": {
165+
"endpoint": "http://localhost:9000",
166+
"access_key": "minioadmin",
167+
"secret_key": "minioadmin",
168+
"force_path_style": true
169+
}
170+
}
171+
]
172+
}
173+
EOF
174+
175+
# Run tests
176+
cargo test --verbose
177+
env:
178+
RUST_LOG: debug
179+
DATABASE_URL: postgres://postgres:postgres@localhost:5432/s3dedup_test
180+
142181
- name: Clean up test databases
143182
if: always()
144183
run: rm -rf db/test_*.db*

README.md

Lines changed: 126 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ S3 deduplication proxy server with Filetracker protocol compatibility.
1111
- **Content Deduplication**: Files are stored by SHA256 hash, identical content is stored only once
1212
- **Filetracker Compatible**: Drop-in replacement for legacy Filetracker servers
1313
- **Pluggable Storage**: Support for SQLite and PostgreSQL metadata storage
14+
- **Distributed Locking**: PostgreSQL advisory locks for distributed, high-availability deployments
1415
- **Migration Support**: Offline and live migration from old Filetracker instances
1516
- **Auto Cleanup**: Background cleaner removes unreferenced S3 objects
1617
- **Multi-bucket**: Run multiple independent buckets on different ports
@@ -65,6 +66,7 @@ docker run -d \
6566
| `KVSTORAGE_TYPE` | `sqlite` | KV storage backend (sqlite, postgres) |
6667
| `SQLITE_PATH` | `/app/data/kv.db` | SQLite database path |
6768
| `SQLITE_MAX_CONNECTIONS` | `10` | SQLite connection pool size |
69+
| `LOCKS_TYPE` | `memory` | Lock manager backend (memory, postgres) |
6870
| `S3_ENDPOINT` | *required* | S3/MinIO endpoint URL |
6971
| `S3_ACCESS_KEY` | *required* | S3 access key |
7072
| `S3_SECRET_KEY` | *required* | S3 secret key |
@@ -76,7 +78,9 @@ docker run -d \
7678
| `FILETRACKER_URL` | - | Old Filetracker URL for live migration (HTTP fallback) |
7779
| `FILETRACKER_V1_DIR` | - | V1 Filetracker directory for filesystem-based migration |
7880

79-
For PostgreSQL, use:
81+
### PostgreSQL Configuration
82+
83+
For PostgreSQL KV storage, use:
8084
```
8185
KVSTORAGE_TYPE=postgres
8286
POSTGRES_HOST=localhost
@@ -87,6 +91,124 @@ POSTGRES_DB=s3dedup
8791
POSTGRES_MAX_CONNECTIONS=10
8892
```
8993

94+
### Distributed Locking (PostgreSQL Advisory Locks)
95+
96+
For high-availability deployments with multiple s3dedup instances, enable PostgreSQL-based distributed locks:
97+
98+
```
99+
LOCKS_TYPE=postgres
100+
POSTGRES_HOST=localhost
101+
POSTGRES_PORT=5432
102+
POSTGRES_USER=postgres
103+
POSTGRES_PASSWORD=password
104+
POSTGRES_DB=s3dedup
105+
POSTGRES_MAX_CONNECTIONS=10
106+
```
107+
108+
**Benefits of PostgreSQL Locks**:
109+
- **Distributed Locking**: Multiple s3dedup instances can safely coordinate file operations
110+
- **High Availability**: If one instance fails, others can continue with the same locks
111+
- **Load Balancing**: Multiple instances can share the same database for coordinated access
112+
- **Atomic Operations**: Prevents race conditions in concurrent file operations
113+
114+
**How It Works**:
115+
- Uses PostgreSQL's built-in advisory locks (`pg_advisory_lock`, `pg_advisory_lock_shared`)
116+
- Lock keys are hashed to 64-bit integers for PostgreSQL's lock API
117+
- Shared locks allow concurrent reads; exclusive locks ensure serialized writes
118+
- Automatic lock release when guard is dropped (via background cleanup tasks)
119+
120+
**Note**: PostgreSQL locks require the same PostgreSQL instance used for KV storage. Connection pool is shared between both uses.
121+
122+
### Connection Pool Sizing
123+
124+
The `POSTGRES_MAX_CONNECTIONS` setting controls the maximum number of concurrent database connections from a single s3dedup instance. This **single pool** is shared between KV storage operations and lock management.
125+
126+
**How to Choose Pool Size:**
127+
128+
```
129+
Pool Size = (Concurrent Requests × 1.5) + Lock Overhead
130+
```
131+
132+
**General Guidelines:**
133+
134+
| Deployment | Concurrency | Recommended Pool Size | Notes |
135+
|------------|-------------|----------------------|-------|
136+
| **Low** | 1-5 concurrent requests | 10 | Default, suitable for development/testing |
137+
| **Medium** | 5-20 concurrent requests | 20-30 | Small production deployments |
138+
| **High** | 20-100 concurrent requests | 50-100 | Large production deployments |
139+
| **Very High** | 100+ concurrent requests | 100-200 | Use multiple instances with load balancing |
140+
141+
**Factors to Consider:**
142+
143+
1. **Number of s3dedup Instances**
144+
- If you have N instances, each needs its own pool
145+
- Total connections = N instances × pool_size
146+
- PostgreSQL must have enough capacity for all instances
147+
- Example: 3 instances × 30 pool_size = 90 connections needed
148+
149+
2. **Lock Contention**
150+
- File operations acquire locks (1 connection per lock)
151+
- Concurrent uploads/downloads increase lock pressure
152+
- Add 20% overhead for lock operations
153+
- Example: 20 concurrent requests → pool_size = (20 × 1.5) + overhead ≈ 35
154+
155+
3. **Database Configuration**
156+
- Check PostgreSQL `max_connections` setting
157+
- Reserve connections for maintenance, monitoring, backups
158+
- Example: PostgreSQL with 200 max_connections:
159+
- Reserve 10 for maintenance
160+
- If 3 s3dedup instances: (200 - 10) / 3 ≈ 63 per instance
161+
162+
4. **Memory Usage Per Connection**
163+
- Each connection uses ~5-10 MB of memory
164+
- Pool size 50 = ~250-500 MB per instance
165+
- Monitor actual usage and adjust accordingly
166+
167+
**Example Configurations:**
168+
169+
**Development (1 instance, low throughput):**
170+
```json
171+
"postgres": {
172+
"pool_size": 10
173+
}
174+
```
175+
176+
**Production (3 instances, medium throughput):**
177+
```json
178+
"postgres": {
179+
"pool_size": 30
180+
}
181+
```
182+
With PostgreSQL `max_connections = 100`:
183+
- 3 × 30 = 90 connections (10 reserved)
184+
185+
**High-Availability (5 instances, high throughput with PostgreSQL max_connections = 200):**
186+
```json
187+
"postgres": {
188+
"pool_size": 35
189+
}
190+
```
191+
- 5 × 35 = 175 connections (25 reserved for other operations)
192+
193+
**Monitoring and Tuning:**
194+
195+
Monitor these metrics to optimize pool size:
196+
197+
1. **Connection Utilization**: Check if connections are frequently exhausted
198+
```sql
199+
SELECT count(*) FROM pg_stat_activity WHERE datname = 's3dedup';
200+
```
201+
202+
2. **Lock Wait Times**: Monitor if operations wait for available connections
203+
3. **Memory Usage**: Watch instance memory as pool size increases
204+
205+
**Scaling Strategy:**
206+
207+
- **Start Conservative**: Begin with pool_size = 10-20
208+
- **Monitor Usage**: Track connection utilization over 1-2 weeks
209+
- **Increase Gradually**: Increment by 10-20 when you see high utilization
210+
- **Scale Horizontally**: Instead of very large pools (>100), use more instances with moderate pools
211+
90212
### Config File
91213

92214
Alternatively, use a JSON config file:
@@ -251,7 +373,9 @@ cargo run -- server --config config.json
251373
- **Deduplication**: SHA256-based content addressing
252374
- **Storage Backend**: S3-compatible object storage (MinIO, AWS S3, etc.)
253375
- **Metadata Store**: SQLite or PostgreSQL for file metadata and reference counts
254-
- **Lock Manager**: In-memory file-level locks for concurrent operations
376+
- **Lock Manager**: In-memory (single-instance) or PostgreSQL advisory locks (distributed, multi-instance HA)
377+
- Memory locks: Fast, suitable for single-instance deployments
378+
- PostgreSQL locks: Distributed coordination, suitable for multi-instance HA setups
255379
- **Cleaner**: Background worker that removes unreferenced S3 objects
256380

257381
For detailed architecture documentation, see [docs/deduplication.md](docs/deduplication.md).

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ pub struct AppState {
4040
impl AppState {
4141
pub async fn new(config: &config::BucketConfig) -> Result<Arc<Self>> {
4242
let kvstorage = kvstorage::KVStorage::new(config).await?;
43-
let locks = locks::LocksStorage::new(config.locks_type);
43+
let locks = locks::LocksStorage::new_with_config(config.locks_type, config).await?;
4444
let s3storage = s3storage::S3Storage::new(config).await?;
4545
let metrics = Arc::new(metrics::Metrics::new());
4646
Ok(Arc::new(Self {
@@ -58,7 +58,7 @@ impl AppState {
5858
filetracker_url: String,
5959
) -> Result<Arc<Self>> {
6060
let kvstorage = kvstorage::KVStorage::new(config).await?;
61-
let locks = locks::LocksStorage::new(config.locks_type);
61+
let locks = locks::LocksStorage::new_with_config(config.locks_type, config).await?;
6262
let s3storage = s3storage::S3Storage::new(config).await?;
6363
let filetracker_client = filetracker_client::FiletrackerClient::new(filetracker_url);
6464
let metrics = Arc::new(metrics::Metrics::new());

src/locks/mod.rs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use serde::Deserialize;
33
use tracing::info;
44

55
pub mod memory;
6+
pub mod postgres;
67

78
/**
89
* Get key for lock on file
@@ -23,16 +24,18 @@ fn hash_lock(bucket: &str, hash: &str) -> String {
2324
pub enum LocksType {
2425
#[serde(rename = "memory")]
2526
Memory,
27+
#[serde(rename = "postgres")]
28+
Postgres,
2629
}
2730

2831
#[must_use = "droping temporary lock makes no sense"]
29-
pub(crate) trait SharedLockGuard<'a> {}
32+
pub trait SharedLockGuard<'a> {}
3033
#[must_use = "droping temporary lock makes no sense"]
31-
pub(crate) trait ExclusiveLockGuard<'a> {}
34+
pub trait ExclusiveLockGuard<'a> {}
3235

3336
#[async_trait]
3437
#[must_use = "preparing temporary lock makes no sense"]
35-
pub(crate) trait Lock {
38+
pub trait Lock {
3639
async fn acquire_shared<'a>(&'a self) -> Box<dyn SharedLockGuard<'a> + Send + 'a>;
3740
async fn acquire_exclusive<'a>(&'a self) -> Box<dyn ExclusiveLockGuard<'a> + Send + 'a>;
3841
}
@@ -48,6 +51,7 @@ pub(crate) trait LockStorage {
4851
#[derive(Clone)]
4952
pub enum LocksStorage {
5053
Memory(memory::MemoryLocks),
54+
Postgres(Box<postgres::PostgresLocks>),
5155
}
5256

5357
impl LocksStorage {
@@ -57,12 +61,33 @@ impl LocksStorage {
5761
info!("Using memory as locks storage");
5862
Box::new(LocksStorage::Memory(*memory::MemoryLocks::new()))
5963
}
64+
LocksType::Postgres => {
65+
panic!("PostgreSQL locks must be initialized with config via new_with_config")
66+
}
67+
}
68+
}
69+
70+
pub async fn new_with_config(
71+
lock_type: LocksType,
72+
bucket_config: &crate::config::BucketConfig,
73+
) -> anyhow::Result<Box<Self>> {
74+
match lock_type {
75+
LocksType::Memory => {
76+
info!("Using memory as locks storage");
77+
Ok(Box::new(LocksStorage::Memory(*memory::MemoryLocks::new())))
78+
}
79+
LocksType::Postgres => {
80+
info!("Using PostgreSQL as locks storage");
81+
let pg_locks = postgres::PostgresLocks::new_with_config(bucket_config).await?;
82+
Ok(Box::new(LocksStorage::Postgres(pg_locks)))
83+
}
6084
}
6185
}
6286

63-
pub(crate) async fn prepare_lock<'a>(&'a self, key: String) -> Box<dyn Lock + 'a + Send> {
87+
pub async fn prepare_lock<'a>(&'a self, key: String) -> Box<dyn Lock + 'a + Send> {
6488
match self {
6589
LocksStorage::Memory(memory_locks) => memory_locks.prepare_lock(key).await,
90+
LocksStorage::Postgres(postgres_locks) => postgres_locks.prepare_lock(key).await,
6691
}
6792
}
6893
}

0 commit comments

Comments
 (0)