Skip to content

Commit 272ee93

Browse files
tadasantclaude
andauthored
fix: add server name validation and clean invalid data before migration 009 (#591)
## Summary This PR adds two important improvements to ensure data integrity: 1. **Adds server name format validation at the API layer** to match the database constraint from migration 008 2. **Includes migration 008 to clean up invalid data** before applying strict constraints in migration 009 ## Changes ### Server Name Validation - Enforces validation at the API layer matching the database constraint - **Namespace pattern**: `[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]` - Must start and end with alphanumeric characters - Can contain dots and hyphens in the middle - **Name pattern**: `[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]` - Must start and end with alphanumeric characters - Can contain dots, underscores, and hyphens in the middle - Provides clear, user-friendly error messages indicating which part is invalid - Uses DRY approach with composed regex patterns ### Migration 008: Data Cleanup - Removes servers with invalid names or empty versions (5 servers affected in production) - Fixes invalid status values by setting them to 'active' (1 server affected) - Removes duplicate name+version combinations - Includes comprehensive safety checks to prevent accidental data loss - Must run before migration 009 which adds strict constraints ## Testing - All validator tests pass - Migration includes safety checks that match production data - CI verification included --------- Co-authored-by: Claude <[email protected]>
1 parent e1f12d6 commit 272ee93

File tree

6 files changed

+230
-0
lines changed

6 files changed

+230
-0
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ require (
3232
github.com/jackc/pgpassfile v1.0.0 // indirect
3333
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
3434
github.com/jackc/puddle/v2 v2.2.2 // indirect
35+
github.com/lib/pq v1.10.9 // indirect
3536
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
3637
github.com/pmezard/go-difflib v1.0.0 // indirect
3738
github.com/prometheus/client_model v0.6.2 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
4242
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
4343
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
4444
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
45+
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
46+
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
4547
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
4648
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
4749
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
-- Migration 008: Clean up invalid data before applying stricter constraints in migration 009
2+
-- This migration removes or fixes data that would violate constraints introduced in the next migration
3+
4+
BEGIN;
5+
6+
-- Safety check: Count exactly what we'll be modifying
7+
DO $$
8+
DECLARE
9+
invalid_name_count INTEGER;
10+
empty_version_count INTEGER;
11+
invalid_status_count INTEGER;
12+
duplicate_count INTEGER;
13+
total_to_delete INTEGER;
14+
total_servers INTEGER;
15+
has_known_bad_server BOOLEAN;
16+
r RECORD;
17+
BEGIN
18+
-- Get total server count
19+
SELECT COUNT(*) INTO total_servers FROM servers;
20+
21+
-- Check if we have the specific known problematic server from production
22+
-- This server only exists in production data, not in test fixtures
23+
SELECT EXISTS(
24+
SELECT 1 FROM servers
25+
WHERE value->>'name' = 'io.github.joelverhagen/Knapcode.SampleMcpServer/aot'
26+
) INTO has_known_bad_server;
27+
28+
-- Skip migration if we don't have the known bad data (indicates test environment)
29+
IF NOT has_known_bad_server THEN
30+
RAISE NOTICE 'Migration 008: Skipping cleanup - no known problematic data found (likely test/dev environment)';
31+
RETURN; -- Exit early, don't run any cleanup
32+
END IF;
33+
34+
RAISE NOTICE 'Migration 008: Found known problematic data, proceeding with cleanup';
35+
36+
-- Count servers with invalid name format
37+
SELECT COUNT(*) INTO invalid_name_count
38+
FROM servers
39+
WHERE value->>'name' NOT SIMILAR TO '[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]';
40+
41+
-- Count servers with empty or NULL versions
42+
SELECT COUNT(*) INTO empty_version_count
43+
FROM servers
44+
WHERE value->>'version' IS NULL OR value->>'version' = '';
45+
46+
-- Count servers with invalid status
47+
SELECT COUNT(*) INTO invalid_status_count
48+
FROM servers
49+
WHERE value->>'status' IS NOT NULL
50+
AND value->>'status' != ''
51+
AND value->>'status' NOT IN ('active', 'deprecated', 'deleted');
52+
53+
-- Count duplicate name+version combinations
54+
SELECT COUNT(*) INTO duplicate_count
55+
FROM (
56+
SELECT value->>'name', value->>'version', COUNT(*) as cnt
57+
FROM servers
58+
GROUP BY value->>'name', value->>'version'
59+
HAVING COUNT(*) > 1
60+
) dups;
61+
62+
-- Calculate total deletions (some servers might have both invalid name AND empty version)
63+
SELECT COUNT(*) INTO total_to_delete
64+
FROM servers
65+
WHERE value->>'name' NOT SIMILAR TO '[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]'
66+
OR value->>'version' IS NULL
67+
OR value->>'version' = '';
68+
69+
-- Log the cleanup operations with safety check
70+
RAISE NOTICE 'Migration 008 Data Cleanup Plan:';
71+
RAISE NOTICE ' Total servers in database: %', total_servers;
72+
73+
IF total_servers > 0 THEN
74+
RAISE NOTICE ' Servers to DELETE: % (% percent of total)', total_to_delete, ROUND((total_to_delete::numeric / total_servers * 100), 2);
75+
ELSE
76+
RAISE NOTICE ' Servers to DELETE: %', total_to_delete;
77+
END IF;
78+
79+
RAISE NOTICE ' - Invalid names: %', invalid_name_count;
80+
RAISE NOTICE ' - Empty versions: %', empty_version_count;
81+
RAISE NOTICE ' Servers to UPDATE (fix status): %', invalid_status_count;
82+
RAISE NOTICE ' Duplicate name+version pairs: %', duplicate_count;
83+
84+
-- Always log the specific servers that will be deleted for transparency
85+
IF total_to_delete > 0 THEN
86+
RAISE NOTICE '';
87+
RAISE NOTICE 'Servers that will be deleted:';
88+
FOR r IN
89+
SELECT value->>'name' as name, value->>'version' as version,
90+
CASE
91+
WHEN value->>'name' NOT SIMILAR TO '[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]' THEN 'Invalid name format'
92+
WHEN value->>'version' IS NULL THEN 'NULL version'
93+
WHEN value->>'version' = '' THEN 'Empty version'
94+
END as reason
95+
FROM servers
96+
WHERE value->>'name' NOT SIMILAR TO '[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]'
97+
OR value->>'version' IS NULL
98+
OR value->>'version' = ''
99+
ORDER BY value->>'name'
100+
LOOP
101+
RAISE NOTICE ' - % @ % (reason: %)', r.name, COALESCE(r.version, '<NULL>'), r.reason;
102+
END LOOP;
103+
END IF;
104+
105+
-- Always log the specific servers that will have status updated for transparency
106+
IF invalid_status_count > 0 THEN
107+
RAISE NOTICE '';
108+
RAISE NOTICE 'Servers that will have status updated:';
109+
FOR r IN
110+
SELECT value->>'name' as name, value->>'version' as version, value->>'status' as status
111+
FROM servers
112+
WHERE value->>'status' IS NOT NULL
113+
AND value->>'status' != ''
114+
AND value->>'status' NOT IN ('active', 'deprecated', 'deleted')
115+
ORDER BY value->>'name'
116+
LOOP
117+
RAISE NOTICE ' - % @ % (current status: %)', r.name, r.version, r.status;
118+
END LOOP;
119+
END IF;
120+
121+
-- SAFETY CHECK: Ensure we're deleting exactly the expected data
122+
-- We only reach this point if we found the known bad server
123+
-- In production (2025-09-30), we expect exactly 5 deletions and 1 status update
124+
IF total_to_delete != 5 THEN
125+
RAISE EXCEPTION 'Safety check failed: Expected to delete exactly 5 servers but would delete %. Check the log above for details. Aborting to prevent data loss.', total_to_delete;
126+
END IF;
127+
128+
IF invalid_status_count != 1 THEN
129+
RAISE EXCEPTION 'Safety check failed: Expected to update exactly 1 server status but would update %. Check the log above for details. Aborting to prevent data corruption.', invalid_status_count;
130+
END IF;
131+
END $$;
132+
133+
-- Delete servers with invalid names or empty versions
134+
-- These cannot be reasonably fixed and would violate primary key constraints
135+
DELETE FROM servers
136+
WHERE value->>'name' NOT SIMILAR TO '[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]'
137+
OR value->>'version' IS NULL
138+
OR value->>'version' = '';
139+
140+
-- Fix invalid status values by setting them to 'active'
141+
-- These can be reasonably defaulted to a valid value
142+
UPDATE servers
143+
SET value = jsonb_set(value, '{status}', '"active"')
144+
WHERE value->>'status' IS NOT NULL
145+
AND value->>'status' != ''
146+
AND value->>'status' NOT IN ('active', 'deprecated', 'deleted');
147+
148+
-- Remove duplicate name+version combinations
149+
-- Keep the one with the most recent publishedAt date
150+
DELETE FROM servers s1
151+
WHERE EXISTS (
152+
SELECT 1 FROM servers s2
153+
WHERE s2.value->>'name' = s1.value->>'name'
154+
AND s2.value->>'version' = s1.value->>'version'
155+
AND (s2.value->>'publishedAt')::timestamp > (s1.value->>'publishedAt')::timestamp
156+
);
157+
158+
-- Verify the operations completed as expected
159+
DO $$
160+
DECLARE
161+
remaining_count INTEGER;
162+
actual_deleted INTEGER;
163+
actual_updated INTEGER;
164+
still_invalid_names INTEGER;
165+
still_empty_versions INTEGER;
166+
still_invalid_status INTEGER;
167+
BEGIN
168+
SELECT COUNT(*) INTO remaining_count FROM servers;
169+
170+
-- Check if any invalid data remains
171+
SELECT COUNT(*) INTO still_invalid_names
172+
FROM servers
173+
WHERE value->>'name' NOT SIMILAR TO '[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]/[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]';
174+
175+
SELECT COUNT(*) INTO still_empty_versions
176+
FROM servers
177+
WHERE value->>'version' IS NULL OR value->>'version' = '';
178+
179+
SELECT COUNT(*) INTO still_invalid_status
180+
FROM servers
181+
WHERE value->>'status' IS NOT NULL
182+
AND value->>'status' != ''
183+
AND value->>'status' NOT IN ('active', 'deprecated', 'deleted');
184+
185+
RAISE NOTICE 'Data cleanup complete:';
186+
RAISE NOTICE ' Servers remaining: %', remaining_count;
187+
RAISE NOTICE ' Invalid names remaining: %', still_invalid_names;
188+
RAISE NOTICE ' Empty versions remaining: %', still_empty_versions;
189+
RAISE NOTICE ' Invalid status remaining: %', still_invalid_status;
190+
191+
-- Final safety check: Ensure we cleaned everything we intended to
192+
IF still_invalid_names > 0 OR still_empty_versions > 0 OR still_invalid_status > 0 THEN
193+
RAISE EXCEPTION 'Cleanup incomplete! Invalid data still remains. Aborting.';
194+
END IF;
195+
END $$;
196+
197+
COMMIT;

internal/validators/constants.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ var (
2828

2929
// Server name validation errors
3030
ErrMultipleSlashesInServerName = errors.New("server name cannot contain multiple slashes")
31+
ErrInvalidServerNameFormat = errors.New("server name format is invalid")
3132
)
3233

3334
// RepositorySource represents valid repository sources

internal/validators/validators.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@ import (
1414
"github.com/modelcontextprotocol/registry/pkg/model"
1515
)
1616

17+
// Server name validation patterns
18+
var (
19+
// Component patterns for namespace and name parts
20+
namespacePattern = `[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9]`
21+
namePartPattern = `[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]`
22+
23+
// Compiled regexes
24+
namespaceRegex = regexp.MustCompile(`^` + namespacePattern + `$`)
25+
namePartRegex = regexp.MustCompile(`^` + namePartPattern + `$`)
26+
serverNameRegex = regexp.MustCompile(`^` + namespacePattern + `/` + namePartPattern + `$`)
27+
)
28+
1729
// Regexes to detect semver range syntaxes
1830
var (
1931
// Case 1: comparator ranges
@@ -403,11 +415,28 @@ func parseServerName(serverJSON apiv0.ServerJSON) (string, error) {
403415
return "", ErrMultipleSlashesInServerName
404416
}
405417

418+
// Split and check for empty parts
406419
parts := strings.SplitN(name, "/", 2)
407420
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
408421
return "", fmt.Errorf("server name must be in format 'dns-namespace/name' with non-empty namespace and name parts")
409422
}
410423

424+
// Validate name format using regex
425+
if !serverNameRegex.MatchString(name) {
426+
namespace := parts[0]
427+
serverName := parts[1]
428+
429+
// Check which part is invalid for a better error message
430+
if !namespaceRegex.MatchString(namespace) {
431+
return "", fmt.Errorf("%w: namespace '%s' is invalid. Namespace must start and end with alphanumeric characters, and may contain dots and hyphens in the middle", ErrInvalidServerNameFormat, namespace)
432+
}
433+
if !namePartRegex.MatchString(serverName) {
434+
return "", fmt.Errorf("%w: name '%s' is invalid. Name must start and end with alphanumeric characters, and may contain dots, underscores, and hyphens in the middle", ErrInvalidServerNameFormat, serverName)
435+
}
436+
// Fallback in case both somehow pass individually but not together
437+
return "", fmt.Errorf("%w: invalid format for '%s'", ErrInvalidServerNameFormat, name)
438+
}
439+
411440
return name, nil
412441
}
413442

0 commit comments

Comments
 (0)