Skip to content

Commit 909611d

Browse files
authored
Merge pull request #63 from mcp-agents-ai/feat/leixu/mcp_server_info_enrich
Feat/leixu/mcp server info enrich - improve github info collection process to avoid rate limit
2 parents a7365b5 + 7ee4ec3 commit 909611d

File tree

1,049 files changed

+4210
-2410
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,049 files changed

+4210
-2410
lines changed

server/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
"test:integration": "vitest run tests/integration/*.test.ts",
1313
"test:mock": "vitest run tests/mock/*.test.ts",
1414
"crawl-servers": "tsx src/data/mcp_servers_crawler.ts --url https://raw.githubusercontent.com/modelcontextprotocol/servers/refs/heads/main/README.md",
15-
"update-server-types": "tsx src/data/mcp_server_crawler_result_updater.ts",
15+
"crawl-servers-postprocess": "tsx src/data/mcp_server_crawler_result_updater.ts",
1616
"clean-duplicates": "tsx src/data/clean_duplicate.ts",
1717
"process_categories": "tsx src/data/process_categories.ts",
1818
"process_locales": "tsx src/data/process_locales.ts",
19-
"process_githubinfo": "tsx src/data/process_githubinfo.ts"
19+
"process_githubinfo": "tsx src/data/process_githubinfo.ts --batch_size 200"
2020
},
2121
"dependencies": {
2222
"axios": "^1.6.0",

server/src/data/mcp_servers_official_list.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"metadata": {
33
"totalServers": 444,
4-
"extractedAt": "2025-04-27T15:31:45.593Z",
4+
"extractedAt": "2025-04-29T04:06:45.722Z",
55
"sourceUrl": "https://raw.githubusercontent.com/modelcontextprotocol/servers/refs/heads/main/README.md",
66
"baseRepoUrl": "https://github.com/modelcontextprotocol/servers",
77
"defaultBranch": "main",

server/src/data/process_githubinfo.log.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"lastProcessed": "2025-04-28T08:45:46.926Z",
2+
"lastProcessed": "2025-04-29T07:41:12.835Z",
33
"processedFiles": [
44
"0006b282-ac88-4c32-b76c-02476e972a04_githubprojects",
55
"003f3571-5f97-4f84-b126-f09b89e4247e_amadeus",

server/src/data/process_githubinfo.ts

100644100755
Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,23 @@ import path from 'path';
88
import { fileURLToPath } from 'url';
99
import { fetchGithubInfo, extractGithubRepoInfo } from '../lib/githubEnrichment.js';
1010

11+
// Parse command line arguments
12+
const args = process.argv.slice(2);
13+
let BATCH_SIZE: number | null = null; // Null means process all records
14+
15+
// Process command line arguments
16+
for (let i = 0; i < args.length; i++) {
17+
if (args[i] === '--batch_size' && i + 1 < args.length) {
18+
const batchSize = parseInt(args[i + 1], 10);
19+
if (!isNaN(batchSize) && batchSize > 0) {
20+
BATCH_SIZE = batchSize;
21+
i++; // Skip the next argument as it's the value
22+
} else {
23+
console.error(`Invalid batch size: ${args[i + 1]}. Will process all records.`);
24+
}
25+
}
26+
}
27+
1128
// Get the directory name in ESM
1229
const __filename = fileURLToPath(import.meta.url);
1330
const __dirname = path.dirname(__filename);
@@ -34,11 +51,23 @@ function ensureDirectoryExists(dirPath: string): void {
3451
}
3552

3653
// Function to load the log file or create it if it doesn't exist
37-
function loadProcessedLog(): ProcessedLog {
54+
function loadProcessedLog(allFilesCount: number): ProcessedLog {
3855
if (fs.existsSync(LOG_FILE)) {
3956
try {
4057
const logContent = fs.readFileSync(LOG_FILE, 'utf8');
41-
return JSON.parse(logContent) as ProcessedLog;
58+
const logData = JSON.parse(logContent) as ProcessedLog;
59+
60+
// Check if we've already processed all files and should start fresh
61+
if (logData.processedFiles.length >= allFilesCount) {
62+
console.log(`Log file shows all ${logData.processedFiles.length} files already processed. Starting fresh.`);
63+
return {
64+
lastProcessed: new Date().toISOString(),
65+
processedFiles: [],
66+
errors: {}
67+
};
68+
}
69+
70+
return logData;
4271
} catch (error) {
4372
console.warn(`Error reading log file, creating a new one: ${error}`);
4473
}
@@ -153,21 +182,22 @@ async function updateGithubInfoInFile(filePath: string): Promise<boolean> {
153182
async function processAllFiles(): Promise<void> {
154183
console.log('Starting GitHub info update process...');
155184
console.log(`Looking for JSON files in: ${SPLIT_DIR}`);
185+
console.log(BATCH_SIZE ? `Batch size set to: ${BATCH_SIZE}` : `Processing all remaining records`);
186+
187+
// Get all JSON files from split directory (only in the root, not in language subdirectories)
188+
const allFiles = fs.readdirSync(SPLIT_DIR)
189+
.filter(file => file.endsWith('.json') && fs.statSync(path.join(SPLIT_DIR, file)).isFile());
190+
191+
console.log(`Found ${allFiles.length} total JSON files in root directory`);
156192

157193
// Load processed log
158-
const processedLog = loadProcessedLog();
194+
const processedLog = loadProcessedLog(allFiles.length);
159195
console.log(`Loaded processing log. Last run: ${processedLog.lastProcessed}`);
160196
console.log(`Previously processed ${processedLog.processedFiles.length} files`);
161197

162198
// Setup handlers to save progress on interruption
163199
setupShutdownHandlers(processedLog);
164200

165-
// Get all JSON files from split directory (only in the root, not in language subdirectories)
166-
const allFiles = fs.readdirSync(SPLIT_DIR)
167-
.filter(file => file.endsWith('.json') && fs.statSync(path.join(SPLIT_DIR, file)).isFile());
168-
169-
console.log(`Found ${allFiles.length} total JSON files in root directory`);
170-
171201
// Filter out already processed files
172202
const filesToProcess = allFiles.filter(file => {
173203
const hubId = getHubIdFromFilename(file);
@@ -181,11 +211,17 @@ async function processAllFiles(): Promise<void> {
181211
return;
182212
}
183213

184-
// Process each file
185-
for (const [index, file] of filesToProcess.entries()) {
214+
// Limit the number of files to process based on batch size if provided
215+
const filesToProcessInThisBatch = BATCH_SIZE ? filesToProcess.slice(0, BATCH_SIZE) : filesToProcess;
216+
console.log(BATCH_SIZE
217+
? `Processing batch of ${filesToProcessInThisBatch.length} files (limited by batch size ${BATCH_SIZE})`
218+
: `Processing all ${filesToProcessInThisBatch.length} remaining files`);
219+
220+
// Process each file in the batch
221+
for (const [index, file] of filesToProcessInThisBatch.entries()) {
186222
try {
187223
const hubId = getHubIdFromFilename(file);
188-
console.log(`Processing file ${index + 1}/${filesToProcess.length}: ${file} (hubId: ${hubId})`);
224+
console.log(`Processing file ${index + 1}/${filesToProcess.length} ${BATCH_SIZE ? `(batch_size: ${BATCH_SIZE})` : ''}: ${file} (hubId: ${hubId})`);
189225
const filePath = path.join(SPLIT_DIR, file);
190226

191227
// Update GitHub info in the main file
@@ -227,6 +263,18 @@ async function processAllFiles(): Promise<void> {
227263
} else {
228264
console.log('GitHub info update process completed successfully!');
229265
}
266+
267+
// Report on overall progress
268+
console.log(`Processed ${filesToProcessInThisBatch.length} files ${BATCH_SIZE ? 'in this batch' : ''}.`);
269+
console.log(`Total progress: ${processedLog.processedFiles.length}/${allFiles.length} files processed.`);
270+
271+
if (processedLog.processedFiles.length < allFiles.length) {
272+
console.log(BATCH_SIZE
273+
? `Run the script again to process the next batch.`
274+
: `Some files may have been skipped due to errors. Check the log file for details.`);
275+
} else {
276+
console.log(`All files have been processed. The log file will be reset on next run.`);
277+
}
230278
}
231279

232280
// Execute the main function

server/src/data/split/00739d70-842d-4d36-bb98-04bde51a2220_kubernetes.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
],
1717
"requiresApiKey": false,
1818
"isRecommended": false,
19-
"githubStars": 493,
19+
"githubStars": 502,
2020
"downloadCount": 414,
2121
"createdAt": "2025-02-17T22:30:26.383193Z",
2222
"updatedAt": "2025-04-22T03:59:36Z",

server/src/data/split/0092c98a-a48a-41fd-bdc8-e69c66e207a8_youtubesubtitles.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
],
1717
"requiresApiKey": false,
1818
"isRecommended": true,
19-
"githubStars": 309,
19+
"githubStars": 310,
2020
"downloadCount": 912,
2121
"createdAt": "2025-02-17T22:27:37.384353Z",
2222
"updatedAt": "2025-03-20T17:17:38Z",

server/src/data/split/00cca75b-f426-4279-ae1c-7e6b80636b50_youtubetranscripts.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
],
1717
"requiresApiKey": false,
1818
"isRecommended": false,
19-
"githubStars": 169,
19+
"githubStars": 170,
2020
"downloadCount": 0,
2121
"createdAt": "2025-03-17T08:29:20.399027+00:00",
2222
"updatedAt": "2025-03-10T05:10:04Z",
@@ -25,6 +25,6 @@
2525
"isReferenceServer": false,
2626
"isCommunityServer": true,
2727
"githubLatestCommit": "fe3b6a8808116a5d08adfe9197f8391a6e359e77",
28-
"githubForks": 24,
28+
"githubForks": 25,
2929
"licenseType": "MIT"
3030
}

server/src/data/split/00f1ad7c-0109-47d9-893d-004fe0b555f7_agentcarefhiremr.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
],
1717
"requiresApiKey": false,
1818
"isRecommended": false,
19-
"githubStars": 39,
19+
"githubStars": 40,
2020
"downloadCount": 0,
2121
"createdAt": "2025-03-17T08:29:23.366219+00:00",
2222
"updatedAt": "2025-03-13T05:56:46Z",

server/src/data/split/022d2a79-e4da-47b2-a138-2560f3822400_elevenlabstexttospeech.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,6 @@
2525
"isReferenceServer": false,
2626
"isCommunityServer": true,
2727
"githubLatestCommit": "ba2d6d946f0334305f8a888d47338f55222f4af1",
28-
"githubForks": 1,
28+
"githubForks": 2,
2929
"licenseType": null
3030
}

server/src/data/split/0272fd9a-3249-4990-bed5-d66aaa4ebdb4_browseruse.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
],
1717
"requiresApiKey": false,
1818
"isRecommended": false,
19-
"githubStars": 18,
19+
"githubStars": 17,
2020
"downloadCount": 0,
2121
"createdAt": "2025-03-17T08:29:23.366219+00:00",
2222
"updatedAt": "2025-04-27T21:48:21Z",

0 commit comments

Comments
 (0)