Skip to content

Commit 2c4ff3f

Browse files
authored
Merge pull request #61 from mcp-agents-ai/feat/leixu/mcp_server_info_enrich
Feat/leixu/mcp server info enrich
2 parents ddbbb28 + 145203b commit 2c4ff3f

File tree

1,192 files changed

+9505
-4939
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,192 files changed

+9505
-4939
lines changed

server/.env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,7 @@ OPENAI_BASE_URL={{your api_base_url_here}}
77
MODEL_NAME={{your model_name_here}}
88

99
# Cache configuration
10-
CACHE_TTL=60000 # 1 minute
10+
CACHE_TTL=60000 # 1 minute
11+
12+
# GitHub configuration
13+
GITHUB_API_TOKEN={{your github_api_token_here}} # Used for GitHub API requests

server/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
"update-server-types": "tsx src/data/mcp_server_crawler_result_updater.ts",
1616
"clean-duplicates": "tsx src/data/clean_duplicate.ts",
1717
"process_categories": "tsx src/data/process_categories.ts",
18-
"process_locales": "tsx src/data/process_locales.ts"
18+
"process_locales": "tsx src/data/process_locales.ts",
19+
"process_githubinfo": "tsx src/data/process_githubinfo.ts"
1920
},
2021
"dependencies": {
2122
"axios": "^1.6.0",

server/src/data/process_githubinfo.log.json

Lines changed: 1843 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
#!/usr/bin/env tsx
2+
3+
// A script to update GitHub information in JSON files in the split directory
4+
// Uses fetchGithubInfo to collect GitHub repository information
5+
6+
import fs from 'fs';
7+
import path from 'path';
8+
import { fileURLToPath } from 'url';
9+
import { fetchGithubInfo, extractGithubRepoInfo } from '../lib/githubEnrichment.js';
10+
11+
// Get the directory name in ESM
12+
const __filename = fileURLToPath(import.meta.url);
13+
const __dirname = path.dirname(__filename);
14+
15+
// Directory paths
16+
const SPLIT_DIR = path.join(__dirname, 'split');
17+
const LOG_FILE = path.join(__dirname, 'process_githubinfo.log.json');
18+
19+
// Languages to process (for language subdirectories)
20+
const LANGUAGES = ['zh-hans', 'zh-hant', 'ja', 'es', 'de'];
21+
22+
// Structure for tracking processed files
23+
interface ProcessedLog {
24+
lastProcessed: string; // Timestamp of last run
25+
processedFiles: string[]; // Array of processed hubIds (filenames without .json)
26+
errors: Record<string, string>; // Record of errors by file
27+
}
28+
29+
// Helper function to ensure directory exists
30+
function ensureDirectoryExists(dirPath: string): void {
31+
if (!fs.existsSync(dirPath)) {
32+
fs.mkdirSync(dirPath, { recursive: true });
33+
}
34+
}
35+
36+
// Function to load the log file or create it if it doesn't exist
37+
function loadProcessedLog(): ProcessedLog {
38+
if (fs.existsSync(LOG_FILE)) {
39+
try {
40+
const logContent = fs.readFileSync(LOG_FILE, 'utf8');
41+
return JSON.parse(logContent) as ProcessedLog;
42+
} catch (error) {
43+
console.warn(`Error reading log file, creating a new one: ${error}`);
44+
}
45+
}
46+
47+
// Return default empty log if file doesn't exist or has errors
48+
return {
49+
lastProcessed: new Date().toISOString(),
50+
processedFiles: [],
51+
errors: {}
52+
};
53+
}
54+
55+
// Function to save the updated log
56+
function saveProcessedLog(log: ProcessedLog): void {
57+
log.lastProcessed = new Date().toISOString();
58+
fs.writeFileSync(LOG_FILE, JSON.stringify(log, null, 2), 'utf8');
59+
console.log(`Updated process log at ${LOG_FILE}`);
60+
}
61+
62+
// Add graceful shutdown handler to save progress on interruption
63+
function setupShutdownHandlers(log: ProcessedLog): void {
64+
const saveAndExit = () => {
65+
console.log('\nProcess interrupted. Saving current progress...');
66+
saveProcessedLog(log);
67+
console.log('Progress saved. Exiting.');
68+
process.exit(0);
69+
};
70+
71+
// Handle common termination signals
72+
process.on('SIGINT', saveAndExit); // Ctrl+C
73+
process.on('SIGTERM', saveAndExit); // kill command
74+
process.on('SIGHUP', saveAndExit); // Terminal closed
75+
}
76+
77+
// Get hubId from filename
78+
function getHubIdFromFilename(filename: string): string {
79+
return filename.replace('.json', '');
80+
}
81+
82+
// Function to update GitHub information in a file
83+
async function updateGithubInfoInFile(filePath: string): Promise<boolean> {
84+
try {
85+
const fileContent = fs.readFileSync(filePath, 'utf8');
86+
const data = JSON.parse(fileContent);
87+
88+
// Skip if no GitHub URL
89+
if (!data.githubUrl || !data.githubUrl.startsWith('https://github.com')) {
90+
console.log(` Skipping ${path.basename(filePath)}: No valid GitHub URL`);
91+
return false;
92+
}
93+
94+
// Fetch GitHub repository information
95+
const githubInfo = await fetchGithubInfo(data.githubUrl);
96+
if (!githubInfo) {
97+
console.warn(` Failed to fetch GitHub info for ${data.githubUrl}`);
98+
return false;
99+
}
100+
101+
// Update fields
102+
let updated = false;
103+
104+
// Update githubStars
105+
if (githubInfo.stars_count !== undefined && (data.githubStars === undefined || data.githubStars !== githubInfo.stars_count)) {
106+
data.githubStars = githubInfo.stars_count;
107+
updated = true;
108+
}
109+
110+
// Update author if owner_name is available
111+
if (githubInfo.owner_name && data.author !== githubInfo.owner_name) {
112+
data.author = githubInfo.owner_name;
113+
updated = true;
114+
}
115+
116+
// Add other fields with specified naming convention
117+
if (githubInfo.latest_update_time) {
118+
data.updatedAt = githubInfo.latest_update_time;
119+
updated = true;
120+
}
121+
122+
if (githubInfo.latest_commit_id && data.githubLatestCommit !== githubInfo.latest_commit_id) {
123+
data.githubLatestCommit = githubInfo.latest_commit_id;
124+
updated = true;
125+
}
126+
127+
if (githubInfo.fork_count !== undefined && data.githubForks !== githubInfo.fork_count) {
128+
data.githubForks = githubInfo.fork_count;
129+
updated = true;
130+
}
131+
132+
if (githubInfo.license_type !== undefined && data.licenseType !== githubInfo.license_type) {
133+
data.licenseType = githubInfo.license_type;
134+
updated = true;
135+
}
136+
137+
// Save the updated file if changes were made
138+
if (updated) {
139+
fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf8');
140+
console.log(` Updated GitHub info in ${path.basename(filePath)}`);
141+
} else {
142+
console.log(` No changes needed for ${path.basename(filePath)}`);
143+
}
144+
145+
return updated;
146+
} catch (error) {
147+
console.error(`Error updating GitHub info in ${filePath}:`, error);
148+
return false;
149+
}
150+
}
151+
152+
// Main function to process all JSON files in the split directory
153+
async function processAllFiles(): Promise<void> {
154+
console.log('Starting GitHub info update process...');
155+
console.log(`Looking for JSON files in: ${SPLIT_DIR}`);
156+
157+
// Load processed log
158+
const processedLog = loadProcessedLog();
159+
console.log(`Loaded processing log. Last run: ${processedLog.lastProcessed}`);
160+
console.log(`Previously processed ${processedLog.processedFiles.length} files`);
161+
162+
// Setup handlers to save progress on interruption
163+
setupShutdownHandlers(processedLog);
164+
165+
// Get all JSON files from split directory (only in the root, not in language subdirectories)
166+
const allFiles = fs.readdirSync(SPLIT_DIR)
167+
.filter(file => file.endsWith('.json') && fs.statSync(path.join(SPLIT_DIR, file)).isFile());
168+
169+
console.log(`Found ${allFiles.length} total JSON files in root directory`);
170+
171+
// Filter out already processed files
172+
const filesToProcess = allFiles.filter(file => {
173+
const hubId = getHubIdFromFilename(file);
174+
return !processedLog.processedFiles.includes(hubId);
175+
});
176+
177+
console.log(`${filesToProcess.length} new files to process (${allFiles.length - filesToProcess.length} skipped)`);
178+
179+
if (filesToProcess.length === 0) {
180+
console.log('No new files to process. Exiting.');
181+
return;
182+
}
183+
184+
// Process each file
185+
for (const [index, file] of filesToProcess.entries()) {
186+
try {
187+
const hubId = getHubIdFromFilename(file);
188+
console.log(`Processing file ${index + 1}/${filesToProcess.length}: ${file} (hubId: ${hubId})`);
189+
const filePath = path.join(SPLIT_DIR, file);
190+
191+
// Update GitHub info in the main file
192+
const updated = await updateGithubInfoInFile(filePath);
193+
194+
if (updated) {
195+
// If the main file was updated, also update in each language subdirectory if the file exists
196+
for (const lang of LANGUAGES) {
197+
const langDir = path.join(SPLIT_DIR, lang);
198+
const langFilePath = path.join(langDir, file);
199+
200+
if (fs.existsSync(langFilePath)) {
201+
await updateGithubInfoInFile(langFilePath);
202+
console.log(` Updated GitHub info in ${lang}/${file}`);
203+
}
204+
}
205+
}
206+
207+
// Add the successfully processed file to our log
208+
processedLog.processedFiles.push(hubId);
209+
210+
// Save the log after each file to ensure we don't lose progress on interruption
211+
saveProcessedLog(processedLog);
212+
213+
console.log(` Completed GitHub info updates for ${file}`);
214+
} catch (error) {
215+
console.error(`Error processing file ${file}:`, error);
216+
processedLog.errors[file] = String(error);
217+
}
218+
}
219+
220+
// Final save of the processing log
221+
saveProcessedLog(processedLog);
222+
223+
// Report on any errors
224+
const errorCount = Object.keys(processedLog.errors).length;
225+
if (errorCount > 0) {
226+
console.log(`Process completed with ${errorCount} errors. Check the log file for details.`);
227+
} else {
228+
console.log('GitHub info update process completed successfully!');
229+
}
230+
}
231+
232+
// Execute the main function
233+
processAllFiles().catch(error => {
234+
console.error('An error occurred during processing:', error);
235+
process.exit(1);
236+
});

0 commit comments

Comments
 (0)