diff --git a/lib/routes/wikipedia/current-events.ts b/lib/routes/wikipedia/current-events.ts new file mode 100644 index 00000000000000..42c85b5828be3b --- /dev/null +++ b/lib/routes/wikipedia/current-events.ts @@ -0,0 +1,411 @@ +import { Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { parseDate } from '@/utils/parse-date'; +import { config } from '@/config'; + +/* The different ways to query Wikipedia's Current Events + +User-facing pages. Can call them with ?action=render to only get the html of the content + 1. portal: `https://en.wikipedia.org/wiki/Portal:Current_events` + 2. single day: `https://en.wikipedia.org/w/index.php?title=Portal:Current_events/2025_September_20` + 3. section that is included in the portal, 7 most recent days: `https://en.wikipedia.org/wiki/Portal:Current_events/Inclusion` + +API at `https://en.wikipedia.org/w/api.php`. Can target: + 4. multiple pages, result in wikitext, may have continuation with: + `action=query&format=json&prop=revisions&rvprop=content&rvslots=main&titles=${page1|page2|...}`, + 5. a single page, result in html with: + `action=parse&format=json&page=${page}` (with page being a page link) or + `https://en.wikipedia.org/w/api.php?action=parse&format=json&contentmodel=wikitext&text={{wikitext}}` (wikitext is a wikitext expression) + 6. multiple pages, result in html, will have continuation for more than 1 page with (note rvparse is obsolete): + `action=query&format=json&rvprop=content&rvparse=true${page1|page2|...}` + +Notes: + 7. combining 5. and 3. as `https://en.wikipedia.org/w/api.php?action=parse&format=json&page=Portal:Current_events/Inclusion` + seems good but doesn't let exclude the most recent day if too early + 8. combining 5. and 4. as `https://en.wikipedia.org/w/api.php?action=parse&format=json&contentmodel=wikitext&text={{Portal:Current events/Inclusion|2025|09|20}}` + variant that fix the above + + - the rendered html still need some processing: + - for inclusion pages, split in separate days + - extract the significant div of each day + - strip css and possibly class/id + - if the result is in wikitext, it needs to be converted to html + +4. is the fastest and current implementation. */ + +function getCurrentEventsDatePath(date: Date): string { + const months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']; + + const year = date.getFullYear(); + const month = months[date.getMonth()]; + const day = date.getDate(); // Not zero-padded + + return `Portal:Current_events/${year}_${month}_${day}`; +} + +// Simple MediaWiki template parser for {{Current events}} template +function parseCurrentEventsTemplate(wikitext: string): string | null { + if (!wikitext || typeof wikitext !== 'string') { + return null; + } + + // Look for {{Current events|content=...}} template + const templateMatch = wikitext.match(/\{\{Current events\s*\|[\s\S]*?content\s*=\s*([\s\S]*?)\}\}/); + if (!templateMatch) { + return null; + } + + return templateMatch[1].trim(); +} + +function convertWikiLinks(html: string): string { + // Convert wiki links [[Link|Text]] or [[Link]] + html = html.replaceAll(/\[\[([^|\]]+)\|([^\]]+)\]\]/g, '$2'); + html = html.replaceAll(/\[\[([^\]]+)\]\]/g, '$1'); + return html; +} + +function convertExternalLinks(html: string): string { + // Convert external links [URL Text] or [URL] + html = html.replaceAll(/\[([^\s\]]+)\s+([^\]]+)\]/g, '$2'); + html = html.replaceAll(/\[([^\s\]]+)\]/g, '$1'); + return html; +} + +function convertTextFormatting(html: string): string { + // Convert bold '''text''' + html = html.replaceAll(/'''([^']+)'''/g, '$1'); + // Convert italic ''text'' + html = html.replaceAll(/''([^']+)''/g, '$1'); + return html; +} + +function processListsAndLines(html: string): string { + const lines = html.split('\n'); + const processedLines: string[] = []; + + for (const line of lines) { + const trimmedLine = line.trim(); + + if (!trimmedLine) { + // Empty line - add paragraph break + processedLines.push('
'); + continue; + } + + // Check for bullet points and convert to proper nesting + const bulletMatch = trimmedLine.match(/^(\*+)\s*(.+)$/); + if (bulletMatch) { + const depth = bulletMatch[1].length; + const content = bulletMatch[2]; + + // Create proper nested list structure + const indent = ' '.repeat(depth - 1); + processedLines.push(`${indent}
') { + result.push('
\n'); + } else if (trimmed) { + result.push(trimmed); + } + } + } + + // Close any remaining open lists + while (currentDepth > 0) { + result.push(' '.repeat(currentDepth - 1) + ''); + currentDepth--; + } + + return result.join(''); +} + +function stripComments(html: string): string { + // Remove HTML comments + return html.replaceAll(//g, ''); +} + +function wrapInParagraphsAndCleanup(html: string): string { + // Clean up multiple paragraph tags and empty paragraphs + html = html.replaceAll(/<\/p>\s*
/g, '
\n'); + html = html.replaceAll(/
\s*
[\s\n\r]*<\/p>/g, ''); + html = html.replaceAll(/
\s*<\/p>/g, ''); + html = html.replaceAll('
', ''); + + // Final cleanup - remove trailing empty paragraphs that might have been added + html = html.replaceAll(/\s*$/g, '').replaceAll(/\s*<\/p>$/g, '');
+
+ return html;
+}
+
+// Wiki markup to HTML converter with proper list handling
+function wikiToHtml(wikitext: string): string {
+ let html = wikitext;
+
+ // Apply transformations in order
+ html = convertWikiLinks(html);
+ html = convertExternalLinks(html);
+ html = convertTextFormatting(html);
+ html = processListsAndLines(html);
+ html = wrapInParagraphsAndCleanup(html);
+ html = stripComments(html);
+
+ return html;
+}
+
+async function fetchMultipleWikiContent(pageNames: string[]): Promise