import {MessageLevel} from "./DOM"; import {couldNotConnectMessage, mergeMaps, mergeSets} from "./Shared"; /** * A data class for combining a language and page title to identify a page. * * This is only an _identifier_ of a page, not the page itself. For information on the page such as the links it * contains, whether it's a redirect, etc., see the `Page` class. */ export class InterlangLink { /** * The language of the wiki this page is of. */ readonly lang: string; /** * The title of the page. */ readonly title: string; /** * Constructs a new interlanguage link. * * @param lang the language of the wiki this page is of * @param title the title of the page */ constructor(lang: string, title: string) { this.lang = lang; this.title = title; } /** * Returns `true` if and only if the given object equals this `InterlangLink`. * * @param other the object to compare to this `InterlangLink` * @return `true` if and only if the given object equals this `InterlangLink` */ equals(other: any): boolean { return other instanceof InterlangLink && this.lang === other.lang && this.title === other.title; } /** * Returns `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles. * * @param other the object to compare to this `InterlangLink` * @return `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles */ equalsIgnoringCase(other: any) { return other instanceof InterlangLink && this.lang === other.lang && this.title.toLowerCase() === other.title.toLowerCase(); } /** * Converts this `InterlangLink` to a string. * * @return the string representation of this `InterlangLink` */ toString(): string { return `${this.lang}:${this.title}`; } /** * Returns a deep copy of this `InterlangLink`. * * @return the deep copy */ copy(): InterlangLink { return new InterlangLink(this.lang, this.title); } } /** * Redirects one `InterlangLink` to another. */ export class Redirect { /** * The page that redirects. */ readonly from: InterlangLink; /** * The page that is redirected to. */ readonly to: InterlangLink; /** * Constructs a new `Redirect`. * * @param from the page that redirects * @param to the page that is redirected to */ constructor(from: InterlangLink, to: InterlangLink) { this.from = from.copy(); this.to = to.copy(); } /** * Returns `true` if and only if the given object equals this `Redirect`. * * @param other the object to compare to this `Redirect` * @return `true` if and only if the given object equals this `Redirect` */ equals(other: any): boolean { return other instanceof Redirect && this.from.equals(other.from) && this.to.equals(other.to); } /** * Returns a deep copy of this `Redirect`. * * This is a deep copy because the constructor performs copies of the received variables. * * @return the deep copy */ copy(): Redirect { return new Redirect(this.from, this.to); } } /** * A map of interwiki links. */ export type InterwikiMap = Map; /** * Describes a page, i.e. what you get if you follow an `InterlangLink`. */ export class Page { /** * The full URL at which this page is located. */ readonly url: URL; /** * The interlanguage link describing the location of the page. */ readonly link: InterlangLink; /** * The interlanguage links contained in this page. */ readonly langLinks: InterlangLink[]; /** * `true` if and only if this page exists. */ readonly exists: boolean; /** * Constructs a new page. * * @param url the full URL at which this page is located * @param link the interlanguage link describing the location of the page * @param langLinks the interlanguage links contained in this page * @param exists `true` if and only if this page exists */ constructor(url: URL, link: InterlangLink, langLinks: InterlangLink[], exists: boolean) { this.url = new URL(url.toString()); this.link = link.copy(); this.langLinks = langLinks.map(it => it.copy()); this.exists = exists; } /** * Returns `true` if and only if this page's language links are sorted alphabetically. * * @return `true` if and only if this page's language links are sorted alphabetically */ langLinksAreOrdered(): boolean { return this.langLinks.reduce( (isSorted: boolean, langLink: InterlangLink, i: number, self: InterlangLink[]) => i === 0 || (isSorted && self[i - 1].toString().localeCompare(langLink.toString()) <= 0), true ); } /** * Returns `true` if and only if this page has multiple links to the same language. * * @return `true` if and only if this page has multiple links to the same language */ hasDoubleLinks(): boolean { return this.langLinks.some(a => this.langLinks.filter(b => a.lang === b.lang).length > 1); } /** * Returns a deep copy of this `Page`. * * This is a deep copy because the constructor performs copies of the received variables. * * @return the deep copy */ copy() { return new Page(this.url, this.link, this.langLinks, this.exists); } } /** * A network of pages linking to each other.y */ export class InterlangNetwork { /** * The alphabetically-sorted pages that have been discovered in the network. */ readonly pages: Page[]; /** * The redirects that have been discovered in the network. */ readonly redirects: Redirect[]; /** * Constructs a new `InterlangNetwork`. * * @param pages the pages linking to each other * @param redirects the redirects in the network */ constructor(pages: Page[], redirects: Redirect[]) { this.pages = pages .map(it => it.copy()) .sort((a, b) => a.link.toString().localeCompare(b.link.toString())); this.redirects = redirects.map(it => it.copy()); } /** * Determines whether the given source links to the given destination, potentially through a redirect. * * @param source the source page of which to check the links * @param destination the destination that could be linked to * @return the checker's verdict of the link */ getLinkVerdict(source: Page, destination: Page): LinkVerdict { const isSelfLangLink = source.link.lang === destination.link.lang; if (source.langLinks.some(it => it.equals(destination.link))) return isSelfLangLink ? "self-linked" : "linked"; if (source.langLinks.some(it => it.equalsIgnoringCase(destination.link))) return isSelfLangLink ? "self-linked" : "wrongly-cased"; if (source.langLinks.some(link => this.redirects.some(it => it.equals(new Redirect(link, destination.link))))) return isSelfLangLink ? "self-linked" : "redirected"; return isSelfLangLink ? "self-unlinked" : "unlinked"; } /** * Analyzes the given source page and returns a verdict of its own state and of the state of its link to all other * pages in this network. * * @param srcPage the page to give a verdict of * @return the checker's verdicts of the page and its outgoing links */ getPageVerdict(srcPage: Page): { self: PageVerdict[], links: Map } { const linkVerdicts = new Map(this.pages.map(dstPage => ([dstPage.link, this.getLinkVerdict(srcPage, dstPage)]))); const foundVerdicts = new Set([...linkVerdicts.values()]); let selfVerdicts: PageVerdict[] = []; if (!srcPage.exists) selfVerdicts.push("not-found"); if (!srcPage.langLinksAreOrdered()) selfVerdicts.push("wrongly-ordered"); if (srcPage.hasDoubleLinks()) selfVerdicts.push("doubly-linked"); if (foundVerdicts.has("self-linked")) selfVerdicts.push("self-linked"); if (foundVerdicts.has("unlinked")) selfVerdicts.push("unlinked"); if (foundVerdicts.has("redirected")) selfVerdicts.push("redirected"); if (foundVerdicts.has("wrongly-cased")) selfVerdicts.push("wrongly-cased"); if (selfVerdicts.length === 0) selfVerdicts.push("perfect"); return {self: selfVerdicts, links: linkVerdicts}; } /** * Returns a verdict on the network. * * @return a verdict on the network */ getNetworkVerdict(): NetworkVerdict { const verdicts = [...mergeSets(this.pages.map(page => new Set(this.getPageVerdict(page).self)))]; if (verdicts.some(verdict => NetworkVerdict.brokenVerdicts.includes(verdict))) return "broken"; if (verdicts.some(verdict => NetworkVerdict.flawedVerdicts.includes(verdict))) return "flawed"; return "perfect"; } /** * Returns a deep copy of this `InterlangNetwork`. * * This is a deep copy because the constructor performs copies of the received variables. * * @return the deep copy */ copy(): InterlangNetwork { return new InterlangNetwork(this.pages, this.redirects); } } /** * Interacts with the API in an asynchronous manner. */ export class MediaWiki { /** * The origin of the wiki's API URL. */ readonly origin: string; /** * The path relative to the wiki's API; starts with a `/`. */ readonly apiPath: string; /** * The general information, retrieved from the API. */ general!: { articlepath: string, lang: string }; /** * The interwiki map of this wiki. */ interwikiMap!: InterwikiMap; /** * The namespaces on this wiki. */ namespaces!: Map; /** * Constructs a new MediaWiki object. * * The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise. * * @param apiUrl the url to the `api.php` file */ constructor(apiUrl: string) { const urlObj = new URL(apiUrl.replace("http://", "https://")); this.origin = urlObj.origin; this.apiPath = urlObj.pathname; } /** * Initializes this `MediaWiki` object with the necessary information from the API. * * @return this `MediaWiki` object */ async init(): Promise { const query = await this.getSiteInfo("general", "interwikimap", "namespaces"); // Add self to map query.interwikimap.push({prefix: query.general.lang, url: query.general.server + query.general.articlepath}); // Set fields this.general = query.general; this.interwikiMap = new Map(query.interwikimap.map((it: { prefix: string, url: string }) => [it.prefix, it.url])); this.namespaces = query.namespaces; return this; } /** * Sends a request to the MediaWiki API and runs the given callback on the response. * * @param params the parameters to send to the API * @return the API's response */ request(params: { [key: string]: string }): Promise { const url = this.origin + this.apiPath + "?format=json&origin=*&" + new URLSearchParams(params).toString(); console.debug(`Requesting from ${this.origin}${this.apiPath} with params`, params, "at", url); return fetch(url) .then(response => { if (!response.ok) throw new Error(couldNotConnectMessage); return response.json(); }) .catch(() => { throw new Error(couldNotConnectMessage); }); } /** * Requests all language links on the given article. * * @param title the title of the article to return links of * @return result the query result, or `undefined` if the article could not be found */ getLangLinks(title: string): Promise<{ link: InterlangLink, langLinks: InterlangLink[], redirects: Redirect[] } | undefined> { return this .request({action: "parse", page: title, prop: "langlinks", redirects: ""}) .then(response => { if (response.error !== undefined) return undefined; const langLinks = response.parse.langlinks .map((it: { lang: string, "*": string }) => new InterlangLink(it.lang, it["*"])); const redirects = response.parse.redirects .map((it: { from: string; to: string; }) => new Redirect(this.toLink(it.from), this.toLink(it.to))) .reduce((redirects: Redirect[], redirect: Redirect, _: number, self: Redirect[]) => { // TODO Support triple redirects (#30) const matches = self.filter(it => it.from.equals(redirect.to)); if (matches.length > 1) redirects.push(new Redirect(redirect.from, matches[0].to)); else redirects.push(redirect); return redirects; }, []); return {link: this.toLink(response.parse.title), langLinks: langLinks, redirects: redirects}; }); } /** * Returns this wiki's site information. * * @param props the site information properties to retrieve, such as "general" or "interwikimap" * @return the wiki's site information, with each property corresponding to an argument to this method */ getSiteInfo(...props: string[]): any { return this.request({action: "query", meta: "siteinfo", siprop: props.join("|")}) .then(response => response.query); } /** * Normalizes the given link, adjusting its language to this wiki's language and replacing the link's namespace with * the canonical namespace. * * @param link the link to normalize * @return the normalized link */ normalize(link: InterlangLink): InterlangLink { const normalLang = this.general.lang; const titleParts = link.title.split(":"); if (titleParts.length < 2) return new InterlangLink(normalLang, link.title); titleParts[0] = [...this.namespaces.values()].reduce( (titlePart: string, namespace: { id: string, canonical: string, "*": string }) => { return titlePart === namespace["canonical"] ? namespace["*"] : titlePart; }, titleParts[0] ); const normalTitle = titleParts.join(":"); return new InterlangLink(normalLang, normalTitle); } /** * Shorthand for converting a title to an `InterlangLink` of this wiki's language. * * @param title the title of the article to generate a link for * @return the link to the article on this wiki * @private */ private toLink(title: string): InterlangLink { return new InterlangLink(this.general.lang, title); } } /** * Manages a `MediaWiki` instance for different languages, caching retrieved information for re-use. */ export class MediaWikiManager { /** * The combined interwiki map of all `MediaWiki` instances under management of this manager. * * @private */ private iwMap: InterwikiMap; /** * The cached `MediaWiki` instances */ mws: Map; /** * The language of the base `MediaWiki`, where the exploration starts. */ baseLang!: string; /** * The path to articles, where `$1` indicates the article name. */ articlePath!: string; /** * The path to `api.php`. */ apiPath!: string; /** * Constructs a new MediaWiki manager. * * The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise. */ constructor() { this.mws = new Map(); this.iwMap = new Map(); } /** * Initializes this `MediaWikiManager`. * * @param baseMw the `MediaWiki` that is used as a starting point * @return this `MediaWikiManager` */ async init(baseMw: MediaWiki): Promise { const basePath = [...(baseMw.apiPath)] .map((it, i) => it === baseMw.general.articlepath[i] ? it : "") .join("") .slice(0, -1); this.articlePath = baseMw.general.articlepath.slice(basePath.length); this.apiPath = baseMw.apiPath.slice(basePath.length); this.baseLang = baseMw.general.lang; this.mws.set(baseMw.general.lang, baseMw); this.updateIwMap(); return this; } /** * Returns the `MediaWiki` for the given language, creating and initializing it if necessary, or `undefined` if it * could not be created. * * @param lang the language of the `MediaWiki` to return * @return the `MediaWiki` for the given language, or `undefined` if it could not be created */ async getMwOrWait(lang: string): Promise { if (this.hasMw(lang)) return this.mws.get(lang); if (!this.iwMap.has(lang)) return undefined; const url = this.iwMap.get(lang); if (url === undefined) return undefined; let newMw; try { newMw = await new MediaWiki(url.slice(0, -this.articlePath.length) + this.apiPath).init(); } catch (error) { return undefined; } if (this.hasMw(newMw.general.lang)) { // Duplicate MW with different but equivalent language code; destroy new MW instance this.mws.set(lang, this.mws.get(newMw.general.lang)!); } else { this.mws.set(newMw.general.lang, newMw); this.mws.set(lang, newMw); } this.updateIwMap(); return this.mws.get(lang); } /** * Returns the `MediaWiki` for the given language or `undefined` if it has not created that object. * * @param lang the language of the `MediaWiki` to return * @return the `MediaWiki` for the given language or `undefined` if it has not created that object */ getMw(lang: string): MediaWiki | undefined { return this.mws.get(lang); } /** * Returns `true` if and only if this manager has a `MediaWiki` for the given language. * * @param lang the language of the `MediaWiki` to check presence of * @return `true` if and only if this manager has a `MediaWiki` for the given language */ hasMw(lang: string): boolean { return this.mws.has(lang); } /** * Returns the URL to the given article. * * @param link the link to return the URL of * @return the URL to the given article */ getArticlePath(link: InterlangLink): URL { const articlePath = this.iwMap.get(link.lang); if (articlePath === undefined) throw Error(`Could not find article path for '${link}'.`); return new URL(articlePath.replace("$1", link.title)); } /** * Updates the `_iwMap` property with the entries in `MediaWiki` instances in this manager. * * @private */ private updateIwMap(): void { this.iwMap = mergeMaps([...this.mws.values()].map(mw => mw.interwikiMap)); } } /** * Discovers the interlanguage network, starting from the given link. * * @param mwm the manager to use for caching and resolving pages * @param title the title of the page to start traversing at * @param errorCb a function handling errors and warnings * @param progressCb a function handling progress updates * @return the discovered network, including pages and redirects */ export const discoverNetwork = async function( mwm: MediaWikiManager, title: string, errorCb: (level: "error" | "warning" | null, message: string) => void, progressCb: (message: string) => void ): Promise<{ pages: Page[], redirects: Redirect[] }> { const pages = []; const redirects = []; const history: InterlangLink[] = []; const queue: InterlangLink[] = [new InterlangLink(mwm.baseLang, title)]; while (queue.length > 0) { progressCb("Checking " + queue[queue.length - 1] + ""); let next = queue.pop()!; if (history.some(it => it.equals(next))) continue; // Normalize const nextMw = await mwm.getMwOrWait(next.lang); if (nextMw === undefined) { history.push(next); pages.push(new Page(mwm.getArticlePath(next), next, [], false)); if (history.length === 1) throw new Error(couldNotConnectMessage); else { errorCb("warning", `Could not connect to the wiki for language '${next.lang}'. Maybe the wiki no longer exists?`); continue; } } next = nextMw.normalize(next); if (history.some(it => it.equals(next))) continue; else history.push(next); // Fetch interlang links const result = await nextMw.getLangLinks(next.title); if (result === undefined) { pages.push(new Page(mwm.getArticlePath(next), next, [], false)); continue; } // Follow redirects if (!result.link.equals(next)) { redirects.push(...(result.redirects)); next = result.link; if (history.some(it => it.equals(next))) continue; else history.push(next); } // Create `Page` object pages.push(new Page(mwm.getArticlePath(next), next, result.langLinks, true)); queue.push(...(result.langLinks)); } // Normalize links pages.forEach(page => { page.langLinks.map((langLink, idx, self) => { const mw = mwm.getMw(langLink.lang); // Update link in place using `self[idx] = ` self[idx] = mw !== undefined ? mw.normalize(langLink) : langLink; }); }); return {pages: pages, redirects: redirects}; }; /** * The verdict that the checker has of a link between two pages. * * The possible values are listed in decreasing order of importance, so that if a single link has multiple verdicts but * only one can be displayed, the one with the highest importance will be displayed. */ type LinkVerdict = | "linked" | "self-linked" | "unlinked" | "self-unlinked" | "redirected" | "wrongly-cased"; export namespace LinkVerdict { /** * Returns UI properties for each link verdict. */ export const props = { "linked": {icon: "check", message: "Linked 🙂", style: ["success"]}, "self-linked": {icon: "rotate-left", message: "Links to its own wiki 😕", style: ["warning"]}, "unlinked": {icon: "times", message: "Link is missing 😕", style: ["error"]}, "self-unlinked": {icon: null, message: "", style: []}, "redirected": {icon: "mail-forward", message: "Links to a redirect 😕", style: ["warning"]}, "wrongly-cased": {icon: "text-height", message: "Links with incorrect capitalisation 😕", style: ["warning"]}, }; } /** * The verdict that the checker has of a page. * * The possible values are listed in decreasing order of importance, so that if a single page has multiple verdicts but * only one can be displayed, the one with the highest importance will be displayed. */ type PageVerdict = | "perfect" | "not-found" | "wrongly-ordered" | "doubly-linked" | "self-linked" | "unlinked" | "redirected" | "wrongly-cased"; export namespace PageVerdict { /** * Returns UI properties for each page verdict. */ export const props = { "perfect": {icon: "check", message: "Perfect 🙂", style: ["success"]}, "not-found": {icon: "search", message: "Article does not exist 😕", style: ["error"]}, "wrongly-ordered": {icon: "sort-alpha-asc", message: "Links are in the wrong order 😕", style: ["warning"]}, "doubly-linked": {icon: "clone", message: "Links to the same wiki multiple times 😕", style: ["warning"]}, "self-linked": {icon: "rotate-left", message: "Links to its own wiki 😕", style: ["warning"]}, "unlinked": {icon: "chain-broken", message: "Misses one or more links 😕", style: ["error"]}, "redirected": {icon: "mail-forward", message: "Links to a redirect 😕", style: ["warning"]}, "wrongly-cased": {icon: "text-height", message: "Links with incorrect capitalisation 😕", style: ["warning"]}, }; } /** * The verdict that the checker has of a network. */ type NetworkVerdict = | "perfect" | "flawed" | "broken"; export namespace NetworkVerdict { /** * Returns UI properties for each network verdict. */ export const props = { "perfect": { message: "A perfect network! 🙂", style: "complete" as MessageLevel }, "flawed": { message: "The network is complete but flawed 😕
" + "Hover over an icon in the left column for more information.", style: "warning" as MessageLevel }, "broken": { message: "The network is broken 😞
" + "Hover over an icon in the left column for more information.", style: "warning" as MessageLevel }, }; /** * Page verdicts that cause a network to become broken. */ export const brokenVerdicts: PageVerdict[] = ["not-found", "unlinked"]; /** * Page verdicts that cause a network to become flawed. */ export const flawedVerdicts: PageVerdict[] = ["wrongly-ordered", "doubly-linked", "self-linked", "redirected", "wrongly-cased"]; }