import {couldNotConnectMessage, mergeStates} from "./Shared"; /** * A data class for combining a language and page title to identify a page. * * This is only an _identifier_ of a page, not the page itself. For information on the page such as the links it * contains, whether it's a redirect, etc., see the `Page` class. * * @property lang {string} the language of the wiki this page is of * @property title {string} the title of the page */ export class InterlangLink { /** * Constructs a new `InterlangLink`. * * @param lang {string} the language of the wiki this page is of * @param title {string} the title of the page */ constructor(lang, title) { this.lang = lang; this.title = title; } /** * Returns `true` if and only if the given object equals this `InterlangLink`. * * @param other {*} the object to compare to this `InterlangLink` * @returns {boolean} `true` if and only if the given object equals this `InterlangLink` */ equals(other) { return other instanceof InterlangLink && this.lang === other.lang && this.title === other.title; } /** * Returns `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles. * * @param other {*} the object to compare to this `InterlangLink` * @returns {boolean} `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the * titles */ equalsIgnoringCase(other) { return other instanceof InterlangLink && this.lang === other.lang && this.title.toLowerCase() === other.title.toLowerCase(); } /** * Converts this `InterlangLink` to a string. * * @returns {string} the string representation of this `InterlangLink` */ toString() { return `${this.lang}:${this.title}`; } /** * Returns a deep copy of this `InterlangLink`. * * @returns {InterlangLink} the deep copy */ copy() { return new InterlangLink(this.lang, this.title); } } /** * Redirects one `InterlangLink` to another. * * @property from [InterlangLink] the page that redirects * @property to [InterlangLink] the page that is redirected to */ export class Redirect { /** * Constructs a new `Redirect`. * * @param from [InterlangLink] the page that redirects * @param to [InterlangLink] the page that is redirected to */ constructor(from, to) { this.from = from.copy(); this.to = to.copy(); } /** * Returns `true` if and only if the given object equals this `Redirect`. * * @param other {*} the object to compare to this `Redirect` * @returns {boolean} `true` if and only if the given object equals this `Redirect` */ equals(other) { return other instanceof Redirect && this.from.equals(other.from) && this.to.equals(other.to); } /** * Returns a deep copy of this `Redirect`. * * @returns {Redirect} the deep copy */ copy() { return new Redirect(this.from, this.to); } } /** * A map of interwiki links. * * Not implemented as a map but as a list of objects. Therefore, when there are duplicate keys, the original value is * always retained. * * @property map {Array<{prefix: string, url: string}>} maps interwiki prefixes to URLs */ export class InterwikiMap { /** * Constructs a new interwiki map. * * @param map {Array<{prefix: string, url: string}>} the mapping from interwiki abbreviations to URLs to store in * this map */ constructor(map) { this.map = map.map(it => ({prefix: it.prefix, url: it.url.replace("http://", "https://")})); } /** * Returns the URL for the given prefix, or `undefined` if the prefix could not be found. * * @param prefix {string} the prefix to return the URL of * @returns {string} the URL for the given prefix, or `undefined` if the prefix could not be found */ getUrl(prefix) { return this.map.find(it => it.prefix === prefix).url; } /** * Returns `true` if and only if this map has a URL for the given prefix. * * @param prefix {string} the prefix to check for * @returns {boolean} `true` if and only if this map has a URL for the given prefix */ hasUrl(prefix) { return this.map.find(it => it.prefix === prefix) !== undefined; } /** * Returns a deep copy of this `InterwikiMap`. * * @returns {InterwikiMap} the deep copy */ copy() { return new InterwikiMap(this.map); } } /** * Describes a page, i.e. what you get if you follow an `InterlangLink`. * * @property url {URL} the full URL at which this page is located * @property link {InterlangLink} the interlanguage link describing the location of the page * @property linksTo {InterlangLink[]} the interlanguage links contained in this page * @property exists {boolean} `true` if and only if this page exists */ export class Page { /** * Constructs a new `Page`. * * @param url {URL} the full URL at which this page is located * @param link {InterlangLink} the interlanguage link describing the location of the page * @param langLinks {InterlangLink[]} the interlanguage links contained in this page * @param exists {boolean} `true` if and only if this page exists */ constructor(url, link, langLinks, exists) { this.url = new URL(url.toString()); this.link = link.copy(); this.langLinks = langLinks.map(it => it.copy()); this.exists = exists; } /** * Returns `true` if and only if this page's language links are sorted alphabetically. * * @returns {boolean} `true` if and only if this page's language links are sorted alphabetically */ langLinksAreOrdered() { return this.langLinks.reduce((isSorted, langLink, i, self) => i === 0 || (isSorted && self[i - 1].toString().localeCompare(langLink.toString()) <= 0), true ); } /** * Returns `true` if and only if this page has multiple links to the same language. * * @return {boolean} `true` if and only if this page has multiple links to the same language */ hasDoubleLinks() { return this.langLinks.some(a => this.langLinks.filter(b => a.lang === b.lang).length > 1); } /** * Returns a deep copy of this `Page`. * * @returns {Page} the deep copy */ copy() { return new Page(this.url, this.link, this.langLinks, this.exists); } } /** * A network of pages linking to each other. * * @property pages {Page[]} the pages linking to each other, sorted alphabetically * @property redirects {Redirect[]} the redirects in the network */ export class InterlangNetwork { /** * Constructs a new `InterlangNetwork`. * * @param pages {Page[]} the pages linking to each other * @param redirects {Redirect[]} the redirects in the network */ constructor(pages, redirects) { this.pages = pages .map(it => it.copy()) .sort((a, b) => a.link.toString().localeCompare(b.link.toString())); this.redirects = redirects.map(it => it.copy()); } /** * Determines whether the given source links to the given destination, potentially through a redirect. * * @param source {Page} the source page of which to check the links * @param destination {Page} the destination that could be linked to * @returns {"linked"|"self-linked"|"unlinked"|"self-unlinked"|"redirected"} the status of the link */ getLinkVerdict(source, destination) { const isSelfLangLink = source.link.lang === destination.link.lang; if (source.langLinks.some(it => it.equals(destination.link))) return isSelfLangLink ? "self-linked" : "linked"; if (source.langLinks.some(it => it.equalsIgnoringCase(destination.link))) return isSelfLangLink ? "self-linked" : "wrongly-cased"; if (source.langLinks.some(link => this.redirects.some(it => it.equals(new Redirect(link, destination.link))))) return isSelfLangLink ? "self-linked" : "redirected"; return isSelfLangLink ? "self-unlinked" : "unlinked"; } /** * Analyzes the given source page and returns a verdict of its own state and of the state of its link to all other * pages in this network. * * @param srcPage {Page} the page to give a verdict of * @return verdict {Object} the verdict * @return verdict.self {("perfect"|"not-found"|"wrongly-ordered"|"doubly-linked"|"self-linked"|"unlinked"| * "redirected")[]} the verdict of the page in relation to the entire network * @return verdict.pages {Object[]} the verdicts of the page in relation to each other article in the network * @return verdict.pages[].page {Page} the page that the verdict is in relation to * @return verdict.pages[].verdict {"linked"|"self-linked"|"unlinked"|"self-unlinked"|"redirected"} the verdict of * the relation of the given page to this page */ getPageVerdict(srcPage) { const pageStates = this.pages.map(dstPage => ({page: dstPage, verdict: this.getLinkVerdict(srcPage, dstPage)})); let selfStates = []; if (!srcPage.exists) selfStates.push("not-found"); if (!srcPage.langLinksAreOrdered()) selfStates.push("wrongly-ordered"); if (srcPage.hasDoubleLinks()) selfStates.push("doubly-linked"); if (pageStates.some(({verdict}) => verdict === "self-linked")) selfStates.push("self-linked"); if (pageStates.some(({verdict}) => verdict === "unlinked")) selfStates.push("unlinked"); if (pageStates.some(({verdict}) => verdict === "redirected")) selfStates.push("redirected"); if (pageStates.some(({verdict}) => verdict === "wrongly-cased")) selfStates.push("wrongly-cased"); if (selfStates.length === 0) selfStates.push("perfect"); return {self: selfStates, pages: pageStates}; } /** * Returns a verdict on the network. * * @return {"perfect"|"flawed"|"broken"} a verdict on the network */ getVerdict() { const states = ["broken", "flawed", "perfect"]; return this.pages.reduce((state, page) => { const verdict = this.getPageVerdict(page).self; if (verdict.some(it => ["not-found", "unlinked"].includes(it))) return mergeStates(states, state, "broken"); if (verdict.some(it => ["wrongly-ordered", "doubly-linked", "self-linked", "redirected", "wrongly-cased"].includes(it))) return mergeStates(states, state, "flawed"); return mergeStates(states, state, "perfect"); }, "perfect"); } /** * Returns a deep copy of this `InterlangNetwork`. * * @returns {InterlangNetwork} the deep copy */ copy() { return new InterlangNetwork(this.pages, this.redirects); } } /** * Interacts with the API in an asynchronous manner. * * @property baseUrl {string} the origin of the wiki's API * @property apiPath {string} the path relative to the wiki's API; starts with a `/` * @property general {Object} the general information, retrieved from the API * @property interwikiMap {InterwikiMap} the interwiki map of this wiki * @property namespaces {Object.{number, Object}} the namespaces on this wiki */ export class MediaWiki { /** * Constructs a new MediaWiki object. * * @param apiUrl the url to the `api.php` file */ constructor(apiUrl) { const urlObj = new URL(apiUrl); this.origin = urlObj.origin; this.apiPath = urlObj.pathname; } /** * Initializes this `MediaWiki` object with the necessary information from the API. * * @returns {MediaWiki} this `MediaWiki` object */ async init() { const query = await this.getSiteInfo("general", "interwikimap", "namespaces"); // Add self to map query.interwikimap.push({prefix: query.general.lang, url: query.general.server + query.general.articlepath}); // Set fields this.general = query.general; this.interwikiMap = new InterwikiMap(query.interwikimap); this.namespaces = query.namespaces; return this; } /** * Sends a request to the MediaWiki API and runs the given callback on the response. * * @param params {Object} the parameters to send to the API * @return {Promise} the API's response */ request(params) { const url = this.origin + this.apiPath + "?format=json&origin=*&" + new URLSearchParams(params).toString(); console.debug(`Requesting from ${this.origin}${this.apiPath} with params`, params, "at", url); return fetch(url) .then(response => { if (!response.ok) throw new Error(couldNotConnectMessage); return response.json(); }) .catch(() => { throw new Error(couldNotConnectMessage); }); } /** * Requests all language links on the given article. * * @param title {string} the title of the article to return links of * @return result {Object|undefined} the query result, or `undefined` if the article could not be found * @return result.link {InterlangLink} the normalized, redirect-resolved link to the article * @return result.langLinks {InterlangLink[]} the language links on the article * @return result.redirects {Redirect[]} all redirects that were encountered, with double redirects removed */ getLangLinks(title) { return this .request({action: "parse", page: title, prop: "langlinks", redirects: ""}) .then(response => { if (response.error !== undefined) return undefined; const langLinks = response.parse.langlinks.map(it => new InterlangLink(it.lang, it["*"])); const redirects = response.parse.redirects .map(it => new Redirect(this._toLink(it.from), this._toLink(it.to))) .reduce((redirects, redirect, _, self) => { // TODO Support triple redirects (#30) const matches = self.filter(it => it.from.equals(redirect.to)); if (matches.length > 1) redirects.push(new Redirect(redirect.from, matches[0].to)); else redirects.push(redirect); return redirects; }, []); return {link: this._toLink(response.parse.title), langLinks: langLinks, redirects: redirects}; }); } /** * Returns this wiki's site information. * * @param props {...string} the site information properties to retrieve, such as "general" or "interwikimap" * @returns {Object} the wiki's site information, with each property corresponding to an argument to this method */ getSiteInfo(...props) { return this.request({action: "query", meta: "siteinfo", siprop: props.join("|")}) .then(response => response.query); } /** * Normalizes the given link, adjusting its language to this wiki's language and replacing the link's namespace with * the canonical namespace. * * @param link {InterlangLink} the link to normalize * @returns {InterlangLink} the normalized link */ normalize(link) { const normalLink = link.copy(); normalLink.lang = this.general.lang; const titleParts = normalLink.title.split(':'); if (titleParts.length < 2) return normalLink; titleParts[0] = Object.keys(this.namespaces).reduce((titlePart, namespaceId) => { const namespace = this.namespaces[namespaceId]; return titlePart === namespace["canonical"] ? namespace["*"] : titlePart }, titleParts[0]); normalLink.title = titleParts.join(':'); return normalLink; } /** * Shorthand for converting a title to an `InterlangLink` of this wiki's language. * * @param title {string} the title of the article to generate a link for * @returns {InterlangLink} the link to the article on this wiki * @private */ _toLink(title) { return new InterlangLink(this.general.lang, title); } } /** * Manages a `MediaWiki` instance for different languages, caching retrieved information for re-use. * * @property mws {Object.} the cached `MediaWiki` instances * @property articlePath {string} the path to articles, where `$1` indicates the article name * @property apiPath {string} the path to `api.php` * @property baseLang {string} the language of the base `MediaWiki`, where the exploration starts */ export class MediaWikiManager { /** * Constructs a new `MediaWikiManager`. * * The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise. */ constructor() { this.mws = {}; this._iwMap = new InterwikiMap([]); } /** * Initializes this `MediaWikiManager`. * * @param baseMw {MediaWiki} the `MediaWiki` that is used as a starting point * @return {MediaWikiManager} this `MediaWikiManager` */ async init(baseMw) { this.basePath = [...(baseMw.apiPath)] .map((it, i) => it === baseMw.general.articlepath[i] ? it : "") .join("") .slice(0, -1); this.articlePath = baseMw.general.articlepath.slice(this.basePath.length); this.apiPath = baseMw.apiPath.slice(this.basePath.length); this.baseLang = baseMw.general.lang; this.mws[baseMw.general.lang] = baseMw; this._updateIwMap(); return this; } /** * Returns the `MediaWiki` for the given language, creating and initializing it if necessary, or `undefined` if it * could not be created. * * @param lang {string} the language of the `MediaWiki` to return * @returns {MediaWiki} the `MediaWiki` for the given language, or `undefined` if it could not be created */ async getMwOrWait(lang) { if (this.hasMw(lang)) return this.mws[lang]; if (!this._iwMap.hasUrl(lang)) return undefined; const url = this._iwMap.getUrl(lang); let newMw; try { newMw = await new MediaWiki(url.slice(0, -this.articlePath.length) + this.apiPath).init(); } catch (error) { return undefined; } if (this.hasMw(newMw.general.lang)) { // Duplicate MW with different but equivalent language code; destroy new MW instance this.mws[lang] = this.mws[newMw.general.lang]; } else { this.mws[newMw.general.lang] = newMw; this.mws[lang] = newMw; } this._updateIwMap(); return this.mws[lang]; } /** * Returns the `MediaWiki` for the given language or `undefined` if it has not created that object. * * @param lang {string} the language of the `MediaWiki` to return * @returns {MediaWiki} the `MediaWiki` for the given language or `undefined` if it has not created that object */ getMw(lang) { return this.mws[lang]; } /** * Returns `true` if and only if this manager has a `MediaWiki` for the given language. * * @param lang {string} the language of the `MediaWiki` to check presence of * @returns {boolean} `true` if and only if this manager has a `MediaWiki` for the given language */ hasMw(lang) { return this.mws[lang] !== undefined; } /** * Returns the URL to the given article. * * @param link {InterlangLink} the link to return the URL of * @returns {URL} the URL to the given article */ getArticlePath(link) { return new URL(this._iwMap.getUrl(link.lang).replace("$1", link.title)); } /** * Updates the `_iwMap` property with the entries in `MediaWiki` instances in this manager. */ _updateIwMap() { const maps = Object.keys(this.mws).map(key => this.mws[key].interwikiMap.map); this._iwMap = new InterwikiMap([].concat(...maps)); } } /** * Discovers the interlanguage network, starting from the given link. * * @param mwm {MediaWikiManager} the manager to use for caching and resolving pages * @param title {string} the title of the page to start traversing at * @param [errorCb] {function("error"|"warning"|null, *): void} a function handling errors and warnings * @param [progressCb] {function(*): void} a function handling progress updates * @returns network {Object} the discovered network * @returns network.pages {Page[]} the pages in the network * @returns network.redirects {Redirect[]} the redirects in the network */ export const discoverNetwork = async function(mwm, title, errorCb, progressCb) { const pages = []; const redirects = []; const history = []; const queue = [new InterlangLink(mwm.baseLang, title)]; while (queue.length > 0) { progressCb("Checking " + queue[queue.length - 1] + ""); let next = queue.pop(); if (history.some(it => it.equals(next))) continue; // Normalize const nextMw = await mwm.getMwOrWait(next.lang); if (nextMw === undefined) { history.push(next); pages.push(new Page(mwm.getArticlePath(next), next, [], false)); if (history.length === 1) throw new Error(couldNotConnectMessage); else { errorCb("warning", `Could not connect to the wiki for language '${next.lang}'. Maybe the wiki no longer exists?`); continue; } } next = nextMw.normalize(next); if (history.some(it => it.equals(next))) continue; else history.push(next); // Fetch interlang links const result = await nextMw.getLangLinks(next.title); if (result === undefined) { pages.push(new Page(mwm.getArticlePath(next), next, [], false)); continue; } // Follow redirects if (!result.link.equals(next)) { redirects.push(...(result.redirects)); next = result.link; if (history.some(it => it.equals(next))) continue; else history.push(next); } // Create `Page` object pages.push(new Page(mwm.getArticlePath(next), next, result.langLinks, true)); queue.push(...(result.langLinks)); } // Normalize links pages.forEach(page => { page.langLinks = page.langLinks.map(langLink => { const mw = mwm.getMw(langLink.lang); return mw !== undefined ? mw.normalize(langLink) : langLink; }); }); return {pages: pages, redirects: redirects}; }