interlanguage-checker/src/main/js/MediaWiki.js

653 lines
23 KiB
JavaScript

import {couldNotConnectMessage, mergeStates} from "./Shared";
/**
* A data class for combining a language and page title to identify a page.
*
* This is only an _identifier_ of a page, not the page itself. For information on the page such as the links it
* contains, whether it's a redirect, etc., see the `Page` class.
*
* @property lang {string} the language of the wiki this page is of
* @property title {string} the title of the page
*/
export class InterlangLink {
/**
* Constructs a new `InterlangLink`.
*
* @param lang {string} the language of the wiki this page is of
* @param title {string} the title of the page
*/
constructor(lang, title) {
this.lang = lang;
this.title = title;
}
/**
* Returns `true` if and only if the given object equals this `InterlangLink`.
*
* @param other {*} the object to compare to this `InterlangLink`
* @returns {boolean} `true` if and only if the given object equals this `InterlangLink`
*/
equals(other) {
return other instanceof InterlangLink && this.lang === other.lang && this.title === other.title;
}
/**
* Returns `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles.
*
* @param other {*} the object to compare to this `InterlangLink`
* @returns {boolean} `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the
* titles
*/
equalsIgnoringCase(other) {
return other instanceof InterlangLink && this.lang === other.lang
&& this.title.toLowerCase() === other.title.toLowerCase();
}
/**
* Converts this `InterlangLink` to a string.
*
* @returns {string} the string representation of this `InterlangLink`
*/
toString() {
return `${this.lang}:${this.title}`;
}
/**
* Returns a deep copy of this `InterlangLink`.
*
* @returns {InterlangLink} the deep copy
*/
copy() {
return new InterlangLink(this.lang, this.title);
}
}
/**
* Redirects one `InterlangLink` to another.
*
* @property from [InterlangLink] the page that redirects
* @property to [InterlangLink] the page that is redirected to
*/
export class Redirect {
/**
* Constructs a new `Redirect`.
*
* @param from [InterlangLink] the page that redirects
* @param to [InterlangLink] the page that is redirected to
*/
constructor(from, to) {
this.from = from.copy();
this.to = to.copy();
}
/**
* Returns `true` if and only if the given object equals this `Redirect`.
*
* @param other {*} the object to compare to this `Redirect`
* @returns {boolean} `true` if and only if the given object equals this `Redirect`
*/
equals(other) {
return other instanceof Redirect && this.from.equals(other.from) && this.to.equals(other.to);
}
/**
* Returns a deep copy of this `Redirect`.
*
* @returns {Redirect} the deep copy
*/
copy() {
return new Redirect(this.from, this.to);
}
}
/**
* A map of interwiki links.
*
* Not implemented as a map but as a list of objects. Therefore, when there are duplicate keys, the original value is
* always retained.
*
* @property map {Array<{prefix: string, url: string}>} maps interwiki prefixes to URLs
*/
export class InterwikiMap {
/**
* Constructs a new interwiki map.
*
* @param map {Array<{prefix: string, url: string}>} the mapping from interwiki abbreviations to URLs to store in
* this map
*/
constructor(map) {
this.map = map.map(it => ({prefix: it.prefix, url: it.url.replace("http://", "https://")}));
}
/**
* Returns the URL for the given prefix, or `undefined` if the prefix could not be found.
*
* @param prefix {string} the prefix to return the URL of
* @returns {string} the URL for the given prefix, or `undefined` if the prefix could not be found
*/
getUrl(prefix) {
return this.map.find(it => it.prefix === prefix).url;
}
/**
* Returns `true` if and only if this map has a URL for the given prefix.
*
* @param prefix {string} the prefix to check for
* @returns {boolean} `true` if and only if this map has a URL for the given prefix
*/
hasUrl(prefix) {
return this.map.find(it => it.prefix === prefix) !== undefined;
}
/**
* Returns a deep copy of this `InterwikiMap`.
*
* @returns {InterwikiMap} the deep copy
*/
copy() {
return new InterwikiMap(this.map);
}
}
/**
* Describes a page, i.e. what you get if you follow an `InterlangLink`.
*
* @property url {URL} the full URL at which this page is located
* @property link {InterlangLink} the interlanguage link describing the location of the page
* @property linksTo {InterlangLink[]} the interlanguage links contained in this page
* @property exists {boolean} `true` if and only if this page exists
*/
export class Page {
/**
* Constructs a new `Page`.
*
* @param url {URL} the full URL at which this page is located
* @param link {InterlangLink} the interlanguage link describing the location of the page
* @param langLinks {InterlangLink[]} the interlanguage links contained in this page
* @param exists {boolean} `true` if and only if this page exists
*/
constructor(url, link, langLinks, exists) {
this.url = new URL(url.toString());
this.link = link.copy();
this.langLinks = langLinks.map(it => it.copy());
this.exists = exists;
}
/**
* Returns `true` if and only if this page's language links are sorted alphabetically.
*
* @returns {boolean} `true` if and only if this page's language links are sorted alphabetically
*/
langLinksAreOrdered() {
return this.langLinks.reduce((isSorted, langLink, i, self) =>
i === 0 || (isSorted && self[i - 1].toString().localeCompare(langLink.toString()) <= 0),
true
);
}
/**
* Returns `true` if and only if this page has multiple links to the same language.
*
* @return {boolean} `true` if and only if this page has multiple links to the same language
*/
hasDoubleLinks() {
return this.langLinks.some(a => this.langLinks.filter(b => a.lang === b.lang).length > 1);
}
/**
* Returns a deep copy of this `Page`.
*
* @returns {Page} the deep copy
*/
copy() {
return new Page(this.url, this.link, this.langLinks, this.exists);
}
}
/**
* A network of pages linking to each other.
*
* @property pages {Page[]} the pages linking to each other, sorted alphabetically
* @property redirects {Redirect[]} the redirects in the network
*/
export class InterlangNetwork {
/**
* Constructs a new `InterlangNetwork`.
*
* @param pages {Page[]} the pages linking to each other
* @param redirects {Redirect[]} the redirects in the network
*/
constructor(pages, redirects) {
this.pages = pages
.map(it => it.copy())
.sort((a, b) => a.link.toString().localeCompare(b.link.toString()));
this.redirects = redirects.map(it => it.copy());
}
/**
* Determines whether the given source links to the given destination, potentially through a redirect.
*
* @param source {Page} the source page of which to check the links
* @param destination {Page} the destination that could be linked to
* @returns {"linked"|"self-linked"|"unlinked"|"self-unlinked"|"redirected"} the status of the link
*/
getLinkVerdict(source, destination) {
const isSelfLangLink = source.link.lang === destination.link.lang;
if (source.langLinks.some(it => it.equals(destination.link)))
return isSelfLangLink ? "self-linked" : "linked";
if (source.langLinks.some(it => it.equalsIgnoringCase(destination.link)))
return isSelfLangLink ? "self-linked" : "wrongly-cased";
if (source.langLinks.some(link => this.redirects.some(it => it.equals(new Redirect(link, destination.link)))))
return isSelfLangLink ? "self-linked" : "redirected";
return isSelfLangLink ? "self-unlinked" : "unlinked";
}
/**
* Analyzes the given source page and returns a verdict of its own state and of the state of its link to all other
* pages in this network.
*
* @param srcPage {Page} the page to give a verdict of
* @return verdict {Object} the verdict
* @return verdict.self {("perfect"|"not-found"|"wrongly-ordered"|"doubly-linked"|"self-linked"|"unlinked"|
* "redirected")[]} the verdict of the page in relation to the entire network
* @return verdict.pages {Object[]} the verdicts of the page in relation to each other article in the network
* @return verdict.pages[].page {Page} the page that the verdict is in relation to
* @return verdict.pages[].verdict {"linked"|"self-linked"|"unlinked"|"self-unlinked"|"redirected"} the verdict of
* the relation of the given page to this page
*/
getPageVerdict(srcPage) {
const pageStates = this.pages.map(dstPage => ({page: dstPage, verdict: this.getLinkVerdict(srcPage, dstPage)}));
let selfStates = [];
if (!srcPage.exists)
selfStates.push("not-found");
if (!srcPage.langLinksAreOrdered())
selfStates.push("wrongly-ordered");
if (srcPage.hasDoubleLinks())
selfStates.push("doubly-linked");
if (pageStates.some(({verdict}) => verdict === "self-linked"))
selfStates.push("self-linked");
if (pageStates.some(({verdict}) => verdict === "unlinked"))
selfStates.push("unlinked");
if (pageStates.some(({verdict}) => verdict === "redirected"))
selfStates.push("redirected");
if (pageStates.some(({verdict}) => verdict === "wrongly-cased"))
selfStates.push("wrongly-cased");
if (selfStates.length === 0)
selfStates.push("perfect");
return {self: selfStates, pages: pageStates};
}
/**
* Returns a verdict on the network.
*
* @return {"perfect"|"flawed"|"broken"} a verdict on the network
*/
getVerdict() {
const states = ["broken", "flawed", "perfect"];
return this.pages.reduce((state, page) => {
const verdict = this.getPageVerdict(page).self;
if (verdict.some(it => ["not-found", "unlinked"].includes(it)))
return mergeStates(states, state, "broken");
if (verdict.some(it => ["wrongly-ordered", "doubly-linked", "self-linked", "redirected", "wrongly-cased"].includes(it)))
return mergeStates(states, state, "flawed");
return mergeStates(states, state, "perfect");
}, "perfect");
}
/**
* Returns a deep copy of this `InterlangNetwork`.
*
* @returns {InterlangNetwork} the deep copy
*/
copy() {
return new InterlangNetwork(this.pages, this.redirects);
}
}
/**
* Interacts with the API in an asynchronous manner.
*
* @property baseUrl {string} the origin of the wiki's API
* @property apiPath {string} the path relative to the wiki's API; starts with a `/`
* @property general {Object} the general information, retrieved from the API
* @property interwikiMap {InterwikiMap} the interwiki map of this wiki
* @property namespaces {Object.{number, Object}} the namespaces on this wiki
*/
export class MediaWiki {
/**
* Constructs a new MediaWiki object.
*
* @param apiUrl the url to the `api.php` file
*/
constructor(apiUrl) {
const urlObj = new URL(apiUrl);
this.origin = urlObj.origin;
this.apiPath = urlObj.pathname;
}
/**
* Initializes this `MediaWiki` object with the necessary information from the API.
*
* @returns {MediaWiki} this `MediaWiki` object
*/
async init() {
const query = await this.getSiteInfo("general", "interwikimap", "namespaces");
// Add self to map
query.interwikimap.push({prefix: query.general.lang, url: query.general.server + query.general.articlepath});
// Set fields
this.general = query.general;
this.interwikiMap = new InterwikiMap(query.interwikimap);
this.namespaces = query.namespaces;
return this;
}
/**
* Sends a request to the MediaWiki API and runs the given callback on the response.
*
* @param params {Object} the parameters to send to the API
* @return {Promise<Object>} the API's response
*/
request(params) {
const url = this.origin + this.apiPath + "?format=json&origin=*&" + new URLSearchParams(params).toString();
console.debug(`Requesting from ${this.origin}${this.apiPath} with params`, params, "at", url);
return fetch(url)
.then(response => {
if (!response.ok) throw new Error(couldNotConnectMessage);
return response.json();
})
.catch(() => {
throw new Error(couldNotConnectMessage);
});
}
/**
* Requests all language links on the given article.
*
* @param title {string} the title of the article to return links of
* @return result {Object|undefined} the query result, or `undefined` if the article could not be found
* @return result.link {InterlangLink} the normalized, redirect-resolved link to the article
* @return result.langLinks {InterlangLink[]} the language links on the article
* @return result.redirects {Redirect[]} all redirects that were encountered, with double redirects removed
*/
getLangLinks(title) {
return this
.request({action: "parse", page: title, prop: "langlinks", redirects: ""})
.then(response => {
if (response.error !== undefined)
return undefined;
const langLinks = response.parse.langlinks.map(it => new InterlangLink(it.lang, it["*"]));
const redirects = response.parse.redirects
.map(it => new Redirect(this._toLink(it.from), this._toLink(it.to)))
.reduce((redirects, redirect, _, self) => {
// TODO Support triple redirects (#30)
const matches = self.filter(it => it.from.equals(redirect.to));
if (matches.length > 1)
redirects.push(new Redirect(redirect.from, matches[0].to));
else
redirects.push(redirect);
return redirects;
}, []);
return {link: this._toLink(response.parse.title), langLinks: langLinks, redirects: redirects};
});
}
/**
* Returns this wiki's site information.
*
* @param props {...string} the site information properties to retrieve, such as "general" or "interwikimap"
* @returns {Object} the wiki's site information, with each property corresponding to an argument to this method
*/
getSiteInfo(...props) {
return this.request({action: "query", meta: "siteinfo", siprop: props.join("|")})
.then(response => response.query);
}
/**
* Normalizes the given link, adjusting its language to this wiki's language and replacing the link's namespace with
* the canonical namespace.
*
* @param link {InterlangLink} the link to normalize
* @returns {InterlangLink} the normalized link
*/
normalize(link) {
const normalLink = link.copy();
normalLink.lang = this.general.lang;
const titleParts = normalLink.title.split(':');
if (titleParts.length < 2) return normalLink;
titleParts[0] = Object.keys(this.namespaces).reduce((titlePart, namespaceId) => {
const namespace = this.namespaces[namespaceId];
return titlePart === namespace["canonical"] ? namespace["*"] : titlePart
}, titleParts[0]);
normalLink.title = titleParts.join(':');
return normalLink;
}
/**
* Shorthand for converting a title to an `InterlangLink` of this wiki's language.
*
* @param title {string} the title of the article to generate a link for
* @returns {InterlangLink} the link to the article on this wiki
* @private
*/
_toLink(title) {
return new InterlangLink(this.general.lang, title);
}
}
/**
* Manages a `MediaWiki` instance for different languages, caching retrieved information for re-use.
*
* @property mws {Object.<string, MediaWiki>} the cached `MediaWiki` instances
* @property articlePath {string} the path to articles, where `$1` indicates the article name
* @property apiPath {string} the path to `api.php`
* @property baseLang {string} the language of the base `MediaWiki`, where the exploration starts
*/
export class MediaWikiManager {
/**
* Constructs a new `MediaWikiManager`.
*
* The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise.
*/
constructor() {
this.mws = {};
this._iwMap = new InterwikiMap([]);
}
/**
* Initializes this `MediaWikiManager`.
*
* @param baseMw {MediaWiki} the `MediaWiki` that is used as a starting point
* @return {MediaWikiManager} this `MediaWikiManager`
*/
async init(baseMw) {
this.basePath = [...(baseMw.apiPath)]
.map((it, i) => it === baseMw.general.articlepath[i] ? it : "")
.join("")
.slice(0, -1);
this.articlePath = baseMw.general.articlepath.slice(this.basePath.length);
this.apiPath = baseMw.apiPath.slice(this.basePath.length);
this.baseLang = baseMw.general.lang;
this.mws[baseMw.general.lang] = baseMw;
this._updateIwMap();
return this;
}
/**
* Returns the `MediaWiki` for the given language, creating and initializing it if necessary, or `undefined` if it
* could not be created.
*
* @param lang {string} the language of the `MediaWiki` to return
* @returns {MediaWiki} the `MediaWiki` for the given language, or `undefined` if it could not be created
*/
async getMwOrWait(lang) {
if (this.hasMw(lang))
return this.mws[lang];
if (!this._iwMap.hasUrl(lang))
return undefined;
const url = this._iwMap.getUrl(lang);
let newMw;
try {
newMw = await new MediaWiki(url.slice(0, -this.articlePath.length) + this.apiPath).init();
} catch (error) {
return undefined;
}
if (this.hasMw(newMw.general.lang)) {
// Duplicate MW with different but equivalent language code; destroy new MW instance
this.mws[lang] = this.mws[newMw.general.lang];
} else {
this.mws[newMw.general.lang] = newMw;
this.mws[lang] = newMw;
}
this._updateIwMap();
return this.mws[lang];
}
/**
* Returns the `MediaWiki` for the given language or `undefined` if it has not created that object.
*
* @param lang {string} the language of the `MediaWiki` to return
* @returns {MediaWiki} the `MediaWiki` for the given language or `undefined` if it has not created that object
*/
getMw(lang) {
return this.mws[lang];
}
/**
* Returns `true` if and only if this manager has a `MediaWiki` for the given language.
*
* @param lang {string} the language of the `MediaWiki` to check presence of
* @returns {boolean} `true` if and only if this manager has a `MediaWiki` for the given language
*/
hasMw(lang) {
return this.mws[lang] !== undefined;
}
/**
* Returns the URL to the given article.
*
* @param link {InterlangLink} the link to return the URL of
* @returns {URL} the URL to the given article
*/
getArticlePath(link) {
return new URL(this._iwMap.getUrl(link.lang).replace("$1", link.title));
}
/**
* Updates the `_iwMap` property with the entries in `MediaWiki` instances in this manager.
*/
_updateIwMap() {
const maps = Object.keys(this.mws).map(key => this.mws[key].interwikiMap.map);
this._iwMap = new InterwikiMap([].concat(...maps));
}
}
/**
* Discovers the interlanguage network, starting from the given link.
*
* @param mwm {MediaWikiManager} the manager to use for caching and resolving pages
* @param title {string} the title of the page to start traversing at
* @param [errorCb] {function("error"|"warning"|null, *): void} a function handling errors and warnings
* @param [progressCb] {function(*): void} a function handling progress updates
* @returns network {Object} the discovered network
* @returns network.pages {Page[]} the pages in the network
* @returns network.redirects {Redirect[]} the redirects in the network
*/
export const discoverNetwork = async function(mwm, title, errorCb, progressCb) {
const pages = [];
const redirects = [];
const history = [];
const queue = [new InterlangLink(mwm.baseLang, title)];
while (queue.length > 0) {
progressCb("Checking <code>" + queue[queue.length - 1] + "</code>");
let next = queue.pop();
if (history.some(it => it.equals(next)))
continue;
// Normalize
const nextMw = await mwm.getMwOrWait(next.lang);
if (nextMw === undefined) {
history.push(next);
pages.push(new Page(mwm.getArticlePath(next), next, [], false));
if (history.length === 1)
throw new Error(couldNotConnectMessage);
else {
errorCb("warning", `Could not connect to the wiki for language '${next.lang}'. Maybe the wiki no longer exists?`);
continue;
}
}
next = nextMw.normalize(next);
if (history.some(it => it.equals(next)))
continue;
else
history.push(next);
// Fetch interlang links
const result = await nextMw.getLangLinks(next.title);
if (result === undefined) {
pages.push(new Page(mwm.getArticlePath(next), next, [], false));
continue;
}
// Follow redirects
if (!result.link.equals(next)) {
redirects.push(...(result.redirects));
next = result.link;
if (history.some(it => it.equals(next)))
continue;
else
history.push(next);
}
// Create `Page` object
pages.push(new Page(mwm.getArticlePath(next), next, result.langLinks, true));
queue.push(...(result.langLinks));
}
// Normalize links
pages.forEach(page => {
page.langLinks = page.langLinks.map(langLink => {
const mw = mwm.getMw(langLink.lang);
return mw !== undefined ? mw.normalize(langLink) : langLink;
});
});
return {pages: pages, redirects: redirects};
}