interlanguage-checker/src/main/js/MediaWiki.ts

798 lines
26 KiB
TypeScript

import {couldNotConnectMessage, mergeMaps, mergeSets} from "./Shared";
/**
* A data class for combining a language and page title to identify a page.
*
* This is only an _identifier_ of a page, not the page itself. For information on the page such as the links it
* contains, whether it's a redirect, etc., see the `Page` class.
*/
export class InterlangLink {
/**
* The language of the wiki this page is of.
*/
readonly lang: string;
/**
* The title of the page.
*/
readonly title: string;
/**
* Constructs a new interlanguage link.
*
* @param lang the language of the wiki this page is of
* @param title the title of the page
*/
constructor(lang: string, title: string) {
this.lang = lang;
this.title = title;
}
/**
* Returns `true` if and only if the given object equals this `InterlangLink`.
*
* @param other the object to compare to this `InterlangLink`
* @return `true` if and only if the given object equals this `InterlangLink`
*/
equals(other: any): boolean {
return other instanceof InterlangLink && this.lang === other.lang && this.title === other.title;
}
/**
* Returns `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles.
*
* @param other the object to compare to this `InterlangLink`
* @return `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles
*/
equalsIgnoringCase(other: any) {
return other instanceof InterlangLink && this.lang === other.lang
&& this.title.toLowerCase() === other.title.toLowerCase();
}
/**
* Converts this `InterlangLink` to a string.
*
* @return the string representation of this `InterlangLink`
*/
toString(): string {
return `${this.lang}:${this.title}`;
}
/**
* Returns a deep copy of this `InterlangLink`.
*
* @return the deep copy
*/
copy(): InterlangLink {
return new InterlangLink(this.lang, this.title);
}
}
/**
* Redirects one `InterlangLink` to another.
*/
export class Redirect {
/**
* The page that redirects.
*/
readonly from: InterlangLink;
/**
* The page that is redirected to.
*/
readonly to: InterlangLink;
/**
* Constructs a new `Redirect`.
*
* @param from the page that redirects
* @param to the page that is redirected to
*/
constructor(from: InterlangLink, to: InterlangLink) {
this.from = from.copy();
this.to = to.copy();
}
/**
* Returns `true` if and only if the given object equals this `Redirect`.
*
* @param other the object to compare to this `Redirect`
* @return `true` if and only if the given object equals this `Redirect`
*/
equals(other: any): boolean {
return other instanceof Redirect && this.from.equals(other.from) && this.to.equals(other.to);
}
/**
* Returns a deep copy of this `Redirect`.
*
* This is a deep copy because the constructor performs copies of the received variables.
*
* @return the deep copy
*/
copy(): Redirect {
return new Redirect(this.from, this.to);
}
}
/**
* A map of interwiki links.
*/
export type InterwikiMap = Map<string, string>;
/**
* Describes a page, i.e. what you get if you follow an `InterlangLink`.
*/
export class Page {
/**
* The full URL at which this page is located.
*/
readonly url: URL;
/**
* The interlanguage link describing the location of the page.
*/
readonly link: InterlangLink;
/**
* The interlanguage links contained in this page.
*/
readonly langLinks: InterlangLink[];
/**
* `true` if and only if this page exists.
*/
readonly exists: boolean;
/**
* Constructs a new page.
*
* @param url the full URL at which this page is located
* @param link the interlanguage link describing the location of the page
* @param langLinks the interlanguage links contained in this page
* @param exists `true` if and only if this page exists
*/
constructor(url: URL, link: InterlangLink, langLinks: InterlangLink[], exists: boolean) {
this.url = new URL(url.toString());
this.link = link.copy();
this.langLinks = langLinks.map(it => it.copy());
this.exists = exists;
}
/**
* Returns `true` if and only if this page's language links are sorted alphabetically.
*
* @return `true` if and only if this page's language links are sorted alphabetically
*/
langLinksAreOrdered(): boolean {
return this.langLinks.reduce(
(isSorted: boolean, langLink: InterlangLink, i: number, self: InterlangLink[]) =>
i === 0 || (isSorted && self[i - 1].toString().localeCompare(langLink.toString()) <= 0),
true
);
}
/**
* Returns `true` if and only if this page has multiple links to the same language.
*
* @return `true` if and only if this page has multiple links to the same language
*/
hasDoubleLinks(): boolean {
return this.langLinks.some(a => this.langLinks.filter(b => a.lang === b.lang).length > 1);
}
/**
* Returns a deep copy of this `Page`.
*
* This is a deep copy because the constructor performs copies of the received variables.
*
* @return the deep copy
*/
copy() {
return new Page(this.url, this.link, this.langLinks, this.exists);
}
}
/**
* A network of pages linking to each other.y
*/
export class InterlangNetwork {
/**
* The alphabetically-sorted pages that have been discovered in the network.
*/
readonly pages: Page[];
/**
* The redirects that have been discovered in the network.
*/
readonly redirects: Redirect[];
/**
* Constructs a new `InterlangNetwork`.
*
* @param pages the pages linking to each other
* @param redirects the redirects in the network
*/
constructor(pages: Page[], redirects: Redirect[]) {
this.pages = pages
.map(it => it.copy())
.sort((a, b) => a.link.toString().localeCompare(b.link.toString()));
this.redirects = redirects.map(it => it.copy());
}
/**
* Determines whether the given source links to the given destination, potentially through a redirect.
*
* @param source the source page of which to check the links
* @param destination the destination that could be linked to
* @return the checker's verdict of the link
*/
getLinkVerdict(source: Page, destination: Page): LinkVerdict {
const isSelfLangLink = source.link.lang === destination.link.lang;
if (source.langLinks.some(it => it.equals(destination.link)))
return isSelfLangLink ? "self-linked" : "linked";
if (source.langLinks.some(it => it.equalsIgnoringCase(destination.link)))
return isSelfLangLink ? "self-linked" : "wrongly-cased";
if (source.langLinks.some(link => this.redirects.some(it => it.equals(new Redirect(link, destination.link)))))
return isSelfLangLink ? "self-linked" : "redirected";
return isSelfLangLink ? "self-unlinked" : "unlinked";
}
/**
* Analyzes the given source page and returns a verdict of its own state and of the state of its link to all other
* pages in this network.
*
* @param srcPage the page to give a verdict of
* @return the checker's verdicts of the page and its outgoing links
*/
getPageVerdict(srcPage: Page): { self: PageVerdict[], links: Map<InterlangLink, LinkVerdict> } {
const linkVerdicts =
new Map(this.pages.map(dstPage => ([dstPage.link, this.getLinkVerdict(srcPage, dstPage)])));
const foundVerdicts =
new Set([...linkVerdicts.values()]);
let selfVerdicts: PageVerdict[] = [];
if (!srcPage.exists) selfVerdicts.push("not-found");
if (!srcPage.langLinksAreOrdered()) selfVerdicts.push("wrongly-ordered");
if (srcPage.hasDoubleLinks()) selfVerdicts.push("doubly-linked");
if (foundVerdicts.has("self-linked")) selfVerdicts.push("self-linked");
if (foundVerdicts.has("unlinked")) selfVerdicts.push("unlinked");
if (foundVerdicts.has("redirected")) selfVerdicts.push("redirected");
if (foundVerdicts.has("wrongly-cased")) selfVerdicts.push("wrongly-cased");
if (selfVerdicts.length === 0) selfVerdicts.push("perfect");
return {self: selfVerdicts, links: linkVerdicts};
}
/**
* Returns a verdict on the network.
*
* @return a verdict on the network
*/
getNetworkVerdict(): NetworkVerdict {
const verdicts = [...mergeSets(this.pages.map(page => new Set(this.getPageVerdict(page).self)))];
if (verdicts.some(verdict => NetworkVerdict.brokenVerdicts.includes(verdict))) return "broken";
if (verdicts.some(verdict => NetworkVerdict.flawedVerdicts.includes(verdict))) return "flawed";
return "perfect";
}
/**
* Returns a deep copy of this `InterlangNetwork`.
*
* This is a deep copy because the constructor performs copies of the received variables.
*
* @return the deep copy
*/
copy(): InterlangNetwork {
return new InterlangNetwork(this.pages, this.redirects);
}
}
/**
* Interacts with the API in an asynchronous manner.
*/
export class MediaWiki {
/**
* The origin of the wiki's API URL.
*/
readonly origin: string;
/**
* The path relative to the wiki's API; starts with a `/`.
*/
readonly apiPath: string;
/**
* The general information, retrieved from the API.
*/
general!: { articlepath: string, lang: string };
/**
* The interwiki map of this wiki.
*/
interwikiMap!: InterwikiMap;
/**
* The namespaces on this wiki.
*/
namespaces!: Map<number, { canonical: string, "*": string }>;
/**
* Constructs a new MediaWiki object.
*
* The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise.
*
* @param apiUrl the url to the `api.php` file
*/
constructor(apiUrl: string) {
const urlObj = new URL(apiUrl.replace("http://", "https://"));
this.origin = urlObj.origin;
this.apiPath = urlObj.pathname;
}
/**
* Initializes this `MediaWiki` object with the necessary information from the API.
*
* @return this `MediaWiki` object
*/
async init(): Promise<MediaWiki> {
const query = await this.getSiteInfo("general", "interwikimap", "namespaces");
// Add self to map
query.interwikimap.push({prefix: query.general.lang, url: query.general.server + query.general.articlepath});
// Set fields
this.general = query.general;
this.interwikiMap =
new Map(query.interwikimap.map((it: { prefix: string, url: string }) => [it.prefix, it.url]));
this.namespaces =
new Map(
Object.keys(query.namespaces).map(id => {
const props = query.namespaces[id];
return [+id, {canonical: props.canonical, "*": props["*"]}];
})
);
return this;
}
/**
* Sends a request to the MediaWiki API and runs the given callback on the response.
*
* @param params the parameters to send to the API
* @return the API's response
*/
request(params: { [key: string]: string }): Promise<any> {
const url = this.origin + this.apiPath + "?format=json&origin=*&" + new URLSearchParams(params).toString();
console.debug(`Requesting from ${this.origin}${this.apiPath} with params`, params, "at", url);
return fetch(url)
.then(response => {
if (!response.ok) throw new Error(couldNotConnectMessage);
return response.json();
})
.catch(() => {
throw new Error(couldNotConnectMessage);
});
}
/**
* Requests all language links on the given article.
*
* @param title the title of the article to return links of
* @return result the query result, or `undefined` if the article could not be found
*/
getLangLinks(title: string): Promise<{ link: InterlangLink, langLinks: InterlangLink[], redirects: Redirect[] } | undefined> {
return this
.request({action: "parse", page: title, prop: "langlinks", redirects: ""})
.then(response => {
if (response.error !== undefined)
return undefined;
const langLinks = response.parse.langlinks
.map((it: { lang: string, "*": string }) => new InterlangLink(it.lang, it["*"]));
const redirects = response.parse.redirects
.map((it: { from: string; to: string; }) => new Redirect(this.toLink(it.from), this.toLink(it.to)))
.reduce((redirects: Redirect[], redirect: Redirect, _: number, self: Redirect[]) => {
// TODO Support triple redirects (#30)
const matches = self.filter(it => it.from.equals(redirect.to));
if (matches.length > 1)
redirects.push(new Redirect(redirect.from, matches[0].to));
else
redirects.push(redirect);
return redirects;
}, []);
return {link: this.toLink(response.parse.title), langLinks: langLinks, redirects: redirects};
});
}
/**
* Returns this wiki's site information.
*
* @param props the site information properties to retrieve, such as "general" or "interwikimap"
* @return the wiki's site information, with each property corresponding to an argument to this method
*/
getSiteInfo(...props: string[]): any {
return this.request({action: "query", meta: "siteinfo", siprop: props.join("|")})
.then(response => response.query);
}
/**
* Normalizes the given link, adjusting its language to this wiki's language and replacing the link's namespace with
* the canonical namespace.
*
* @param link the link to normalize
* @return the normalized link
*/
normalize(link: InterlangLink): InterlangLink {
const normalLang = this.general.lang;
const titleParts = link.title.split(":");
if (titleParts.length < 2) return new InterlangLink(normalLang, link.title);
titleParts[0] = [...this.namespaces.values()].reduce(
(titlePart, namespace) => titlePart === namespace["canonical"] ? namespace["*"] : titlePart,
titleParts[0]
);
const normalTitle = titleParts.join(":");
return new InterlangLink(normalLang, normalTitle);
}
/**
* Shorthand for converting a title to an `InterlangLink` of this wiki's language.
*
* @param title the title of the article to generate a link for
* @return the link to the article on this wiki
* @private
*/
private toLink(title: string): InterlangLink {
return new InterlangLink(this.general.lang, title);
}
}
/**
* Manages a `MediaWiki` instance for different languages, caching retrieved information for re-use.
*/
export class MediaWikiManager {
/**
* The combined interwiki map of all `MediaWiki` instances under management of this manager.
*
* @private
*/
private iwMap: InterwikiMap;
/**
* The cached `MediaWiki` instances
*/
mws: Map<string, MediaWiki>;
/**
* The language of the base `MediaWiki`, where the exploration starts.
*/
baseLang!: string;
/**
* The path to articles, where `$1` indicates the article name.
*/
articlePath!: string;
/**
* The path to `api.php`.
*/
apiPath!: string;
/**
* Constructs a new MediaWiki manager.
*
* The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise.
*/
constructor() {
this.mws = new Map();
this.iwMap = new Map();
}
/**
* Initializes this `MediaWikiManager`.
*
* @param baseMw the `MediaWiki` that is used as a starting point
* @return this `MediaWikiManager`
*/
async init(baseMw: MediaWiki): Promise<MediaWikiManager> {
const basePath = [...(baseMw.apiPath)]
.map((it, i) => it === baseMw.general.articlepath[i] ? it : "")
.join("")
.slice(0, -1);
this.articlePath = baseMw.general.articlepath.slice(basePath.length);
this.apiPath = baseMw.apiPath.slice(basePath.length);
this.baseLang = baseMw.general.lang;
this.mws.set(baseMw.general.lang, baseMw);
this.updateIwMap();
return this;
}
/**
* Returns the `MediaWiki` for the given language, creating and initializing it if necessary, or `undefined` if it
* could not be created.
*
* @param lang the language of the `MediaWiki` to return
* @return the `MediaWiki` for the given language, or `undefined` if it could not be created
*/
async getMwOrWait(lang: string): Promise<MediaWiki | undefined> {
if (this.hasMw(lang))
return this.mws.get(lang);
if (!this.iwMap.has(lang))
return undefined;
const url = this.iwMap.get(lang);
if (url === undefined) return undefined;
let newMw;
try {
newMw = await new MediaWiki(url.slice(0, -this.articlePath.length) + this.apiPath).init();
} catch (error) {
return undefined;
}
if (this.hasMw(newMw.general.lang)) {
// Duplicate MW with different but equivalent language code; destroy new MW instance
this.mws.set(lang, this.mws.get(newMw.general.lang)!);
} else {
this.mws.set(newMw.general.lang, newMw);
this.mws.set(lang, newMw);
}
this.updateIwMap();
return this.mws.get(lang);
}
/**
* Returns the `MediaWiki` for the given language or `undefined` if it has not created that object.
*
* @param lang the language of the `MediaWiki` to return
* @return the `MediaWiki` for the given language or `undefined` if it has not created that object
*/
getMw(lang: string): MediaWiki | undefined {
return this.mws.get(lang);
}
/**
* Returns `true` if and only if this manager has a `MediaWiki` for the given language.
*
* @param lang the language of the `MediaWiki` to check presence of
* @return `true` if and only if this manager has a `MediaWiki` for the given language
*/
hasMw(lang: string): boolean {
return this.mws.has(lang);
}
/**
* Returns the URL to the given article.
*
* @param link the link to return the URL of
* @return the URL to the given article
*/
getArticlePath(link: InterlangLink): URL {
const articlePath = this.iwMap.get(link.lang);
if (articlePath === undefined) throw Error(`Could not find article path for '${link}'.`);
return new URL(articlePath.replace("$1", link.title));
}
/**
* Updates the `_iwMap` property with the entries in `MediaWiki` instances in this manager.
*
* @private
*/
private updateIwMap(): void {
this.iwMap = mergeMaps(
[...this.mws.values()].map(mw => mw.interwikiMap),
(k, v1, v2) => (v2.startsWith("https://")) ? v2 : v1
);
}
}
/**
* Discovers the interlanguage network, starting from the given link.
*
* @param mwm the manager to use for caching and resolving pages
* @param title the title of the page to start traversing at
* @param errorCb a function handling errors and warnings
* @param progressCb a function handling progress updates
* @return the discovered network, including pages and redirects
*/
export const discoverNetwork = async function(
mwm: MediaWikiManager,
title: string,
errorCb: (type: "error" | "warning" | null, message: string) => void,
progressCb: (message: string) => void
): Promise<{ pages: Page[], redirects: Redirect[] }> {
const pages = [];
const redirects = [];
const history: InterlangLink[] = [];
const queue: InterlangLink[] = [new InterlangLink(mwm.baseLang, title)];
while (queue.length > 0) {
progressCb("Checking <code>" + queue[queue.length - 1] + "</code>.");
let next = queue.pop()!;
if (history.some(it => it.equals(next)))
continue;
// Normalize
const nextMw = await mwm.getMwOrWait(next.lang);
if (nextMw === undefined) {
history.push(next);
pages.push(new Page(mwm.getArticlePath(next), next, [], false));
if (history.length === 1)
throw new Error(couldNotConnectMessage);
else {
errorCb(
"warning",
`Could not connect to the wiki for language '${next.lang}'. Maybe the wiki no longer exists?`
);
continue;
}
}
next = nextMw.normalize(next);
if (history.some(it => it.equals(next)))
continue;
else
history.push(next);
// Fetch interlang links
const result = await nextMw.getLangLinks(next.title);
if (result === undefined) {
pages.push(new Page(mwm.getArticlePath(next), next, [], false));
continue;
}
// Follow redirects
if (!result.link.equals(next)) {
redirects.push(...(result.redirects));
next = result.link;
if (history.some(it => it.equals(next)))
continue;
else
history.push(next);
}
// Create `Page` object
pages.push(new Page(mwm.getArticlePath(next), next, result.langLinks, true));
queue.push(...(result.langLinks));
}
// Normalize links
pages.forEach(page => {
page.langLinks.map((langLink, idx, self) => {
const mw = mwm.getMw(langLink.lang);
// Update link in place using `self[idx] = `
self[idx] = mw !== undefined ? mw.normalize(langLink) : langLink;
});
});
return {pages: pages, redirects: redirects};
};
/**
* The verdict that the checker has of a link between two pages.
*
* The possible values are listed in decreasing order of importance, so that if a single link has multiple verdicts but
* only one can be displayed, the one with the highest importance will be displayed.
*/
type LinkVerdict =
| "linked"
| "self-linked"
| "unlinked"
| "self-unlinked"
| "redirected"
| "wrongly-cased";
export namespace LinkVerdict {
/**
* Returns UI properties for each link verdict.
*/
export const props = {
"linked": {icon: "check", message: "Linked 🙂", style: ["success"]},
"self-linked": {icon: "rotate-left", message: "Links to its own wiki 😕", style: ["warning"]},
"unlinked": {icon: "times", message: "Link is missing 😕", style: ["error"]},
"self-unlinked": {icon: null, message: "", style: []},
"redirected": {icon: "mail-forward", message: "Links to a redirect 😕", style: ["warning"]},
"wrongly-cased": {icon: "text-height", message: "Links with incorrect capitalisation 😕", style: ["warning"]},
};
}
/**
* The verdict that the checker has of a page.
*
* The possible values are listed in decreasing order of importance, so that if a single page has multiple verdicts but
* only one can be displayed, the one with the highest importance will be displayed.
*/
type PageVerdict =
| "perfect"
| "not-found"
| "wrongly-ordered"
| "doubly-linked"
| "self-linked"
| "unlinked"
| "redirected"
| "wrongly-cased";
export namespace PageVerdict {
/**
* Returns UI properties for each page verdict.
*/
export const props = {
"perfect": {icon: "check", message: "Perfect 🙂", style: ["success"]},
"not-found": {icon: "search", message: "Article does not exist 😕", style: ["error"]},
"wrongly-ordered": {icon: "sort-alpha-asc", message: "Links are in the wrong order 😕", style: ["warning"]},
"doubly-linked": {icon: "clone", message: "Links to the same wiki multiple times 😕", style: ["warning"]},
"self-linked": {icon: "rotate-left", message: "Links to its own wiki 😕", style: ["warning"]},
"unlinked": {icon: "chain-broken", message: "Misses one or more links 😕", style: ["error"]},
"redirected": {icon: "mail-forward", message: "Links to a redirect 😕", style: ["warning"]},
"wrongly-cased": {icon: "text-height", message: "Links with incorrect capitalisation 😕", style: ["warning"]},
};
}
/**
* The verdict that the checker has of a network.
*/
type NetworkVerdict =
| "perfect"
| "flawed"
| "broken";
export namespace NetworkVerdict {
/**
* Returns UI properties for each network verdict.
*/
export const props = {
"perfect": {
message: "A perfect network! 🙂",
type: "success"
},
"flawed": {
message:
"The network is complete but flawed 😕<br />" +
"Hover over an icon in the left column for more information.",
type: "warning"
},
"broken": {
message:
"The network is broken 😞<br />" +
"Hover over an icon in the left column for more information.",
type: "warning"
},
};
/**
* Page verdicts that cause a network to become broken.
*/
export const brokenVerdicts: PageVerdict[]
= ["not-found", "unlinked"];
/**
* Page verdicts that cause a network to become flawed.
*/
export const flawedVerdicts: PageVerdict[]
= ["wrongly-ordered", "doubly-linked", "self-linked", "redirected", "wrongly-cased"];
}