797 lines
25 KiB
TypeScript
797 lines
25 KiB
TypeScript
import {couldNotConnectMessage, mergeStates} from "./Shared";
|
|
|
|
|
|
/**
|
|
* A data class for combining a language and page title to identify a page.
|
|
*
|
|
* This is only an _identifier_ of a page, not the page itself. For information on the page such as the links it
|
|
* contains, whether it's a redirect, etc., see the `Page` class.
|
|
*/
|
|
export class InterlangLink {
|
|
/**
|
|
* The language of the wiki this page is of.
|
|
*/
|
|
readonly lang: string;
|
|
/**
|
|
* The title of the page.
|
|
*/
|
|
readonly title: string;
|
|
|
|
|
|
/**
|
|
* Constructs a new interlanguage link.
|
|
*
|
|
* @param lang the language of the wiki this page is of
|
|
* @param title the title of the page
|
|
*/
|
|
constructor(lang: string, title: string) {
|
|
this.lang = lang;
|
|
this.title = title;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns `true` if and only if the given object equals this `InterlangLink`.
|
|
*
|
|
* @param other the object to compare to this `InterlangLink`
|
|
* @return `true` if and only if the given object equals this `InterlangLink`
|
|
*/
|
|
equals(other: any): boolean {
|
|
return other instanceof InterlangLink && this.lang === other.lang && this.title === other.title;
|
|
}
|
|
|
|
/**
|
|
* Returns `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles.
|
|
*
|
|
* @param other the object to compare to this `InterlangLink`
|
|
* @return `true` if and only if the given object equals this `InterlangLink`, ignoring the case of the titles
|
|
*/
|
|
equalsIgnoringCase(other: any) {
|
|
return other instanceof InterlangLink && this.lang === other.lang
|
|
&& this.title.toLowerCase() === other.title.toLowerCase();
|
|
}
|
|
|
|
/**
|
|
* Converts this `InterlangLink` to a string.
|
|
*
|
|
* @return the string representation of this `InterlangLink`
|
|
*/
|
|
toString(): string {
|
|
return `${this.lang}:${this.title}`;
|
|
}
|
|
|
|
/**
|
|
* Returns a deep copy of this `InterlangLink`.
|
|
*
|
|
* @return the deep copy
|
|
*/
|
|
copy(): InterlangLink {
|
|
return new InterlangLink(this.lang, this.title);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Redirects one `InterlangLink` to another.
|
|
*/
|
|
export class Redirect {
|
|
/**
|
|
* The page that redirects.
|
|
*/
|
|
readonly from: InterlangLink;
|
|
/**
|
|
* The page that is redirected to.
|
|
*/
|
|
readonly to: InterlangLink;
|
|
|
|
|
|
/**
|
|
* Constructs a new `Redirect`.
|
|
*
|
|
* @param from the page that redirects
|
|
* @param to the page that is redirected to
|
|
*/
|
|
constructor(from: InterlangLink, to: InterlangLink) {
|
|
this.from = from.copy();
|
|
this.to = to.copy();
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns `true` if and only if the given object equals this `Redirect`.
|
|
*
|
|
* @param other the object to compare to this `Redirect`
|
|
* @return `true` if and only if the given object equals this `Redirect`
|
|
*/
|
|
equals(other: any): boolean {
|
|
return other instanceof Redirect && this.from.equals(other.from) && this.to.equals(other.to);
|
|
}
|
|
|
|
/**
|
|
* Returns a deep copy of this `Redirect`.
|
|
*
|
|
* This is a deep copy because the constructor performs copies of the received variables.
|
|
*
|
|
* @return the deep copy
|
|
*/
|
|
copy(): Redirect {
|
|
return new Redirect(this.from, this.to);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A map of interwiki links.
|
|
*
|
|
* Not implemented as a map but as a list of objects. Therefore, when there are duplicate keys, the original value is
|
|
* always retained.
|
|
*/
|
|
// TODO: Replace entire class with a `Map`
|
|
export class InterwikiMap {
|
|
/**
|
|
* The mapping from interwiki abbreviations/prefixes to URLs.
|
|
*/
|
|
readonly map: Map<string, string>;
|
|
|
|
|
|
/**
|
|
* Constructs a new interwiki map.
|
|
*
|
|
* @param map the mapping from interwiki abbreviations/prefixes to URLs
|
|
*/
|
|
constructor(map: { prefix: string, url: string }[]) {
|
|
this.map = new Map();
|
|
map.forEach(({prefix, url}) => this.map.set(prefix, url.replace("http://", "https://")));
|
|
}
|
|
|
|
/**
|
|
* Constructs a new interwiki map from the given map.
|
|
*
|
|
* @param map the map to construct an interwiki map from
|
|
*/
|
|
static fromMap(map: Map<string, string>): InterwikiMap {
|
|
return new InterwikiMap([...map.entries()].map(it => ({prefix: it[0], url: it[1]})));
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns the URL for the given prefix, or `undefined` if the prefix could not be found.
|
|
*
|
|
* @param prefix the prefix to return the URL of
|
|
* @return the URL for the given prefix, or `undefined` if the prefix could not be found
|
|
*/
|
|
getUrl(prefix: string): string | undefined {
|
|
return this.map.get(prefix);
|
|
}
|
|
|
|
/**
|
|
* Returns `true` if and only if this map has a URL for the given prefix.
|
|
*
|
|
* @param prefix the prefix to check for
|
|
* @return `true` if and only if this map has a URL for the given prefix
|
|
*/
|
|
hasUrl(prefix: string): boolean {
|
|
return this.map.has(prefix);
|
|
}
|
|
|
|
/**
|
|
* Returns a deep copy of this `InterwikiMap`.
|
|
*
|
|
* This is a deep copy because the constructor performs copies of the received variables.
|
|
*
|
|
* @return the deep copy
|
|
*/
|
|
copy(): InterwikiMap {
|
|
return InterwikiMap.fromMap(this.map);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Describes a page, i.e. what you get if you follow an `InterlangLink`.
|
|
*/
|
|
export class Page {
|
|
/**
|
|
* The full URL at which this page is located.
|
|
*/
|
|
readonly url: URL;
|
|
/**
|
|
* The interlanguage link describing the location of the page.
|
|
*/
|
|
readonly link: InterlangLink;
|
|
/**
|
|
* The interlanguage links contained in this page.
|
|
*/
|
|
readonly langLinks: InterlangLink[];
|
|
/**
|
|
* `true` if and only if this page exists.
|
|
*/
|
|
readonly exists: boolean;
|
|
|
|
|
|
/**
|
|
* Constructs a new page.
|
|
*
|
|
* @param url the full URL at which this page is located
|
|
* @param link the interlanguage link describing the location of the page
|
|
* @param langLinks the interlanguage links contained in this page
|
|
* @param exists `true` if and only if this page exists
|
|
*/
|
|
constructor(url: URL, link: InterlangLink, langLinks: InterlangLink[], exists: boolean) {
|
|
this.url = new URL(url.toString());
|
|
this.link = link.copy();
|
|
this.langLinks = langLinks.map(it => it.copy());
|
|
this.exists = exists;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns `true` if and only if this page's language links are sorted alphabetically.
|
|
*
|
|
* @return `true` if and only if this page's language links are sorted alphabetically
|
|
*/
|
|
langLinksAreOrdered(): boolean {
|
|
return this.langLinks.reduce(
|
|
(isSorted: boolean, langLink: InterlangLink, i: number, self: InterlangLink[]) =>
|
|
i === 0 || (isSorted && self[i - 1].toString().localeCompare(langLink.toString()) <= 0),
|
|
true
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Returns `true` if and only if this page has multiple links to the same language.
|
|
*
|
|
* @return `true` if and only if this page has multiple links to the same language
|
|
*/
|
|
hasDoubleLinks(): boolean {
|
|
return this.langLinks.some(a => this.langLinks.filter(b => a.lang === b.lang).length > 1);
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns a deep copy of this `Page`.
|
|
*
|
|
* This is a deep copy because the constructor performs copies of the received variables.
|
|
*
|
|
* @return the deep copy
|
|
*/
|
|
copy() {
|
|
return new Page(this.url, this.link, this.langLinks, this.exists);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A network of pages linking to each other.y
|
|
*/
|
|
export class InterlangNetwork {
|
|
/**
|
|
* The alphabetically-sorted pages that have been discovered in the network.
|
|
*/
|
|
readonly pages: Page[];
|
|
/**
|
|
* The redirects that have been discovered in the network.
|
|
*/
|
|
readonly redirects: Redirect[];
|
|
|
|
|
|
/**
|
|
* Constructs a new `InterlangNetwork`.
|
|
*
|
|
* @param pages the pages linking to each other
|
|
* @param redirects the redirects in the network
|
|
*/
|
|
constructor(pages: Page[], redirects: Redirect[]) {
|
|
this.pages = pages
|
|
.map(it => it.copy())
|
|
.sort((a, b) => a.link.toString().localeCompare(b.link.toString()));
|
|
this.redirects = redirects.map(it => it.copy());
|
|
}
|
|
|
|
|
|
/**
|
|
* Determines whether the given source links to the given destination, potentially through a redirect.
|
|
*
|
|
* @param source the source page of which to check the links
|
|
* @param destination the destination that could be linked to
|
|
* @return the checker's verdict of the link
|
|
*/
|
|
getLinkVerdict(source: Page, destination: Page): LinkVerdict {
|
|
const isSelfLangLink = source.link.lang === destination.link.lang;
|
|
|
|
if (source.langLinks.some(it => it.equals(destination.link)))
|
|
return isSelfLangLink ? "self-linked" : "linked";
|
|
|
|
if (source.langLinks.some(it => it.equalsIgnoringCase(destination.link)))
|
|
return isSelfLangLink ? "self-linked" : "wrongly-cased";
|
|
|
|
if (source.langLinks.some(link => this.redirects.some(it => it.equals(new Redirect(link, destination.link)))))
|
|
return isSelfLangLink ? "self-linked" : "redirected";
|
|
|
|
return isSelfLangLink ? "self-unlinked" : "unlinked";
|
|
}
|
|
|
|
/**
|
|
* Analyzes the given source page and returns a verdict of its own state and of the state of its link to all other
|
|
* pages in this network.
|
|
*
|
|
* @param srcPage the page to give a verdict of
|
|
* @return the checker's verdicts of the page and its outgoing links
|
|
*/
|
|
getPageVerdict(srcPage: Page): { self: PageVerdict[], pages: { page: Page, verdict: LinkVerdict }[] } {
|
|
const pageStates = this.pages.map(dstPage => ({page: dstPage, verdict: this.getLinkVerdict(srcPage, dstPage)}));
|
|
|
|
let selfStates: PageVerdict[] = [];
|
|
if (!srcPage.exists)
|
|
selfStates.push("not-found");
|
|
if (!srcPage.langLinksAreOrdered())
|
|
selfStates.push("wrongly-ordered");
|
|
if (srcPage.hasDoubleLinks())
|
|
selfStates.push("doubly-linked");
|
|
if (pageStates.some(({verdict}) => verdict === "self-linked"))
|
|
selfStates.push("self-linked");
|
|
if (pageStates.some(({verdict}) => verdict === "unlinked"))
|
|
selfStates.push("unlinked");
|
|
if (pageStates.some(({verdict}) => verdict === "redirected"))
|
|
selfStates.push("redirected");
|
|
if (pageStates.some(({verdict}) => verdict === "wrongly-cased"))
|
|
selfStates.push("wrongly-cased");
|
|
|
|
if (selfStates.length === 0)
|
|
selfStates.push("perfect");
|
|
|
|
return {self: selfStates, pages: pageStates};
|
|
}
|
|
|
|
/**
|
|
* Returns a verdict on the network.
|
|
*
|
|
* @return a verdict on the network
|
|
*/
|
|
getNetworkVerdict(): NetworkVerdict {
|
|
const states: NetworkVerdict[] = ["broken", "flawed", "perfect"];
|
|
return this.pages.reduce(
|
|
(state: NetworkVerdict, page: Page) => {
|
|
const verdict = this.getPageVerdict(page).self;
|
|
if (verdict.some(it => ["not-found", "unlinked"].includes(it)))
|
|
return mergeStates<NetworkVerdict>(states, state, "broken");
|
|
if (verdict.some(it => ["wrongly-ordered", "doubly-linked", "self-linked", "redirected", "wrongly-cased"].includes(it)))
|
|
return mergeStates<NetworkVerdict>(states, state, "flawed");
|
|
return mergeStates<NetworkVerdict>(states, state, "perfect");
|
|
},
|
|
"perfect"
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Returns a deep copy of this `InterlangNetwork`.
|
|
*
|
|
* This is a deep copy because the constructor performs copies of the received variables.
|
|
*
|
|
* @return the deep copy
|
|
*/
|
|
copy(): InterlangNetwork {
|
|
return new InterlangNetwork(this.pages, this.redirects);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Interacts with the API in an asynchronous manner.
|
|
*/
|
|
export class MediaWiki {
|
|
/**
|
|
* The origin of the wiki's API URL.
|
|
*/
|
|
readonly origin: string;
|
|
/**
|
|
* The path relative to the wiki's API; starts with a `/`.
|
|
*/
|
|
readonly apiPath: string;
|
|
|
|
/**
|
|
* The general information, retrieved from the API.
|
|
*/
|
|
general!: { articlepath: string, lang: string };
|
|
/**
|
|
* The interwiki map of this wiki.
|
|
*/
|
|
interwikiMap!: InterwikiMap;
|
|
/**
|
|
* The namespaces on this wiki.
|
|
*/
|
|
namespaces!: Map<number, { id: string, canonical: string, "*": string }>;
|
|
|
|
|
|
/**
|
|
* Constructs a new MediaWiki object.
|
|
*
|
|
* The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise.
|
|
*
|
|
* @param apiUrl the url to the `api.php` file
|
|
*/
|
|
constructor(apiUrl: string) {
|
|
const urlObj = new URL(apiUrl);
|
|
this.origin = urlObj.origin;
|
|
this.apiPath = urlObj.pathname;
|
|
}
|
|
|
|
/**
|
|
* Initializes this `MediaWiki` object with the necessary information from the API.
|
|
*
|
|
* @return this `MediaWiki` object
|
|
*/
|
|
async init(): Promise<MediaWiki> {
|
|
const query = await this.getSiteInfo("general", "interwikimap", "namespaces");
|
|
|
|
// Add self to map
|
|
query.interwikimap.push({prefix: query.general.lang, url: query.general.server + query.general.articlepath});
|
|
|
|
// Set fields
|
|
this.general = query.general;
|
|
this.interwikiMap = new InterwikiMap(query.interwikimap);
|
|
this.namespaces = query.namespaces;
|
|
|
|
return this;
|
|
}
|
|
|
|
|
|
/**
|
|
* Sends a request to the MediaWiki API and runs the given callback on the response.
|
|
*
|
|
* @param params the parameters to send to the API
|
|
* @return the API's response
|
|
*/
|
|
request(params: { [key: string]: string }): Promise<any> {
|
|
const url = this.origin + this.apiPath + "?format=json&origin=*&" + new URLSearchParams(params).toString();
|
|
console.debug(`Requesting from ${this.origin}${this.apiPath} with params`, params, "at", url);
|
|
return fetch(url)
|
|
.then(response => {
|
|
if (!response.ok) throw new Error(couldNotConnectMessage);
|
|
return response.json();
|
|
})
|
|
.catch(() => {
|
|
throw new Error(couldNotConnectMessage);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Requests all language links on the given article.
|
|
*
|
|
* @param title the title of the article to return links of
|
|
* @return result the query result, or `undefined` if the article could not be found
|
|
*/
|
|
getLangLinks(title: string): Promise<{ link: InterlangLink, langLinks: InterlangLink[], redirects: Redirect[] } | undefined> {
|
|
return this
|
|
.request({action: "parse", page: title, prop: "langlinks", redirects: ""})
|
|
.then(response => {
|
|
if (response.error !== undefined)
|
|
return undefined;
|
|
|
|
const langLinks = response.parse.langlinks
|
|
.map((it: { lang: string, "*": string }) => new InterlangLink(it.lang, it["*"]));
|
|
const redirects = response.parse.redirects
|
|
.map((it: { from: string; to: string; }) => new Redirect(this.toLink(it.from), this.toLink(it.to)))
|
|
.reduce((redirects: Redirect[], redirect: Redirect, _: number, self: Redirect[]) => {
|
|
// TODO Support triple redirects (#30)
|
|
const matches = self.filter(it => it.from.equals(redirect.to));
|
|
if (matches.length > 1)
|
|
redirects.push(new Redirect(redirect.from, matches[0].to));
|
|
else
|
|
redirects.push(redirect);
|
|
|
|
return redirects;
|
|
}, []);
|
|
|
|
return {link: this.toLink(response.parse.title), langLinks: langLinks, redirects: redirects};
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Returns this wiki's site information.
|
|
*
|
|
* @param props the site information properties to retrieve, such as "general" or "interwikimap"
|
|
* @return the wiki's site information, with each property corresponding to an argument to this method
|
|
*/
|
|
getSiteInfo(...props: string[]): any {
|
|
return this.request({action: "query", meta: "siteinfo", siprop: props.join("|")})
|
|
.then(response => response.query);
|
|
}
|
|
|
|
/**
|
|
* Normalizes the given link, adjusting its language to this wiki's language and replacing the link's namespace with
|
|
* the canonical namespace.
|
|
*
|
|
* @param link the link to normalize
|
|
* @return the normalized link
|
|
*/
|
|
normalize(link: InterlangLink): InterlangLink {
|
|
const normalLang = this.general.lang;
|
|
|
|
const titleParts = link.title.split(":");
|
|
if (titleParts.length < 2) return new InterlangLink(normalLang, link.title);
|
|
|
|
titleParts[0] = [...this.namespaces.values()].reduce(
|
|
(titlePart: string, namespace: { id: string, canonical: string, "*": string }) => {
|
|
return titlePart === namespace["canonical"] ? namespace["*"] : titlePart;
|
|
},
|
|
titleParts[0]
|
|
);
|
|
const normalTitle = titleParts.join(":");
|
|
|
|
return new InterlangLink(normalLang, normalTitle);
|
|
}
|
|
|
|
|
|
/**
|
|
* Shorthand for converting a title to an `InterlangLink` of this wiki's language.
|
|
*
|
|
* @param title the title of the article to generate a link for
|
|
* @return the link to the article on this wiki
|
|
* @private
|
|
*/
|
|
private toLink(title: string): InterlangLink {
|
|
return new InterlangLink(this.general.lang, title);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Manages a `MediaWiki` instance for different languages, caching retrieved information for re-use.
|
|
*/
|
|
export class MediaWikiManager {
|
|
/**
|
|
* The combined interwiki map of all `MediaWiki` instances under management of this manager.
|
|
*
|
|
* @private
|
|
*/
|
|
private iwMap: InterwikiMap;
|
|
|
|
/**
|
|
* The cached `MediaWiki` instances
|
|
*/
|
|
mws: Map<string, MediaWiki>;
|
|
/**
|
|
* The language of the base `MediaWiki`, where the exploration starts.
|
|
*/
|
|
baseLang!: string;
|
|
/**
|
|
* The path to articles, where `$1` indicates the article name.
|
|
*/
|
|
articlePath!: string;
|
|
/**
|
|
* The path to `api.php`.
|
|
*/
|
|
apiPath!: string;
|
|
|
|
|
|
/**
|
|
* Constructs a new MediaWiki manager.
|
|
*
|
|
* The `#init` method **must** be called before invoking any other function. Behavior is undefined otherwise.
|
|
*/
|
|
constructor() {
|
|
this.mws = new Map();
|
|
this.iwMap = new InterwikiMap([]);
|
|
}
|
|
|
|
/**
|
|
* Initializes this `MediaWikiManager`.
|
|
*
|
|
* @param baseMw the `MediaWiki` that is used as a starting point
|
|
* @return this `MediaWikiManager`
|
|
*/
|
|
async init(baseMw: MediaWiki): Promise<MediaWikiManager> {
|
|
const basePath = [...(baseMw.apiPath)]
|
|
.map((it, i) => it === baseMw.general.articlepath[i] ? it : "")
|
|
.join("")
|
|
.slice(0, -1);
|
|
|
|
this.articlePath = baseMw.general.articlepath.slice(basePath.length);
|
|
this.apiPath = baseMw.apiPath.slice(basePath.length);
|
|
this.baseLang = baseMw.general.lang;
|
|
|
|
this.mws.set(baseMw.general.lang, baseMw);
|
|
this.updateIwMap();
|
|
|
|
return this;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns the `MediaWiki` for the given language, creating and initializing it if necessary, or `undefined` if it
|
|
* could not be created.
|
|
*
|
|
* @param lang the language of the `MediaWiki` to return
|
|
* @return the `MediaWiki` for the given language, or `undefined` if it could not be created
|
|
*/
|
|
async getMwOrWait(lang: string): Promise<MediaWiki | undefined> {
|
|
if (this.hasMw(lang))
|
|
return this.mws.get(lang);
|
|
|
|
if (!this.iwMap.hasUrl(lang))
|
|
return undefined;
|
|
|
|
const url = this.iwMap.getUrl(lang);
|
|
if (url === undefined) return undefined;
|
|
|
|
let newMw;
|
|
try {
|
|
newMw = await new MediaWiki(url.slice(0, -this.articlePath.length) + this.apiPath).init();
|
|
} catch (error) {
|
|
return undefined;
|
|
}
|
|
|
|
if (this.hasMw(newMw.general.lang)) {
|
|
// Duplicate MW with different but equivalent language code; destroy new MW instance
|
|
this.mws.set(lang, this.mws.get(newMw.general.lang)!);
|
|
} else {
|
|
this.mws.set(newMw.general.lang, newMw);
|
|
this.mws.set(lang, newMw);
|
|
}
|
|
this.updateIwMap();
|
|
|
|
return this.mws.get(lang);
|
|
}
|
|
|
|
/**
|
|
* Returns the `MediaWiki` for the given language or `undefined` if it has not created that object.
|
|
*
|
|
* @param lang the language of the `MediaWiki` to return
|
|
* @return the `MediaWiki` for the given language or `undefined` if it has not created that object
|
|
*/
|
|
getMw(lang: string): MediaWiki | undefined {
|
|
return this.mws.get(lang);
|
|
}
|
|
|
|
/**
|
|
* Returns `true` if and only if this manager has a `MediaWiki` for the given language.
|
|
*
|
|
* @param lang the language of the `MediaWiki` to check presence of
|
|
* @return `true` if and only if this manager has a `MediaWiki` for the given language
|
|
*/
|
|
hasMw(lang: string): boolean {
|
|
return this.mws.has(lang);
|
|
}
|
|
|
|
/**
|
|
* Returns the URL to the given article.
|
|
*
|
|
* @param link the link to return the URL of
|
|
* @return the URL to the given article
|
|
*/
|
|
getArticlePath(link: InterlangLink): URL {
|
|
const articlePath = this.iwMap.getUrl(link.lang);
|
|
if (articlePath === undefined) throw Error(`Could not find article path for '${link}'.`);
|
|
|
|
return new URL(articlePath.replace("$1", link.title));
|
|
}
|
|
|
|
|
|
/**
|
|
* Updates the `_iwMap` property with the entries in `MediaWiki` instances in this manager.
|
|
*
|
|
* @private
|
|
*/
|
|
private updateIwMap(): void {
|
|
this.iwMap = InterwikiMap.fromMap(
|
|
[...this.mws.values()]
|
|
.map(mw => mw.interwikiMap.map)
|
|
.reduce((combined, map) => new Map([...combined, ...map]), new Map())
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Discovers the interlanguage network, starting from the given link.
|
|
*
|
|
* @param mwm the manager to use for caching and resolving pages
|
|
* @param title the title of the page to start traversing at
|
|
* @param errorCb a function handling errors and warnings
|
|
* @param progressCb a function handling progress updates
|
|
* @return the discovered network, including pages and redirects
|
|
*/
|
|
export const discoverNetwork = async function(
|
|
mwm: MediaWikiManager,
|
|
title: string,
|
|
errorCb: (level: "error" | "warning" | null, message: string) => void,
|
|
progressCb: (message: string) => void
|
|
): Promise<{ pages: Page[], redirects: Redirect[] }> {
|
|
const pages = [];
|
|
const redirects = [];
|
|
|
|
const history: InterlangLink[] = [];
|
|
const queue: InterlangLink[] = [new InterlangLink(mwm.baseLang, title)];
|
|
while (queue.length > 0) {
|
|
progressCb("Checking <code>" + queue[queue.length - 1] + "</code>");
|
|
|
|
let next = queue.pop()!;
|
|
if (history.some(it => it.equals(next)))
|
|
continue;
|
|
|
|
// Normalize
|
|
const nextMw = await mwm.getMwOrWait(next.lang);
|
|
if (nextMw === undefined) {
|
|
history.push(next);
|
|
pages.push(new Page(mwm.getArticlePath(next), next, [], false));
|
|
if (history.length === 1)
|
|
throw new Error(couldNotConnectMessage);
|
|
else {
|
|
errorCb("warning", `Could not connect to the wiki for language '${next.lang}'. Maybe the wiki no longer exists?`);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
next = nextMw.normalize(next);
|
|
if (history.some(it => it.equals(next)))
|
|
continue;
|
|
else
|
|
history.push(next);
|
|
|
|
// Fetch interlang links
|
|
const result = await nextMw.getLangLinks(next.title);
|
|
if (result === undefined) {
|
|
pages.push(new Page(mwm.getArticlePath(next), next, [], false));
|
|
continue;
|
|
}
|
|
|
|
// Follow redirects
|
|
if (!result.link.equals(next)) {
|
|
redirects.push(...(result.redirects));
|
|
next = result.link;
|
|
if (history.some(it => it.equals(next)))
|
|
continue;
|
|
else
|
|
history.push(next);
|
|
}
|
|
|
|
// Create `Page` object
|
|
pages.push(new Page(mwm.getArticlePath(next), next, result.langLinks, true));
|
|
queue.push(...(result.langLinks));
|
|
}
|
|
|
|
// Normalize links
|
|
pages.forEach(page => {
|
|
page.langLinks.map((langLink, idx, self) => {
|
|
const mw = mwm.getMw(langLink.lang);
|
|
// Update link in place using `self[idx] = `
|
|
self[idx] = mw !== undefined ? mw.normalize(langLink) : langLink;
|
|
});
|
|
});
|
|
|
|
return {pages: pages, redirects: redirects};
|
|
};
|
|
|
|
|
|
/**
|
|
* The verdict that the checker has of a link between two pages.
|
|
*
|
|
* The possible values are listed in decreasing order of importance, so that if a single link has multiple verdicts but
|
|
* only one can be displayed, the one with the highest importance will be displayed.
|
|
*/
|
|
type LinkVerdict = "linked"
|
|
| "self-linked"
|
|
| "unlinked"
|
|
| "self-unlinked"
|
|
| "redirected"
|
|
| "wrongly-cased";
|
|
|
|
/**
|
|
* The verdict that the checker has of a page.
|
|
*
|
|
* The possible values are listed in decreasing order of importance, so that if a single page has multiple verdicts but
|
|
* only one can be displayed, the one with the highest importance will be displayed.
|
|
*/
|
|
type PageVerdict =
|
|
"perfect"
|
|
| "not-found"
|
|
| "wrongly-ordered"
|
|
| "doubly-linked"
|
|
| "self-linked"
|
|
| "unlinked"
|
|
| "redirected"
|
|
| "wrongly-cased";
|
|
|
|
/**
|
|
* The verdict that the checker has of a network.
|
|
*/
|
|
type NetworkVerdict =
|
|
| "perfect"
|
|
| "flawed"
|
|
| "broken";
|