From 2778553d0e55f6124fe8909e3ff4163f929d063e Mon Sep 17 00:00:00 2001 From: "Felix W. Dekker" Date: Wed, 15 Apr 2020 15:48:01 +0200 Subject: [PATCH] Normalize article titles in table Fixes #21. --- src/main/js/Main.js | 2 +- src/main/js/MediaWiki.js | 78 +++++++++++++++++++++++++++++----------- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/src/main/js/Main.js b/src/main/js/Main.js index 3b3b4ef..299d26b 100644 --- a/src/main/js/Main.js +++ b/src/main/js/Main.js @@ -64,7 +64,7 @@ doAfterLoad(async () => { // Discover discoverNetwork(mwm, articleInput.getValue(), it => messageHandler.handle("progress", it)) - .then(({pages, redirects}) => new InterlangNetwork(pages, redirects)) + .then(it => new InterlangNetwork(it.pages, it.redirects, it.normalizations)) .then(network => { messageHandler.handle("progress", "Creating table"); diff --git a/src/main/js/MediaWiki.js b/src/main/js/MediaWiki.js index 2f7b4ea..6da3626 100644 --- a/src/main/js/MediaWiki.js +++ b/src/main/js/MediaWiki.js @@ -167,7 +167,8 @@ export class Page { * A network of pages linking to each other. * * @property pages {Page[]} the pages linking to each other, sorted alphabetically - * @properties redirects {Redirect[]} the redirects in the network + * @property redirects {Redirect[]} the redirects in the network + * @property normalizations {Redirect[]} the normalizations in the network */ export class InterlangNetwork { /** @@ -175,14 +176,37 @@ export class InterlangNetwork { * * @param pages {Page[]} the pages linking to each other * @param redirects {Redirect[]} the redirects in the network + * @param normalizations {Redirect[]} the normalizations in the network */ - constructor(pages, redirects) { + // TODO Make all properties immutable + constructor(pages, redirects, normalizations) { this.pages = pages.sort((a, b) => a.link.toString().localeCompare(b.link.toString())); this.redirects = redirects; + this.normalizations = normalizations; + this._normalizeTitles(); this._coalesceWikis(); } + /** + * Normalizes all article titles. + * + * @private + */ + _normalizeTitles() { + this.pages.forEach(page => { + const normalization = this.normalizations.find(it => it.from.equals(page.link)); + if (normalization !== undefined) + page.url = normalization.to; + + page.langLinks.forEach(langLink => { + const normalization = this.normalizations.find(it => it.from.equals(langLink)); + if (normalization !== undefined) + langLink.title = normalization.to.title; + }); + }); + } + /** * Changes the state of this network such that there are no duplicate wikis. * @@ -342,7 +366,8 @@ export class MediaWiki { * @return result {Object} the query result * @return result.langLinks {Object.} the language links per (redirected-to) * page, which are `undefined` if the page could not be found - * @return result.redirects {Object.[]} an array mapping a `from` to a `to` + * @return result.redirects {Redirect[]} all redirects that were encountered, with double redirects removed + * @return result.normalizations {Redirect[]} all article name normalizations that were encountered */ getLangLinks(pages, limit) { if (limit === undefined) limit = "max"; @@ -361,17 +386,23 @@ export class MediaWiki { return links; }, {}); - const redirects = (query.redirects || []).reduce((redirects, redirect) => { - const matches = query.redirects.filter(it => it.from === redirect.to); - if (matches.length === 1) - redirects.push(new Redirect(redirect.from, matches[0].to)); - else - redirects.push(redirect); + const redirects = (query.redirects || []) + .map(it => new Redirect(this._toLink(it.from), this._toLink(it.to))) + .reduce((redirects, redirect, _, self) => { + // TODO Support triple redirects + const matches = self.filter(it => it.from.equals(redirect.to)); + if (matches.length > 1) + redirects.push(new Redirect(redirect.from, matches[0].to)); + else + redirects.push(redirect); - return redirects; - }, []); + return redirects; + }, []); - return {langLinks: links, redirects: redirects}; + const normalizations = (query.normalized || []) + .map(it => new Redirect(this._toLink(it.from), this._toLink(it.to))); + + return {langLinks: links, redirects: redirects, normalizations: normalizations}; }); } @@ -385,6 +416,18 @@ export class MediaWiki { return this.request({action: "query", meta: "siteinfo", siprop: props.join("|")}) .then(response => response.query); } + + + /** + * Shorthand for converting a title to an `InterlangLink` of this wiki's language. + * + * @param title {string} the title of the article to generate a link for + * @returns {InterlangLink} the link to the article on this wiki + * @private + */ + _toLink(title) { + return new InterlangLink(this.general.lang, title); + } } /** @@ -492,6 +535,7 @@ export class MediaWikiManager { export const discoverNetwork = async function (mwm, title, progressCb) { const pages = []; const redirects = []; + const normalizations = []; const history = []; const queue = [new InterlangLink(mwm.baseLang, title)]; @@ -505,15 +549,9 @@ export const discoverNetwork = async function (mwm, title, progressCb) { const nextMw = await mwm.getMw(next.lang); await nextMw .getLangLinks([next.title]) - .then(({langLinks, redirects}) => ({ - langLinks: langLinks, - redirects: redirects.map(it => new Redirect( - new InterlangLink(next.lang, it.from), - new InterlangLink(next.lang, it.to) - )) - })) .then(result => { redirects.push(...result.redirects); + normalizations.push(...result.normalizations); const actualLink = new InterlangLink(next.lang, Object.keys(result.langLinks)[0]); if (!actualLink.equals(next)) { @@ -539,5 +577,5 @@ export const discoverNetwork = async function (mwm, title, progressCb) { }); } - return {pages: pages, redirects: redirects}; + return {pages: pages, redirects: redirects, normalizations: normalizations}; }