Normalize article titles in table

Fixes #21.
This commit is contained in:
Florine W. Dekker 2020-04-15 15:48:01 +02:00
parent 57a0913f6f
commit 2778553d0e
Signed by: FWDekker
GPG Key ID: B1B567AF58D6EE0F
2 changed files with 59 additions and 21 deletions

View File

@ -64,7 +64,7 @@ doAfterLoad(async () => {
// Discover
discoverNetwork(mwm, articleInput.getValue(), it => messageHandler.handle("progress", it))
.then(({pages, redirects}) => new InterlangNetwork(pages, redirects))
.then(it => new InterlangNetwork(it.pages, it.redirects, it.normalizations))
.then(network => {
messageHandler.handle("progress", "Creating table");

View File

@ -167,7 +167,8 @@ export class Page {
* A network of pages linking to each other.
*
* @property pages {Page[]} the pages linking to each other, sorted alphabetically
* @properties redirects {Redirect[]} the redirects in the network
* @property redirects {Redirect[]} the redirects in the network
* @property normalizations {Redirect[]} the normalizations in the network
*/
export class InterlangNetwork {
/**
@ -175,14 +176,37 @@ export class InterlangNetwork {
*
* @param pages {Page[]} the pages linking to each other
* @param redirects {Redirect[]} the redirects in the network
* @param normalizations {Redirect[]} the normalizations in the network
*/
constructor(pages, redirects) {
// TODO Make all properties immutable
constructor(pages, redirects, normalizations) {
this.pages = pages.sort((a, b) => a.link.toString().localeCompare(b.link.toString()));
this.redirects = redirects;
this.normalizations = normalizations;
this._normalizeTitles();
this._coalesceWikis();
}
/**
* Normalizes all article titles.
*
* @private
*/
_normalizeTitles() {
this.pages.forEach(page => {
const normalization = this.normalizations.find(it => it.from.equals(page.link));
if (normalization !== undefined)
page.url = normalization.to;
page.langLinks.forEach(langLink => {
const normalization = this.normalizations.find(it => it.from.equals(langLink));
if (normalization !== undefined)
langLink.title = normalization.to.title;
});
});
}
/**
* Changes the state of this network such that there are no duplicate wikis.
*
@ -342,7 +366,8 @@ export class MediaWiki {
* @return result {Object} the query result
* @return result.langLinks {Object.<string, InterlangLink[]|undefined>} the language links per (redirected-to)
* page, which are `undefined` if the page could not be found
* @return result.redirects {Object.<string, string>[]} an array mapping a `from` to a `to`
* @return result.redirects {Redirect[]} all redirects that were encountered, with double redirects removed
* @return result.normalizations {Redirect[]} all article name normalizations that were encountered
*/
getLangLinks(pages, limit) {
if (limit === undefined) limit = "max";
@ -361,17 +386,23 @@ export class MediaWiki {
return links;
}, {});
const redirects = (query.redirects || []).reduce((redirects, redirect) => {
const matches = query.redirects.filter(it => it.from === redirect.to);
if (matches.length === 1)
redirects.push(new Redirect(redirect.from, matches[0].to));
else
redirects.push(redirect);
const redirects = (query.redirects || [])
.map(it => new Redirect(this._toLink(it.from), this._toLink(it.to)))
.reduce((redirects, redirect, _, self) => {
// TODO Support triple redirects
const matches = self.filter(it => it.from.equals(redirect.to));
if (matches.length > 1)
redirects.push(new Redirect(redirect.from, matches[0].to));
else
redirects.push(redirect);
return redirects;
}, []);
return redirects;
}, []);
return {langLinks: links, redirects: redirects};
const normalizations = (query.normalized || [])
.map(it => new Redirect(this._toLink(it.from), this._toLink(it.to)));
return {langLinks: links, redirects: redirects, normalizations: normalizations};
});
}
@ -385,6 +416,18 @@ export class MediaWiki {
return this.request({action: "query", meta: "siteinfo", siprop: props.join("|")})
.then(response => response.query);
}
/**
* Shorthand for converting a title to an `InterlangLink` of this wiki's language.
*
* @param title {string} the title of the article to generate a link for
* @returns {InterlangLink} the link to the article on this wiki
* @private
*/
_toLink(title) {
return new InterlangLink(this.general.lang, title);
}
}
/**
@ -492,6 +535,7 @@ export class MediaWikiManager {
export const discoverNetwork = async function (mwm, title, progressCb) {
const pages = [];
const redirects = [];
const normalizations = [];
const history = [];
const queue = [new InterlangLink(mwm.baseLang, title)];
@ -505,15 +549,9 @@ export const discoverNetwork = async function (mwm, title, progressCb) {
const nextMw = await mwm.getMw(next.lang);
await nextMw
.getLangLinks([next.title])
.then(({langLinks, redirects}) => ({
langLinks: langLinks,
redirects: redirects.map(it => new Redirect(
new InterlangLink(next.lang, it.from),
new InterlangLink(next.lang, it.to)
))
}))
.then(result => {
redirects.push(...result.redirects);
normalizations.push(...result.normalizations);
const actualLink = new InterlangLink(next.lang, Object.keys(result.langLinks)[0]);
if (!actualLink.equals(next)) {
@ -539,5 +577,5 @@ export const discoverNetwork = async function (mwm, title, progressCb) {
});
}
return {pages: pages, redirects: redirects};
return {pages: pages, redirects: redirects, normalizations: normalizations};
}