death-notifier/src/main/php/Mediawiki.php

189 lines
6.5 KiB
PHP

<?php
namespace php;
use Exception;
use Monolog\Logger;
/**
* Helper class for interacting with Wikipedia's API.
*/
class Mediawiki
{
/**
* The URL of Wikipedia's API endpoint.
*/
private const API_URL = "https://en.wikipedia.org/w/api.php?";
/**
* The user agent used to represent the death notifier to Wikipedia.
*/
private const USER_AGENT =
"death-notifier/%%VERSION_NUMBER%% " .
"(https://git.fwdekker.com/tools/death-notifier; florine@fwdekker.com)";
/**
* @var Logger The logger to use for logging.
*/
private Logger $logger;
/**
* Creates a new Mediawiki instance.
*
* @param Logger $logger the logger to use for logging
*/
public function __construct(Logger $logger)
{
$this->logger = $logger;
}
/**
* Sends a request to Wikipedia's API and returns its response as a JSON object.
*
* @param array<string, mixed> $url_param the query parameters to send to the API
* @return mixed a JSON object containing the API's response
* @throws Exception if the API could not be reached
*/
private function api_fetch(array $url_param): mixed
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, self::API_URL . http_build_query($url_param));
curl_setopt($ch, CURLOPT_USERAGENT, self::USER_AGENT);
$output = curl_exec($ch);
curl_close($ch);
if (is_bool($output) || curl_error($ch))
throw new Exception(curl_error($ch));
return json_decode($output, associative: true);
}
/**
* Determines for each title whether the page at Wikipedia exists.
*
* @param string[] $titles the titles of the pages to check
* @return array<string, bool> maps each requested title to a boolean indicating whether the page exists
*/
public function pages_exist(array $titles): array
{
try {
$page_exists = fn($it): bool => !array_key_exists("missing", $it) && !array_key_exists("invalid", $it);
$pages = [];
for ($i = 0; $i < sizeof($titles); $i += 50) {
$response = $this->api_fetch(array(
"action" => "query",
"format" => "json",
"prop" => "info",
"redirects" => true,
"titles" => implode("|", array_slice($titles, $i, 50))
))["query"];
$response_pages = array_values($response["pages"]);
$pages = array_merge(
$pages,
array_combine(
array_column($response_pages, "title"),
array_map($page_exists, $response_pages)
)
);
if (isset($response["normalized"]))
foreach ($response["normalized"] as $redirect)
$pages[$redirect["from"]] = $pages[$redirect["to"]];
}
return $pages;
} catch (Exception $exception) {
$this->logger->error(
"Failed to fetch basic page info from API.",
["cause" => $exception, "titles" => $titles]
);
http_response_code(500);
exit();
}
}
/**
* Returns a string describing `person`'s status ("deceased", "alive", "possibly alive", "missing"), or `null` if
* the title does not refer to a page about a person on Wikipedia.
*
* @param mixed $person_page the page as returned by the Wikipedia API
* @return string|null a string describing `person`'s status ("deceased", "alive", "possibly alive", "missing"), or
* `null` if the title does not refer to a page about a person on Wikipedia
*/
private function person_status(mixed $person_page): ?string
{
if (array_key_exists("missing", $person_page) || array_key_exists("invalid", $person_page))
return null;
$category_titles = array_column($person_page["categories"], "title");
$deceased_regex = "/^Category:([0-9]{1,4}s? (BC |AD )?deaths|Year of death (missing|unknown))$/";
if (!empty(array_filter($category_titles, fn($it) => preg_match($deceased_regex, $it))))
return "deceased";
elseif (in_array("Category:Possibly living people", $category_titles))
return "possibly alive";
elseif (in_array("Category:Missing people", $category_titles))
return "missing";
elseif (in_array("Category:Living people", $category_titles))
return "alive";
else
return null;
}
/**
* Checks for each person what their status (dead, alive, missing) is according to Wikipedia's categorization.
*
* @param array<string> $people_names the names of the people to check aliveness of
* @return array<string, ?string> maps each requested person's name to a string describing their status ("deceased",
* "alive", "possibly alive", "missing"). If a page does not exist, it is mapped to `null`
*/
public function people_statuses(array $people_names): array
{
try {
$pages = [];
for ($i = 0; $i < sizeof($people_names); $i += 50) {
$response = $this->api_fetch(array(
"action" => "query",
"format" => "json",
"prop" => "categories",
"cllimit" => 500, # Record is 252 categories, so setting to 500 is fine
"redirects" => true,
"titles" => implode("|", array_slice($people_names, $i, 50))
))["query"];
$response_pages = array_values($response["pages"]);
$pages = array_merge(
$pages,
array_combine(
array_column($response_pages, "title"),
array_map(fn($it) => $this->person_status($it), $response_pages)
)
);
if (isset($response["redirects"]))
foreach ($response["redirects"] as $redirect)
$pages[$redirect["from"]] = $pages[$redirect["to"]];
}
return $pages;
} catch (Exception $exception) {
$this->logger->error(
"Failed to fetch categories from API.",
["cause" => $exception, "people_names" => $people_names]
);
http_response_code(500);
exit();
}
}
}