189 lines
6.5 KiB
PHP
189 lines
6.5 KiB
PHP
<?php
|
|
|
|
namespace php;
|
|
|
|
use Exception;
|
|
use Monolog\Logger;
|
|
|
|
|
|
/**
|
|
* Helper class for interacting with Wikipedia's API.
|
|
*/
|
|
class Mediawiki
|
|
{
|
|
/**
|
|
* The URL of Wikipedia's API endpoint.
|
|
*/
|
|
private const API_URL = "https://en.wikipedia.org/w/api.php?";
|
|
/**
|
|
* The user agent used to represent the death notifier to Wikipedia.
|
|
*/
|
|
private const USER_AGENT =
|
|
"death-notifier/%%VERSION_NUMBER%% " .
|
|
"(https://git.fwdekker.com/tools/death-notifier; florine@fwdekker.com)";
|
|
|
|
/**
|
|
* @var Logger The logger to use for logging.
|
|
*/
|
|
private Logger $logger;
|
|
|
|
|
|
/**
|
|
* Creates a new Mediawiki instance.
|
|
*
|
|
* @param Logger $logger the logger to use for logging
|
|
*/
|
|
public function __construct(Logger $logger)
|
|
{
|
|
$this->logger = $logger;
|
|
}
|
|
|
|
|
|
/**
|
|
* Sends a request to Wikipedia's API and returns its response as a JSON object.
|
|
*
|
|
* @param array<string, mixed> $url_param the query parameters to send to the API
|
|
* @return mixed a JSON object containing the API's response
|
|
* @throws Exception if the API could not be reached
|
|
*/
|
|
private function api_fetch(array $url_param): mixed
|
|
{
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
|
curl_setopt($ch, CURLOPT_URL, self::API_URL . http_build_query($url_param));
|
|
curl_setopt($ch, CURLOPT_USERAGENT, self::USER_AGENT);
|
|
|
|
$output = curl_exec($ch);
|
|
curl_close($ch);
|
|
if (is_bool($output) || curl_error($ch))
|
|
throw new Exception(curl_error($ch));
|
|
|
|
return json_decode($output, associative: true);
|
|
}
|
|
|
|
/**
|
|
* Determines for each title whether the page at Wikipedia exists.
|
|
*
|
|
* @param string[] $titles the titles of the pages to check
|
|
* @return array<string, bool> maps each requested title to a boolean indicating whether the page exists
|
|
*/
|
|
public function pages_exist(array $titles): array
|
|
{
|
|
try {
|
|
$page_exists = fn($it): bool => !array_key_exists("missing", $it) && !array_key_exists("invalid", $it);
|
|
$pages = [];
|
|
|
|
for ($i = 0; $i < sizeof($titles); $i += 50) {
|
|
$response = $this->api_fetch(array(
|
|
"action" => "query",
|
|
"format" => "json",
|
|
"prop" => "info",
|
|
"redirects" => true,
|
|
"titles" => implode("|", array_slice($titles, $i, 50))
|
|
))["query"];
|
|
|
|
$response_pages = array_values($response["pages"]);
|
|
|
|
$pages = array_merge(
|
|
$pages,
|
|
array_combine(
|
|
array_column($response_pages, "title"),
|
|
array_map($page_exists, $response_pages)
|
|
)
|
|
);
|
|
|
|
if (isset($response["normalized"]))
|
|
foreach ($response["normalized"] as $redirect)
|
|
$pages[$redirect["from"]] = $pages[$redirect["to"]];
|
|
}
|
|
|
|
return $pages;
|
|
} catch (Exception $exception) {
|
|
$this->logger->error(
|
|
"Failed to fetch basic page info from API.",
|
|
["cause" => $exception, "titles" => $titles]
|
|
);
|
|
|
|
http_response_code(500);
|
|
exit();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns a string describing `person`'s status ("deceased", "alive", "possibly alive", "missing"), or `null` if
|
|
* the title does not refer to a page about a person on Wikipedia.
|
|
*
|
|
* @param mixed $person_page the page as returned by the Wikipedia API
|
|
* @return string|null a string describing `person`'s status ("deceased", "alive", "possibly alive", "missing"), or
|
|
* `null` if the title does not refer to a page about a person on Wikipedia
|
|
*/
|
|
private function person_status(mixed $person_page): ?string
|
|
{
|
|
if (array_key_exists("missing", $person_page) || array_key_exists("invalid", $person_page))
|
|
return null;
|
|
|
|
$category_titles = array_column($person_page["categories"], "title");
|
|
$deceased_regex = "/^Category:([0-9]{1,4}s? (BC |AD )?deaths|Year of death (missing|unknown))$/";
|
|
|
|
if (!empty(array_filter($category_titles, fn($it) => preg_match($deceased_regex, $it))))
|
|
return "deceased";
|
|
elseif (in_array("Category:Possibly living people", $category_titles))
|
|
return "possibly alive";
|
|
elseif (in_array("Category:Missing people", $category_titles))
|
|
return "missing";
|
|
elseif (in_array("Category:Living people", $category_titles))
|
|
return "alive";
|
|
else
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Checks for each person what their status (dead, alive, missing) is according to Wikipedia's categorization.
|
|
*
|
|
* @param array<string> $people_names the names of the people to check aliveness of
|
|
* @return array<string, ?string> maps each requested person's name to a string describing their status ("deceased",
|
|
* "alive", "possibly alive", "missing"). If a page does not exist, it is mapped to `null`
|
|
*/
|
|
public function people_statuses(array $people_names): array
|
|
{
|
|
try {
|
|
$pages = [];
|
|
|
|
for ($i = 0; $i < sizeof($people_names); $i += 50) {
|
|
$response = $this->api_fetch(array(
|
|
"action" => "query",
|
|
"format" => "json",
|
|
"prop" => "categories",
|
|
"cllimit" => 500, # Record is 252 categories, so setting to 500 is fine
|
|
"redirects" => true,
|
|
"titles" => implode("|", array_slice($people_names, $i, 50))
|
|
))["query"];
|
|
|
|
$response_pages = array_values($response["pages"]);
|
|
|
|
$pages = array_merge(
|
|
$pages,
|
|
array_combine(
|
|
array_column($response_pages, "title"),
|
|
array_map(fn($it) => $this->person_status($it), $response_pages)
|
|
)
|
|
);
|
|
|
|
if (isset($response["redirects"]))
|
|
foreach ($response["redirects"] as $redirect)
|
|
$pages[$redirect["from"]] = $pages[$redirect["to"]];
|
|
}
|
|
|
|
return $pages;
|
|
} catch (Exception $exception) {
|
|
$this->logger->error(
|
|
"Failed to fetch categories from API.",
|
|
["cause" => $exception, "people_names" => $people_names]
|
|
);
|
|
|
|
http_response_code(500);
|
|
exit();
|
|
}
|
|
}
|
|
}
|