$params the request parameters to send to the API * @return mixed a JSON object containing the API's response * @throws WikipediaException if the request fails */ private function api_fetch(array $params): mixed { $config = Config::get("wikipedia"); $user_agent = sprintf( self::USER_AGENT_FORMAT, $config["user_agent_contact"], "curl/" . curl_version()["version"], ); $ch = curl_init(); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_URL, self::API_URL . http_build_query($params)); curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); $output = curl_exec($ch); curl_close($ch); if (is_bool($output) || curl_error($ch)) throw new WikipediaException(curl_error($ch)); try { return json_decode($output, associative: true, flags: JSON_THROW_ON_ERROR); } catch (JsonException $exception) { throw new WikipediaException($exception->getMessage(), previous: $exception); } } /** * Sends a query request to Wikipedia's API with continuation and returns its response as a JSON object. * * @param array $params the query request parameters to send to the API * @param string|null $continue_name the name of the continue parameter to follow, or `null` if no continuation * should be done * @return mixed[] a JSON array containing the API's responses merged into one array * @throws WikipediaException if the query fails */ private function api_query_continued(array $params, ?string $continue_name = null): array { $query_params = array_merge(["action" => "query", "format" => "json"], $params); $response = []; $continue = null; $continue_value = null; do { $continue_params = $continue === null ? $query_params : array_merge($query_params, ["continue" => $continue, $continue_name => $continue_value]); $new_response = $this->api_fetch($continue_params); $response = Util::array_merge_recursive_distinct($response, $new_response); if (isset($response["batchcomplete"])) { $continue = null; $continue_value = null; } else if ($continue_name !== null) { $continue = $response["continue"]["continue"]; $continue_value = $response["continue"][$continue_name]; } } while ($continue !== null); return $response; } /** * Sends a query request to the Wikipedia API in batches of {@see Wikipedia::ARTICLES_PER_QUERY} titles at a time. * * @param array $params the parameters to include in each query * @param string[] $titles the titles of the pages to query * @param string|null $continue_name the name of the continue parameter to follow for this request * @return QueryOutput the API's responses merged into a single `QueryOutput` * @throws WikipediaException if the query fails */ private function api_query_batched(array $params, array $titles, ?string $continue_name = null): QueryOutput { $articles = []; $redirects = new Redirects(); $missing = []; $title_chunks = array_chunk($titles, self::ARTICLES_PER_QUERY); foreach ($title_chunks as $title_chunk) { $chunk_params = array_merge($params, ["titles" => implode("|", $title_chunk), "redirects" => true]); $response = $this->api_query_continued($chunk_params, $continue_name)["query"]; foreach ($response["pages"] as $article_id => $article) { if ($article_id < 0) $missing[] = strval($article["title"]); else $articles[strval($article["title"])] = $article; } $new_normalizations = array_combine( array_map(fn($it) => strval($it), array_column($response["normalized"] ?? [], "from")), array_map(fn($it) => strval($it), array_column($response["normalized"] ?? [], "to")) ); $redirects = $redirects->add($new_normalizations); $new_redirects = array_combine( array_map(fn($it) => strval($it), array_column($response["redirects"] ?? [], "from")), array_map(fn($it) => strval($it), array_column($response["redirects"] ?? [], "to")) ); $redirects = $redirects->add($new_redirects); } return new QueryOutput($articles, $redirects, $missing); } /** * Figures out the page that a deleted page was moved to, if any. * * @param string $title the title to figure out the new title of after moving * @param int $max_depth the maximum number of recursive steps to take, in case the article that {@see $title} was * moved to was also deleted, and the article that was moved to was also deleted, etc. * @return string|null the new title of the article, or `null` if the article has actually been deleted * @throws WikipediaException if the query fails */ private function api_query_title_after_move(string $title, int $max_depth = Wikipedia::MAX_MOVE_DEPTH): ?string { if ($max_depth <= 0) return null; $log_events = $this->api_fetch([ "action" => "query", "format" => "json", "list" => "logevents", "letype" => "move", "letitle" => $title ])["query"]["logevents"]; if (empty($log_events)) return null; $title_after_move = $log_events[0]["params"]["target_title"]; $after_move_page_info = $this->api_fetch([ "action" => "query", "format" => "json", "prop" => "info", "titles" => $title_after_move, "redirects" => true, ])["query"]; if (in_array(-1, array_keys($after_move_page_info["pages"]))) return $this->api_query_title_after_move($title_after_move, $max_depth - 1); else if (!empty($after_move_page_info["redirects"])) return $after_move_page_info["redirects"][0]["to"]; else return $title_after_move; } /** * Sends a query request to the Wikipedia API in batches of {@see Wikipedia::ARTICLES_PER_QUERY} titles at a time, * and optionally resolves articles that were deleted because of a move. * * @param array $params the parameters to include in each query * @param string[] $titles the titles of the pages to query * @param string|null $continue_name the name of the continue parameter to follow for this request * @param bool $resolve_moves `true` if and only if deleted pages that were moved should be resolved * @return QueryOutput the API's responses merged into a single `QueryOutput` * @throws WikipediaException if the query fails */ private function api_query_batched_resolve_moves(array $params, array $titles, ?string $continue_name = null, bool $resolve_moves = true): QueryOutput { $output_base = $this->api_query_batched($params, $titles, $continue_name); if (!$resolve_moves) return $output_base; $not_moved = []; $moves = []; foreach ($output_base->missing as $missing_title) { $title_after_move = $this->api_query_title_after_move($missing_title); if ($title_after_move === null) $not_moved[] = $missing_title; else $moves[$missing_title] = $title_after_move; } $output_of_moves = $this->api_query_batched($params, array_values($moves), $continue_name); if ($output_of_moves->redirects->has_non_reflexive_redirects()) throw new WikipediaException("Article was moved unexpectedly: " . json_encode($output_of_moves->redirects)); if (!empty($output_of_moves->missing)) throw new WikipediaException("Article missing unexpectedly: " . json_encode($output_of_moves->missing)); return new QueryOutput( array_replace($output_base->results, $output_of_moves->results), $output_base->redirects->add($moves), $not_moved ); } /** * Returns the current {@see ArticleType} of the article. * * @param mixed $article the article object as returned by the Wikipedia API * @return ArticleType the current `ArticleType` of article */ private function article_type(mixed $article): ArticleType { $category_titles = array_column($article["categories"], "title"); $status = $this->person_status($article); if ($status !== null) return ArticleType::Person; else if (in_array("Category:All set index articles", $category_titles) || in_array("Category:All disambiguation pages", $category_titles)) return ArticleType::Disambiguation; else return ArticleType::Other; } /** * Returns the current {@see PersonStatus}, or `null` if the title does not refer to an article about a person on * Wikipedia. * * @param mixed $article the article as returned by the Wikipedia API * @return PersonStatus|null the current `PersonStatus`, or `null` if the title does not refer to an article about a * person on Wikipedia */ private function person_status(mixed $article): ?PersonStatus { if (array_key_exists("missing", $article) || array_key_exists("invalid", $article)) return null; $category_titles = array_column($article["categories"], "title"); $dead_regex = "/^Category:([0-9]{1,4}s? (BC |AD )?(deaths|suicides)|Year of death (missing|unknown))$/"; if (!empty(array_filter($category_titles, fn($it) => preg_match($dead_regex, $it)))) return PersonStatus::Dead; elseif (in_array("Category:Possibly living people", $category_titles)) return PersonStatus::PossiblyAlive; elseif (in_array("Category:Missing people", $category_titles)) return PersonStatus::Missing; elseif (in_array("Category:Living people", $category_titles)) return PersonStatus::Alive; else return null; } /** * Checks for all {@see $names} what their current {@see ArticleType} and {@see PersonStatus} is according to * Wikipedia. * * @param string[] $names the names of the people to retrieve the information of * @param bool $resolve_moves `true` if and only if deleted pages that were moved should be resolved * @return QueryOutput a `QueryOutput` with its * {@see QueryOutput::$results} mapping each normalized title to its `ArticleType` and, if the type is `Person`, the * `PersonStatus` * @throws WikipediaException if the query fails */ public function query_people_info(array $names, bool $resolve_moves): QueryOutput { $output = $this->api_query_batched_resolve_moves( params: ["prop" => "categories", "cllimit" => strval(self::CATS_PER_QUERY)], titles: $names, continue_name: "clcontinue", resolve_moves: $resolve_moves ); $articles = array_combine( array_column($output->results, "title"), array_map( fn($it) => [ "type" => $this->article_type($it), "status" => $this->person_status($it), ], $output->results ) ); return new QueryOutput($articles, $output->redirects, $output->missing); } }