pokerogue/scripts/scrape-trainer-names/fetch-names.js
Sirz Benjie 6766940fa1
[Misc] Make the repo REUSE compliant (#6474)
* Add license information

* Add reuse lint workflow

* Add snippets for spdx

* fix: minor wording adjustments and typo fixes

Co-authored-by: NightKev <34855794+DayKev@users.noreply.github.com>

* chore: add FileContributor attributions for Bertie

Co-authored-by: Bertie690 <136088738+Bertie690@users.noreply.github.com>

* begin licensing some audio assets

* Add pokemon reborn sound affect attribution

* Annotate Leavannite's section

* Add more licensing info

* Add license info to license files ._.

* Move ps1 files out of public

* Add license for animation jsons

* Add license for bat scripts in public

* Update licensing in scripts

* Fix typo in license ref

* Fix AGPL-3.0-or-later

* Add license info to typedoc.config.js

* Add MIT license for snippets

* chore: update license info for files in scripts

* chore: update license info

* chore: update license info

* chore: update license info

* Remove licenses used only by public before linting with reuse

* Add license info to new files added by docker PR

* chore: apply biome

* fix: add back linting workflow lost during merge

* Add attribution based on Hanniel's information

* Add attribution based on Officer Porkchop and Green Ninja's information

* add attribution to unicorn_power for reshiram/zekrom/kyurem epic variant

* Fixup minor typo

* Adjust sprite test to not think REUSE.toml is a sprite json

* Add missing continue-on-error to workflow

* fix: address kev's comments from code review

* docs: minor touchups

---------

Co-authored-by: NightKev <34855794+DayKev@users.noreply.github.com>
Co-authored-by: Bertie690 <136088738+Bertie690@users.noreply.github.com>
2025-09-23 08:49:03 -05:00

84 lines
3.2 KiB
JavaScript

/*
* SPDX-FileCopyrightText: 2024-2025 Pagefault Games
* SPDX-FileContributor: Bertie690
*
* SPDX-License-Identifier: AGPL-3.0-only
*/
/**
* @import { parsedNames } from "./types.js";
*/
/**
* An error code for a bad URL.
*/
export const INVALID_URL = /** @type {const} */ ("bad_url_code");
/**
* Fetch a given trainer's names from the given HTML document.
* @param {HTMLElement | null | undefined} trainerListHeader - The header containing the trainer lists
* @param {boolean} [knownFemale=false] - Whether the class is known to be female; default `false`
* @returns {parsedNames | INVALID_URL}
* An object containing the parsed names. \
* Will instead return with {@linkcode INVALID_URL} if the data is invalid.
*/
export function fetchNames(trainerListHeader, knownFemale = false) {
const trainerNames = /** @type {Set<string>} */ (new Set());
const femaleTrainerNames = /** @type {Set<string>} */ (new Set());
if (!trainerListHeader?.parentElement?.childNodes) {
// Return early if no child nodes (ie tables) can be found
return INVALID_URL;
}
const elements = [...trainerListHeader.parentElement.childNodes];
// Find all elements within the "Trainer Names" header and selectively filter to find the name tables.
const startChildIndex = elements.indexOf(trainerListHeader);
const endChildIndex = elements.findIndex(h => h.nodeName === "H2" && elements.indexOf(h) > startChildIndex);
// Grab all the trainer name tables sorted by generation
const tables = elements.slice(startChildIndex, endChildIndex).filter(
/** @type {(t: ChildNode) => t is HTMLTableElement} */
(
t =>
// Only grab expandable tables within the header block
t.nodeName === "TABLE" && /** @type {HTMLTableElement} */ (t)["className"] === "expandable"
),
);
parseTable(tables, knownFemale, trainerNames, femaleTrainerNames);
return {
male: Array.from(trainerNames),
female: Array.from(femaleTrainerNames),
};
}
/**
* Parse the table in question.
* @param {HTMLTableElement[]} tables - The array of Elements forming the current table
* @param {boolean} isFemale - Whether the trainer is known to be female or not
* @param {Set<string>} trainerNames A Set containing the male trainer names
* @param {Set<string>} femaleTrainerNames - A Set containing the female trainer names
*/
function parseTable(tables, isFemale, trainerNames, femaleTrainerNames) {
for (const table of tables) {
// Grab all rows past the first header with exactly 9 children in them (Name, Battle, Winnings, 6 party slots)
const trainerRows = [...table.rows].slice(1).filter(r => r.children.length === 9);
for (const row of trainerRows) {
const content = row.firstElementChild?.innerHTML;
// Skip empty elements & ones without anchors
if (!content || content?.indexOf(" <a ") === -1) {
continue;
}
/** Whether the name is female */
const female = isFemale || content.includes("♀");
// Grab the plaintext name part with an optional ampersand
const nameMatch = />([a-z]+(?: &amp; [a-z]+)?)<\/a>/i.exec(content);
if (!nameMatch) {
continue;
}
(female ? femaleTrainerNames : trainerNames).add(nameMatch[1].replace("&amp;", "&"));
}
}
}