From 85002ac8be9e8f37683c1163c0a6370aa94058c1 Mon Sep 17 00:00:00 2001 From: Bertie690 Date: Wed, 13 Aug 2025 00:13:01 -0400 Subject: [PATCH 1/7] Removed scrape trainer names --- src/data/trainer-names.ts | 165 -------------------------------------- 1 file changed, 165 deletions(-) delete mode 100644 src/data/trainer-names.ts diff --git a/src/data/trainer-names.ts b/src/data/trainer-names.ts deleted file mode 100644 index 8eafd9f6404..00000000000 --- a/src/data/trainer-names.ts +++ /dev/null @@ -1,165 +0,0 @@ -import { TrainerType } from "#enums/trainer-type"; -import { toPascalSnakeCase } from "#utils/strings"; - -class TrainerNameConfig { - public urls: string[]; - public femaleUrls: string[] | null; - - constructor(type: TrainerType, ...urls: string[]) { - this.urls = urls.length ? urls : [toPascalSnakeCase(TrainerType[type])]; - } - - hasGenderVariant(...femaleUrls: string[]): TrainerNameConfig { - this.femaleUrls = femaleUrls.length ? femaleUrls : null; - return this; - } -} - -interface TrainerNameConfigs { - [key: number]: TrainerNameConfig; -} - -// used in a commented code -// biome-ignore lint/correctness/noUnusedVariables: Used by commented code -const trainerNameConfigs: TrainerNameConfigs = { - [TrainerType.ACE_TRAINER]: new TrainerNameConfig(TrainerType.ACE_TRAINER), - [TrainerType.ARTIST]: new TrainerNameConfig(TrainerType.ARTIST), - [TrainerType.BACKERS]: new TrainerNameConfig(TrainerType.BACKERS), - [TrainerType.BACKPACKER]: new TrainerNameConfig(TrainerType.BACKPACKER), - [TrainerType.BAKER]: new TrainerNameConfig(TrainerType.BAKER), - [TrainerType.BEAUTY]: new TrainerNameConfig(TrainerType.BEAUTY), - [TrainerType.BIKER]: new TrainerNameConfig(TrainerType.BIKER), - [TrainerType.BLACK_BELT]: new TrainerNameConfig(TrainerType.BLACK_BELT).hasGenderVariant("Battle_Girl"), - [TrainerType.BREEDER]: new TrainerNameConfig(TrainerType.BREEDER, "Pokémon_Breeder"), - [TrainerType.CLERK]: new TrainerNameConfig(TrainerType.CLERK), - [TrainerType.CYCLIST]: new TrainerNameConfig(TrainerType.CYCLIST), - [TrainerType.DANCER]: new TrainerNameConfig(TrainerType.DANCER), - [TrainerType.DEPOT_AGENT]: new TrainerNameConfig(TrainerType.DEPOT_AGENT), - [TrainerType.DOCTOR]: new TrainerNameConfig(TrainerType.DOCTOR).hasGenderVariant("Nurse"), - [TrainerType.FIREBREATHER]: new TrainerNameConfig(TrainerType.FIREBREATHER), - [TrainerType.FISHERMAN]: new TrainerNameConfig(TrainerType.FISHERMAN), - [TrainerType.GUITARIST]: new TrainerNameConfig(TrainerType.GUITARIST), - [TrainerType.HARLEQUIN]: new TrainerNameConfig(TrainerType.HARLEQUIN), - [TrainerType.HIKER]: new TrainerNameConfig(TrainerType.HIKER), - [TrainerType.HOOLIGANS]: new TrainerNameConfig(TrainerType.HOOLIGANS), - [TrainerType.HOOPSTER]: new TrainerNameConfig(TrainerType.HOOPSTER), - [TrainerType.INFIELDER]: new TrainerNameConfig(TrainerType.INFIELDER), - [TrainerType.JANITOR]: new TrainerNameConfig(TrainerType.JANITOR), - [TrainerType.LINEBACKER]: new TrainerNameConfig(TrainerType.LINEBACKER), - [TrainerType.MAID]: new TrainerNameConfig(TrainerType.MAID), - [TrainerType.MUSICIAN]: new TrainerNameConfig(TrainerType.MUSICIAN), - [TrainerType.HEX_MANIAC]: new TrainerNameConfig(TrainerType.HEX_MANIAC), - [TrainerType.NURSERY_AIDE]: new TrainerNameConfig(TrainerType.NURSERY_AIDE), - [TrainerType.OFFICER]: new TrainerNameConfig(TrainerType.OFFICER), - [TrainerType.PARASOL_LADY]: new TrainerNameConfig(TrainerType.PARASOL_LADY), - [TrainerType.PILOT]: new TrainerNameConfig(TrainerType.PILOT), - [TrainerType.POKEFAN]: new TrainerNameConfig(TrainerType.POKEFAN, "Poké_Fan"), - [TrainerType.PRESCHOOLER]: new TrainerNameConfig(TrainerType.PRESCHOOLER), - [TrainerType.PSYCHIC]: new TrainerNameConfig(TrainerType.PSYCHIC), - [TrainerType.RANGER]: new TrainerNameConfig(TrainerType.RANGER), - [TrainerType.RICH]: new TrainerNameConfig(TrainerType.RICH, "Gentleman").hasGenderVariant("Madame"), - [TrainerType.RICH_KID]: new TrainerNameConfig(TrainerType.RICH_KID, "Rich_Boy").hasGenderVariant("Lady"), - [TrainerType.ROUGHNECK]: new TrainerNameConfig(TrainerType.ROUGHNECK), - [TrainerType.SAILOR]: new TrainerNameConfig(TrainerType.SAILOR), - [TrainerType.SCIENTIST]: new TrainerNameConfig(TrainerType.SCIENTIST), - [TrainerType.SMASHER]: new TrainerNameConfig(TrainerType.SMASHER), - [TrainerType.SNOW_WORKER]: new TrainerNameConfig(TrainerType.SNOW_WORKER, "Worker"), - [TrainerType.STRIKER]: new TrainerNameConfig(TrainerType.STRIKER), - [TrainerType.SCHOOL_KID]: new TrainerNameConfig(TrainerType.SCHOOL_KID, "School_Kid"), - [TrainerType.SWIMMER]: new TrainerNameConfig(TrainerType.SWIMMER), - [TrainerType.TWINS]: new TrainerNameConfig(TrainerType.TWINS), - [TrainerType.VETERAN]: new TrainerNameConfig(TrainerType.VETERAN), - [TrainerType.WAITER]: new TrainerNameConfig(TrainerType.WAITER).hasGenderVariant("Waitress"), - [TrainerType.WORKER]: new TrainerNameConfig(TrainerType.WORKER), - [TrainerType.YOUNGSTER]: new TrainerNameConfig(TrainerType.YOUNGSTER).hasGenderVariant("Lass"), -}; - -// function used in a commented code -// biome-ignore lint/correctness/noUnusedVariables: TODO make this into a script instead of having it be in src/data... -function fetchAndPopulateTrainerNames( - url: string, - parser: DOMParser, - trainerNames: Set, - femaleTrainerNames: Set, - forceFemale = false, -) { - return new Promise(resolve => { - fetch(`https://bulbapedia.bulbagarden.net/wiki/${url}_(Trainer_class)`) - .then(response => response.text()) - .then(html => { - console.log(url); - const htmlDoc = parser.parseFromString(html, "text/html"); - const trainerListHeader = htmlDoc.querySelector("#Trainer_list")?.parentElement; - if (!trainerListHeader) { - return []; - } - const elements = [...(trainerListHeader?.parentElement?.childNodes ?? [])]; - const startChildIndex = elements.indexOf(trainerListHeader); - const endChildIndex = elements.findIndex(h => h.nodeName === "H2" && elements.indexOf(h) > startChildIndex); - const tables = elements - .filter(t => { - if (t.nodeName !== "TABLE" || t["className"] !== "expandable") { - return false; - } - const childIndex = elements.indexOf(t); - return childIndex > startChildIndex && childIndex < endChildIndex; - }) - .map(t => t as Element); - console.log(url, tables); - for (const table of tables) { - const trainerRows = [...table.querySelectorAll("tr:not(:first-child)")].filter(r => r.children.length === 9); - for (const row of trainerRows) { - const nameCell = row.firstElementChild; - if (!nameCell) { - continue; - } - const content = nameCell.innerHTML; - if (content.indexOf(" -1) { - const female = /♀/.test(content); - if (url === "Twins") { - console.log(content); - } - const nameMatch = />([a-z]+(?: & [a-z]+)?)<\/a>/i.exec(content); - if (nameMatch) { - (female || forceFemale ? femaleTrainerNames : trainerNames).add(nameMatch[1].replace("&", "&")); - } - } - } - } - resolve(); - }); - }); -} - -/*export function scrapeTrainerNames() { - const parser = new DOMParser(); - const trainerTypeNames = {}; - const populateTrainerNamePromises: Promise[] = []; - for (let t of Object.keys(trainerNameConfigs)) { - populateTrainerNamePromises.push(new Promise(resolve => { - const trainerType = t; - trainerTypeNames[trainerType] = []; - - const config = trainerNameConfigs[t] as TrainerNameConfig; - const trainerNames = new Set(); - const femaleTrainerNames = new Set(); - console.log(config.urls, config.femaleUrls) - const trainerClassRequests = config.urls.map(u => fetchAndPopulateTrainerNames(u, parser, trainerNames, femaleTrainerNames)); - if (config.femaleUrls) - trainerClassRequests.push(...config.femaleUrls.map(u => fetchAndPopulateTrainerNames(u, parser, null, femaleTrainerNames, true))); - Promise.all(trainerClassRequests).then(() => { - console.log(trainerNames, femaleTrainerNames) - trainerTypeNames[trainerType] = !femaleTrainerNames.size ? Array.from(trainerNames) : [ Array.from(trainerNames), Array.from(femaleTrainerNames) ]; - resolve(); - }); - })); - } - Promise.all(populateTrainerNamePromises).then(() => { - let output = 'export const trainerNamePools = {'; - Object.keys(trainerTypeNames).forEach(t => { - output += `\n\t[TrainerType.${TrainerType[t]}]: ${JSON.stringify(trainerTypeNames[t])},`; - }); - output += `\n};`; - console.log(output); - }); -}*/ From fb8562d47c2a696f5feaf52bdb9b400bdcc9d457 Mon Sep 17 00:00:00 2001 From: Bertie690 Date: Wed, 13 Aug 2025 00:13:26 -0400 Subject: [PATCH 2/7] Removed `selfStatLowerMoves` --- src/data/moves/move.ts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/data/moves/move.ts b/src/data/moves/move.ts index 067bd05c2ae..b7951e03c00 100644 --- a/src/data/moves/move.ts +++ b/src/data/moves/move.ts @@ -8500,8 +8500,6 @@ const MoveAttrs = Object.freeze({ /** Map of of move attribute names to their constructors */ export type MoveAttrConstructorMap = typeof MoveAttrs; -export const selfStatLowerMoves: MoveId[] = []; - export function initMoves() { allMoves.push( new SelfStatusMove(MoveId.NONE, PokemonType.NORMAL, MoveCategory.STATUS, -1, -1, 0, 1), @@ -11542,10 +11540,5 @@ export function initMoves() { .condition(new UpperHandCondition()), new AttackMove(MoveId.MALIGNANT_CHAIN, PokemonType.POISON, MoveCategory.SPECIAL, 100, 100, 5, 50, 0, 9) .attr(StatusEffectAttr, StatusEffect.TOXIC) - ); - allMoves.map(m => { - if (m.getAttrs("StatStageChangeAttr").some(a => a.selfTarget && a.stages < 0)) { - selfStatLowerMoves.push(m.id); - } - }); + ) } From 4939a9f6f466dff0d7944855602773b4af128dee Mon Sep 17 00:00:00 2001 From: Bertie690 <136088738+Bertie690@users.noreply.github.com> Date: Wed, 13 Aug 2025 08:29:04 -0400 Subject: [PATCH 3/7] Update move.ts Co-authored-by: NightKev <34855794+DayKev@users.noreply.github.com> --- src/data/moves/move.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/moves/move.ts b/src/data/moves/move.ts index b7951e03c00..a44a033b137 100644 --- a/src/data/moves/move.ts +++ b/src/data/moves/move.ts @@ -11540,5 +11540,5 @@ export function initMoves() { .condition(new UpperHandCondition()), new AttackMove(MoveId.MALIGNANT_CHAIN, PokemonType.POISON, MoveCategory.SPECIAL, 100, 100, 5, 50, 0, 9) .attr(StatusEffectAttr, StatusEffect.TOXIC) - ) + ); } From 43e9d82b26bd11313a3e17193d75fb482d70deaa Mon Sep 17 00:00:00 2001 From: Bertie690 Date: Sat, 16 Aug 2025 18:15:17 -0400 Subject: [PATCH 4/7] Started work on the script again --- package.json | 1 + scripts/helpers/strings.js | 179 +++++++++++++++++++ scripts/scrape-trainer-names/check-gender.js | 45 +++++ scripts/scrape-trainer-names/fetch-names.js | 92 ++++++++++ scripts/scrape-trainer-names/help.js | 0 scripts/scrape-trainer-names/main.js | 36 ++++ scripts/scrape-trainer-names/types.js | 7 + src/data/abilities/ability.ts | 7 +- src/data/moves/move.ts | 4 +- typedoc.json | 2 +- 10 files changed, 367 insertions(+), 6 deletions(-) create mode 100644 scripts/helpers/strings.js create mode 100644 scripts/scrape-trainer-names/check-gender.js create mode 100644 scripts/scrape-trainer-names/fetch-names.js create mode 100644 scripts/scrape-trainer-names/help.js create mode 100644 scripts/scrape-trainer-names/main.js create mode 100644 scripts/scrape-trainer-names/types.js diff --git a/package.json b/package.json index d3494da677c..3bf4b971e21 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "test:watch": "vitest watch --coverage --no-isolate", "test:silent": "vitest run --silent='passed-only' --no-isolate", "test:create": "node scripts/create-test/create-test.js", + "scrape-trainers": "node scripts/scrape-trainer-names/main.js", "typecheck": "tsc --noEmit", "eslint": "eslint --fix .", "eslint-ci": "eslint .", diff --git a/scripts/helpers/strings.js b/scripts/helpers/strings.js new file mode 100644 index 00000000000..3692bcfdd01 --- /dev/null +++ b/scripts/helpers/strings.js @@ -0,0 +1,179 @@ +// #region Split string code +// Regexps involved with splitting words in various case formats. +// Sourced from https://www.npmjs.com/package/change-case (with slight tweaking here and there) + +/** + * Regex to split at word boundaries. + * @type {RegExp} + */ +const SPLIT_LOWER_UPPER_RE = /([\p{Ll}\d])(\p{Lu})/gu; +/** + * Regex to split around single-letter uppercase words. + * @type {RegExp} + */ +const SPLIT_UPPER_UPPER_RE = /(\p{Lu})([\p{Lu}][\p{Ll}])/gu; +/** + * Regexp involved with stripping non-word delimiters from the result. + * @type {RegExp} + */ +const DELIM_STRIP_REGEXP = /[-_ ]+/giu; +// The replacement value for splits. +const SPLIT_REPLACE_VALUE = "$1\0$2"; + +/** + * Split any cased string into an array of its constituent words. + * @param {string} value + * @returns {string[]} The new string, delimited at each instance of one or more spaces, underscores, hyphens + * or lower-to-upper boundaries. + */ +function splitWords(value) { + let result = value.trim(); + result = result.replace(SPLIT_LOWER_UPPER_RE, SPLIT_REPLACE_VALUE).replace(SPLIT_UPPER_UPPER_RE, SPLIT_REPLACE_VALUE); + result = result.replace(DELIM_STRIP_REGEXP, "\0"); + // Trim the delimiter from around the output string + return trimFromStartAndEnd(result, "\0").split(/\0/g); +} + +/** + * Helper function to remove one or more sequences of characters from either end of a string. + * @param {string} str - The string to replace + * @param {string} charToTrim - The string to remove + * @returns {string} The string having been trimmed + */ +function trimFromStartAndEnd(str, charToTrim) { + let start = 0; + let end = str.length; + const blockLength = charToTrim.length; + while (str.startsWith(charToTrim, start)) { + start += blockLength; + } + if (start - end === blockLength) { + // Occurs if the ENTIRE string is made up of charToTrim (at which point we return nothing) + return ""; + } + while (str.endsWith(charToTrim, end)) { + end -= blockLength; + } + return str.slice(start, end); +} +// #endregion Split String code + +/** + * Capitalize the first letter of a string. + * @example + * ```ts + * console.log(capitalizeFirstLetter("consectetur adipiscing elit")); // returns "Consectetur adipiscing elit" + * ``` + * @param {string} str - The string whose first letter is to be capitalized + * @return {string} The original string with its first letter capitalized. + */ +export function capitalizeFirstLetter(str) { + return str.charAt(0).toUpperCase() + str.slice(1); +} + +/** + * Helper method to convert a string into `Title Case` (such as one used for console logs). + * @example + * ```ts + * console.log(toTitleCase("lorem ipsum dolor sit amet")); // returns "Lorem Ipsum Dolor Sit Amet" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into title case. + */ +export function toTitleCase(str) { + return splitWords(str) + .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) + .join(" "); +} + +/** + * Helper method to convert a string into `camelCase` (such as one used for i18n keys). + * @example + * ```ts + * console.log(toCamelCase("BIG_ANGRY_TRAINER")); // returns "bigAngryTrainer" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into camel case. + */ +export function toCamelCase(str) { + return splitWords(str) + .map((word, index) => + index === 0 ? word.toLowerCase() : word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(), + ) + .join(""); +} + +/** + * Helper method to convert a string into `PascalCase`. + * @example + * ```ts + * console.log(toPascalCase("hi how was your day")); // returns "HiHowWasYourDay" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into pascal case. + */ +export function toPascalCase(str) { + return splitWords(str) + .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) + .join(""); +} + +/** + * Helper method to convert a string into `kebab-case` (such as one used for filenames). + * @example + * ```ts + * console.log(toKebabCase("not_kebab-caSe String")); // returns "not-kebab-case-string" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into kebab case. + */ +export function toKebabCase(str) { + return splitWords(str) + .map(word => word.toLowerCase()) + .join("-"); +} + +/** + * Helper method to convert a string into `snake_case` (such as one used for filenames). + * @example + * ```ts + * console.log(toSnakeCase("not-in snake_CaSe")); // returns "not_in_snake_case" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into snake case. + */ +export function toSnakeCase(str) { + return splitWords(str) + .map(word => word.toLowerCase()) + .join("_"); +} + +/** + * Helper method to convert a string into `UPPER_SNAKE_CASE`. + * @example + * ```ts + * console.log(toUpperSnakeCase("apples bananas_oranGes-PearS")); // returns "APPLES_BANANAS_ORANGES_PEARS" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into upper snake case. + */ +export function toUpperSnakeCase(str) { + return splitWords(str) + .map(word => word.toUpperCase()) + .join("_"); +} + +/** + * Helper method to convert a string into `Pascal_Snake_Case`. + * @example + * ```ts + * console.log(toPascalSnakeCase("apples-bananas_oranGes Pears")); // returns "Apples_Bananas_Oranges_Pears" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into pascal snake case. + */ +export function toPascalSnakeCase(str) { + return splitWords(str) + .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) + .join("_"); +} diff --git a/scripts/scrape-trainer-names/check-gender.js b/scripts/scrape-trainer-names/check-gender.js new file mode 100644 index 00000000000..641305708bf --- /dev/null +++ b/scripts/scrape-trainer-names/check-gender.js @@ -0,0 +1,45 @@ +/** + * Check if the given trainer class is female. + * @param {Document} document - The HTML document to scrape + * @returns {[gender: boolean, counterpartURL?: string]} A 2-length tuple containing: + * 1. The trainer class' normal gender + * 2. A URL to the gender counterpart of the current class (if the trainer has one). + */ +export function checkGenderAndType(document) { + const infoBox = document.getElementById("infobox"); + if (!infoBox) { + return [false]; + } + // Find the row of the table containing the specified gender + const children = [...infoBox.childNodes]; + const genderCell = children.find( + node => node.nodeName === "tr" && [...node.childNodes].some(c => c.textContent?.includes("Gender")), + )?.parentElement; + if (!genderCell) { + return [false]; + } + + const gender = getGender(genderCell.querySelector("tr")); + const hrefExtractRegex = /href="\/wiki\/(.*)_(Trainer_class)"/g; + const counterpartURL = genderCell.querySelector("td")?.getHTML().match(hrefExtractRegex)?.[1]; + + return [gender, counterpartURL]; +} + +/** + * Retrieve the gender from the given node text. + * @param {HTMLTableRowElement?} genderCell - The cell to check + * @returns {boolean} The gender type + * @todo Handle trainers whose gender type has changed across different gens (Artists, etc.) + */ +function getGender(genderCell) { + switch (genderCell?.textContent) { + case "Female Only": + return false; + case "Male Only": + case "Both": + case undefined: + default: + return true; + } +} diff --git a/scripts/scrape-trainer-names/fetch-names.js b/scripts/scrape-trainer-names/fetch-names.js new file mode 100644 index 00000000000..03e7c5b66f6 --- /dev/null +++ b/scripts/scrape-trainer-names/fetch-names.js @@ -0,0 +1,92 @@ +import chalk from "chalk"; +import { JSDOM } from "jsdom"; +import { checkGenderAndType } from "./check-gender.js"; + +/** + * @import { nameRecord, parsedNames } from "./types.js"; + */ + +/** + * Fetch a given trainer's names from the given URL. + * @param {string} url - The URL to parse + * @param {boolean} [currGender] - The current class' known gender. + * If provided, will override the natural gender detection with the given gender and avoid + * checking any gender counterparts. + * @returns {Promise} A Promise that resolves with the parsed names once the parsing concludes. + * Will resolve with an empty array if the name could not be parsed. + */ +export async function fetchNames(url, currGender) { + const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${url}_(Trainer_class)`)).window; + const trainerListHeader = document.querySelector("#Trainer_list")?.parentElement; + if (!trainerListHeader?.parentElement?.childNodes) { + console.warn(chalk.hex("#ffa500")(`URL ${url} did not correspond to a valid trainer class!`)); + return { male: [], female: [] }; + } + + let trainerNames = /** @type {Set} */ (new Set()); + let femaleTrainerNames = /** @type {Set} */ (new Set()); + + // If we don't know whether this class is female, check, optionally recursing into the counterpart's webpage as well. + if (currGender === undefined) { + /** @type {string | undefined} */ + let counterpartURL; + [currGender, counterpartURL] = checkGenderAndType(document); + if (counterpartURL) { + console.log(chalk.green(`Accessing gender counterpart URL: ${counterpartURL}`)); + const names = await fetchNames(counterpartURL, !currGender); + trainerNames = new Set(names.male); + femaleTrainerNames = new Set(names.female); + } + } + + const elements = [...trainerListHeader.parentElement.childNodes]; + + // Find all elements within the "Trainer Names" header and selectively filter to find the name tables. + const startChildIndex = elements.indexOf(trainerListHeader); + const endChildIndex = elements.findIndex(h => h.nodeName === "H2" && elements.indexOf(h) > startChildIndex); + + // Grab all the trainer name tables sorted by generation + const tables = elements.slice(startChildIndex, endChildIndex).filter( + /** @type {(t: ChildNode) => t is Element} */ + ( + t => + // Only grab expandable tables within the header block + t.nodeName === "TABLE" && t["className"] === "expandable" + ), + ); + + parseTable(tables, currGender, trainerNames, femaleTrainerNames); + return { + male: Array.from(trainerNames), + female: Array.from(femaleTrainerNames), + }; +} + +/** + * Parse the table in question. + * @param {Element[]} tables - The array of Elements forming the current table + * @param {boolean} isFemale - Whether the trainer is known to be female or not + * @param {Set} trainerNames A Set containing the male trainer names + * @param {Set} femaleTrainerNames - A Set containing the female trainer names + */ +function parseTable(tables, isFemale, trainerNames, femaleTrainerNames) { + for (const table of tables) { + // Grab all rows past the first header with exactly 9 children in them (Name, Battle, Winnings, 6 party slots) + const trainerRows = [...table.querySelectorAll("tr:not(:first-child)")].filter(r => r.children.length === 9); + for (const row of trainerRows) { + const content = row.firstElementChild?.innerHTML; + // Skip empty elements & ones without anchors + if (!content || content?.indexOf(" ([a-z]+(?: & [a-z]+)?)<\/a>/i.exec(content); + if (!nameMatch) { + continue; + } + (female ? femaleTrainerNames : trainerNames).add(nameMatch[1].replace("&", "&")); + } + } +} diff --git a/scripts/scrape-trainer-names/help.js b/scripts/scrape-trainer-names/help.js new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/scrape-trainer-names/main.js b/scripts/scrape-trainer-names/main.js new file mode 100644 index 00000000000..98f7860442f --- /dev/null +++ b/scripts/scrape-trainer-names/main.js @@ -0,0 +1,36 @@ +import { toCamelCase, toPascalSnakeCase } from "../helpers/strings.js"; +import { fetchNames } from "./fetch-names.js"; + +/** + * @packageDocumentation + * This script will scrape Bulbapedia for the English names of a given trainer class, + * outputting them as JSON. + * Usage: + */ + +/** + * Scrape the requested trainer names and format the resultant output. + * @param {...string} classes The names of the trainer classes to retrieve + * @returns {Promise} A Promise that resolves with the finished text. + */ +async function scrapeTrainerNames(...classes) { + /** + * A large object mapping each class to their corresponding list of trainer names. \ + * Trainer classes with only 1 gender will only contain the single array for that gender. + * @type {Record} + */ + const nameTuples = Object.fromEntries( + await Promise.all( + classes.map(async trainerClass => { + // Bulba URLs use Pascal_Snake_Case (Bug_Catcher) + const classURL = toPascalSnakeCase(trainerClass); + const names = await fetchNames(classURL); + const namesObj = names.female.length === 0 ? names.male : names; + return [toCamelCase(trainerClass), namesObj]; + }), + ), + ); + return JSON.stringify(nameTuples, null, 2); +} + +console.log(await scrapeTrainerNames("doctor")); diff --git a/scripts/scrape-trainer-names/types.js b/scripts/scrape-trainer-names/types.js new file mode 100644 index 00000000000..f2de7bc487e --- /dev/null +++ b/scripts/scrape-trainer-names/types.js @@ -0,0 +1,7 @@ +/** + * @typedef {Object} + * parsedNames + * A parsed object containing the desired names. + * @property {string[]} male + * @property {string[]} female + */ diff --git a/src/data/abilities/ability.ts b/src/data/abilities/ability.ts index f5fd9b19f72..1f06aae5546 100644 --- a/src/data/abilities/ability.ts +++ b/src/data/abilities/ability.ts @@ -1760,7 +1760,7 @@ export class PokemonTypeChangeAbAttr extends PreAttackAbAttr { * Parameters for abilities that modify the hit count and damage of a move */ export interface AddSecondStrikeAbAttrParams extends Omit { - /** Holder for the number of hits. May be modified by ability application */ + /** Holder for the number of hits. May be modified by ability application */ hitCount?: NumberHolder; /** Holder for the damage multiplier _of the current hit_ */ multiplier?: NumberHolder; @@ -5816,7 +5816,7 @@ export class NoFusionAbilityAbAttr extends AbAttr { export interface IgnoreTypeImmunityAbAttrParams extends AbAttrBaseParams { /** The type of the move being used */ readonly moveType: PokemonType; - /** The type being checked for */ + /** The type being checked for */ readonly defenderType: PokemonType; /** Holds whether the type immunity should be bypassed */ cancelled: BooleanHolder; @@ -6755,7 +6755,7 @@ function getPokemonWithWeatherBasedForms() { ); } -// biome-ignore format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`) +// biome-ignore-start format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`) export function initAbilities() { allAbilities.push( new Ability(AbilityId.NONE, 3), @@ -7867,3 +7867,4 @@ export function initAbilities() { .attr(ConfusionOnStatusEffectAbAttr, StatusEffect.POISON, StatusEffect.TOXIC) ); } +// biome-ignore-end format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`) diff --git a/src/data/moves/move.ts b/src/data/moves/move.ts index a44a033b137..520bf778172 100644 --- a/src/data/moves/move.ts +++ b/src/data/moves/move.ts @@ -5916,8 +5916,8 @@ export class ProtectAttr extends AddBattlerTagAttr { for (const turnMove of user.getLastXMoves(-1).slice()) { if ( // Quick & Wide guard increment the Protect counter without using it for fail chance - !(allMoves[turnMove.move].hasAttr("ProtectAttr") || - [MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) || + !(allMoves[turnMove.move].hasAttr("ProtectAttr") || + [MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) || turnMove.result !== MoveResult.SUCCESS ) { break; diff --git a/typedoc.json b/typedoc.json index c34e6190c1a..e4ab2d8dcc4 100644 --- a/typedoc.json +++ b/typedoc.json @@ -1,7 +1,7 @@ { "entryPoints": ["./src"], "entryPointStrategy": "expand", - "exclude": ["**/*+.test.ts"], + "exclude": ["**/*+.test.ts", "**/src/data/trainer-names.ts"], "out": "typedoc", "highlightLanguages": ["javascript", "json", "jsonc", "json5", "tsx", "typescript", "markdown"] } From 78efc5d1302118d76d4d23271e4ae11008ed6d4b Mon Sep 17 00:00:00 2001 From: Bertie690 Date: Mon, 18 Aug 2025 00:36:44 -0400 Subject: [PATCH 5/7] Made script actually work --- scripts/scrape-trainer-names/check-gender.js | 52 ++-- scripts/scrape-trainer-names/fetch-names.js | 54 ++-- scripts/scrape-trainer-names/help-message.js | 16 ++ scripts/scrape-trainer-names/main.js | 278 +++++++++++++++++-- scripts/scrape-trainer-names/types.js | 2 + 5 files changed, 321 insertions(+), 81 deletions(-) create mode 100644 scripts/scrape-trainer-names/help-message.js diff --git a/scripts/scrape-trainer-names/check-gender.js b/scripts/scrape-trainer-names/check-gender.js index 641305708bf..0a8d2aecdaa 100644 --- a/scripts/scrape-trainer-names/check-gender.js +++ b/scripts/scrape-trainer-names/check-gender.js @@ -1,45 +1,53 @@ /** * Check if the given trainer class is female. * @param {Document} document - The HTML document to scrape - * @returns {[gender: boolean, counterpartURL?: string]} A 2-length tuple containing: - * 1. The trainer class' normal gender - * 2. A URL to the gender counterpart of the current class (if the trainer has one). + * @returns {[gender: boolean, counterpartURLs: string[]]} A 2-length tuple containing: + * 1. The trainer class' gender (female or not) + * 2. A list of all the current class' opposite-gender counterparts (if the trainer has any). */ export function checkGenderAndType(document) { - const infoBox = document.getElementById("infobox"); + const infoBox = document.getElementsByClassName("infobox")[0]; if (!infoBox) { - return [false]; + return [false, []]; } // Find the row of the table containing the specified gender - const children = [...infoBox.childNodes]; - const genderCell = children.find( - node => node.nodeName === "tr" && [...node.childNodes].some(c => c.textContent?.includes("Gender")), - )?.parentElement; - if (!genderCell) { - return [false]; + const children = [...infoBox.getElementsByTagName("tr")]; + const genderCell = children.find(node => [...node.childNodes].some(c => c.textContent?.includes("Gender"))); + const tableBox = genderCell?.querySelector("td"); + if (!tableBox) { + return [false, []]; } - const gender = getGender(genderCell.querySelector("tr")); - const hrefExtractRegex = /href="\/wiki\/(.*)_(Trainer_class)"/g; - const counterpartURL = genderCell.querySelector("td")?.getHTML().match(hrefExtractRegex)?.[1]; + const gender = getGender(tableBox); - return [gender, counterpartURL]; + // CHeck the cell's inner HTML for any `href`s to gender counterparts and scrape them too + const hrefExtractRegex = /href="\/wiki\/(.*?)_\(Trainer_class\)"/g; + const counterpartCell = children.find(node => [...node.childNodes].some(c => c.textContent?.includes("Counterpart"))); + + const counterpartURLs = []; + for (const url of counterpartCell?.innerHTML?.matchAll(hrefExtractRegex) ?? []) { + counterpartURLs.push(url[1]); + } + + return [gender, counterpartURLs]; } /** * Retrieve the gender from the given node text. - * @param {HTMLTableRowElement?} genderCell - The cell to check + * @param {HTMLTableCellElement} genderCell - The cell to check * @returns {boolean} The gender type * @todo Handle trainers whose gender type has changed across different gens (Artists, etc.) */ function getGender(genderCell) { - switch (genderCell?.textContent) { - case "Female Only": - return false; - case "Male Only": - case "Both": + const gender = genderCell.textContent?.trim().toLowerCase() ?? ""; + + switch (gender) { + case "female only": + return true; + case "male only": + case "both": case undefined: default: - return true; + return false; } } diff --git a/scripts/scrape-trainer-names/fetch-names.js b/scripts/scrape-trainer-names/fetch-names.js index 03e7c5b66f6..e2c9bd3093a 100644 --- a/scripts/scrape-trainer-names/fetch-names.js +++ b/scripts/scrape-trainer-names/fetch-names.js @@ -1,42 +1,28 @@ -import chalk from "chalk"; -import { JSDOM } from "jsdom"; -import { checkGenderAndType } from "./check-gender.js"; - /** - * @import { nameRecord, parsedNames } from "./types.js"; + * @import { parsedNames } from "./types.js"; */ /** - * Fetch a given trainer's names from the given URL. - * @param {string} url - The URL to parse - * @param {boolean} [currGender] - The current class' known gender. - * If provided, will override the natural gender detection with the given gender and avoid - * checking any gender counterparts. - * @returns {Promise} A Promise that resolves with the parsed names once the parsing concludes. - * Will resolve with an empty array if the name could not be parsed. + * An error code for a bad URL. */ -export async function fetchNames(url, currGender) { - const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${url}_(Trainer_class)`)).window; - const trainerListHeader = document.querySelector("#Trainer_list")?.parentElement; +export const INVALID_URL = "bad_url_code"; + +/** @type {const} */ + +/** + * Fetch a given trainer's names from the given HTML document. + * @param {HTMLElement | null | undefined} trainerListHeader - The header containing the trainer lists + * @param {boolean} [knownFemale=false] - Whether the class is known to be female; default `false` + * @returns {parsedNames | INVALID_URL} + * An object containing the parsed names. \ + * Will instead return with {@linkcode INVALID_URL} if the data is invalid. + */ +export function fetchNames(trainerListHeader, knownFemale = false) { + const trainerNames = /** @type {Set} */ (new Set()); + const femaleTrainerNames = /** @type {Set} */ (new Set()); if (!trainerListHeader?.parentElement?.childNodes) { - console.warn(chalk.hex("#ffa500")(`URL ${url} did not correspond to a valid trainer class!`)); - return { male: [], female: [] }; - } - - let trainerNames = /** @type {Set} */ (new Set()); - let femaleTrainerNames = /** @type {Set} */ (new Set()); - - // If we don't know whether this class is female, check, optionally recursing into the counterpart's webpage as well. - if (currGender === undefined) { - /** @type {string | undefined} */ - let counterpartURL; - [currGender, counterpartURL] = checkGenderAndType(document); - if (counterpartURL) { - console.log(chalk.green(`Accessing gender counterpart URL: ${counterpartURL}`)); - const names = await fetchNames(counterpartURL, !currGender); - trainerNames = new Set(names.male); - femaleTrainerNames = new Set(names.female); - } + // Return early if no child nodes (ie tables) can be found + return INVALID_URL; } const elements = [...trainerListHeader.parentElement.childNodes]; @@ -55,7 +41,7 @@ export async function fetchNames(url, currGender) { ), ); - parseTable(tables, currGender, trainerNames, femaleTrainerNames); + parseTable(tables, knownFemale, trainerNames, femaleTrainerNames); return { male: Array.from(trainerNames), female: Array.from(femaleTrainerNames), diff --git a/scripts/scrape-trainer-names/help-message.js b/scripts/scrape-trainer-names/help-message.js new file mode 100644 index 00000000000..fe922a5e988 --- /dev/null +++ b/scripts/scrape-trainer-names/help-message.js @@ -0,0 +1,16 @@ +import chalk from "chalk"; + +/** Show help/usage text for the `scrape-trainers` CLI. */ +export function showHelpText() { + console.log(` +Usage: ${chalk.cyan("pnpm scrape-trainers [options] ")} +Note that all option names are ${chalk.bold("case insensitive")}. + +${chalk.hex("#8a2be2")("Arguments:")} + ${chalk.hex("#7fff00")("names")} The name of one or more trainer classes to parse. + +${chalk.hex("#ffa500")("Options:")} + ${chalk.blue("-h, --help")} Show this help message. + ${chalk.blue("-o, --out, --outfile")} The path to a file to save the output. If not provided, will send directly to stdout. +`); +} diff --git a/scripts/scrape-trainer-names/main.js b/scripts/scrape-trainer-names/main.js index 98f7860442f..5d86f30d18c 100644 --- a/scripts/scrape-trainer-names/main.js +++ b/scripts/scrape-trainer-names/main.js @@ -1,36 +1,264 @@ -import { toCamelCase, toPascalSnakeCase } from "../helpers/strings.js"; -import { fetchNames } from "./fetch-names.js"; +import { existsSync, writeFileSync } from "node:fs"; +import { format } from "node:util"; +import chalk from "chalk"; +import inquirer from "inquirer"; +import { JSDOM } from "jsdom"; +import { toCamelCase, toPascalSnakeCase, toTitleCase } from "../helpers/strings.js"; +import { checkGenderAndType } from "./check-gender.js"; +import { fetchNames, INVALID_URL } from "./fetch-names.js"; +import { showHelpText } from "./help-message.js"; /** * @packageDocumentation * This script will scrape Bulbapedia for the English names of a given trainer class, * outputting them as JSON. - * Usage: + * Usage: `pnpm scrape-trainers` */ /** - * Scrape the requested trainer names and format the resultant output. - * @param {...string} classes The names of the trainer classes to retrieve - * @returns {Promise} A Promise that resolves with the finished text. + * @import { parsedNames } from "./types.js" */ -async function scrapeTrainerNames(...classes) { - /** - * A large object mapping each class to their corresponding list of trainer names. \ - * Trainer classes with only 1 gender will only contain the single array for that gender. - * @type {Record} - */ - const nameTuples = Object.fromEntries( - await Promise.all( - classes.map(async trainerClass => { - // Bulba URLs use Pascal_Snake_Case (Bug_Catcher) - const classURL = toPascalSnakeCase(trainerClass); - const names = await fetchNames(classURL); - const namesObj = names.female.length === 0 ? names.male : names; - return [toCamelCase(trainerClass), namesObj]; - }), - ), - ); - return JSON.stringify(nameTuples, null, 2); + +const version = "1.0.0"; +const SUPPORTED_ARGS = /** @type {const} */ (["-o", "--outfile", "--outFile"]); + +/** + * A large object mapping each "base" trainer name to a list of replacements. + * Used to allow for trainer classes with different `TrainerType`s than in mainline. + * @type {Record} + */ +const trainerNamesMap = { + pokemonBreeder: ["breeder"], + worker: ["worker", "snowWorker"], + richBoy: ["richKid"], + gentleman: ["rich"], +}; + +async function main() { + console.log(chalk.hex("#FF7F50")(`🍳 Trainer Name Scraper v${version}`)); + + const args = process.argv.slice(2); + const out = getOutfile(args); + // Break out if no args remain + if (args.length === 0) { + console.error( + chalk.red.bold( + `✗ Error: No trainer classes provided!\nArgs: ${chalk.hex("#7310fdff")(process.argv.slice(2).join(", "))}`, + ), + ); + showHelpText(); + process.exitCode = 1; + return; + } + + const output = await scrapeTrainerNames(args); + await tryWriteFile(out, output); } -console.log(await scrapeTrainerNames("doctor")); +/** + * Get the outfile location from the args array. + * @param {string[]} args - The command line arguments + * @returns {string | undefined} The outfile location, or `undefined` if none is provided + * @remarks + * This will mutate the `args` array by removing the outfile from the list of arguments. + */ +function getOutfile(args) { + let /** @type {string} */ outFile; + // Extract the argument as either the form "x=y" or "x y". + const hasEquals = args[0]?.match(/^(.*)=(.*)$/g); + if (hasEquals) { + outFile = hasEquals[2]; + args.splice(0, 1); + } else if (/** @type {readonly string[]} */ (SUPPORTED_ARGS).includes(args[0])) { + outFile = args[1]; + args.splice(0, 2); + } else { + console.log(chalk.hex("#ffa500")("No outfile detected, logging to stdout...")); + return; + } + + console.log(chalk.hex("#ffa500")(`Using outfile: ${chalk.blue(outFile)}`)); + return outFile; +} + +/** + * Scrape the requested trainer names and format the resultant output. + * @param {string[]} classes The names of the trainer classes to retrieve + * @returns {Promise} A Promise that resolves with the finished text. + */ +async function scrapeTrainerNames(classes) { + classes = [...new Set(classes)]; + + /** + * A Set containing all trainer URLs that have been seen. + * @type {Set} + */ + const seenClasses = new Set(); + + /** + * A large array of tuples matching each class to their corresponding list of trainer names. \ + * Trainer classes with only 1 gender will only contain the single array for that gender. + * @type {[keyName: string, names: string[] | parsedNames][]} + */ + const namesTuples = await Promise.all( + classes.map(async trainerClass => { + const [trainerName, names] = await doFetch(trainerClass, seenClasses); + const namesObj = names.female.length === 0 ? names.male : names; + return /** @type {const} */ ([trainerName, namesObj]); + }), + ); + + // Grab all keys inside the name replacement map and change them accordingly. + const mappedNames = namesTuples.filter(tuple => tuple[0] in trainerNamesMap); + for (const nameTuple of mappedNames) { + const namesMapping = trainerNamesMap[nameTuple[0]]; + namesTuples.splice( + namesTuples.indexOf(nameTuple), + 1, + ...namesMapping.map( + name => /** @type {[keyName: string, names: parsedNames | string[]]} */ ([name, nameTuple[1]]), + ), + ); + } + + namesTuples.sort((a, b) => a[0].localeCompare(b[0])); + + /** @type {Record} */ + const namesRecord = Object.fromEntries(namesTuples); + + // Convert all arrays into objects indexed by the number + return JSON.stringify( + namesRecord, + (_, v) => { + if (Array.isArray(v)) { + return v.reduce((ret, curr, i) => { + ret[i + 1] = curr; // 1 indexed + return ret; + }, {}); + } + return v; + }, + 2, + ); +} + +/** + * Recursively scrape names from a given Trainer class and its gender counterparts. + * @param {string} trainerClass - The URL to parse + * @param {Set} seenClasses - A Set containing all seen class URLs, used for record keeping. + * @returns {Promise<[string, parsedNames]>} + * A Promise that resolves with: + * 1. The name to use for the key. + * 2. All fetched names for this trainer class and its gender variants. + */ +async function doFetch(trainerClass, seenClasses) { + let keyName = toCamelCase(trainerClass); + const classURL = toPascalSnakeCase(trainerClass); + seenClasses.add(classURL); + + const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${classURL}_(Trainer_class)`)) + .window; + const trainerListHeader = document.querySelector("#Trainer_list")?.parentElement; + const [female, counterpartURLs] = checkGenderAndType(document); + const names = fetchNames(trainerListHeader, female); + if (names === INVALID_URL) { + return Promise.reject(chalk.red.bold(`URL ${classURL} did not correspond to a valid trainer class!`)); + } + + // Recurse into all unseen gender counterparts' URLs, using the first male name we find + const counterpartNames = await Promise.all( + counterpartURLs + .filter(url => !seenClasses.has(url)) + .map(counterpartURL => { + console.log(chalk.green(`Accessing gender counterpart URL: ${toTitleCase(counterpartURL)}`)); + return doFetch(counterpartURL, seenClasses); + }), + ); + let overrodeName = false; + for (const [cKeyName, cNameObj] of counterpartNames) { + if (!overrodeName && female) { + overrodeName = true; + console.log(chalk.green(`Using "${cKeyName}" as the name of the JSON key object...`)); + keyName = cKeyName; + } + names.male = [...new Set(names.male.concat(cNameObj.male))]; + names.female = [...new Set(names.female.concat(cNameObj.female))]; + } + return [normalizeDiacritics(keyName), names]; +} + +/** + * Convert all diacritical marks within a string into their normalized variants. + * @param {string} str - The string to parse + * @returns {string} The string with normalized diacritics + */ +function normalizeDiacritics(str) { + // Normalizing to NFKD splits all diacritics into the base letter + grapheme (à -> a + `), + // which are conveniently all in their own little Unicode block for easy removal + return str.normalize("NFKD").replace(/[\u0300-\u036f]/g, ""); +} + +/** + * Try to write the output to a file (or log it to stdout, as the case may be). + * @param {string | undefined} outFile - The outfile + * @param {string} output - The scraped output to produce + */ +async function tryWriteFile(outFile, output) { + if (!outFile) { + console.log(output); + return; + } + + if (existsSync(outFile) && !(await promptExisting(outFile))) { + process.exitCode = 1; + return; + } + + try { + writeFileSync(outFile, output); + console.log(chalk.green.bold(`✔ Output written to ${chalk.blue(outFile)} successfully!`)); + } catch (e) { + let /** @type {string} */ errStr; + if (!(e instanceof Error)) { + errStr = format("Unknown error occurred: ", e); + } else { + // @ts-expect-error - Node.JS file errors always have codes + switch (e.code) { + case "ENOENT": + errStr = `File not found: ${outFile}`; + break; + case "EACCES": + errStr = `Could not write ${outFile}: Permission denied`; + break; + case "EISDIR": + errStr = `Unable to write to ${outFile} as it is a directory`; + break; + default: + errStr = `Error writing file: ${e.message}`; + } + } + console.error(chalk.red.bold(errStr)); + process.exitCode = 1; + return; + } +} + +/** + * Confirm overwriting an already-existing file. + * @param {string} outFile - The outfile + * @returns {Promise} Whether "Yes" or "No" was selected. + */ +async function promptExisting(outFile) { + return ( + await inquirer.prompt([ + { + type: "confirm", + name: "continue", + message: `File ${chalk.blue(outFile)} already exists!` + "\nDo you want to replace it?", + default: false, + }, + ]) + ).continue; +} + +main(); diff --git a/scripts/scrape-trainer-names/types.js b/scripts/scrape-trainer-names/types.js index f2de7bc487e..0ea07db5164 100644 --- a/scripts/scrape-trainer-names/types.js +++ b/scripts/scrape-trainer-names/types.js @@ -5,3 +5,5 @@ * @property {string[]} male * @property {string[]} female */ + +export {}; From d2b0a3147b7acdfaf3e47a3be71e3d108bae9e52 Mon Sep 17 00:00:00 2001 From: Bertie690 Date: Mon, 18 Aug 2025 20:59:33 -0400 Subject: [PATCH 6/7] Made finishing touches on script --- scripts/scrape-trainer-names/fetch-names.js | 4 +- scripts/scrape-trainer-names/help.js | 0 scripts/scrape-trainer-names/main.js | 65 +++++++++++++++------ 3 files changed, 49 insertions(+), 20 deletions(-) delete mode 100644 scripts/scrape-trainer-names/help.js diff --git a/scripts/scrape-trainer-names/fetch-names.js b/scripts/scrape-trainer-names/fetch-names.js index e2c9bd3093a..9e6bd0c4d2b 100644 --- a/scripts/scrape-trainer-names/fetch-names.js +++ b/scripts/scrape-trainer-names/fetch-names.js @@ -5,9 +5,7 @@ /** * An error code for a bad URL. */ -export const INVALID_URL = "bad_url_code"; - -/** @type {const} */ +export const INVALID_URL = /** @type {const} */ ("bad_url_code"); /** * Fetch a given trainer's names from the given HTML document. diff --git a/scripts/scrape-trainer-names/help.js b/scripts/scrape-trainer-names/help.js deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/scripts/scrape-trainer-names/main.js b/scripts/scrape-trainer-names/main.js index 5d86f30d18c..43989eebab9 100644 --- a/scripts/scrape-trainer-names/main.js +++ b/scripts/scrape-trainer-names/main.js @@ -1,5 +1,5 @@ import { existsSync, writeFileSync } from "node:fs"; -import { format } from "node:util"; +import { format, inspect } from "node:util"; import chalk from "chalk"; import inquirer from "inquirer"; import { JSDOM } from "jsdom"; @@ -20,7 +20,7 @@ import { showHelpText } from "./help-message.js"; */ const version = "1.0.0"; -const SUPPORTED_ARGS = /** @type {const} */ (["-o", "--outfile", "--outFile"]); +const OUTFILE_ALIASES = /** @type {const} */ (["-o", "--outfile", "--outFile"]); /** * A large object mapping each "base" trainer name to a list of replacements. @@ -64,12 +64,12 @@ async function main() { */ function getOutfile(args) { let /** @type {string} */ outFile; - // Extract the argument as either the form "x=y" or "x y". - const hasEquals = args[0]?.match(/^(.*)=(.*)$/g); + // Extract the outfile as either the form "-o=y" or "-o y". + const hasEquals = /^.*=(.+)$/g.exec(args[0]); if (hasEquals) { - outFile = hasEquals[2]; + outFile = hasEquals[1]; args.splice(0, 1); - } else if (/** @type {readonly string[]} */ (SUPPORTED_ARGS).includes(args[0])) { + } else if (/** @type {readonly string[]} */ (OUTFILE_ALIASES).includes(args[0])) { outFile = args[1]; args.splice(0, 2); } else { @@ -102,21 +102,48 @@ async function scrapeTrainerNames(classes) { */ const namesTuples = await Promise.all( classes.map(async trainerClass => { - const [trainerName, names] = await doFetch(trainerClass, seenClasses); - const namesObj = names.female.length === 0 ? names.male : names; - return /** @type {const} */ ([trainerName, namesObj]); + try { + const [trainerName, names] = await doFetch(trainerClass, seenClasses); + const namesObj = names.female.length === 0 ? names.male : names; + return /** @type {const} */ ([trainerName, namesObj]); + } catch (e) { + if (!(e instanceof Error)) { + throw new Error(chalk.red.bold("Unrecognized error detected:", inspect(e))); + } + // If the error contains an HTTP status, attempt to parse the code to give a more friendly + // response than JSDOM's "Resource was not loaded"gi + const errCode = /Status: (\d*)/g.exec(e.message)?.[1]; + if (!errCode) { + throw e; + } + /** @type {string} */ + let reason; + switch (+errCode) { + case 404: + reason = "Page not found"; + break; + case 403: + reason = "Access is forbidden"; + break; + default: + reason = `Server produced error code of ${+errCode}`; + } + throw new Error( + chalk.red.bold(`Failed to parse URL for ${chalk.hex("#7fff00")(`\"${trainerClass}\"`)}!\nReason: ${reason}`), + ); + } }), ); // Grab all keys inside the name replacement map and change them accordingly. const mappedNames = namesTuples.filter(tuple => tuple[0] in trainerNamesMap); - for (const nameTuple of mappedNames) { - const namesMapping = trainerNamesMap[nameTuple[0]]; + for (const mappedName of mappedNames) { + const namesMapping = trainerNamesMap[mappedName[0]]; namesTuples.splice( - namesTuples.indexOf(nameTuple), + namesTuples.indexOf(mappedName), 1, ...namesMapping.map( - name => /** @type {[keyName: string, names: parsedNames | string[]]} */ ([name, nameTuple[1]]), + name => /** @type {[keyName: string, names: parsedNames | string[]]} */ ([name, mappedName[1]]), ), ); } @@ -126,7 +153,7 @@ async function scrapeTrainerNames(classes) { /** @type {Record} */ const namesRecord = Object.fromEntries(namesTuples); - // Convert all arrays into objects indexed by the number + // Convert all arrays into objects indexed by numbers return JSON.stringify( namesRecord, (_, v) => { @@ -153,16 +180,20 @@ async function scrapeTrainerNames(classes) { */ async function doFetch(trainerClass, seenClasses) { let keyName = toCamelCase(trainerClass); + // Bulba URLs are in Pascal_Snake_Case (Pokemon_Breeder) const classURL = toPascalSnakeCase(trainerClass); seenClasses.add(classURL); - const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${classURL}_(Trainer_class)`)) - .window; + // Bulbapedia has redirects mapping basically all variant spellings of each trainer name to the corresponding main page. + // We thus rely on it + const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${classURL}`)).window; const trainerListHeader = document.querySelector("#Trainer_list")?.parentElement; const [female, counterpartURLs] = checkGenderAndType(document); const names = fetchNames(trainerListHeader, female); if (names === INVALID_URL) { - return Promise.reject(chalk.red.bold(`URL ${classURL} did not correspond to a valid trainer class!`)); + return Promise.reject( + new Error(chalk.red.bold(`URL \"${classURL}\" did not correspond to a valid trainer class!`)), + ); } // Recurse into all unseen gender counterparts' URLs, using the first male name we find From 6b98afa34fa9ac15da94b73d47b69de9bd704197 Mon Sep 17 00:00:00 2001 From: Bertie690 Date: Mon, 18 Aug 2025 21:00:03 -0400 Subject: [PATCH 7/7] Fixed main repo code to not expect snake cased locale strings --- .../global-trade-system-encounter.ts | 26 ++++--- src/field/trainer.ts | 70 +++++++++---------- src/utils/i18n.ts | 17 +++++ 3 files changed, 66 insertions(+), 47 deletions(-) create mode 100644 src/utils/i18n.ts diff --git a/src/data/mystery-encounters/encounters/global-trade-system-encounter.ts b/src/data/mystery-encounters/encounters/global-trade-system-encounter.ts index ed49fccf190..28cbf190e67 100644 --- a/src/data/mystery-encounters/encounters/global-trade-system-encounter.ts +++ b/src/data/mystery-encounters/encounters/global-trade-system-encounter.ts @@ -44,7 +44,10 @@ import { PokemonData } from "#system/pokemon-data"; import { MusicPreference } from "#system/settings"; import type { OptionSelectItem } from "#ui/abstract-option-select-ui-handler"; import { isNullOrUndefined, NumberHolder, randInt, randSeedInt, randSeedItem, randSeedShuffle } from "#utils/common"; +import { getEnumKeys } from "#utils/enums"; +import { getRandomLocaleKey } from "#utils/i18n"; import { getPokemonSpecies } from "#utils/pokemon-utils"; +import { toCamelCase } from "#utils/strings"; import i18next from "i18next"; /** the i18n namespace for the encounter */ @@ -984,14 +987,17 @@ function doTradeReceivedSequence( } function generateRandomTraderName() { - const length = TrainerType.YOUNGSTER - TrainerType.ACE_TRAINER + 1; - // +1 avoids TrainerType.UNKNOWN - const classKey = `trainersCommon:${TrainerType[randInt(length) + 1]}`; - // Some trainers have 2 gendered pools, some do not - const genderKey = i18next.exists(`${classKey}.MALE`) ? (randInt(2) === 0 ? ".MALE" : ".FEMALE") : ""; - const trainerNameKey = randSeedItem(Object.keys(i18next.t(`${classKey}${genderKey}`, { returnObjects: true }))); - const trainerNameString = i18next.t(`${classKey}${genderKey}.${trainerNameKey}`); - // Some names have an '&' symbol and need to be trimmed to a single name instead of a double name - const trainerNames = trainerNameString.split(" & "); - return trainerNames[randInt(trainerNames.length)]; + const allTrainerNames = getEnumKeys(TrainerType); + // Exclude TrainerType.UNKNOWN and everything after Ace Trainers (grunts and unique trainers) + const eligibleNames = allTrainerNames.slice( + 1, + allTrainerNames.indexOf(TrainerType[TrainerType.YOUNGSTER] as keyof typeof TrainerType), + ); + const randomTrainer = toCamelCase(randSeedItem(eligibleNames)); + const classKey = `trainersCommon:${randomTrainer}`; + // Pick a random gender for ones with gendered pools, or access the raw object for ones without. + const genderKey = i18next.exists(`${classKey}.male`) ? randSeedItem([".male", ".female"]) : ""; + const trainerNameString = getRandomLocaleKey(`${classKey}${genderKey}`)[1]; + // Split the string by &s (for duo trainers) + return randSeedItem(trainerNameString.split(" & ")); } diff --git a/src/field/trainer.ts b/src/field/trainer.ts index 584c9310932..71660be524f 100644 --- a/src/field/trainer.ts +++ b/src/field/trainer.ts @@ -16,14 +16,11 @@ import type { PersistentModifier } from "#modifiers/modifier"; import { getIsInitialized, initI18n } from "#plugins/i18n"; import type { TrainerConfig } from "#trainers/trainer-config"; import { trainerConfigs } from "#trainers/trainer-config"; -import { - TrainerPartyCompoundTemplate, - type TrainerPartyTemplate, - trainerPartyTemplates, -} from "#trainers/trainer-party-template"; +import { TrainerPartyCompoundTemplate, type TrainerPartyTemplate } from "#trainers/trainer-party-template"; import { randSeedInt, randSeedItem, randSeedWeightedItem } from "#utils/common"; +import { getRandomLocaleKey } from "#utils/i18n"; import { getPokemonSpecies } from "#utils/pokemon-utils"; -import { toSnakeCase } from "#utils/strings"; +import { toCamelCase, toSnakeCase } from "#utils/strings"; import i18next from "i18next"; export class Trainer extends Phaser.GameObjects.Container { @@ -35,6 +32,18 @@ export class Trainer extends Phaser.GameObjects.Container { public partnerNameKey: string | undefined; public originalIndexes: { [key: number]: number } = {}; + /** + * Create a new Trainer. + * @param trainerType - The {@linkcode TrainerType} for this trainer, used to determine + * name, sprite, party contents and other details. + * @param variant - The {@linkcode TrainerVariant} for this trainer (if any are available) + * @param partyTemplateIndex - If provided, will override the trainer's party template with the given + * version. + * @param nameKey - If provided, will override the name key of the trainer + * @param partnerNameKey - If provided, will override the + * @param trainerConfigOverride - If provided, will override the trainer config for the given trainer type + * @todo Review how many of these parameters we actually need + */ constructor( trainerType: TrainerType, variant: TrainerVariant, @@ -44,13 +53,11 @@ export class Trainer extends Phaser.GameObjects.Container { trainerConfigOverride?: TrainerConfig, ) { super(globalScene, -72, 80); - this.config = trainerConfigs.hasOwnProperty(trainerType) - ? trainerConfigs[trainerType] - : trainerConfigs[TrainerType.ACE_TRAINER]; - - if (trainerConfigOverride) { - this.config = trainerConfigOverride; - } + this.config = + trainerConfigOverride ?? + (trainerConfigs.hasOwnProperty(trainerType) + ? trainerConfigs[trainerType] + : trainerConfigs[TrainerType.ACE_TRAINER]); this.variant = variant; this.partyTemplateIndex = Math.min( @@ -59,20 +66,21 @@ export class Trainer extends Phaser.GameObjects.Container { : randSeedWeightedItem(this.config.partyTemplates.map((_, i) => i)), this.config.partyTemplates.length - 1, ); - const classKey = `trainersCommon:${TrainerType[trainerType]}`; + // TODO: Rework this and add actual error handling for missing names + const classKey = `trainersCommon:${toCamelCase(TrainerType[trainerType])}`; if (i18next.exists(classKey, { returnObjects: true })) { if (nameKey) { this.nameKey = nameKey; + this.name = i18next.t(nameKey); } else { - const genderKey = i18next.exists(`${classKey}.MALE`) + const genderKey = i18next.exists(`${classKey}.male`) ? variant === TrainerVariant.FEMALE - ? ".FEMALE" - : ".MALE" + ? ".female" + : ".male" : ""; - const trainerKey = randSeedItem(Object.keys(i18next.t(`${classKey}${genderKey}`, { returnObjects: true }))); - this.nameKey = `${classKey}${genderKey}.${trainerKey}`; + [this.nameKey, this.name] = getRandomLocaleKey(`${classKey}${genderKey}`); } - this.name = i18next.t(this.nameKey); + if (variant === TrainerVariant.DOUBLE) { if (this.config.doubleOnly) { if (partnerNameKey) { @@ -82,16 +90,8 @@ export class Trainer extends Phaser.GameObjects.Container { [this.name, this.partnerName] = this.name.split(" & "); } } else { - const partnerGenderKey = i18next.exists(`${classKey}.FEMALE`) ? ".FEMALE" : ""; - const partnerTrainerKey = randSeedItem( - Object.keys( - i18next.t(`${classKey}${partnerGenderKey}`, { - returnObjects: true, - }), - ), - ); - this.partnerNameKey = `${classKey}${partnerGenderKey}.${partnerTrainerKey}`; - this.partnerName = i18next.t(this.partnerNameKey); + const partnerGenderKey = i18next.exists(`${classKey}.fenale`) ? ".fenale" : ""; + [this.partnerNameKey, this.partnerName] = getRandomLocaleKey(`${classKey}${partnerGenderKey}`); } } } @@ -109,10 +109,6 @@ export class Trainer extends Phaser.GameObjects.Container { break; } - console.log( - Object.keys(trainerPartyTemplates)[Object.values(trainerPartyTemplates).indexOf(this.getPartyTemplate())], - ); - const getSprite = (hasShadow?: boolean, forceFemale?: boolean) => { const ret = globalScene.addFieldSprite( 0, @@ -157,9 +153,9 @@ export class Trainer extends Phaser.GameObjects.Container { /** * Returns the name of the trainer based on the provided trainer slot and the option to include a title. - * @param {TrainerSlot} trainerSlot - The slot to determine which name to use. Defaults to TrainerSlot.NONE. - * @param {boolean} includeTitle - Whether to include the title in the returned name. Defaults to false. - * @returns {string} - The formatted name of the trainer. + * @param rainerSlot - The slot to determine which name to use; default `TrainerSlot.NONE` + * @param includeTitle - Whether to include the title in the returned name; default `false` + * @returns - The formatted name of the trainer */ getName(trainerSlot: TrainerSlot = TrainerSlot.NONE, includeTitle = false): string { // Get the base title based on the trainer slot and variant. diff --git a/src/utils/i18n.ts b/src/utils/i18n.ts new file mode 100644 index 00000000000..cd5f8d1ee4f --- /dev/null +++ b/src/utils/i18n.ts @@ -0,0 +1,17 @@ +import { randSeedItem } from "#utils/common"; +import i18next from "i18next"; + +/** + * Select a random i18n key from all nested keys in the given object. + * @param key - The i18n key to retrieve a random value of. + * The key's value should be an object containing numerical keys (starting from 1). + * @returns A typle containing the key and value pair. + * @privateRemarks + * The reason such "array-like" keys are not stored as actual arrays is due to the + * translation software used by the Translation Team (Mozilla Pontoon) + * not supporting arrays in any capacity. + */ +export function getRandomLocaleKey(key: string): [key: string, value: string] { + const keyName = `${key}.${randSeedItem(Object.keys(i18next.t("key", { returnObjects: true })))}`; + return [keyName, i18next.t(keyName)]; +}