From 43e9d82b26bd11313a3e17193d75fb482d70deaa Mon Sep 17 00:00:00 2001 From: Bertie690 Date: Sat, 16 Aug 2025 18:15:17 -0400 Subject: [PATCH] Started work on the script again --- package.json | 1 + scripts/helpers/strings.js | 179 +++++++++++++++++++ scripts/scrape-trainer-names/check-gender.js | 45 +++++ scripts/scrape-trainer-names/fetch-names.js | 92 ++++++++++ scripts/scrape-trainer-names/help.js | 0 scripts/scrape-trainer-names/main.js | 36 ++++ scripts/scrape-trainer-names/types.js | 7 + src/data/abilities/ability.ts | 7 +- src/data/moves/move.ts | 4 +- typedoc.json | 2 +- 10 files changed, 367 insertions(+), 6 deletions(-) create mode 100644 scripts/helpers/strings.js create mode 100644 scripts/scrape-trainer-names/check-gender.js create mode 100644 scripts/scrape-trainer-names/fetch-names.js create mode 100644 scripts/scrape-trainer-names/help.js create mode 100644 scripts/scrape-trainer-names/main.js create mode 100644 scripts/scrape-trainer-names/types.js diff --git a/package.json b/package.json index d3494da677c..3bf4b971e21 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "test:watch": "vitest watch --coverage --no-isolate", "test:silent": "vitest run --silent='passed-only' --no-isolate", "test:create": "node scripts/create-test/create-test.js", + "scrape-trainers": "node scripts/scrape-trainer-names/main.js", "typecheck": "tsc --noEmit", "eslint": "eslint --fix .", "eslint-ci": "eslint .", diff --git a/scripts/helpers/strings.js b/scripts/helpers/strings.js new file mode 100644 index 00000000000..3692bcfdd01 --- /dev/null +++ b/scripts/helpers/strings.js @@ -0,0 +1,179 @@ +// #region Split string code +// Regexps involved with splitting words in various case formats. +// Sourced from https://www.npmjs.com/package/change-case (with slight tweaking here and there) + +/** + * Regex to split at word boundaries. + * @type {RegExp} + */ +const SPLIT_LOWER_UPPER_RE = /([\p{Ll}\d])(\p{Lu})/gu; +/** + * Regex to split around single-letter uppercase words. + * @type {RegExp} + */ +const SPLIT_UPPER_UPPER_RE = /(\p{Lu})([\p{Lu}][\p{Ll}])/gu; +/** + * Regexp involved with stripping non-word delimiters from the result. + * @type {RegExp} + */ +const DELIM_STRIP_REGEXP = /[-_ ]+/giu; +// The replacement value for splits. +const SPLIT_REPLACE_VALUE = "$1\0$2"; + +/** + * Split any cased string into an array of its constituent words. + * @param {string} value + * @returns {string[]} The new string, delimited at each instance of one or more spaces, underscores, hyphens + * or lower-to-upper boundaries. + */ +function splitWords(value) { + let result = value.trim(); + result = result.replace(SPLIT_LOWER_UPPER_RE, SPLIT_REPLACE_VALUE).replace(SPLIT_UPPER_UPPER_RE, SPLIT_REPLACE_VALUE); + result = result.replace(DELIM_STRIP_REGEXP, "\0"); + // Trim the delimiter from around the output string + return trimFromStartAndEnd(result, "\0").split(/\0/g); +} + +/** + * Helper function to remove one or more sequences of characters from either end of a string. + * @param {string} str - The string to replace + * @param {string} charToTrim - The string to remove + * @returns {string} The string having been trimmed + */ +function trimFromStartAndEnd(str, charToTrim) { + let start = 0; + let end = str.length; + const blockLength = charToTrim.length; + while (str.startsWith(charToTrim, start)) { + start += blockLength; + } + if (start - end === blockLength) { + // Occurs if the ENTIRE string is made up of charToTrim (at which point we return nothing) + return ""; + } + while (str.endsWith(charToTrim, end)) { + end -= blockLength; + } + return str.slice(start, end); +} +// #endregion Split String code + +/** + * Capitalize the first letter of a string. + * @example + * ```ts + * console.log(capitalizeFirstLetter("consectetur adipiscing elit")); // returns "Consectetur adipiscing elit" + * ``` + * @param {string} str - The string whose first letter is to be capitalized + * @return {string} The original string with its first letter capitalized. + */ +export function capitalizeFirstLetter(str) { + return str.charAt(0).toUpperCase() + str.slice(1); +} + +/** + * Helper method to convert a string into `Title Case` (such as one used for console logs). + * @example + * ```ts + * console.log(toTitleCase("lorem ipsum dolor sit amet")); // returns "Lorem Ipsum Dolor Sit Amet" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into title case. + */ +export function toTitleCase(str) { + return splitWords(str) + .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) + .join(" "); +} + +/** + * Helper method to convert a string into `camelCase` (such as one used for i18n keys). + * @example + * ```ts + * console.log(toCamelCase("BIG_ANGRY_TRAINER")); // returns "bigAngryTrainer" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into camel case. + */ +export function toCamelCase(str) { + return splitWords(str) + .map((word, index) => + index === 0 ? word.toLowerCase() : word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(), + ) + .join(""); +} + +/** + * Helper method to convert a string into `PascalCase`. + * @example + * ```ts + * console.log(toPascalCase("hi how was your day")); // returns "HiHowWasYourDay" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into pascal case. + */ +export function toPascalCase(str) { + return splitWords(str) + .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) + .join(""); +} + +/** + * Helper method to convert a string into `kebab-case` (such as one used for filenames). + * @example + * ```ts + * console.log(toKebabCase("not_kebab-caSe String")); // returns "not-kebab-case-string" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into kebab case. + */ +export function toKebabCase(str) { + return splitWords(str) + .map(word => word.toLowerCase()) + .join("-"); +} + +/** + * Helper method to convert a string into `snake_case` (such as one used for filenames). + * @example + * ```ts + * console.log(toSnakeCase("not-in snake_CaSe")); // returns "not_in_snake_case" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into snake case. + */ +export function toSnakeCase(str) { + return splitWords(str) + .map(word => word.toLowerCase()) + .join("_"); +} + +/** + * Helper method to convert a string into `UPPER_SNAKE_CASE`. + * @example + * ```ts + * console.log(toUpperSnakeCase("apples bananas_oranGes-PearS")); // returns "APPLES_BANANAS_ORANGES_PEARS" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into upper snake case. + */ +export function toUpperSnakeCase(str) { + return splitWords(str) + .map(word => word.toUpperCase()) + .join("_"); +} + +/** + * Helper method to convert a string into `Pascal_Snake_Case`. + * @example + * ```ts + * console.log(toPascalSnakeCase("apples-bananas_oranGes Pears")); // returns "Apples_Bananas_Oranges_Pears" + * ``` + * @param {string} str - The string being converted + * @returns {string} The result of converting `str` into pascal snake case. + */ +export function toPascalSnakeCase(str) { + return splitWords(str) + .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) + .join("_"); +} diff --git a/scripts/scrape-trainer-names/check-gender.js b/scripts/scrape-trainer-names/check-gender.js new file mode 100644 index 00000000000..641305708bf --- /dev/null +++ b/scripts/scrape-trainer-names/check-gender.js @@ -0,0 +1,45 @@ +/** + * Check if the given trainer class is female. + * @param {Document} document - The HTML document to scrape + * @returns {[gender: boolean, counterpartURL?: string]} A 2-length tuple containing: + * 1. The trainer class' normal gender + * 2. A URL to the gender counterpart of the current class (if the trainer has one). + */ +export function checkGenderAndType(document) { + const infoBox = document.getElementById("infobox"); + if (!infoBox) { + return [false]; + } + // Find the row of the table containing the specified gender + const children = [...infoBox.childNodes]; + const genderCell = children.find( + node => node.nodeName === "tr" && [...node.childNodes].some(c => c.textContent?.includes("Gender")), + )?.parentElement; + if (!genderCell) { + return [false]; + } + + const gender = getGender(genderCell.querySelector("tr")); + const hrefExtractRegex = /href="\/wiki\/(.*)_(Trainer_class)"/g; + const counterpartURL = genderCell.querySelector("td")?.getHTML().match(hrefExtractRegex)?.[1]; + + return [gender, counterpartURL]; +} + +/** + * Retrieve the gender from the given node text. + * @param {HTMLTableRowElement?} genderCell - The cell to check + * @returns {boolean} The gender type + * @todo Handle trainers whose gender type has changed across different gens (Artists, etc.) + */ +function getGender(genderCell) { + switch (genderCell?.textContent) { + case "Female Only": + return false; + case "Male Only": + case "Both": + case undefined: + default: + return true; + } +} diff --git a/scripts/scrape-trainer-names/fetch-names.js b/scripts/scrape-trainer-names/fetch-names.js new file mode 100644 index 00000000000..03e7c5b66f6 --- /dev/null +++ b/scripts/scrape-trainer-names/fetch-names.js @@ -0,0 +1,92 @@ +import chalk from "chalk"; +import { JSDOM } from "jsdom"; +import { checkGenderAndType } from "./check-gender.js"; + +/** + * @import { nameRecord, parsedNames } from "./types.js"; + */ + +/** + * Fetch a given trainer's names from the given URL. + * @param {string} url - The URL to parse + * @param {boolean} [currGender] - The current class' known gender. + * If provided, will override the natural gender detection with the given gender and avoid + * checking any gender counterparts. + * @returns {Promise} A Promise that resolves with the parsed names once the parsing concludes. + * Will resolve with an empty array if the name could not be parsed. + */ +export async function fetchNames(url, currGender) { + const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${url}_(Trainer_class)`)).window; + const trainerListHeader = document.querySelector("#Trainer_list")?.parentElement; + if (!trainerListHeader?.parentElement?.childNodes) { + console.warn(chalk.hex("#ffa500")(`URL ${url} did not correspond to a valid trainer class!`)); + return { male: [], female: [] }; + } + + let trainerNames = /** @type {Set} */ (new Set()); + let femaleTrainerNames = /** @type {Set} */ (new Set()); + + // If we don't know whether this class is female, check, optionally recursing into the counterpart's webpage as well. + if (currGender === undefined) { + /** @type {string | undefined} */ + let counterpartURL; + [currGender, counterpartURL] = checkGenderAndType(document); + if (counterpartURL) { + console.log(chalk.green(`Accessing gender counterpart URL: ${counterpartURL}`)); + const names = await fetchNames(counterpartURL, !currGender); + trainerNames = new Set(names.male); + femaleTrainerNames = new Set(names.female); + } + } + + const elements = [...trainerListHeader.parentElement.childNodes]; + + // Find all elements within the "Trainer Names" header and selectively filter to find the name tables. + const startChildIndex = elements.indexOf(trainerListHeader); + const endChildIndex = elements.findIndex(h => h.nodeName === "H2" && elements.indexOf(h) > startChildIndex); + + // Grab all the trainer name tables sorted by generation + const tables = elements.slice(startChildIndex, endChildIndex).filter( + /** @type {(t: ChildNode) => t is Element} */ + ( + t => + // Only grab expandable tables within the header block + t.nodeName === "TABLE" && t["className"] === "expandable" + ), + ); + + parseTable(tables, currGender, trainerNames, femaleTrainerNames); + return { + male: Array.from(trainerNames), + female: Array.from(femaleTrainerNames), + }; +} + +/** + * Parse the table in question. + * @param {Element[]} tables - The array of Elements forming the current table + * @param {boolean} isFemale - Whether the trainer is known to be female or not + * @param {Set} trainerNames A Set containing the male trainer names + * @param {Set} femaleTrainerNames - A Set containing the female trainer names + */ +function parseTable(tables, isFemale, trainerNames, femaleTrainerNames) { + for (const table of tables) { + // Grab all rows past the first header with exactly 9 children in them (Name, Battle, Winnings, 6 party slots) + const trainerRows = [...table.querySelectorAll("tr:not(:first-child)")].filter(r => r.children.length === 9); + for (const row of trainerRows) { + const content = row.firstElementChild?.innerHTML; + // Skip empty elements & ones without anchors + if (!content || content?.indexOf(" ([a-z]+(?: & [a-z]+)?)<\/a>/i.exec(content); + if (!nameMatch) { + continue; + } + (female ? femaleTrainerNames : trainerNames).add(nameMatch[1].replace("&", "&")); + } + } +} diff --git a/scripts/scrape-trainer-names/help.js b/scripts/scrape-trainer-names/help.js new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/scrape-trainer-names/main.js b/scripts/scrape-trainer-names/main.js new file mode 100644 index 00000000000..98f7860442f --- /dev/null +++ b/scripts/scrape-trainer-names/main.js @@ -0,0 +1,36 @@ +import { toCamelCase, toPascalSnakeCase } from "../helpers/strings.js"; +import { fetchNames } from "./fetch-names.js"; + +/** + * @packageDocumentation + * This script will scrape Bulbapedia for the English names of a given trainer class, + * outputting them as JSON. + * Usage: + */ + +/** + * Scrape the requested trainer names and format the resultant output. + * @param {...string} classes The names of the trainer classes to retrieve + * @returns {Promise} A Promise that resolves with the finished text. + */ +async function scrapeTrainerNames(...classes) { + /** + * A large object mapping each class to their corresponding list of trainer names. \ + * Trainer classes with only 1 gender will only contain the single array for that gender. + * @type {Record} + */ + const nameTuples = Object.fromEntries( + await Promise.all( + classes.map(async trainerClass => { + // Bulba URLs use Pascal_Snake_Case (Bug_Catcher) + const classURL = toPascalSnakeCase(trainerClass); + const names = await fetchNames(classURL); + const namesObj = names.female.length === 0 ? names.male : names; + return [toCamelCase(trainerClass), namesObj]; + }), + ), + ); + return JSON.stringify(nameTuples, null, 2); +} + +console.log(await scrapeTrainerNames("doctor")); diff --git a/scripts/scrape-trainer-names/types.js b/scripts/scrape-trainer-names/types.js new file mode 100644 index 00000000000..f2de7bc487e --- /dev/null +++ b/scripts/scrape-trainer-names/types.js @@ -0,0 +1,7 @@ +/** + * @typedef {Object} + * parsedNames + * A parsed object containing the desired names. + * @property {string[]} male + * @property {string[]} female + */ diff --git a/src/data/abilities/ability.ts b/src/data/abilities/ability.ts index f5fd9b19f72..1f06aae5546 100644 --- a/src/data/abilities/ability.ts +++ b/src/data/abilities/ability.ts @@ -1760,7 +1760,7 @@ export class PokemonTypeChangeAbAttr extends PreAttackAbAttr { * Parameters for abilities that modify the hit count and damage of a move */ export interface AddSecondStrikeAbAttrParams extends Omit { - /** Holder for the number of hits. May be modified by ability application */ + /** Holder for the number of hits. May be modified by ability application */ hitCount?: NumberHolder; /** Holder for the damage multiplier _of the current hit_ */ multiplier?: NumberHolder; @@ -5816,7 +5816,7 @@ export class NoFusionAbilityAbAttr extends AbAttr { export interface IgnoreTypeImmunityAbAttrParams extends AbAttrBaseParams { /** The type of the move being used */ readonly moveType: PokemonType; - /** The type being checked for */ + /** The type being checked for */ readonly defenderType: PokemonType; /** Holds whether the type immunity should be bypassed */ cancelled: BooleanHolder; @@ -6755,7 +6755,7 @@ function getPokemonWithWeatherBasedForms() { ); } -// biome-ignore format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`) +// biome-ignore-start format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`) export function initAbilities() { allAbilities.push( new Ability(AbilityId.NONE, 3), @@ -7867,3 +7867,4 @@ export function initAbilities() { .attr(ConfusionOnStatusEffectAbAttr, StatusEffect.POISON, StatusEffect.TOXIC) ); } +// biome-ignore-end format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`) diff --git a/src/data/moves/move.ts b/src/data/moves/move.ts index a44a033b137..520bf778172 100644 --- a/src/data/moves/move.ts +++ b/src/data/moves/move.ts @@ -5916,8 +5916,8 @@ export class ProtectAttr extends AddBattlerTagAttr { for (const turnMove of user.getLastXMoves(-1).slice()) { if ( // Quick & Wide guard increment the Protect counter without using it for fail chance - !(allMoves[turnMove.move].hasAttr("ProtectAttr") || - [MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) || + !(allMoves[turnMove.move].hasAttr("ProtectAttr") || + [MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) || turnMove.result !== MoveResult.SUCCESS ) { break; diff --git a/typedoc.json b/typedoc.json index c34e6190c1a..e4ab2d8dcc4 100644 --- a/typedoc.json +++ b/typedoc.json @@ -1,7 +1,7 @@ { "entryPoints": ["./src"], "entryPointStrategy": "expand", - "exclude": ["**/*+.test.ts"], + "exclude": ["**/*+.test.ts", "**/src/data/trainer-names.ts"], "out": "typedoc", "highlightLanguages": ["javascript", "json", "jsonc", "json5", "tsx", "typescript", "markdown"] }