Started work on the script again

2025-08-19 22:09:27 +02:00 · 2025-08-16 18:15:17 -04:00 · 2025-08-16 18:15:17 -04:00 · 43e9d82b26
commit 43e9d82b26
parent 4939a9f6f4
10 changed files with 367 additions and 6 deletions
--- a/package.json
+++ b/package.json
@ -14,6 +14,7 @@
    "test:watch": "vitest watch --coverage --no-isolate",
    "test:silent": "vitest run --silent='passed-only' --no-isolate",
    "test:create": "node scripts/create-test/create-test.js",
+    "scrape-trainers": "node scripts/scrape-trainer-names/main.js",
    "typecheck": "tsc --noEmit",
    "eslint": "eslint --fix .",
    "eslint-ci": "eslint .",
--- a/scripts/helpers/strings.js
+++ b/scripts/helpers/strings.js
@ -0,0 +1,179 @@
+// #region Split string code
+// Regexps involved with splitting words in various case formats.
+// Sourced from https://www.npmjs.com/package/change-case (with slight tweaking here and there)
+
+/**
+ * Regex to split at word boundaries.
+ * @type {RegExp}
+ */
+const SPLIT_LOWER_UPPER_RE = /([\p{Ll}\d])(\p{Lu})/gu;
+/**
+ * Regex to split around single-letter uppercase words.
+ * @type {RegExp}
+ */
+const SPLIT_UPPER_UPPER_RE = /(\p{Lu})([\p{Lu}][\p{Ll}])/gu;
+/**
+ * Regexp involved with stripping non-word delimiters from the result.
+ * @type {RegExp}
+ */
+const DELIM_STRIP_REGEXP = /[-_ ]+/giu;
+// The replacement value for splits.
+const SPLIT_REPLACE_VALUE = "$1\0$2";
+
+/**
+ * Split any cased string into an array of its constituent words.
+ * @param {string} value
+ * @returns {string[]} The new string, delimited at each instance of one or more spaces, underscores, hyphens
+ * or lower-to-upper boundaries.
+ */
+function splitWords(value) {
+  let result = value.trim();
+  result = result.replace(SPLIT_LOWER_UPPER_RE, SPLIT_REPLACE_VALUE).replace(SPLIT_UPPER_UPPER_RE, SPLIT_REPLACE_VALUE);
+  result = result.replace(DELIM_STRIP_REGEXP, "\0");
+  // Trim the delimiter from around the output string
+  return trimFromStartAndEnd(result, "\0").split(/\0/g);
+}
+
+/**
+ * Helper function to remove one or more sequences of characters from either end of a string.
+ * @param {string} str - The string to replace
+ * @param {string} charToTrim - The string to remove
+ * @returns {string} The string having been trimmed
+ */
+function trimFromStartAndEnd(str, charToTrim) {
+  let start = 0;
+  let end = str.length;
+  const blockLength = charToTrim.length;
+  while (str.startsWith(charToTrim, start)) {
+    start += blockLength;
+  }
+  if (start - end === blockLength) {
+    // Occurs if the ENTIRE string is made up of charToTrim (at which point we return nothing)
+    return "";
+  }
+  while (str.endsWith(charToTrim, end)) {
+    end -= blockLength;
+  }
+  return str.slice(start, end);
+}
+// #endregion Split String code
+
+/**
+ * Capitalize the first letter of a string.
+ * @example
+ * ```ts
+ * console.log(capitalizeFirstLetter("consectetur adipiscing elit")); // returns "Consectetur adipiscing elit"
+ * ```
+ * @param {string} str - The string whose first letter is to be capitalized
+ * @return {string} The original string with its first letter capitalized.
+ */
+export function capitalizeFirstLetter(str) {
+  return str.charAt(0).toUpperCase() + str.slice(1);
+}
+
+/**
+ * Helper method to convert a string into `Title Case` (such as one used for console logs).
+ * @example
+ * ```ts
+ * console.log(toTitleCase("lorem ipsum dolor sit amet")); // returns "Lorem Ipsum Dolor Sit Amet"
+ * ```
+ * @param {string} str - The string being converted
+ * @returns {string} The result of converting `str` into title case.
+ */
+export function toTitleCase(str) {
+  return splitWords(str)
+    .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
+    .join(" ");
+}
+
+/**
+ * Helper method to convert a string into `camelCase` (such as one used for i18n keys).
+ * @example
+ * ```ts
+ * console.log(toCamelCase("BIG_ANGRY_TRAINER")); // returns "bigAngryTrainer"
+ * ```
+ * @param {string} str - The string being converted
+ * @returns {string} The result of converting `str` into camel case.
+ */
+export function toCamelCase(str) {
+  return splitWords(str)
+    .map((word, index) =>
+      index === 0 ? word.toLowerCase() : word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(),
+    )
+    .join("");
+}
+
+/**
+ * Helper method to convert a string into `PascalCase`.
+ * @example
+ * ```ts
+ * console.log(toPascalCase("hi how was your day")); // returns "HiHowWasYourDay"
+ * ```
+ * @param {string} str - The string being converted
+ * @returns {string} The result of converting `str` into pascal case.
+ */
+export function toPascalCase(str) {
+  return splitWords(str)
+    .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
+    .join("");
+}
+
+/**
+ * Helper method to convert a string into `kebab-case` (such as one used for filenames).
+ * @example
+ * ```ts
+ * console.log(toKebabCase("not_kebab-caSe String")); // returns "not-kebab-case-string"
+ * ```
+ * @param {string} str - The string being converted
+ * @returns {string} The result of converting `str` into kebab case.
+ */
+export function toKebabCase(str) {
+  return splitWords(str)
+    .map(word => word.toLowerCase())
+    .join("-");
+}
+
+/**
+ * Helper method to convert a string into `snake_case` (such as one used for filenames).
+ * @example
+ * ```ts
+ * console.log(toSnakeCase("not-in snake_CaSe")); // returns "not_in_snake_case"
+ * ```
+ * @param {string} str - The string being converted
+ * @returns {string} The result of converting `str` into snake case.
+ */
+export function toSnakeCase(str) {
+  return splitWords(str)
+    .map(word => word.toLowerCase())
+    .join("_");
+}
+
+/**
+ * Helper method to convert a string into `UPPER_SNAKE_CASE`.
+ * @example
+ * ```ts
+ * console.log(toUpperSnakeCase("apples bananas_oranGes-PearS")); // returns "APPLES_BANANAS_ORANGES_PEARS"
+ * ```
+ * @param {string} str - The string being converted
+ * @returns {string} The result of converting `str` into upper snake case.
+ */
+export function toUpperSnakeCase(str) {
+  return splitWords(str)
+    .map(word => word.toUpperCase())
+    .join("_");
+}
+
+/**
+ * Helper method to convert a string into `Pascal_Snake_Case`.
+ * @example
+ * ```ts
+ * console.log(toPascalSnakeCase("apples-bananas_oranGes Pears")); // returns "Apples_Bananas_Oranges_Pears"
+ * ```
+ * @param {string} str - The string being converted
+ * @returns {string} The result of converting `str` into pascal snake case.
+ */
+export function toPascalSnakeCase(str) {
+  return splitWords(str)
+    .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
+    .join("_");
+}
--- a/scripts/scrape-trainer-names/check-gender.js
+++ b/scripts/scrape-trainer-names/check-gender.js
@ -0,0 +1,45 @@
+/**
+ * Check if the given trainer class is female.
+ * @param {Document} document - The HTML document to scrape
+ * @returns {[gender: boolean, counterpartURL?: string]} A 2-length tuple containing:
+ * 1. The trainer class' normal gender
+ * 2. A URL to the gender counterpart of the current class (if the trainer has one).
+ */
+export function checkGenderAndType(document) {
+  const infoBox = document.getElementById("infobox");
+  if (!infoBox) {
+    return [false];
+  }
+  // Find the row of the table containing the specified gender
+  const children = [...infoBox.childNodes];
+  const genderCell = children.find(
+    node => node.nodeName === "tr" && [...node.childNodes].some(c => c.textContent?.includes("Gender")),
+  )?.parentElement;
+  if (!genderCell) {
+    return [false];
+  }
+
+  const gender = getGender(genderCell.querySelector("tr"));
+  const hrefExtractRegex = /href="\/wiki\/(.*)_(Trainer_class)"/g;
+  const counterpartURL = genderCell.querySelector("td")?.getHTML().match(hrefExtractRegex)?.[1];
+
+  return [gender, counterpartURL];
+}
+
+/**
+ * Retrieve the gender from the given node text.
+ * @param {HTMLTableRowElement?} genderCell - The cell to check
+ * @returns {boolean} The gender type
+ * @todo Handle trainers whose gender type has changed across different gens (Artists, etc.)
+ */
+function getGender(genderCell) {
+  switch (genderCell?.textContent) {
+    case "Female Only":
+      return false;
+    case "Male Only":
+    case "Both":
+    case undefined:
+    default:
+      return true;
+  }
+}
--- a/scripts/scrape-trainer-names/fetch-names.js
+++ b/scripts/scrape-trainer-names/fetch-names.js
@ -0,0 +1,92 @@
+import chalk from "chalk";
+import { JSDOM } from "jsdom";
+import { checkGenderAndType } from "./check-gender.js";
+
+/**
+ * @import { nameRecord, parsedNames } from "./types.js";
+ */
+
+/**
+ * Fetch a given trainer's names from the given URL.
+ * @param {string} url - The URL to parse
+ * @param {boolean} [currGender] - The current class' known gender.
+ * If provided, will override the natural gender detection with the given gender and avoid
+ * checking any gender counterparts.
+ * @returns {Promise<parsedNames>} A Promise that resolves with the parsed names once the parsing concludes.
+ * Will resolve with an empty array if the name could not be parsed.
+ */
+export async function fetchNames(url, currGender) {
+  const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${url}_(Trainer_class)`)).window;
+  const trainerListHeader = document.querySelector("#Trainer_list")?.parentElement;
+  if (!trainerListHeader?.parentElement?.childNodes) {
+    console.warn(chalk.hex("#ffa500")(`URL ${url} did not correspond to a valid trainer class!`));
+    return { male: [], female: [] };
+  }
+
+  let trainerNames = /** @type {Set<string>} */ (new Set());
+  let femaleTrainerNames = /** @type {Set<string>} */ (new Set());
+
+  // If we don't know whether this class is female, check, optionally recursing into the counterpart's webpage as well.
+  if (currGender === undefined) {
+    /** @type {string | undefined} */
+    let counterpartURL;
+    [currGender, counterpartURL] = checkGenderAndType(document);
+    if (counterpartURL) {
+      console.log(chalk.green(`Accessing gender counterpart URL: ${counterpartURL}`));
+      const names = await fetchNames(counterpartURL, !currGender);
+      trainerNames = new Set(names.male);
+      femaleTrainerNames = new Set(names.female);
+    }
+  }
+
+  const elements = [...trainerListHeader.parentElement.childNodes];
+
+  // Find all elements within the "Trainer Names" header and selectively filter to find the name tables.
+  const startChildIndex = elements.indexOf(trainerListHeader);
+  const endChildIndex = elements.findIndex(h => h.nodeName === "H2" && elements.indexOf(h) > startChildIndex);
+
+  // Grab all the trainer name tables sorted by generation
+  const tables = elements.slice(startChildIndex, endChildIndex).filter(
+    /** @type {(t: ChildNode) => t is Element} */
+    (
+      t =>
+        // Only grab expandable tables within the header block
+        t.nodeName === "TABLE" && t["className"] === "expandable"
+    ),
+  );
+
+  parseTable(tables, currGender, trainerNames, femaleTrainerNames);
+  return {
+    male: Array.from(trainerNames),
+    female: Array.from(femaleTrainerNames),
+  };
+}
+
+/**
+ * Parse the table in question.
+ * @param {Element[]} tables - The array of Elements forming the current table
+ * @param {boolean} isFemale - Whether the trainer is known to be female or not
+ * @param {Set<string>} trainerNames A Set containing the male trainer names
+ * @param {Set<string>} femaleTrainerNames - A Set containing the female trainer names
+ */
+function parseTable(tables, isFemale, trainerNames, femaleTrainerNames) {
+  for (const table of tables) {
+    // Grab all rows past the first header with exactly 9 children in them (Name, Battle, Winnings, 6 party slots)
+    const trainerRows = [...table.querySelectorAll("tr:not(:first-child)")].filter(r => r.children.length === 9);
+    for (const row of trainerRows) {
+      const content = row.firstElementChild?.innerHTML;
+      // Skip empty elements & ones without anchors
+      if (!content || content?.indexOf(" <a ") === -1) {
+        continue;
+      }
+      /** Whether the name is female */
+      const female = isFemale || content.includes("♀");
+      // Grab the plaintext name part with an optional ampersand
+      const nameMatch = />([a-z]+(?: &amp; [a-z]+)?)<\/a>/i.exec(content);
+      if (!nameMatch) {
+        continue;
+      }
+      (female ? femaleTrainerNames : trainerNames).add(nameMatch[1].replace("&amp;", "&"));
+    }
+  }
+}
--- a/scripts/scrape-trainer-names/help.js
+++ b/scripts/scrape-trainer-names/help.js
--- a/scripts/scrape-trainer-names/main.js
+++ b/scripts/scrape-trainer-names/main.js
@ -0,0 +1,36 @@
+import { toCamelCase, toPascalSnakeCase } from "../helpers/strings.js";
+import { fetchNames } from "./fetch-names.js";
+
+/**
+ * @packageDocumentation
+ * This script will scrape Bulbapedia for the English names of a given trainer class,
+ * outputting them as JSON.
+ * Usage:
+ */
+
+/**
+ * Scrape the requested trainer names and format the resultant output.
+ * @param {...string} classes The names of the trainer classes to retrieve
+ * @returns {Promise<string>} A Promise that resolves with the finished text.
+ */
+async function scrapeTrainerNames(...classes) {
+  /**
+   * A large object mapping each class to their corresponding list of trainer names. \
+   * Trainer classes with only 1 gender will only contain the single array for that gender.
+   * @type {Record<string, string[] | parsedNames>}
+   */
+  const nameTuples = Object.fromEntries(
+    await Promise.all(
+      classes.map(async trainerClass => {
+        // Bulba URLs use Pascal_Snake_Case (Bug_Catcher)
+        const classURL = toPascalSnakeCase(trainerClass);
+        const names = await fetchNames(classURL);
+        const namesObj = names.female.length === 0 ? names.male : names;
+        return [toCamelCase(trainerClass), namesObj];
+      }),
+    ),
+  );
+  return JSON.stringify(nameTuples, null, 2);
+}
+
+console.log(await scrapeTrainerNames("doctor"));
--- a/scripts/scrape-trainer-names/types.js
+++ b/scripts/scrape-trainer-names/types.js
@ -0,0 +1,7 @@
+/**
+ * @typedef {Object}
+ * parsedNames
+ * A parsed object containing the desired names.
+ * @property {string[]} male
+ * @property {string[]} female
+ */
--- a/src/data/abilities/ability.ts
+++ b/src/data/abilities/ability.ts
@ -1760,7 +1760,7 @@ export class PokemonTypeChangeAbAttr extends PreAttackAbAttr {
 * Parameters for abilities that modify the hit count and damage of a move
 */
 export interface AddSecondStrikeAbAttrParams extends Omit<AugmentMoveInteractionAbAttrParams, "opponent"> {
-  /** Holder for the number of hits. May be modified by ability application  */
+  /** Holder for the number of hits. May be modified by ability application */
  hitCount?: NumberHolder;
  /** Holder for the damage multiplier _of the current hit_ */
  multiplier?: NumberHolder;
@ -5816,7 +5816,7 @@ export class NoFusionAbilityAbAttr extends AbAttr {
 export interface IgnoreTypeImmunityAbAttrParams extends AbAttrBaseParams {
  /** The type of the move being used */
  readonly moveType: PokemonType;
-  /** The type being checked for  */
+  /** The type being checked for */
  readonly defenderType: PokemonType;
  /** Holds whether the type immunity should be bypassed */
  cancelled: BooleanHolder;
@ -6755,7 +6755,7 @@ function getPokemonWithWeatherBasedForms() {
    );
 }

-// biome-ignore format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`)
+// biome-ignore-start format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`)
 export function initAbilities() {
  allAbilities.push(
    new Ability(AbilityId.NONE, 3),
@ -7867,3 +7867,4 @@ export function initAbilities() {
      .attr(ConfusionOnStatusEffectAbAttr, StatusEffect.POISON, StatusEffect.TOXIC)
  );
 }
+// biome-ignore-end format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`)
--- a/src/data/moves/move.ts
+++ b/src/data/moves/move.ts
@ -5916,8 +5916,8 @@ export class ProtectAttr extends AddBattlerTagAttr {
      for (const turnMove of user.getLastXMoves(-1).slice()) {
        if (
          // Quick & Wide guard increment the Protect counter without using it for fail chance
-          !(allMoves[turnMove.move].hasAttr("ProtectAttr") || 
-          [MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) || 
+          !(allMoves[turnMove.move].hasAttr("ProtectAttr") ||
+          [MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) ||
          turnMove.result !== MoveResult.SUCCESS
        ) {
          break;
--- a/typedoc.json
+++ b/typedoc.json
@ -1,7 +1,7 @@
 {
  "entryPoints": ["./src"],
  "entryPointStrategy": "expand",
-  "exclude": ["**/*+.test.ts"],
+  "exclude": ["**/*+.test.ts", "**/src/data/trainer-names.ts"],
  "out": "typedoc",
  "highlightLanguages": ["javascript", "json", "jsonc", "json5", "tsx", "typescript", "markdown"]
 }