Started work on the script again

This commit is contained in:
Bertie690 2025-08-16 18:15:17 -04:00
parent 4939a9f6f4
commit 43e9d82b26
10 changed files with 367 additions and 6 deletions

View File

@ -14,6 +14,7 @@
"test:watch": "vitest watch --coverage --no-isolate",
"test:silent": "vitest run --silent='passed-only' --no-isolate",
"test:create": "node scripts/create-test/create-test.js",
"scrape-trainers": "node scripts/scrape-trainer-names/main.js",
"typecheck": "tsc --noEmit",
"eslint": "eslint --fix .",
"eslint-ci": "eslint .",

179
scripts/helpers/strings.js Normal file
View File

@ -0,0 +1,179 @@
// #region Split string code
// Regexps involved with splitting words in various case formats.
// Sourced from https://www.npmjs.com/package/change-case (with slight tweaking here and there)
/**
* Regex to split at word boundaries.
* @type {RegExp}
*/
const SPLIT_LOWER_UPPER_RE = /([\p{Ll}\d])(\p{Lu})/gu;
/**
* Regex to split around single-letter uppercase words.
* @type {RegExp}
*/
const SPLIT_UPPER_UPPER_RE = /(\p{Lu})([\p{Lu}][\p{Ll}])/gu;
/**
* Regexp involved with stripping non-word delimiters from the result.
* @type {RegExp}
*/
const DELIM_STRIP_REGEXP = /[-_ ]+/giu;
// The replacement value for splits.
const SPLIT_REPLACE_VALUE = "$1\0$2";
/**
* Split any cased string into an array of its constituent words.
* @param {string} value
* @returns {string[]} The new string, delimited at each instance of one or more spaces, underscores, hyphens
* or lower-to-upper boundaries.
*/
function splitWords(value) {
let result = value.trim();
result = result.replace(SPLIT_LOWER_UPPER_RE, SPLIT_REPLACE_VALUE).replace(SPLIT_UPPER_UPPER_RE, SPLIT_REPLACE_VALUE);
result = result.replace(DELIM_STRIP_REGEXP, "\0");
// Trim the delimiter from around the output string
return trimFromStartAndEnd(result, "\0").split(/\0/g);
}
/**
* Helper function to remove one or more sequences of characters from either end of a string.
* @param {string} str - The string to replace
* @param {string} charToTrim - The string to remove
* @returns {string} The string having been trimmed
*/
function trimFromStartAndEnd(str, charToTrim) {
let start = 0;
let end = str.length;
const blockLength = charToTrim.length;
while (str.startsWith(charToTrim, start)) {
start += blockLength;
}
if (start - end === blockLength) {
// Occurs if the ENTIRE string is made up of charToTrim (at which point we return nothing)
return "";
}
while (str.endsWith(charToTrim, end)) {
end -= blockLength;
}
return str.slice(start, end);
}
// #endregion Split String code
/**
* Capitalize the first letter of a string.
* @example
* ```ts
* console.log(capitalizeFirstLetter("consectetur adipiscing elit")); // returns "Consectetur adipiscing elit"
* ```
* @param {string} str - The string whose first letter is to be capitalized
* @return {string} The original string with its first letter capitalized.
*/
export function capitalizeFirstLetter(str) {
return str.charAt(0).toUpperCase() + str.slice(1);
}
/**
* Helper method to convert a string into `Title Case` (such as one used for console logs).
* @example
* ```ts
* console.log(toTitleCase("lorem ipsum dolor sit amet")); // returns "Lorem Ipsum Dolor Sit Amet"
* ```
* @param {string} str - The string being converted
* @returns {string} The result of converting `str` into title case.
*/
export function toTitleCase(str) {
return splitWords(str)
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join(" ");
}
/**
* Helper method to convert a string into `camelCase` (such as one used for i18n keys).
* @example
* ```ts
* console.log(toCamelCase("BIG_ANGRY_TRAINER")); // returns "bigAngryTrainer"
* ```
* @param {string} str - The string being converted
* @returns {string} The result of converting `str` into camel case.
*/
export function toCamelCase(str) {
return splitWords(str)
.map((word, index) =>
index === 0 ? word.toLowerCase() : word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(),
)
.join("");
}
/**
* Helper method to convert a string into `PascalCase`.
* @example
* ```ts
* console.log(toPascalCase("hi how was your day")); // returns "HiHowWasYourDay"
* ```
* @param {string} str - The string being converted
* @returns {string} The result of converting `str` into pascal case.
*/
export function toPascalCase(str) {
return splitWords(str)
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join("");
}
/**
* Helper method to convert a string into `kebab-case` (such as one used for filenames).
* @example
* ```ts
* console.log(toKebabCase("not_kebab-caSe String")); // returns "not-kebab-case-string"
* ```
* @param {string} str - The string being converted
* @returns {string} The result of converting `str` into kebab case.
*/
export function toKebabCase(str) {
return splitWords(str)
.map(word => word.toLowerCase())
.join("-");
}
/**
* Helper method to convert a string into `snake_case` (such as one used for filenames).
* @example
* ```ts
* console.log(toSnakeCase("not-in snake_CaSe")); // returns "not_in_snake_case"
* ```
* @param {string} str - The string being converted
* @returns {string} The result of converting `str` into snake case.
*/
export function toSnakeCase(str) {
return splitWords(str)
.map(word => word.toLowerCase())
.join("_");
}
/**
* Helper method to convert a string into `UPPER_SNAKE_CASE`.
* @example
* ```ts
* console.log(toUpperSnakeCase("apples bananas_oranGes-PearS")); // returns "APPLES_BANANAS_ORANGES_PEARS"
* ```
* @param {string} str - The string being converted
* @returns {string} The result of converting `str` into upper snake case.
*/
export function toUpperSnakeCase(str) {
return splitWords(str)
.map(word => word.toUpperCase())
.join("_");
}
/**
* Helper method to convert a string into `Pascal_Snake_Case`.
* @example
* ```ts
* console.log(toPascalSnakeCase("apples-bananas_oranGes Pears")); // returns "Apples_Bananas_Oranges_Pears"
* ```
* @param {string} str - The string being converted
* @returns {string} The result of converting `str` into pascal snake case.
*/
export function toPascalSnakeCase(str) {
return splitWords(str)
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join("_");
}

View File

@ -0,0 +1,45 @@
/**
* Check if the given trainer class is female.
* @param {Document} document - The HTML document to scrape
* @returns {[gender: boolean, counterpartURL?: string]} A 2-length tuple containing:
* 1. The trainer class' normal gender
* 2. A URL to the gender counterpart of the current class (if the trainer has one).
*/
export function checkGenderAndType(document) {
const infoBox = document.getElementById("infobox");
if (!infoBox) {
return [false];
}
// Find the row of the table containing the specified gender
const children = [...infoBox.childNodes];
const genderCell = children.find(
node => node.nodeName === "tr" && [...node.childNodes].some(c => c.textContent?.includes("Gender")),
)?.parentElement;
if (!genderCell) {
return [false];
}
const gender = getGender(genderCell.querySelector("tr"));
const hrefExtractRegex = /href="\/wiki\/(.*)_(Trainer_class)"/g;
const counterpartURL = genderCell.querySelector("td")?.getHTML().match(hrefExtractRegex)?.[1];
return [gender, counterpartURL];
}
/**
* Retrieve the gender from the given node text.
* @param {HTMLTableRowElement?} genderCell - The cell to check
* @returns {boolean} The gender type
* @todo Handle trainers whose gender type has changed across different gens (Artists, etc.)
*/
function getGender(genderCell) {
switch (genderCell?.textContent) {
case "Female Only":
return false;
case "Male Only":
case "Both":
case undefined:
default:
return true;
}
}

View File

@ -0,0 +1,92 @@
import chalk from "chalk";
import { JSDOM } from "jsdom";
import { checkGenderAndType } from "./check-gender.js";
/**
* @import { nameRecord, parsedNames } from "./types.js";
*/
/**
* Fetch a given trainer's names from the given URL.
* @param {string} url - The URL to parse
* @param {boolean} [currGender] - The current class' known gender.
* If provided, will override the natural gender detection with the given gender and avoid
* checking any gender counterparts.
* @returns {Promise<parsedNames>} A Promise that resolves with the parsed names once the parsing concludes.
* Will resolve with an empty array if the name could not be parsed.
*/
export async function fetchNames(url, currGender) {
const { document } = (await JSDOM.fromURL(`https://bulbapedia.bulbagarden.net/wiki/${url}_(Trainer_class)`)).window;
const trainerListHeader = document.querySelector("#Trainer_list")?.parentElement;
if (!trainerListHeader?.parentElement?.childNodes) {
console.warn(chalk.hex("#ffa500")(`URL ${url} did not correspond to a valid trainer class!`));
return { male: [], female: [] };
}
let trainerNames = /** @type {Set<string>} */ (new Set());
let femaleTrainerNames = /** @type {Set<string>} */ (new Set());
// If we don't know whether this class is female, check, optionally recursing into the counterpart's webpage as well.
if (currGender === undefined) {
/** @type {string | undefined} */
let counterpartURL;
[currGender, counterpartURL] = checkGenderAndType(document);
if (counterpartURL) {
console.log(chalk.green(`Accessing gender counterpart URL: ${counterpartURL}`));
const names = await fetchNames(counterpartURL, !currGender);
trainerNames = new Set(names.male);
femaleTrainerNames = new Set(names.female);
}
}
const elements = [...trainerListHeader.parentElement.childNodes];
// Find all elements within the "Trainer Names" header and selectively filter to find the name tables.
const startChildIndex = elements.indexOf(trainerListHeader);
const endChildIndex = elements.findIndex(h => h.nodeName === "H2" && elements.indexOf(h) > startChildIndex);
// Grab all the trainer name tables sorted by generation
const tables = elements.slice(startChildIndex, endChildIndex).filter(
/** @type {(t: ChildNode) => t is Element} */
(
t =>
// Only grab expandable tables within the header block
t.nodeName === "TABLE" && t["className"] === "expandable"
),
);
parseTable(tables, currGender, trainerNames, femaleTrainerNames);
return {
male: Array.from(trainerNames),
female: Array.from(femaleTrainerNames),
};
}
/**
* Parse the table in question.
* @param {Element[]} tables - The array of Elements forming the current table
* @param {boolean} isFemale - Whether the trainer is known to be female or not
* @param {Set<string>} trainerNames A Set containing the male trainer names
* @param {Set<string>} femaleTrainerNames - A Set containing the female trainer names
*/
function parseTable(tables, isFemale, trainerNames, femaleTrainerNames) {
for (const table of tables) {
// Grab all rows past the first header with exactly 9 children in them (Name, Battle, Winnings, 6 party slots)
const trainerRows = [...table.querySelectorAll("tr:not(:first-child)")].filter(r => r.children.length === 9);
for (const row of trainerRows) {
const content = row.firstElementChild?.innerHTML;
// Skip empty elements & ones without anchors
if (!content || content?.indexOf(" <a ") === -1) {
continue;
}
/** Whether the name is female */
const female = isFemale || content.includes("♀");
// Grab the plaintext name part with an optional ampersand
const nameMatch = />([a-z]+(?: &amp; [a-z]+)?)<\/a>/i.exec(content);
if (!nameMatch) {
continue;
}
(female ? femaleTrainerNames : trainerNames).add(nameMatch[1].replace("&amp;", "&"));
}
}
}

View File

View File

@ -0,0 +1,36 @@
import { toCamelCase, toPascalSnakeCase } from "../helpers/strings.js";
import { fetchNames } from "./fetch-names.js";
/**
* @packageDocumentation
* This script will scrape Bulbapedia for the English names of a given trainer class,
* outputting them as JSON.
* Usage:
*/
/**
* Scrape the requested trainer names and format the resultant output.
* @param {...string} classes The names of the trainer classes to retrieve
* @returns {Promise<string>} A Promise that resolves with the finished text.
*/
async function scrapeTrainerNames(...classes) {
/**
* A large object mapping each class to their corresponding list of trainer names. \
* Trainer classes with only 1 gender will only contain the single array for that gender.
* @type {Record<string, string[] | parsedNames>}
*/
const nameTuples = Object.fromEntries(
await Promise.all(
classes.map(async trainerClass => {
// Bulba URLs use Pascal_Snake_Case (Bug_Catcher)
const classURL = toPascalSnakeCase(trainerClass);
const names = await fetchNames(classURL);
const namesObj = names.female.length === 0 ? names.male : names;
return [toCamelCase(trainerClass), namesObj];
}),
),
);
return JSON.stringify(nameTuples, null, 2);
}
console.log(await scrapeTrainerNames("doctor"));

View File

@ -0,0 +1,7 @@
/**
* @typedef {Object}
* parsedNames
* A parsed object containing the desired names.
* @property {string[]} male
* @property {string[]} female
*/

View File

@ -1760,7 +1760,7 @@ export class PokemonTypeChangeAbAttr extends PreAttackAbAttr {
* Parameters for abilities that modify the hit count and damage of a move
*/
export interface AddSecondStrikeAbAttrParams extends Omit<AugmentMoveInteractionAbAttrParams, "opponent"> {
/** Holder for the number of hits. May be modified by ability application */
/** Holder for the number of hits. May be modified by ability application */
hitCount?: NumberHolder;
/** Holder for the damage multiplier _of the current hit_ */
multiplier?: NumberHolder;
@ -5816,7 +5816,7 @@ export class NoFusionAbilityAbAttr extends AbAttr {
export interface IgnoreTypeImmunityAbAttrParams extends AbAttrBaseParams {
/** The type of the move being used */
readonly moveType: PokemonType;
/** The type being checked for */
/** The type being checked for */
readonly defenderType: PokemonType;
/** Holds whether the type immunity should be bypassed */
cancelled: BooleanHolder;
@ -6755,7 +6755,7 @@ function getPokemonWithWeatherBasedForms() {
);
}
// biome-ignore format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`)
// biome-ignore-start format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`)
export function initAbilities() {
allAbilities.push(
new Ability(AbilityId.NONE, 3),
@ -7867,3 +7867,4 @@ export function initAbilities() {
.attr(ConfusionOnStatusEffectAbAttr, StatusEffect.POISON, StatusEffect.TOXIC)
);
}
// biome-ignore-end format: prevent biome from removing the newlines (e.g. prevent `new Ability(...).attr(...)`)

View File

@ -5916,8 +5916,8 @@ export class ProtectAttr extends AddBattlerTagAttr {
for (const turnMove of user.getLastXMoves(-1).slice()) {
if (
// Quick & Wide guard increment the Protect counter without using it for fail chance
!(allMoves[turnMove.move].hasAttr("ProtectAttr") ||
[MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) ||
!(allMoves[turnMove.move].hasAttr("ProtectAttr") ||
[MoveId.QUICK_GUARD, MoveId.WIDE_GUARD].includes(turnMove.move)) ||
turnMove.result !== MoveResult.SUCCESS
) {
break;

View File

@ -1,7 +1,7 @@
{
"entryPoints": ["./src"],
"entryPointStrategy": "expand",
"exclude": ["**/*+.test.ts"],
"exclude": ["**/*+.test.ts", "**/src/data/trainer-names.ts"],
"out": "typedoc",
"highlightLanguages": ["javascript", "json", "jsonc", "json5", "tsx", "typescript", "markdown"]
}