build-a-squad/scripts/parse-assets.ts
michael f4e4412bf2 Initial commit: Build-A-Squad staffing calculator
React 19 + TypeScript client-side app for creative agency staffing projections,
powered by Google Gemini for AI scope analysis. Includes asset catalog, staffing
routes, three-tab workflow (scoping, configurator, squad projection), scenario
management, and CSV/PDF export.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 09:49:16 -06:00

317 lines
10 KiB
TypeScript

/**
* Asset Catalog Parser
*
* Parses `documents/Latest Asset List.md` (pipe-delimited markdown table)
* and outputs three JSON files into `data/`:
* - assets.json — full Asset[] catalog
* - staffingRoutes.json — Record<uniques, Record<role, hours>>
* - roleDisciplineMap.json — Record<role, discipline>
*
* Run: npm run parse-catalog
* When: After updating `documents/Latest Asset List.md` or the source CSV
* Check: Review console output for asset count (~334), staffing route count,
* and any roles flagged as UNKNOWN discipline.
*/
import * as fs from 'fs';
import * as path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface Asset {
uniques: string;
catalogId: string;
category: string;
subCategory: string;
assetName: string;
complexityLevel: string;
description: string;
complexityDescription: string;
studioCaveats: string;
isMaster: boolean;
}
// ---------------------------------------------------------------------------
// Base role → discipline map (from existing app data)
// ---------------------------------------------------------------------------
const BASE_ROLE_DISCIPLINE_MAP: Record<string, string> = {
"Business Director": "Account Management",
"Account Director / Content Team Leader": "Account Management",
"Account Director": "Account Management",
"Account Manager": "Account Management",
"Senior Account Manager": "Account Management",
"Group Account Director": "Account Management",
"Programme Director": "Delivery",
"Senior Project Manager": "Delivery",
"Project Manager": "Delivery",
"Project Manager ": "Delivery",
"Strategy Director": "Strategy",
"Planning Director / Strategy Director": "Strategy",
"Senior Planner / Strategist": "Strategy",
"Planner / Strategist": "Strategy",
"Strategist": "Strategy",
"Strategist ": "Strategy",
"Executive Creative Director": "Creative",
"Senior Creative Director": "Creative",
"Creative Director": "Creative",
"Creative Director ": "Creative",
"Associate Creative Director": "Creative",
"Senior Designer / Lead Creator": "Creative",
"Senior Designer / Lead Creator ": "Creative",
"Designer / Creator": "Creative",
"Designer / Creator ": "Creative",
"Senior Art Director": "Creative",
"Art Director": "Creative",
"Senior Copywriter": "Editorial",
"Conceptual Copywriter": "Editorial",
"Copywriter / Brand Journalist": "Editorial",
"Copywriter / Brand Journalist ": "Editorial",
"Executive Producer": "Production",
"Producer": "Production",
"Producer ": "Production",
"Senior Producer": "Production",
"Senior Motion Design / Editor": "Production",
"Motion Designer / Editor": "Production",
"Motion Designer / Editor ": "Production",
"Web (Front-End) Developer": "Tech",
"Web (Front-End) Developer ": "Tech",
"Senior Web (Front-End) Developer": "Tech",
"Content Manager": "Tech",
"Content Manager ": "Tech",
"Web Designer": "Tech",
"Web Designer ": "Tech",
"CGI Operator (Medium)": "Tech",
"QA Manager": "QA",
"QA Manager ": "QA",
};
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
const COMPLEXITY_MAP: Record<string, string> = {
'1': 'Simple',
'2': 'Medium',
'3': 'Complex',
};
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Remove markdown escapes: `\_` → `_`, `\&` → `&`, `\[` → `[`, `\]` → `]` */
function unescapeMarkdown(text: string): string {
return text
.replace(/\\_/g, '_')
.replace(/\\&/g, '&')
.replace(/\\\[/g, '[')
.replace(/\\\]/g, ']');
}
/** Replace `<br/>` with newlines and unescape markdown. */
function cleanText(text: string): string {
return unescapeMarkdown(text.replace(/<br\/>/g, '\n').trim());
}
/**
* Determine whether an asset is a "master" (origination) or not.
* Returns `false` for adaptations/reedits/edits.
*/
function determineIsMaster(subCategory: string, assetName: string): boolean {
if (/adaptation|reedits/i.test(subCategory)) return false;
if (/\b(?:Adapt|Re-?Edit|Re-?purpose|Edit)\b/i.test(assetName)) return false;
return true;
}
/**
* Auto-classify a role name into a discipline by keyword matching.
* Used only as a fallback for roles not in the base map.
*/
function classifyRole(roleName: string): string {
const name = roleName.trim().toLowerCase();
// Tech (check before Creative to avoid "web designer" → Creative)
if (/web designer/i.test(name)) return 'Tech';
if (/content manager/i.test(name)) return 'Tech';
if (/developer/i.test(name)) return 'Tech';
if (/\bcgi\b/i.test(name)) return 'Tech';
// QA
if (/\bqa\b|quality assurance/i.test(name)) return 'QA';
// Production (check before Editorial to prioritise "motion" / "editor" correctly)
if (/producer/i.test(name)) return 'Production';
if (/motion/i.test(name)) return 'Production';
// Editorial
if (/copywriter|brand journalist/i.test(name)) return 'Editorial';
// Creative
if (/creative director|designer|art director|creator/i.test(name)) return 'Creative';
// Strategy
if (/strateg|planner/i.test(name)) return 'Strategy';
// Delivery
if (/project manager|programme director/i.test(name)) return 'Delivery';
// Account Management
if (/business director|account director|account manager|group account/i.test(name)) return 'Account Management';
return 'UNKNOWN';
}
// ---------------------------------------------------------------------------
// Main
// ---------------------------------------------------------------------------
const mdPath = path.resolve(__dirname, '../documents/Latest Asset List.md');
const content = fs.readFileSync(mdPath, 'utf-8');
const lines = content.split('\n');
const assets: Asset[] = [];
const staffingRoutes: Record<string, Record<string, number>> = {};
const allRoles = new Set<string>();
const seenUniques = new Set<string>();
// Data rows start at line 5 (1-indexed) → index 4 (0-indexed).
// Lines 1-4 are: title, blank, header row, separator row.
for (let i = 4; i < lines.length; i++) {
const line = lines[i];
if (!line || !line.startsWith('|')) continue;
const cols = line.split('|');
// Column mapping (after split on |, index 0 is empty before first pipe)
const rawCatalogId = cols[1]?.trim() ?? '';
if (!rawCatalogId) continue;
const catalogId = unescapeMarkdown(rawCatalogId);
const category = cols[2]?.trim() ?? '';
const subCategory = cols[3]?.trim() ?? '';
// cols[4] = sub-category description (not in Asset interface)
const assetName = unescapeMarkdown(cols[5]?.trim() ?? '');
const complexityRaw = cols[6]?.trim() ?? '';
const complexityLevel = COMPLEXITY_MAP[complexityRaw] ?? complexityRaw;
const description = cleanText(cols[7] ?? '');
const complexityDescription = cleanText(cols[8] ?? '');
let studioCaveats = cleanText(cols[9] ?? '');
if (studioCaveats === 'None') studioCaveats = '';
const uniques = `${catalogId} - ${assetName} - ${complexityLevel}`;
if (seenUniques.has(uniques)) {
console.warn(` DUPLICATE skipped: ${uniques}`);
continue;
}
seenUniques.add(uniques);
const isMaster = determineIsMaster(subCategory, assetName);
assets.push({
uniques,
catalogId,
category,
subCategory,
assetName,
complexityLevel,
description,
complexityDescription,
studioCaveats,
isMaster,
});
// --- Staffing routes ---
// Role groups start at col index 13, repeating in blocks of 4:
// [Role, Location, Hours, <empty separator>]
const roles: Record<string, number> = {};
for (let j = 13; j + 2 < cols.length; j += 4) {
const roleName = cols[j]?.trim() ?? '';
const location = cols[j + 1]?.trim() ?? '';
const hoursStr = cols[j + 2]?.trim() ?? '';
if (!roleName || !hoursStr) continue;
const hours = parseFloat(hoursStr);
if (isNaN(hours) || hours === 0) continue;
// Append trailing space for Oliver+ location to distinguish from Local
const roleKey = location === 'Oliver+' ? `${roleName} ` : roleName;
roles[roleKey] = (roles[roleKey] ?? 0) + hours;
allRoles.add(roleKey);
}
if (Object.keys(roles).length > 0) {
staffingRoutes[uniques] = roles;
}
}
// ---------------------------------------------------------------------------
// Build merged role → discipline map
// ---------------------------------------------------------------------------
const roleDisciplineMap: Record<string, string> = { ...BASE_ROLE_DISCIPLINE_MAP };
const unknownRoles: string[] = [];
for (const role of allRoles) {
if (!roleDisciplineMap[role]) {
const discipline = classifyRole(role);
roleDisciplineMap[role] = discipline;
if (discipline === 'UNKNOWN') {
unknownRoles.push(role);
} else {
console.log(` New role classified: "${role}" → ${discipline}`);
}
}
}
// ---------------------------------------------------------------------------
// Write output
// ---------------------------------------------------------------------------
const dataDir = path.resolve(__dirname, '../data');
fs.mkdirSync(dataDir, { recursive: true });
fs.writeFileSync(
path.join(dataDir, 'assets.json'),
JSON.stringify(assets, null, 2),
);
fs.writeFileSync(
path.join(dataDir, 'staffingRoutes.json'),
JSON.stringify(staffingRoutes, null, 2),
);
fs.writeFileSync(
path.join(dataDir, 'roleDisciplineMap.json'),
JSON.stringify(roleDisciplineMap, null, 2),
);
// ---------------------------------------------------------------------------
// Summary
// ---------------------------------------------------------------------------
console.log('\n--- Parse Complete ---');
console.log(` Assets: ${assets.length}`);
console.log(` Staffing routes: ${Object.keys(staffingRoutes).length}`);
console.log(` Unique roles: ${allRoles.size}`);
console.log(` Disciplines: ${[...new Set(Object.values(roleDisciplineMap))].join(', ')}`);
if (unknownRoles.length > 0) {
console.warn(`\n ⚠ UNKNOWN discipline roles (need manual classification):`);
for (const r of unknownRoles) {
console.warn(` - "${r}"`);
}
}
const categories = [...new Set(assets.map(a => a.category))];
console.log(`\n Categories (${categories.length}):`);
for (const c of categories) {
const count = assets.filter(a => a.category === c).length;
console.log(` ${c}: ${count} assets`);
}