/** * Asset Catalog Parser * * Parses `documents/Latest Asset List.md` (pipe-delimited markdown table) * and outputs three JSON files into `data/`: * - assets.json — full Asset[] catalog * - staffingRoutes.json — Record> * - roleDisciplineMap.json — Record * * Run: npm run parse-catalog * When: After updating `documents/Latest Asset List.md` or the source CSV * Check: Review console output for asset count (~334), staffing route count, * and any roles flagged as UNKNOWN discipline. */ import * as fs from 'fs'; import * as path from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- interface Asset { uniques: string; catalogId: string; category: string; subCategory: string; assetName: string; complexityLevel: string; description: string; complexityDescription: string; studioCaveats: string; isMaster: boolean; } // --------------------------------------------------------------------------- // Base role → discipline map (from existing app data) // --------------------------------------------------------------------------- const BASE_ROLE_DISCIPLINE_MAP: Record = { "Business Director": "Account Management", "Account Director / Content Team Leader": "Account Management", "Account Director": "Account Management", "Account Manager": "Account Management", "Senior Account Manager": "Account Management", "Group Account Director": "Account Management", "Programme Director": "Delivery", "Senior Project Manager": "Delivery", "Project Manager": "Delivery", "Project Manager ": "Delivery", "Strategy Director": "Strategy", "Planning Director / Strategy Director": "Strategy", "Senior Planner / Strategist": "Strategy", "Planner / Strategist": "Strategy", "Strategist": "Strategy", "Strategist ": "Strategy", "Executive Creative Director": "Creative", "Senior Creative Director": "Creative", "Creative Director": "Creative", "Creative Director ": "Creative", "Associate Creative Director": "Creative", "Senior Designer / Lead Creator": "Creative", "Senior Designer / Lead Creator ": "Creative", "Designer / Creator": "Creative", "Designer / Creator ": "Creative", "Senior Art Director": "Creative", "Art Director": "Creative", "Senior Copywriter": "Editorial", "Conceptual Copywriter": "Editorial", "Copywriter / Brand Journalist": "Editorial", "Copywriter / Brand Journalist ": "Editorial", "Executive Producer": "Production", "Producer": "Production", "Producer ": "Production", "Senior Producer": "Production", "Senior Motion Design / Editor": "Production", "Motion Designer / Editor": "Production", "Motion Designer / Editor ": "Production", "Web (Front-End) Developer": "Tech", "Web (Front-End) Developer ": "Tech", "Senior Web (Front-End) Developer": "Tech", "Content Manager": "Tech", "Content Manager ": "Tech", "Web Designer": "Tech", "Web Designer ": "Tech", "CGI Operator (Medium)": "Tech", "QA Manager": "QA", "QA Manager ": "QA", }; // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- const COMPLEXITY_MAP: Record = { '1': 'Simple', '2': 'Medium', '3': 'Complex', }; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- /** Remove markdown escapes: `\_` → `_`, `\&` → `&`, `\[` → `[`, `\]` → `]` */ function unescapeMarkdown(text: string): string { return text .replace(/\\_/g, '_') .replace(/\\&/g, '&') .replace(/\\\[/g, '[') .replace(/\\\]/g, ']'); } /** Replace `
` with newlines and unescape markdown. */ function cleanText(text: string): string { return unescapeMarkdown(text.replace(//g, '\n').trim()); } /** * Determine whether an asset is a "master" (origination) or not. * Returns `false` for adaptations/reedits/edits. */ function determineIsMaster(subCategory: string, assetName: string): boolean { if (/adaptation|reedits/i.test(subCategory)) return false; if (/\b(?:Adapt|Re-?Edit|Re-?purpose|Edit)\b/i.test(assetName)) return false; return true; } /** * Auto-classify a role name into a discipline by keyword matching. * Used only as a fallback for roles not in the base map. */ function classifyRole(roleName: string): string { const name = roleName.trim().toLowerCase(); // Tech (check before Creative to avoid "web designer" → Creative) if (/web designer/i.test(name)) return 'Tech'; if (/content manager/i.test(name)) return 'Tech'; if (/developer/i.test(name)) return 'Tech'; if (/\bcgi\b/i.test(name)) return 'Tech'; // QA if (/\bqa\b|quality assurance/i.test(name)) return 'QA'; // Production (check before Editorial to prioritise "motion" / "editor" correctly) if (/producer/i.test(name)) return 'Production'; if (/motion/i.test(name)) return 'Production'; // Editorial if (/copywriter|brand journalist/i.test(name)) return 'Editorial'; // Creative if (/creative director|designer|art director|creator/i.test(name)) return 'Creative'; // Strategy if (/strateg|planner/i.test(name)) return 'Strategy'; // Delivery if (/project manager|programme director/i.test(name)) return 'Delivery'; // Account Management if (/business director|account director|account manager|group account/i.test(name)) return 'Account Management'; return 'UNKNOWN'; } // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- const mdPath = path.resolve(__dirname, '../documents/Latest Asset List.md'); const content = fs.readFileSync(mdPath, 'utf-8'); const lines = content.split('\n'); const assets: Asset[] = []; const staffingRoutes: Record> = {}; const allRoles = new Set(); const seenUniques = new Set(); // Data rows start at line 5 (1-indexed) → index 4 (0-indexed). // Lines 1-4 are: title, blank, header row, separator row. for (let i = 4; i < lines.length; i++) { const line = lines[i]; if (!line || !line.startsWith('|')) continue; const cols = line.split('|'); // Column mapping (after split on |, index 0 is empty before first pipe) const rawCatalogId = cols[1]?.trim() ?? ''; if (!rawCatalogId) continue; const catalogId = unescapeMarkdown(rawCatalogId); const category = cols[2]?.trim() ?? ''; const subCategory = cols[3]?.trim() ?? ''; // cols[4] = sub-category description (not in Asset interface) const assetName = unescapeMarkdown(cols[5]?.trim() ?? ''); const complexityRaw = cols[6]?.trim() ?? ''; const complexityLevel = COMPLEXITY_MAP[complexityRaw] ?? complexityRaw; const description = cleanText(cols[7] ?? ''); const complexityDescription = cleanText(cols[8] ?? ''); let studioCaveats = cleanText(cols[9] ?? ''); if (studioCaveats === 'None') studioCaveats = ''; const uniques = `${catalogId} - ${assetName} - ${complexityLevel}`; if (seenUniques.has(uniques)) { console.warn(` DUPLICATE skipped: ${uniques}`); continue; } seenUniques.add(uniques); const isMaster = determineIsMaster(subCategory, assetName); assets.push({ uniques, catalogId, category, subCategory, assetName, complexityLevel, description, complexityDescription, studioCaveats, isMaster, }); // --- Staffing routes --- // Role groups start at col index 13, repeating in blocks of 4: // [Role, Location, Hours, ] const roles: Record = {}; for (let j = 13; j + 2 < cols.length; j += 4) { const roleName = cols[j]?.trim() ?? ''; const location = cols[j + 1]?.trim() ?? ''; const hoursStr = cols[j + 2]?.trim() ?? ''; if (!roleName || !hoursStr) continue; const hours = parseFloat(hoursStr); if (isNaN(hours) || hours === 0) continue; // Append trailing space for Oliver+ location to distinguish from Local const roleKey = location === 'Oliver+' ? `${roleName} ` : roleName; roles[roleKey] = (roles[roleKey] ?? 0) + hours; allRoles.add(roleKey); } if (Object.keys(roles).length > 0) { staffingRoutes[uniques] = roles; } } // --------------------------------------------------------------------------- // Build merged role → discipline map // --------------------------------------------------------------------------- const roleDisciplineMap: Record = { ...BASE_ROLE_DISCIPLINE_MAP }; const unknownRoles: string[] = []; for (const role of allRoles) { if (!roleDisciplineMap[role]) { const discipline = classifyRole(role); roleDisciplineMap[role] = discipline; if (discipline === 'UNKNOWN') { unknownRoles.push(role); } else { console.log(` New role classified: "${role}" → ${discipline}`); } } } // --------------------------------------------------------------------------- // Write output // --------------------------------------------------------------------------- const dataDir = path.resolve(__dirname, '../data'); fs.mkdirSync(dataDir, { recursive: true }); fs.writeFileSync( path.join(dataDir, 'assets.json'), JSON.stringify(assets, null, 2), ); fs.writeFileSync( path.join(dataDir, 'staffingRoutes.json'), JSON.stringify(staffingRoutes, null, 2), ); fs.writeFileSync( path.join(dataDir, 'roleDisciplineMap.json'), JSON.stringify(roleDisciplineMap, null, 2), ); // --------------------------------------------------------------------------- // Summary // --------------------------------------------------------------------------- console.log('\n--- Parse Complete ---'); console.log(` Assets: ${assets.length}`); console.log(` Staffing routes: ${Object.keys(staffingRoutes).length}`); console.log(` Unique roles: ${allRoles.size}`); console.log(` Disciplines: ${[...new Set(Object.values(roleDisciplineMap))].join(', ')}`); if (unknownRoles.length > 0) { console.warn(`\n ⚠ UNKNOWN discipline roles (need manual classification):`); for (const r of unknownRoles) { console.warn(` - "${r}"`); } } const categories = [...new Set(assets.map(a => a.category))]; console.log(`\n Categories (${categories.length}):`); for (const c of categories) { const count = assets.filter(a => a.category === c).length; console.log(` ${c}: ${count} assets`); }