Aimpress-site/scripts/sync-blog.mjs
Vadym Samoilenko 5d0aaab339 Migrate 5 server blog posts to TinaCMS-managed content/blog/
Converts existing server-side JSON blog posts to Markdown format with
YAML frontmatter so they appear in TinaCloud admin and are managed via git.
Also fixes sync-blog.mjs parseFrontmatter to support multi-line YAML lists
(TinaCMS writes hashtags as multi-line list items).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 22:28:11 +00:00

252 lines
8.1 KiB
JavaScript

import { readdir, readFile, copyFile, mkdir, rm, writeFile, stat } from 'node:fs/promises';
import { join, basename } from 'node:path';
const LINKEDIN_INPUT_DIR = '/Volumes/SSD/Projects/Aimpress/LinkedIn-autopost/output';
const TINA_INPUT_DIR = join(import.meta.dirname, '..', 'content', 'blog');
const OUTPUT_DIR = join(import.meta.dirname, '..', 'public', 'blog');
function toSlug(title) {
return title
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.slice(0, 80);
}
function makeExcerpt(body, len = 150) {
if (body.length <= len) return body;
const trimmed = body.slice(0, len);
const lastSpace = trimmed.lastIndexOf(' ');
return (lastSpace > 0 ? trimmed.slice(0, lastSpace) : trimmed) + '...';
}
function parseArticle(text, date) {
const lines = text.replace(/\r\n/g, '\n').split('\n');
const title = lines[0].trim();
const slug = toSlug(title);
// Find separator line
const sepIdx = lines.indexOf('---');
// Body is everything between the title blank line and the separator (or end)
const bodyStart = lines[1]?.trim() === '' ? 2 : 1;
const bodyEnd = sepIdx > -1 ? sepIdx : lines.length;
const body = lines.slice(bodyStart, bodyEnd).join('\n').trim();
// Parse source and hashtags after separator
let sourceTitle = '';
let sourceUrl = '';
let hashtags = [];
if (sepIdx > -1) {
const afterSep = lines.slice(sepIdx + 1);
for (const line of afterSep) {
const trimmed = line.trim();
if (trimmed.startsWith('Source:')) {
sourceTitle = trimmed.replace('Source:', '').trim();
} else if (trimmed.startsWith('http')) {
sourceUrl = trimmed;
} else if (trimmed.startsWith('#')) {
hashtags = trimmed.split(/\s+/).map(t => t.replace(/^#/, '')).filter(Boolean);
}
}
}
const excerpt = makeExcerpt(body);
return { slug, title, date, body, excerpt, sourceTitle, sourceUrl, hashtags };
}
// Parse YAML-style frontmatter from TinaCMS markdown files
function parseFrontmatter(text) {
const normalized = text.replace(/\r\n/g, '\n');
if (!normalized.startsWith('---\n')) return { meta: {}, body: normalized };
const endIdx = normalized.indexOf('\n---\n', 4);
if (endIdx === -1) return { meta: {}, body: normalized };
const yamlBlock = normalized.slice(4, endIdx);
const body = normalized.slice(endIdx + 5).trim();
const meta = {};
let currentKey = null;
for (const line of yamlBlock.split('\n')) {
// Multi-line list item (e.g. " - value")
if (currentKey && line.match(/^\s+-\s+/)) {
const item = line.replace(/^\s+-\s+/, '').replace(/^['"]|['"]$/g, '');
if (!Array.isArray(meta[currentKey])) meta[currentKey] = [];
meta[currentKey].push(item);
continue;
}
const colonIdx = line.indexOf(':');
if (colonIdx === -1) { currentKey = null; continue; }
const key = line.slice(0, colonIdx).trim();
let val = line.slice(colonIdx + 1).trim();
currentKey = key;
// Strip surrounding quotes
if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
val = val.slice(1, -1);
}
// Parse list values (single-line arrays: [a, b, c])
if (val.startsWith('[') && val.endsWith(']')) {
meta[key] = val.slice(1, -1).split(',').map(s => s.trim().replace(/^['"]|['"]$/g, '')).filter(Boolean);
currentKey = null;
} else if (val === '') {
// Empty value — may be followed by multi-line list items
meta[key] = '';
} else {
meta[key] = val;
currentKey = null;
}
}
return { meta, body };
}
async function dirExists(dirPath) {
try {
const s = await stat(dirPath);
return s.isDirectory();
} catch {
return false;
}
}
async function main() {
// Clear output
await rm(join(OUTPUT_DIR, 'posts'), { recursive: true, force: true });
await rm(join(OUTPUT_DIR, 'images'), { recursive: true, force: true });
await mkdir(join(OUTPUT_DIR, 'posts'), { recursive: true });
await mkdir(join(OUTPUT_DIR, 'images'), { recursive: true });
const allPosts = [];
const usedSlugs = new Set();
// --- Source 1: LinkedIn autopost output (optional — skipped if path doesn't exist) ---
if (await dirExists(LINKEDIN_INPUT_DIR)) {
const entries = await readdir(LINKEDIN_INPUT_DIR, { withFileTypes: true });
const dateDirs = entries
.filter(e => e.isDirectory() && /^\d{4}-\d{2}-\d{2}$/.test(e.name))
.map(e => e.name)
.sort();
for (const dateDir of dateDirs) {
const dirPath = join(LINKEDIN_INPUT_DIR, dateDir);
const files = await readdir(dirPath);
const articles = files.filter(f => f.startsWith('article_') && f.endsWith('.txt'));
for (const articleFile of articles) {
const timestamp = articleFile.match(/article_(\d+)\.txt/)?.[1];
if (!timestamp) continue;
const coverFile = `cover_${timestamp}.png`;
const hasCover = files.includes(coverFile);
const text = await readFile(join(dirPath, articleFile), 'utf-8');
const post = parseArticle(text, dateDir);
// Skip duplicate slugs
if (usedSlugs.has(post.slug)) continue;
usedSlugs.add(post.slug);
const fullPost = {
...post,
coverImage: hasCover ? `/blog/images/${post.slug}.png` : '',
};
await writeFile(
join(OUTPUT_DIR, 'posts', `${post.slug}.json`),
JSON.stringify(fullPost, null, 2)
);
if (hasCover) {
await copyFile(
join(dirPath, coverFile),
join(OUTPUT_DIR, 'images', `${post.slug}.png`)
);
}
allPosts.push({
slug: post.slug,
title: post.title,
date: post.date,
excerpt: post.excerpt,
coverImage: fullPost.coverImage,
hashtags: post.hashtags,
});
}
}
console.log(`Synced ${allPosts.length} LinkedIn posts`);
} else {
console.log(`LinkedIn source not found (${LINKEDIN_INPUT_DIR}), skipping`);
}
// --- Source 2: TinaCMS-authored blog posts from content/blog/*.md ---
if (await dirExists(TINA_INPUT_DIR)) {
const mdFiles = (await readdir(TINA_INPUT_DIR)).filter(f => f.endsWith('.md'));
for (const mdFile of mdFiles) {
const text = await readFile(join(TINA_INPUT_DIR, mdFile), 'utf-8');
const { meta, body } = parseFrontmatter(text);
if (!meta.title || !meta.date) continue;
const date = meta.date.slice(0, 10); // ISO date → YYYY-MM-DD
const slug = toSlug(meta.title);
// TinaCMS posts take priority over LinkedIn (override if same slug)
if (usedSlugs.has(slug)) {
// Remove the existing post entry (will be replaced)
const idx = allPosts.findIndex(p => p.slug === slug);
if (idx !== -1) allPosts.splice(idx, 1);
}
usedSlugs.add(slug);
const excerpt = meta.excerpt || makeExcerpt(body.replace(/[#*`>\[\]]/g, ''));
const coverImage = meta.coverImage || '';
const hashtags = Array.isArray(meta.hashtags) ? meta.hashtags : [];
const fullPost = {
slug,
title: meta.title,
date,
body,
excerpt,
coverImage,
sourceTitle: meta.sourceTitle || '',
sourceUrl: meta.sourceUrl || '',
hashtags,
};
await writeFile(
join(OUTPUT_DIR, 'posts', `${slug}.json`),
JSON.stringify(fullPost, null, 2)
);
allPosts.push({
slug,
title: meta.title,
date,
excerpt,
coverImage,
hashtags,
});
}
console.log(`Merged ${mdFiles.length} TinaCMS posts from content/blog/`);
}
// Sort newest first
allPosts.sort((a, b) => b.date.localeCompare(a.date));
await writeFile(
join(OUTPUT_DIR, 'posts.json'),
JSON.stringify(allPosts, null, 2)
);
console.log(`Total: ${allPosts.length} blog posts written to ${OUTPUT_DIR}`);
}
main().catch(err => {
console.error(err);
process.exit(1);
});