Aimpress-site/scripts/sync-blog.mjs

import { readdir, readFile, copyFile, mkdir, rm, writeFile, stat } from 'node:fs/promises';
import { join, basename } from 'node:path';

const LINKEDIN_INPUT_DIR = '/Volumes/SSD/Projects/Aimpress/LinkedIn-autopost/output';
const TINA_INPUT_DIR = join(import.meta.dirname, '..', 'content', 'blog');
const OUTPUT_DIR = join(import.meta.dirname, '..', 'public', 'blog');

function toSlug(title) {
  return title
    .toLowerCase()
    .replace(/[^a-z0-9]+/g, '-')
    .replace(/^-|-$/g, '')
    .slice(0, 80);
}

function makeExcerpt(body, len = 150) {
  if (body.length <= len) return body;
  const trimmed = body.slice(0, len);
  const lastSpace = trimmed.lastIndexOf(' ');
  return (lastSpace > 0 ? trimmed.slice(0, lastSpace) : trimmed) + '...';
}

function parseArticle(text, date) {
  const lines = text.replace(/\r\n/g, '\n').split('\n');
  const title = lines[0].trim();
  const slug = toSlug(title);

  // Find separator line
  const sepIdx = lines.indexOf('---');

  // Body is everything between the title blank line and the separator (or end)
  const bodyStart = lines[1]?.trim() === '' ? 2 : 1;
  const bodyEnd = sepIdx > -1 ? sepIdx : lines.length;
  const body = lines.slice(bodyStart, bodyEnd).join('\n').trim();

  // Parse source and hashtags after separator
  let sourceTitle = '';
  let sourceUrl = '';
  let hashtags = [];

  if (sepIdx > -1) {
    const afterSep = lines.slice(sepIdx + 1);
    for (const line of afterSep) {
      const trimmed = line.trim();
      if (trimmed.startsWith('Source:')) {
        sourceTitle = trimmed.replace('Source:', '').trim();
      } else if (trimmed.startsWith('http')) {
        sourceUrl = trimmed;
      } else if (trimmed.startsWith('#')) {
        hashtags = trimmed.split(/\s+/).map(t => t.replace(/^#/, '')).filter(Boolean);
      }
    }
  }

  const excerpt = makeExcerpt(body);

  return { slug, title, date, body, excerpt, sourceTitle, sourceUrl, hashtags };
}

// Parse YAML-style frontmatter from TinaCMS markdown files
function parseFrontmatter(text) {
  const normalized = text.replace(/\r\n/g, '\n');
  if (!normalized.startsWith('---\n')) return { meta: {}, body: normalized };

  const endIdx = normalized.indexOf('\n---\n', 4);
  if (endIdx === -1) return { meta: {}, body: normalized };

  const yamlBlock = normalized.slice(4, endIdx);
  const body = normalized.slice(endIdx + 5).trim();

  const meta = {};
  let currentKey = null;
  for (const line of yamlBlock.split('\n')) {
    // Multi-line list item (e.g. "  - value")
    if (currentKey && line.match(/^\s+-\s+/)) {
      const item = line.replace(/^\s+-\s+/, '').replace(/^['"]|['"]$/g, '');
      if (!Array.isArray(meta[currentKey])) meta[currentKey] = [];
      meta[currentKey].push(item);
      continue;
    }
    const colonIdx = line.indexOf(':');
    if (colonIdx === -1) { currentKey = null; continue; }
    const key = line.slice(0, colonIdx).trim();
    let val = line.slice(colonIdx + 1).trim();
    currentKey = key;
    // Strip surrounding quotes
    if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
      val = val.slice(1, -1);
    }
    // Parse list values (single-line arrays: [a, b, c])
    if (val.startsWith('[') && val.endsWith(']')) {
      meta[key] = val.slice(1, -1).split(',').map(s => s.trim().replace(/^['"]|['"]$/g, '')).filter(Boolean);
      currentKey = null;
    } else if (val === '') {
      // Empty value — may be followed by multi-line list items
      meta[key] = '';
    } else {
      meta[key] = val;
      currentKey = null;
    }
  }

  return { meta, body };
}

async function dirExists(dirPath) {
  try {
    const s = await stat(dirPath);
    return s.isDirectory();
  } catch {
    return false;
  }
}

async function main() {
  // Clear output
  await rm(join(OUTPUT_DIR, 'posts'), { recursive: true, force: true });
  await rm(join(OUTPUT_DIR, 'images'), { recursive: true, force: true });
  await mkdir(join(OUTPUT_DIR, 'posts'), { recursive: true });
  await mkdir(join(OUTPUT_DIR, 'images'), { recursive: true });

  const allPosts = [];
  const usedSlugs = new Set();

  // --- Source 1: LinkedIn autopost output (optional — skipped if path doesn't exist) ---
  if (await dirExists(LINKEDIN_INPUT_DIR)) {
    const entries = await readdir(LINKEDIN_INPUT_DIR, { withFileTypes: true });
    const dateDirs = entries
      .filter(e => e.isDirectory() && /^\d{4}-\d{2}-\d{2}$/.test(e.name))
      .map(e => e.name)
      .sort();

    for (const dateDir of dateDirs) {
      const dirPath = join(LINKEDIN_INPUT_DIR, dateDir);
      const files = await readdir(dirPath);

      const articles = files.filter(f => f.startsWith('article_') && f.endsWith('.txt'));

      for (const articleFile of articles) {
        const timestamp = articleFile.match(/article_(\d+)\.txt/)?.[1];
        if (!timestamp) continue;

        const coverFile = `cover_${timestamp}.png`;
        const hasCover = files.includes(coverFile);

        const text = await readFile(join(dirPath, articleFile), 'utf-8');
        const post = parseArticle(text, dateDir);

        // Skip duplicate slugs
        if (usedSlugs.has(post.slug)) continue;
        usedSlugs.add(post.slug);

        const fullPost = {
          ...post,
          coverImage: hasCover ? `/blog/images/${post.slug}.png` : '',
        };
        await writeFile(
          join(OUTPUT_DIR, 'posts', `${post.slug}.json`),
          JSON.stringify(fullPost, null, 2)
        );

        if (hasCover) {
          await copyFile(
            join(dirPath, coverFile),
            join(OUTPUT_DIR, 'images', `${post.slug}.png`)
          );
        }

        allPosts.push({
          slug: post.slug,
          title: post.title,
          date: post.date,
          excerpt: post.excerpt,
          coverImage: fullPost.coverImage,
          hashtags: post.hashtags,
        });
      }
    }
    console.log(`Synced ${allPosts.length} LinkedIn posts`);
  } else {
    console.log(`LinkedIn source not found (${LINKEDIN_INPUT_DIR}), skipping`);
  }

  // --- Source 2: TinaCMS-authored blog posts from content/blog/*.md ---
  if (await dirExists(TINA_INPUT_DIR)) {
    const mdFiles = (await readdir(TINA_INPUT_DIR)).filter(f => f.endsWith('.md'));

    for (const mdFile of mdFiles) {
      const text = await readFile(join(TINA_INPUT_DIR, mdFile), 'utf-8');
      const { meta, body } = parseFrontmatter(text);

      if (!meta.title || !meta.date) continue;

      const date = meta.date.slice(0, 10); // ISO date → YYYY-MM-DD
      const slug = toSlug(meta.title);

      // TinaCMS posts take priority over LinkedIn (override if same slug)
      if (usedSlugs.has(slug)) {
        // Remove the existing post entry (will be replaced)
        const idx = allPosts.findIndex(p => p.slug === slug);
        if (idx !== -1) allPosts.splice(idx, 1);
      }
      usedSlugs.add(slug);

      const excerpt = meta.excerpt || makeExcerpt(body.replace(/[#*`>\[\]]/g, ''));
      const coverImage = meta.coverImage || '';
      const hashtags = Array.isArray(meta.hashtags) ? meta.hashtags : [];

      const fullPost = {
        slug,
        title: meta.title,
        date,
        body,
        excerpt,
        coverImage,
        sourceTitle: meta.sourceTitle || '',
        sourceUrl: meta.sourceUrl || '',
        hashtags,
      };

      await writeFile(
        join(OUTPUT_DIR, 'posts', `${slug}.json`),
        JSON.stringify(fullPost, null, 2)
      );

      allPosts.push({
        slug,
        title: meta.title,
        date,
        excerpt,
        coverImage,
        hashtags,
      });
    }
    console.log(`Merged ${mdFiles.length} TinaCMS posts from content/blog/`);
  }

  // Sort newest first
  allPosts.sort((a, b) => b.date.localeCompare(a.date));

  await writeFile(
    join(OUTPUT_DIR, 'posts.json'),
    JSON.stringify(allPosts, null, 2)
  );

  console.log(`Total: ${allPosts.length} blog posts written to ${OUTPUT_DIR}`);
}

main().catch(err => {
  console.error(err);
  process.exit(1);
});