diff --git a/backend/app/services/knowledge_base_service.py b/backend/app/services/knowledge_base_service.py index b8a8770..c58d90a 100644 --- a/backend/app/services/knowledge_base_service.py +++ b/backend/app/services/knowledge_base_service.py @@ -16,58 +16,118 @@ MAX_CONCURRENT_PARSES = 10 DISTILLATION_PROMPTS = { "legal": """You are a compliance documentation specialist. Below is raw reference material about legal compliance, advertising standards, financial promotions, and disclaimers relevant to Barclays marketing materials. -Your task is to distil this into a clear, structured specification document that an AI compliance agent can use to review marketing proofs. Organise the content into logical sections with clear headings. Focus on actionable rules, required disclaimers, prohibited content, and compliance requirements. +Your task is to restructure and organise ALL of this content into a clear, well-structured specification document that an AI compliance agent can use to review marketing proofs. You are imposing structure on the content — NOT reducing it. -Remove any redundancy, marketing fluff, or content not relevant to compliance checking. Preserve all specific rules, thresholds, and requirements verbatim. +CRITICAL — ZERO TOLERANCE FOR DETAIL LOSS: +Every specific rule, instruction, condition, threshold, measurement, example, exception, and edge case from the source material MUST appear in the output. If in doubt about whether something is relevant, INCLUDE IT. Source documents may contain unconventional rules or conditional instructions that seem tangential (e.g. "disclaimers must use a specific font size only when appearing below the fold on mobile" or "this rule does not apply to materials distributed in Scotland"). These MUST be preserved verbatim — do not silently drop them. + +What TO do: +- Merge exact duplicates (identical content repeated across sources) +- Impose logical structure with clear headings: required disclaimers, prohibited content, compliance thresholds, financial promotion rules, advertising standards, conditional rules, exceptions +- Use clear formatting (headings, bullet points, tables where helpful) +- Preserve the exact wording of specific rules, legal requirements, and thresholds + +What NOT to do: +- Do NOT omit, summarise away, or paraphrase specific rules, values, or conditions +- Do NOT drop content because it seems minor, unusual, or edge-case +- Do NOT filter out content you consider "not relevant" — the downstream agent needs ALL of it RAW REFERENCE MATERIAL: {combined_markdown} -OUTPUT: A well-structured markdown specification document.""", +OUTPUT: A comprehensive, well-structured markdown specification document containing ALL content from the source material.""", "brand_barclays": """You are a brand guidelines specialist. Below is raw reference material about Barclays brand guidelines including logo usage, colour palettes, typography, design principles, and visual identity standards. -Your task is to distil this into a clear, structured specification document that an AI brand compliance agent can use to review marketing proofs. Organise the content into logical sections: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, do's and don'ts. +Your task is to restructure and organise ALL of this content into a clear, well-structured specification document that an AI brand compliance agent can use to review marketing proofs. You are imposing structure on the content — NOT reducing it. -Remove any redundancy or content not directly relevant to visual brand compliance checking. Preserve all specific measurements, colour values, and rules verbatim. +CRITICAL — ZERO TOLERANCE FOR DETAIL LOSS: +Every specific rule, instruction, condition, threshold, measurement, example, exception, and edge case from the source material MUST appear in the output. If in doubt about whether something is relevant, INCLUDE IT. Source documents may contain unconventional rules or conditional instructions that seem tangential (e.g. "the eagle icon must be rotated 5° clockwise when used on dark backgrounds below 200x200px" or "gradient usage is prohibited except in Q4 seasonal campaigns"). These MUST be preserved verbatim — do not silently drop them. + +What TO do: +- Merge exact duplicates (identical content repeated across sources) +- Impose logical structure with clear headings: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, imagery guidelines, do's and don'ts, conditional usage rules +- Use clear formatting (headings, bullet points, tables where helpful) +- Preserve all exact measurements, colour values, ratios, and conditional rules + +What NOT to do: +- Do NOT omit, summarise away, or paraphrase specific rules, values, or conditions +- Do NOT drop content because it seems minor, unusual, or edge-case +- Do NOT filter out content you consider "not relevant" — the downstream agent needs ALL of it RAW REFERENCE MATERIAL: {combined_markdown} -OUTPUT: A well-structured markdown specification document.""", +OUTPUT: A comprehensive, well-structured markdown specification document containing ALL content from the source material.""", "brand_barclaycard": """You are a brand guidelines specialist. Below is raw reference material about Barclaycard brand guidelines including logo usage, colour palettes, typography, design principles, and visual identity standards. -Your task is to distil this into a clear, structured specification document that an AI brand compliance agent can use to review marketing proofs. Organise the content into logical sections: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, do's and don'ts. +Your task is to restructure and organise ALL of this content into a clear, well-structured specification document that an AI brand compliance agent can use to review marketing proofs. You are imposing structure on the content — NOT reducing it. -Remove any redundancy or content not directly relevant to visual brand compliance checking. Preserve all specific measurements, colour values, and rules verbatim. +CRITICAL — ZERO TOLERANCE FOR DETAIL LOSS: +Every specific rule, instruction, condition, threshold, measurement, example, exception, and edge case from the source material MUST appear in the output. If in doubt about whether something is relevant, INCLUDE IT. Source documents may contain unconventional rules or conditional instructions that seem tangential (e.g. "the Barclaycard wordmark must use the condensed variant when co-branded with partner logos" or "minimum clear space increases to 2x on print materials below A5 size"). These MUST be preserved verbatim — do not silently drop them. + +What TO do: +- Merge exact duplicates (identical content repeated across sources) +- Impose logical structure with clear headings: logo rules, colour specifications (with exact hex/RGB values), typography rules, spacing/layout requirements, imagery guidelines, do's and don'ts, conditional usage rules +- Use clear formatting (headings, bullet points, tables where helpful) +- Preserve all exact measurements, colour values, ratios, and conditional rules + +What NOT to do: +- Do NOT omit, summarise away, or paraphrase specific rules, values, or conditions +- Do NOT drop content because it seems minor, unusual, or edge-case +- Do NOT filter out content you consider "not relevant" — the downstream agent needs ALL of it RAW REFERENCE MATERIAL: {combined_markdown} -OUTPUT: A well-structured markdown specification document.""", +OUTPUT: A comprehensive, well-structured markdown specification document containing ALL content from the source material.""", "channel_best_practices": """You are a marketing channel specialist. Below is raw reference material about best practices for various marketing channels (social media, display, email, print, OOH) relevant to Barclays marketing. -Your task is to distil this into a clear, structured specification document that an AI channel compliance agent can use to review marketing proofs. Organise by channel type, then by platform/format. Focus on content guidelines, accessibility requirements, and engagement best practices. +Your task is to restructure and organise ALL of this content into a clear, well-structured specification document that an AI channel compliance agent can use to review marketing proofs. You are imposing structure on the content — NOT reducing it. -Remove any redundancy or content not directly relevant to proof review. Preserve all specific recommendations and requirements. +CRITICAL — ZERO TOLERANCE FOR DETAIL LOSS: +Every specific rule, instruction, condition, threshold, measurement, example, exception, and edge case from the source material MUST appear in the output. If in doubt about whether something is relevant, INCLUDE IT. Source documents may contain unconventional rules or conditional instructions that seem tangential (e.g. "carousel posts must not exceed 5 cards when promoting credit products" or "reply-to-comment tone shifts to informal only on Instagram and TikTok"). These MUST be preserved verbatim — do not silently drop them. + +What TO do: +- Merge exact duplicates (identical content repeated across sources) +- Impose logical structure with clear headings organised by channel type, then platform/format: content guidelines, accessibility requirements, engagement rules, tone/voice guidance, conditional instructions +- Use clear formatting (headings, bullet points, tables where helpful) +- Preserve all specific recommendations, requirements, and platform-specific rules + +What NOT to do: +- Do NOT omit, summarise away, or paraphrase specific rules, values, or conditions +- Do NOT drop content because it seems minor, unusual, or edge-case +- Do NOT filter out content you consider "not relevant" — the downstream agent needs ALL of it RAW REFERENCE MATERIAL: {combined_markdown} -OUTPUT: A well-structured markdown specification document.""", +OUTPUT: A comprehensive, well-structured markdown specification document containing ALL content from the source material.""", "channel_tech_specs": """You are a marketing production specialist. Below is raw reference material about technical specifications for various marketing channels (dimensions, file formats, file sizes, resolution requirements, platform constraints). -Your task is to distil this into a clear, structured specification document that an AI technical compliance agent can use to review marketing proofs. Organise by channel, then platform, then format. Use tables where appropriate for dimensions and specs. +Your task is to restructure and organise ALL of this content into a clear, well-structured specification document that an AI technical compliance agent can use to review marketing proofs. You are imposing structure on the content — NOT reducing it. -Remove any redundancy or content not directly relevant to technical spec checking. Preserve all specific dimensions, file size limits, format requirements, and platform constraints verbatim. +CRITICAL — ZERO TOLERANCE FOR DETAIL LOSS: +Every specific rule, instruction, condition, threshold, measurement, example, exception, and edge case from the source material MUST appear in the output. If in doubt about whether something is relevant, INCLUDE IT. Source documents may contain unconventional rules or conditional instructions that seem tangential (e.g. "safe zone insets increase to 20px for YouTube bumper ads on connected TV" or "max file size is 100KB for email hero images but 200KB when animated"). These MUST be preserved verbatim — do not silently drop them. + +What TO do: +- Merge exact duplicates (identical content repeated across sources) +- Impose logical structure with clear headings organised by channel → platform → format. Use tables where appropriate for dimensions, file sizes, and format specs +- Use clear formatting (headings, bullet points, tables where helpful) +- Preserve all specific dimensions, file size limits, format requirements, resolution values, and platform constraints + +What NOT to do: +- Do NOT omit, summarise away, or paraphrase specific rules, values, or conditions +- Do NOT drop content because it seems minor, unusual, or edge-case +- Do NOT filter out content you consider "not relevant" — the downstream agent needs ALL of it RAW REFERENCE MATERIAL: {combined_markdown} -OUTPUT: A well-structured markdown specification document.""", +OUTPUT: A comprehensive, well-structured markdown specification document containing ALL content from the source material.""", }