volt-newsroom-scraper-report/summarizer.py
DJP d814ff8209 Add 'So What' implications section for Molson Coors
- Reduced bullets from 2-3 to 1-2 (more concise)
- Added 'So What' section with Molson Coors business implications
- Prompt now asks Claude to analyze relevance to beverage industry
- So What section styled with yellow border and highlighted background
- Provides actionable context for every article/post
- Helps team understand why each item matters to the business
2026-01-07 09:13:35 -05:00

201 lines
6.7 KiB
Python

"""
AI summarization using Claude API.
Generates concise title + 2-3 bullet point summaries for articles/posts.
"""
from anthropic import Anthropic
from config import Config
class ContentSummarizer:
"""Summarizer using Claude API for content summarization."""
def __init__(self):
"""Initialize Claude API client."""
self.client = Anthropic(api_key=Config.ANTHROPIC_API_KEY)
# Claude Sonnet 4.5 (latest model)
self.model = "claude-sonnet-4-5-20250929"
def summarize_content(self, content_item):
"""
Summarize a single content item into title + 2-3 bullet points.
Args:
content_item (dict): Content to summarize with keys:
- url (str)
- title (str, optional)
- content (str)
- platform (str, optional for social media)
Returns:
dict: Summarized content
{
'title': str,
'bullets': list of str,
'url': str
}
"""
content = content_item.get('content', '')
existing_title = content_item.get('title', '')
url = content_item.get('url', '')
platform = content_item.get('platform', '')
# Handle empty content
if not content or content.strip() == '':
return {
'title': existing_title or 'Content unavailable',
'bullets': ['Unable to extract content from this source.'],
'url': url
}
try:
# Create prompt for Claude
prompt = self._create_summarization_prompt(content, existing_title, platform)
# Call Claude API
message = self.client.messages.create(
model=self.model,
max_tokens=500,
messages=[
{"role": "user", "content": prompt}
]
)
# Parse response
response_text = message.content[0].text
summary = self._parse_summary_response(response_text, existing_title)
return {
'title': summary['title'],
'bullets': summary['bullets'],
'so_what': summary['so_what'],
'url': url,
'screenshot_path': content_item.get('screenshot_path') # Preserve screenshot path
}
except Exception as e:
print(f"Error summarizing content for {url}: {e}")
# Fallback to existing title and truncated content
return {
'title': existing_title or 'Summary unavailable',
'bullets': [
'Error generating summary.'
],
'so_what': 'Unable to determine implications.',
'url': url,
'screenshot_path': content_item.get('screenshot_path') # Preserve screenshot path
}
def summarize_batch(self, content_items):
"""
Summarize multiple content items.
Args:
content_items (list): List of content dictionaries
Returns:
list: List of summarized content dictionaries
"""
print(f"Summarizing {len(content_items)} items with Claude API...")
summaries = []
for i, item in enumerate(content_items, 1):
print(f" Summarizing item {i}/{len(content_items)}...")
summary = self.summarize_content(item)
summaries.append(summary)
print(f"Completed {len(summaries)} summaries")
return summaries
def _create_summarization_prompt(self, content, existing_title, platform):
"""
Create prompt for Claude API to summarize content.
Args:
content (str): Content to summarize
existing_title (str): Existing title if available
platform (str): Platform name for social media
Returns:
str: Formatted prompt
"""
platform_context = f" from {platform}" if platform else ""
prompt = f"""You are summarizing content{platform_context} for a daily newsroom report for Molson Coors beverage company.
Please provide:
1. A clear, engaging title (if the existing title isn't good, create a better one)
2. 1-2 concise bullet points highlighting the key information
3. A brief "So What" statement explaining implications for Molson Coors and their beverage brands
Focus on actionable insights relevant to the beverage/alcohol industry. Be concise and professional.
{'Existing title: ' + existing_title if existing_title else ''}
Content to summarize:
{content[:3000]}
Please respond in this exact format:
TITLE: [your title here]
BULLETS:
- [bullet point 1]
- [bullet point 2 if needed]
SO WHAT: [1-2 sentence implication for Molson Coors - how this impacts their business, brands, or strategy]"""
return prompt
def _parse_summary_response(self, response_text, fallback_title):
"""
Parse Claude's response into structured format.
Args:
response_text (str): Response from Claude
fallback_title (str): Fallback title if parsing fails
Returns:
dict: Parsed summary with 'title', 'bullets', and 'so_what' keys
"""
try:
lines = response_text.strip().split('\n')
title = fallback_title
bullets = []
so_what = ''
parsing_bullets = False
for line in lines:
line = line.strip()
if line.startswith('TITLE:'):
title = line.replace('TITLE:', '').strip()
parsing_bullets = False
elif line.startswith('BULLETS:'):
parsing_bullets = True
elif line.startswith('SO WHAT:'):
so_what = line.replace('SO WHAT:', '').strip()
parsing_bullets = False
elif parsing_bullets and (line.startswith('-') or line.startswith('')):
bullet = line.lstrip('- •').strip()
if bullet:
bullets.append(bullet)
# Ensure we have at least one bullet
if not bullets:
bullets = ['Summary could not be generated.']
return {
'title': title or fallback_title or 'Untitled',
'bullets': bullets[:2], # Limit to 2 bullets
'so_what': so_what or 'No specific implications identified.'
}
except Exception as e:
print(f"Error parsing summary response: {e}")
return {
'title': fallback_title or 'Untitled',
'bullets': ['Summary could not be generated.']
}