Detect identical file uploads via MD5 hashing

- Add file_hash and is_identical_file columns to proof_versions table
- Compute MD5 hash on file upload and compare with previous version
- Display warning banner when uploading identical file as revision
- Return is_identical_file in WebSocket response and API endpoints

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2026-01-25 10:15:48 -06:00
parent 3a5c3bcde3
commit 2f547dc494
10 changed files with 104 additions and 5 deletions

View file

@ -0,0 +1,35 @@
"""Add file_hash and is_identical_file columns to proof_versions
Revision ID: 005_add_file_hash
Revises: 004_cleanup_duplicate_dropdown_options
Create Date: 2025-01-25
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '005_add_file_hash'
down_revision: Union[str, None] = '004_cleanup_duplicate_dropdown_options'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Add file_hash column (MD5 hex digest is always 32 characters)
op.add_column(
'proof_versions',
sa.Column('file_hash', sa.String(32), nullable=True)
)
# Add is_identical_file column to track if this version is identical to previous
op.add_column(
'proof_versions',
sa.Column('is_identical_file', sa.Boolean(), nullable=True, server_default='false')
)
def downgrade() -> None:
op.drop_column('proof_versions', 'is_identical_file')
op.drop_column('proof_versions', 'file_hash')

View file

@ -245,6 +245,7 @@ async def list_proofs(
agent_review=v.agent_review,
overall_status=v.overall_status,
workfront_id=v.workfront_id,
is_identical_file=v.is_identical_file,
created_at=v.created_at,
)
for v in proof.versions
@ -283,6 +284,7 @@ async def get_proof(
agent_review=v.agent_review,
overall_status=v.overall_status,
workfront_id=v.workfront_id,
is_identical_file=v.is_identical_file,
created_at=v.created_at,
)
for v in proof.versions

View file

@ -57,6 +57,7 @@ class ProofVersionResponse(BaseModel):
agent_review: Optional[dict]
overall_status: Optional[str]
workfront_id: Optional[str]
is_identical_file: Optional[bool] = False
created_at: datetime
class Config:

View file

@ -2,7 +2,7 @@ import uuid
from datetime import datetime
from typing import Optional
from sqlalchemy import DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
@ -100,6 +100,8 @@ class ProofVersion(Base):
agent_review: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True)
overall_status: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
workfront_id: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
file_hash: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
is_identical_file: Mapped[Optional[bool]] = mapped_column(Boolean, nullable=True, default=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
# Relationships

View file

@ -95,6 +95,8 @@ class ProofRepository:
agent_review: Optional[dict] = None,
overall_status: Optional[str] = None,
workfront_id: Optional[str] = None,
file_hash: Optional[str] = None,
is_identical_file: Optional[bool] = None,
) -> ProofVersion:
"""Create a new version of a proof."""
proof_version = ProofVersion(
@ -105,6 +107,8 @@ class ProofRepository:
agent_review=agent_review,
overall_status=overall_status,
workfront_id=workfront_id,
file_hash=file_hash,
is_identical_file=is_identical_file,
)
self.session.add(proof_version)
await self.session.flush()
@ -148,6 +152,16 @@ class ProofRepository:
version = result.scalar_one_or_none()
return version if version else 0
async def get_latest_version_hash(self, proof_id: uuid.UUID) -> Optional[str]:
"""Get the file_hash from the latest version of a proof."""
result = await self.session.execute(
select(ProofVersion.file_hash)
.where(ProofVersion.proof_id == proof_id)
.order_by(ProofVersion.version.desc())
.limit(1)
)
return result.scalar_one_or_none()
async def get_previous_version_review(
self,
proof_id: uuid.UUID,
@ -243,6 +257,8 @@ class ProofRepository:
agent_review: dict,
overall_status: str,
created_by: Optional[uuid.UUID] = None,
file_hash: Optional[str] = None,
is_identical_file: Optional[bool] = None,
) -> tuple[Proof, ProofVersion]:
"""Create or get proof and add a new version with review results."""
proof, is_new = await self.get_or_create_proof(
@ -281,6 +297,8 @@ class ProofRepository:
agent_review=agent_review,
overall_status=overall_status,
workfront_id=version_workfront_id,
file_hash=file_hash,
is_identical_file=is_identical_file,
)
return proof, version

View file

@ -56,6 +56,10 @@ async def handle_analyze_message(
})
return
# Compute file hash for duplicate detection
file_hash = storage_service.get_checksum(file_data)
logger.info(f"[WEBSOCKET] Computed file hash: {file_hash}")
# Create callback for real-time updates
async def on_agent_update(agent_name: str, review: SubReview | None) -> None:
if not manager.is_connected(client_id):
@ -93,6 +97,8 @@ async def handle_analyze_message(
# Fetch previous analysis if this is a revision
previous_analysis = None
previous_file_hash = None
is_identical_file = False
campaign_id = data.get("campaign_id")
proof_name = data.get("proof_name")
@ -106,10 +112,15 @@ async def handle_analyze_message(
)
if existing_proof:
previous_analysis = await proof_repo.get_latest_version_review(existing_proof.id)
previous_file_hash = await proof_repo.get_latest_version_hash(existing_proof.id)
if previous_analysis:
logger.info(f"[WEBSOCKET] Found previous analysis version {previous_analysis.get('version')}")
else:
logger.info("[WEBSOCKET] No previous analysis found (new proof)")
# Check if file is identical to previous version
if previous_file_hash and previous_file_hash == file_hash:
is_identical_file = True
logger.info(f"[WEBSOCKET] Identical file detected - hash matches previous version: {file_hash}")
else:
logger.info("[WEBSOCKET] No existing proof found (new proof)")
except Exception as e:
@ -208,6 +219,8 @@ async def handle_analyze_message(
thumbnail_url=thumbnail_url,
agent_review=result_dict,
overall_status=result.overallStatus,
file_hash=file_hash,
is_identical_file=is_identical_file,
)
await session.commit()
@ -225,6 +238,7 @@ async def handle_analyze_message(
response = {
"type": "complete",
"result": result_dict,
"is_identical_file": is_identical_file,
}
# Include proof/version IDs if persisted
if proof_id:

View file

@ -6,7 +6,7 @@ import { ArrowLeftIcon } from './icons/ArrowLeftIcon';
import type { AgentReview, FlaggedItem, ResolvedItem, OverallStatus } from '../types';
import { FeedbackReport } from './FeedbackReport';
import { CreateCampaignModal } from './CreateCampaignModal';
import { CheckCircleIcon, ArrowPathIcon } from './icons/StatusIcons';
import { CheckCircleIcon, ArrowPathIcon, ExclamationTriangleIcon } from './icons/StatusIcons';
import { ProofPreview } from './ProofPreview';
import { HistoryIcon } from './icons/HistoryIcon';
import { DropdownOptions } from '../App';
@ -1579,9 +1579,22 @@ const ProofDetailView: React.FC<{
</div>
</div>
<div className="mt-12 lg:mt-0 lg:col-span-2">
<FeedbackReport
feedback={selectedVersion.feedback}
onFlagSubmit={handleFlagSubmitWrapper}
{selectedVersion.isIdenticalFile && (
<div className="mb-6 bg-amber-50 border border-amber-200 rounded-lg p-4 flex items-start gap-3">
<ExclamationTriangleIcon className="w-5 h-5 text-amber-600 flex-shrink-0 mt-0.5" />
<div>
<p className="font-semibold text-amber-800">Identical File Detected</p>
<p className="text-sm text-amber-700 mt-1">
This file is exactly the same as the previous version.
The analysis results shown are from the new analysis,
but no changes were made to the creative.
</p>
</div>
</div>
)}
<FeedbackReport
feedback={selectedVersion.feedback}
onFlagSubmit={handleFlagSubmitWrapper}
onResolveSubmit={handleResolveSubmitWrapper}
/>
</div>

View file

@ -27,6 +27,7 @@ export interface ProofVersionResponse {
agent_review: AgentReview | null;
overall_status: string | null;
workfront_id: string | null;
is_identical_file: boolean | null;
created_at: string;
}
@ -290,6 +291,7 @@ class ApiService {
feedback: v.agent_review || {} as AgentReview,
overallStatus: v.overall_status as any,
fileStorageKey: v.file_storage_key || '',
isIdenticalFile: v.is_identical_file || false,
})),
_id: proof.id,
fileStorageKey: latestVersion?.file_storage_key || '',

View file

@ -27,6 +27,7 @@ export interface AnalyzeProofResult {
proofId?: string;
versionId?: string;
pdfPages?: PDFPage[];
isIdenticalFile?: boolean;
}
/**
@ -121,6 +122,7 @@ export const analyzeProof = async (
proofId: message.proof_id,
versionId: message.version_id,
pdfPages: message.pdf_pages as PDFPage[] | undefined,
isIdenticalFile: message.is_identical_file as boolean | undefined,
});
break;

View file

@ -73,3 +73,13 @@ export interface PDFPage {
width: number;
height: number;
}
export interface ProofVersion {
version: number;
timestamp: string;
workfrontId: string;
proofPreviewUrl?: string;
fileStorageKey?: string;
feedback: AgentReview;
isIdenticalFile?: boolean;
}