Detect identical file uploads via MD5 hashing
- Add file_hash and is_identical_file columns to proof_versions table - Compute MD5 hash on file upload and compare with previous version - Display warning banner when uploading identical file as revision - Return is_identical_file in WebSocket response and API endpoints Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
3a5c3bcde3
commit
2f547dc494
10 changed files with 104 additions and 5 deletions
35
backend/alembic/versions/005_add_file_hash.py
Normal file
35
backend/alembic/versions/005_add_file_hash.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
"""Add file_hash and is_identical_file columns to proof_versions
|
||||
|
||||
Revision ID: 005_add_file_hash
|
||||
Revises: 004_cleanup_duplicate_dropdown_options
|
||||
Create Date: 2025-01-25
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '005_add_file_hash'
|
||||
down_revision: Union[str, None] = '004_cleanup_duplicate_dropdown_options'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add file_hash column (MD5 hex digest is always 32 characters)
|
||||
op.add_column(
|
||||
'proof_versions',
|
||||
sa.Column('file_hash', sa.String(32), nullable=True)
|
||||
)
|
||||
# Add is_identical_file column to track if this version is identical to previous
|
||||
op.add_column(
|
||||
'proof_versions',
|
||||
sa.Column('is_identical_file', sa.Boolean(), nullable=True, server_default='false')
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column('proof_versions', 'is_identical_file')
|
||||
op.drop_column('proof_versions', 'file_hash')
|
||||
|
|
@ -245,6 +245,7 @@ async def list_proofs(
|
|||
agent_review=v.agent_review,
|
||||
overall_status=v.overall_status,
|
||||
workfront_id=v.workfront_id,
|
||||
is_identical_file=v.is_identical_file,
|
||||
created_at=v.created_at,
|
||||
)
|
||||
for v in proof.versions
|
||||
|
|
@ -283,6 +284,7 @@ async def get_proof(
|
|||
agent_review=v.agent_review,
|
||||
overall_status=v.overall_status,
|
||||
workfront_id=v.workfront_id,
|
||||
is_identical_file=v.is_identical_file,
|
||||
created_at=v.created_at,
|
||||
)
|
||||
for v in proof.versions
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ class ProofVersionResponse(BaseModel):
|
|||
agent_review: Optional[dict]
|
||||
overall_status: Optional[str]
|
||||
workfront_id: Optional[str]
|
||||
is_identical_file: Optional[bool] = False
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import uuid
|
|||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
|
|
@ -100,6 +100,8 @@ class ProofVersion(Base):
|
|||
agent_review: Mapped[Optional[dict]] = mapped_column(JSONB, nullable=True)
|
||||
overall_status: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
|
||||
workfront_id: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
|
||||
file_hash: Mapped[Optional[str]] = mapped_column(String(32), nullable=True)
|
||||
is_identical_file: Mapped[Optional[bool]] = mapped_column(Boolean, nullable=True, default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
# Relationships
|
||||
|
|
|
|||
|
|
@ -95,6 +95,8 @@ class ProofRepository:
|
|||
agent_review: Optional[dict] = None,
|
||||
overall_status: Optional[str] = None,
|
||||
workfront_id: Optional[str] = None,
|
||||
file_hash: Optional[str] = None,
|
||||
is_identical_file: Optional[bool] = None,
|
||||
) -> ProofVersion:
|
||||
"""Create a new version of a proof."""
|
||||
proof_version = ProofVersion(
|
||||
|
|
@ -105,6 +107,8 @@ class ProofRepository:
|
|||
agent_review=agent_review,
|
||||
overall_status=overall_status,
|
||||
workfront_id=workfront_id,
|
||||
file_hash=file_hash,
|
||||
is_identical_file=is_identical_file,
|
||||
)
|
||||
self.session.add(proof_version)
|
||||
await self.session.flush()
|
||||
|
|
@ -148,6 +152,16 @@ class ProofRepository:
|
|||
version = result.scalar_one_or_none()
|
||||
return version if version else 0
|
||||
|
||||
async def get_latest_version_hash(self, proof_id: uuid.UUID) -> Optional[str]:
|
||||
"""Get the file_hash from the latest version of a proof."""
|
||||
result = await self.session.execute(
|
||||
select(ProofVersion.file_hash)
|
||||
.where(ProofVersion.proof_id == proof_id)
|
||||
.order_by(ProofVersion.version.desc())
|
||||
.limit(1)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_previous_version_review(
|
||||
self,
|
||||
proof_id: uuid.UUID,
|
||||
|
|
@ -243,6 +257,8 @@ class ProofRepository:
|
|||
agent_review: dict,
|
||||
overall_status: str,
|
||||
created_by: Optional[uuid.UUID] = None,
|
||||
file_hash: Optional[str] = None,
|
||||
is_identical_file: Optional[bool] = None,
|
||||
) -> tuple[Proof, ProofVersion]:
|
||||
"""Create or get proof and add a new version with review results."""
|
||||
proof, is_new = await self.get_or_create_proof(
|
||||
|
|
@ -281,6 +297,8 @@ class ProofRepository:
|
|||
agent_review=agent_review,
|
||||
overall_status=overall_status,
|
||||
workfront_id=version_workfront_id,
|
||||
file_hash=file_hash,
|
||||
is_identical_file=is_identical_file,
|
||||
)
|
||||
|
||||
return proof, version
|
||||
|
|
|
|||
|
|
@ -56,6 +56,10 @@ async def handle_analyze_message(
|
|||
})
|
||||
return
|
||||
|
||||
# Compute file hash for duplicate detection
|
||||
file_hash = storage_service.get_checksum(file_data)
|
||||
logger.info(f"[WEBSOCKET] Computed file hash: {file_hash}")
|
||||
|
||||
# Create callback for real-time updates
|
||||
async def on_agent_update(agent_name: str, review: SubReview | None) -> None:
|
||||
if not manager.is_connected(client_id):
|
||||
|
|
@ -93,6 +97,8 @@ async def handle_analyze_message(
|
|||
|
||||
# Fetch previous analysis if this is a revision
|
||||
previous_analysis = None
|
||||
previous_file_hash = None
|
||||
is_identical_file = False
|
||||
campaign_id = data.get("campaign_id")
|
||||
proof_name = data.get("proof_name")
|
||||
|
||||
|
|
@ -106,10 +112,15 @@ async def handle_analyze_message(
|
|||
)
|
||||
if existing_proof:
|
||||
previous_analysis = await proof_repo.get_latest_version_review(existing_proof.id)
|
||||
previous_file_hash = await proof_repo.get_latest_version_hash(existing_proof.id)
|
||||
if previous_analysis:
|
||||
logger.info(f"[WEBSOCKET] Found previous analysis version {previous_analysis.get('version')}")
|
||||
else:
|
||||
logger.info("[WEBSOCKET] No previous analysis found (new proof)")
|
||||
# Check if file is identical to previous version
|
||||
if previous_file_hash and previous_file_hash == file_hash:
|
||||
is_identical_file = True
|
||||
logger.info(f"[WEBSOCKET] Identical file detected - hash matches previous version: {file_hash}")
|
||||
else:
|
||||
logger.info("[WEBSOCKET] No existing proof found (new proof)")
|
||||
except Exception as e:
|
||||
|
|
@ -208,6 +219,8 @@ async def handle_analyze_message(
|
|||
thumbnail_url=thumbnail_url,
|
||||
agent_review=result_dict,
|
||||
overall_status=result.overallStatus,
|
||||
file_hash=file_hash,
|
||||
is_identical_file=is_identical_file,
|
||||
)
|
||||
|
||||
await session.commit()
|
||||
|
|
@ -225,6 +238,7 @@ async def handle_analyze_message(
|
|||
response = {
|
||||
"type": "complete",
|
||||
"result": result_dict,
|
||||
"is_identical_file": is_identical_file,
|
||||
}
|
||||
# Include proof/version IDs if persisted
|
||||
if proof_id:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import { ArrowLeftIcon } from './icons/ArrowLeftIcon';
|
|||
import type { AgentReview, FlaggedItem, ResolvedItem, OverallStatus } from '../types';
|
||||
import { FeedbackReport } from './FeedbackReport';
|
||||
import { CreateCampaignModal } from './CreateCampaignModal';
|
||||
import { CheckCircleIcon, ArrowPathIcon } from './icons/StatusIcons';
|
||||
import { CheckCircleIcon, ArrowPathIcon, ExclamationTriangleIcon } from './icons/StatusIcons';
|
||||
import { ProofPreview } from './ProofPreview';
|
||||
import { HistoryIcon } from './icons/HistoryIcon';
|
||||
import { DropdownOptions } from '../App';
|
||||
|
|
@ -1579,9 +1579,22 @@ const ProofDetailView: React.FC<{
|
|||
</div>
|
||||
</div>
|
||||
<div className="mt-12 lg:mt-0 lg:col-span-2">
|
||||
<FeedbackReport
|
||||
feedback={selectedVersion.feedback}
|
||||
onFlagSubmit={handleFlagSubmitWrapper}
|
||||
{selectedVersion.isIdenticalFile && (
|
||||
<div className="mb-6 bg-amber-50 border border-amber-200 rounded-lg p-4 flex items-start gap-3">
|
||||
<ExclamationTriangleIcon className="w-5 h-5 text-amber-600 flex-shrink-0 mt-0.5" />
|
||||
<div>
|
||||
<p className="font-semibold text-amber-800">Identical File Detected</p>
|
||||
<p className="text-sm text-amber-700 mt-1">
|
||||
This file is exactly the same as the previous version.
|
||||
The analysis results shown are from the new analysis,
|
||||
but no changes were made to the creative.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<FeedbackReport
|
||||
feedback={selectedVersion.feedback}
|
||||
onFlagSubmit={handleFlagSubmitWrapper}
|
||||
onResolveSubmit={handleResolveSubmitWrapper}
|
||||
/>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export interface ProofVersionResponse {
|
|||
agent_review: AgentReview | null;
|
||||
overall_status: string | null;
|
||||
workfront_id: string | null;
|
||||
is_identical_file: boolean | null;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
|
|
@ -290,6 +291,7 @@ class ApiService {
|
|||
feedback: v.agent_review || {} as AgentReview,
|
||||
overallStatus: v.overall_status as any,
|
||||
fileStorageKey: v.file_storage_key || '',
|
||||
isIdenticalFile: v.is_identical_file || false,
|
||||
})),
|
||||
_id: proof.id,
|
||||
fileStorageKey: latestVersion?.file_storage_key || '',
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export interface AnalyzeProofResult {
|
|||
proofId?: string;
|
||||
versionId?: string;
|
||||
pdfPages?: PDFPage[];
|
||||
isIdenticalFile?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -121,6 +122,7 @@ export const analyzeProof = async (
|
|||
proofId: message.proof_id,
|
||||
versionId: message.version_id,
|
||||
pdfPages: message.pdf_pages as PDFPage[] | undefined,
|
||||
isIdenticalFile: message.is_identical_file as boolean | undefined,
|
||||
});
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -73,3 +73,13 @@ export interface PDFPage {
|
|||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface ProofVersion {
|
||||
version: number;
|
||||
timestamp: string;
|
||||
workfrontId: string;
|
||||
proofPreviewUrl?: string;
|
||||
fileStorageKey?: string;
|
||||
feedback: AgentReview;
|
||||
isIdenticalFile?: boolean;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue