Add migration to cleanup duplicate dropdown options

The staging database has duplicate sub-channels (5 "Meta", 2 "Magazine")
which causes the last duplicate (with 0 proof types) to overwrite the
correct one in the API response.

This migration:
1. Identifies duplicate sub-channels and channels
2. Keeps the one with the most children (proof types)
3. Deletes the duplicates
4. Adds unique partial indexes to prevent future duplicates

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2026-01-23 13:22:44 -06:00
parent 874c1fceee
commit 94a37f3ed8

View file

@ -0,0 +1,168 @@
"""Cleanup duplicate dropdown options and add unique constraint
Revision ID: 004_cleanup_duplicates
Revises: 003_add_meta_proof_types
Create Date: 2026-01-23
This migration fixes an issue where duplicate sub-channels (e.g., multiple "Meta" entries)
were created, causing proof types to not display correctly because the last duplicate
(with no children) would overwrite the correct one in the API response.
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '004_cleanup_duplicates'
down_revision: Union[str, None] = '003_add_meta_proof_types'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Remove duplicate dropdown options, keeping the ones with children (proof types)."""
conn = op.get_bind()
# Step 1: Find and remove duplicate sub-channels (keeping the one with proof types)
# For each (option_type, parent_id, value) combination, keep the one that has children
# or the oldest one if none have children
# First, identify duplicates - sub-channels with same parent and value
duplicates = conn.execute(
sa.text("""
SELECT option_type, parent_id, value, COUNT(*) as cnt
FROM dropdown_options
WHERE option_type = 'sub_channel'
GROUP BY option_type, parent_id, value
HAVING COUNT(*) > 1
""")
).fetchall()
print(f"[Migration 004] Found {len(duplicates)} duplicate sub-channel groups")
for dup in duplicates:
option_type, parent_id, value = dup[0], dup[1], dup[2]
print(f"[Migration 004] Processing duplicate: {value} (parent_id={parent_id})")
# Get all entries for this duplicate
entries = conn.execute(
sa.text("""
SELECT d.id,
(SELECT COUNT(*) FROM dropdown_options WHERE parent_id = d.id) as child_count
FROM dropdown_options d
WHERE d.option_type = :option_type
AND d.parent_id = :parent_id
AND d.value = :value
ORDER BY child_count DESC, d.created_at ASC
"""),
{"option_type": option_type, "parent_id": parent_id, "value": value}
).fetchall()
if len(entries) > 1:
# Keep the first one (has most children or oldest), delete the rest
keep_id = entries[0][0]
print(f"[Migration 004] Keeping {keep_id} with {entries[0][1]} children")
for entry in entries[1:]:
delete_id = entry[0]
print(f"[Migration 004] Deleting duplicate {delete_id} with {entry[1]} children")
# First delete any orphaned children (shouldn't exist but be safe)
conn.execute(
sa.text("DELETE FROM dropdown_options WHERE parent_id = :id"),
{"id": delete_id}
)
# Then delete the duplicate
conn.execute(
sa.text("DELETE FROM dropdown_options WHERE id = :id"),
{"id": delete_id}
)
# Step 2: Do the same for channels (just in case)
channel_duplicates = conn.execute(
sa.text("""
SELECT option_type, value, COUNT(*) as cnt
FROM dropdown_options
WHERE option_type = 'channel' AND parent_id IS NULL
GROUP BY option_type, value
HAVING COUNT(*) > 1
""")
).fetchall()
print(f"[Migration 004] Found {len(channel_duplicates)} duplicate channel groups")
for dup in channel_duplicates:
option_type, value = dup[0], dup[1]
print(f"[Migration 004] Processing duplicate channel: {value}")
entries = conn.execute(
sa.text("""
SELECT d.id,
(SELECT COUNT(*) FROM dropdown_options WHERE parent_id = d.id) as child_count
FROM dropdown_options d
WHERE d.option_type = :option_type
AND d.parent_id IS NULL
AND d.value = :value
ORDER BY child_count DESC, d.created_at ASC
"""),
{"option_type": option_type, "value": value}
).fetchall()
if len(entries) > 1:
keep_id = entries[0][0]
print(f"[Migration 004] Keeping channel {keep_id} with {entries[0][1]} children")
for entry in entries[1:]:
delete_id = entry[0]
print(f"[Migration 004] Deleting duplicate channel {delete_id}")
# Delete sub-channels and their proof types first
sub_channels = conn.execute(
sa.text("SELECT id FROM dropdown_options WHERE parent_id = :id"),
{"id": delete_id}
).fetchall()
for sc in sub_channels:
conn.execute(
sa.text("DELETE FROM dropdown_options WHERE parent_id = :id"),
{"id": sc[0]}
)
conn.execute(
sa.text("DELETE FROM dropdown_options WHERE id = :id"),
{"id": sc[0]}
)
conn.execute(
sa.text("DELETE FROM dropdown_options WHERE id = :id"),
{"id": delete_id}
)
# Step 3: Add unique constraint to prevent future duplicates
# For sub-channels: unique on (option_type, parent_id, value)
# For channels: unique on (option_type, value) where parent_id IS NULL
# Create a partial unique index for sub-channels and proof types (where parent_id is NOT NULL)
op.execute("""
CREATE UNIQUE INDEX IF NOT EXISTS idx_dropdown_options_unique_with_parent
ON dropdown_options (option_type, parent_id, value)
WHERE parent_id IS NOT NULL
""")
# Create a partial unique index for channels and brand guidelines (where parent_id IS NULL)
op.execute("""
CREATE UNIQUE INDEX IF NOT EXISTS idx_dropdown_options_unique_without_parent
ON dropdown_options (option_type, value)
WHERE parent_id IS NULL
""")
print("[Migration 004] Unique indexes created successfully")
def downgrade() -> None:
"""Remove unique constraints (cannot restore deleted duplicates)."""
op.execute("DROP INDEX IF EXISTS idx_dropdown_options_unique_with_parent")
op.execute("DROP INDEX IF EXISTS idx_dropdown_options_unique_without_parent")