FastAPI + React + PostgreSQL salary benchmarking tool with AI research pipeline. - Seed data for 25+ New York roles (junior/mid/senior levels) - Single + bulk lookup with location alias mapping (NYC -> New York, etc.) - Research pipeline: Serper -> Firecrawl -> Cohere Rerank -> Claude analysis - Editable validation UI for AI-proposed benchmarks - CSV export, Montserrat font, black/white/#FFC407 design - Fully Dockerized (app + db + frontend) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
96 lines
2.6 KiB
Python
96 lines
2.6 KiB
Python
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.models import Benchmark, Location, Role
|
|
from app.schemas import BenchmarkOut
|
|
|
|
LOCATION_ALIASES = {
|
|
"nyc": "new york",
|
|
"new york city": "new york",
|
|
"manhattan": "new york",
|
|
"sf": "san francisco",
|
|
"san fran": "san francisco",
|
|
"la": "los angeles",
|
|
"chi": "chicago",
|
|
"dc": "washington dc",
|
|
"washington": "washington dc",
|
|
"philly": "philadelphia",
|
|
"atl": "atlanta",
|
|
"bos": "boston",
|
|
"dal": "dallas",
|
|
"hou": "houston",
|
|
"sea": "seattle",
|
|
"pdx": "portland",
|
|
"den": "denver",
|
|
"mia": "miami",
|
|
"det": "detroit",
|
|
"mpls": "minneapolis",
|
|
"nola": "new orleans",
|
|
"london": "london",
|
|
"ldn": "london",
|
|
}
|
|
|
|
|
|
def normalize(text: str) -> str:
|
|
cleaned = text.strip().lower()
|
|
return LOCATION_ALIASES.get(cleaned, cleaned)
|
|
|
|
|
|
def normalize_title(text: str) -> str:
|
|
return text.strip().lower()
|
|
|
|
|
|
async def get_or_create_role(db: AsyncSession, title: str) -> Role:
|
|
norm = normalize_title(title)
|
|
result = await db.execute(select(Role).where(Role.normalized_title == norm))
|
|
role = result.scalar_one_or_none()
|
|
if not role:
|
|
role = Role(title=title.strip(), normalized_title=norm)
|
|
db.add(role)
|
|
await db.flush()
|
|
return role
|
|
|
|
|
|
async def get_or_create_location(db: AsyncSession, location: str) -> Location:
|
|
norm = normalize(location)
|
|
result = await db.execute(
|
|
select(Location).where(Location.normalized_name == norm)
|
|
)
|
|
loc = result.scalar_one_or_none()
|
|
if not loc:
|
|
# Use the canonical name as the display city name
|
|
loc = Location(city=norm.title(), normalized_name=norm)
|
|
db.add(loc)
|
|
await db.flush()
|
|
return loc
|
|
|
|
|
|
async def lookup_benchmarks(
|
|
db: AsyncSession, title: str, location: str
|
|
) -> list[BenchmarkOut]:
|
|
norm_title = normalize_title(title)
|
|
norm_location = normalize(location)
|
|
|
|
result = await db.execute(
|
|
select(Benchmark, Role, Location)
|
|
.join(Role, Benchmark.role_id == Role.id)
|
|
.join(Location, Benchmark.location_id == Location.id)
|
|
.where(Role.normalized_title == norm_title)
|
|
.where(Location.normalized_name == norm_location)
|
|
.order_by(
|
|
Benchmark.level.desc()
|
|
)
|
|
)
|
|
rows = result.all()
|
|
return [
|
|
BenchmarkOut(
|
|
role=role.title,
|
|
location=loc.city,
|
|
level=bench.level,
|
|
salary=bench.salary,
|
|
source=bench.source,
|
|
validated=bench.validated,
|
|
confidence_score=bench.confidence_score,
|
|
)
|
|
for bench, role, loc in rows
|
|
]
|