salary-benchmark/app/services/benchmark_service.py
DJP da3f5faa91 Initial commit: Salary Benchmark Tool
FastAPI + React + PostgreSQL salary benchmarking tool with AI research pipeline.
- Seed data for 25+ New York roles (junior/mid/senior levels)
- Single + bulk lookup with location alias mapping (NYC -> New York, etc.)
- Research pipeline: Serper -> Firecrawl -> Cohere Rerank -> Claude analysis
- Editable validation UI for AI-proposed benchmarks
- CSV export, Montserrat font, black/white/#FFC407 design
- Fully Dockerized (app + db + frontend)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-02 22:47:32 -04:00

96 lines
2.6 KiB
Python

from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models import Benchmark, Location, Role
from app.schemas import BenchmarkOut
LOCATION_ALIASES = {
"nyc": "new york",
"new york city": "new york",
"manhattan": "new york",
"sf": "san francisco",
"san fran": "san francisco",
"la": "los angeles",
"chi": "chicago",
"dc": "washington dc",
"washington": "washington dc",
"philly": "philadelphia",
"atl": "atlanta",
"bos": "boston",
"dal": "dallas",
"hou": "houston",
"sea": "seattle",
"pdx": "portland",
"den": "denver",
"mia": "miami",
"det": "detroit",
"mpls": "minneapolis",
"nola": "new orleans",
"london": "london",
"ldn": "london",
}
def normalize(text: str) -> str:
cleaned = text.strip().lower()
return LOCATION_ALIASES.get(cleaned, cleaned)
def normalize_title(text: str) -> str:
return text.strip().lower()
async def get_or_create_role(db: AsyncSession, title: str) -> Role:
norm = normalize_title(title)
result = await db.execute(select(Role).where(Role.normalized_title == norm))
role = result.scalar_one_or_none()
if not role:
role = Role(title=title.strip(), normalized_title=norm)
db.add(role)
await db.flush()
return role
async def get_or_create_location(db: AsyncSession, location: str) -> Location:
norm = normalize(location)
result = await db.execute(
select(Location).where(Location.normalized_name == norm)
)
loc = result.scalar_one_or_none()
if not loc:
# Use the canonical name as the display city name
loc = Location(city=norm.title(), normalized_name=norm)
db.add(loc)
await db.flush()
return loc
async def lookup_benchmarks(
db: AsyncSession, title: str, location: str
) -> list[BenchmarkOut]:
norm_title = normalize_title(title)
norm_location = normalize(location)
result = await db.execute(
select(Benchmark, Role, Location)
.join(Role, Benchmark.role_id == Role.id)
.join(Location, Benchmark.location_id == Location.id)
.where(Role.normalized_title == norm_title)
.where(Location.normalized_name == norm_location)
.order_by(
Benchmark.level.desc()
)
)
rows = result.all()
return [
BenchmarkOut(
role=role.title,
location=loc.city,
level=bench.level,
salary=bench.salary,
source=bench.source,
validated=bench.validated,
confidence_score=bench.confidence_score,
)
for bench, role, loc in rows
]