Gunicorn: raise max_requests and graceful_timeout
The previous max_requests=200 caused workers to recycle every ~5 minutes under normal progress polling (~40 req/min), killing any in-flight background matching/QC thread on the worker. Bumping to 5000 means a worker only recycles after several hours, well past any single job. Also raise graceful_timeout to 600s so in-flight threads finish on legitimate shutdowns instead of being SIGKILL'd after 30s. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3dd0420145
commit
b140ab3860
1 changed files with 11 additions and 3 deletions
|
|
@ -13,10 +13,18 @@ worker_class = "gthread"
|
|||
worker_connections = 1000
|
||||
timeout = 300
|
||||
keepalive = 5
|
||||
# graceful_timeout: how long gunicorn waits for in-flight work to finish on a shutdown
|
||||
# (deploy, recycle). Default is 30s, which is too short for long-running matching/QC jobs
|
||||
# that run as background threads inside the worker. 600s gives those jobs a chance to
|
||||
# complete on graceful shutdowns; SIGKILL only fires after that.
|
||||
graceful_timeout = 600
|
||||
|
||||
# Recycle workers after N requests to release accumulated memory
|
||||
max_requests = 200
|
||||
max_requests_jitter = 30
|
||||
# Recycle workers after N requests to release accumulated memory.
|
||||
# Set high (was 200) because each in-progress polling job fires ~40 req/min — with the
|
||||
# old setting the worker recycled mid-job and killed any running matching/QC thread.
|
||||
# Monitor memory in `docker stats hm-qc-app`; if it climbs over time, lower this.
|
||||
max_requests = 5000
|
||||
max_requests_jitter = 500
|
||||
|
||||
# Logging — stdout/stderr for docker logs
|
||||
accesslog = "-"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue