From b140ab386033882e08bd042f9045ee634bec7ecf Mon Sep 17 00:00:00 2001 From: nickviljoen Date: Tue, 28 Apr 2026 13:17:43 +0200 Subject: [PATCH] Gunicorn: raise max_requests and graceful_timeout The previous max_requests=200 caused workers to recycle every ~5 minutes under normal progress polling (~40 req/min), killing any in-flight background matching/QC thread on the worker. Bumping to 5000 means a worker only recycles after several hours, well past any single job. Also raise graceful_timeout to 600s so in-flight threads finish on legitimate shutdowns instead of being SIGKILL'd after 30s. Co-Authored-By: Claude Opus 4.7 (1M context) --- gunicorn_config.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/gunicorn_config.py b/gunicorn_config.py index 4cace88..6f8c6ba 100644 --- a/gunicorn_config.py +++ b/gunicorn_config.py @@ -13,10 +13,18 @@ worker_class = "gthread" worker_connections = 1000 timeout = 300 keepalive = 5 +# graceful_timeout: how long gunicorn waits for in-flight work to finish on a shutdown +# (deploy, recycle). Default is 30s, which is too short for long-running matching/QC jobs +# that run as background threads inside the worker. 600s gives those jobs a chance to +# complete on graceful shutdowns; SIGKILL only fires after that. +graceful_timeout = 600 -# Recycle workers after N requests to release accumulated memory -max_requests = 200 -max_requests_jitter = 30 +# Recycle workers after N requests to release accumulated memory. +# Set high (was 200) because each in-progress polling job fires ~40 req/min — with the +# old setting the worker recycled mid-job and killed any running matching/QC thread. +# Monitor memory in `docker stats hm-qc-app`; if it climbs over time, lower this. +max_requests = 5000 +max_requests_jitter = 500 # Logging — stdout/stderr for docker logs accesslog = "-"