diff --git a/gunicorn_config.py b/gunicorn_config.py
index 4cace88..6f8c6ba 100644
--- a/gunicorn_config.py
+++ b/gunicorn_config.py
@@ -13,10 +13,18 @@ worker_class = "gthread"
 worker_connections = 1000
 timeout = 300
 keepalive = 5
+# graceful_timeout: how long gunicorn waits for in-flight work to finish on a shutdown
+# (deploy, recycle). Default is 30s, which is too short for long-running matching/QC jobs
+# that run as background threads inside the worker. 600s gives those jobs a chance to
+# complete on graceful shutdowns; SIGKILL only fires after that.
+graceful_timeout = 600
 
-# Recycle workers after N requests to release accumulated memory
-max_requests = 200
-max_requests_jitter = 30
+# Recycle workers after N requests to release accumulated memory.
+# Set high (was 200) because each in-progress polling job fires ~40 req/min — with the
+# old setting the worker recycled mid-job and killed any running matching/QC thread.
+# Monitor memory in `docker stats hm-qc-app`; if it climbs over time, lower this.
+max_requests = 5000
+max_requests_jitter = 500
 
 # Logging — stdout/stderr for docker logs
 accesslog = "-"