diff options
author | Maxim Dounin <mdounin@mdounin.ru> | 2011-10-12 14:22:48 +0000 |
---|---|---|
committer | Maxim Dounin <mdounin@mdounin.ru> | 2011-10-12 14:22:48 +0000 |
commit | b713e480ca9aeccaf63bccf8629eec13041c6ab6 (patch) | |
tree | 8e2715dced608b399c3a4d7157bee7519e4380f9 /src | |
parent | 72df0f400a4f6144f9c69add91002a0ff02cfe41 (diff) | |
download | nginx-b713e480ca9aeccaf63bccf8629eec13041c6ab6.tar.gz nginx-b713e480ca9aeccaf63bccf8629eec13041c6ab6.zip |
Better recheck of dead upstream servers.
Previously nginx used to mark backend again as live as soon as fail_timeout
passes (10s by default) since last failure. On the other hand, detecting
dead backend takes up to 60s (proxy_connect_timeout) in typical situation
"backend is down and doesn't respond to any packets". This resulted in
suboptimal behaviour in the above situation (up to 23% of requests were
directed to dead backend with default settings).
More detailed description of the problem may be found here (in Russian):
http://mailman.nginx.org/pipermail/nginx-ru/2011-August/042172.html
Fix is to only allow one request after fail_timeout passes, and
mark backend as "live" only if this request succeeds.
Note that with new code backend will not be marked "live" unless "check"
request is completed, and this may take a while in some specific workloads
(e.g. streaming). This is believed to be acceptable.
Diffstat (limited to 'src')
-rw-r--r-- | src/http/modules/ngx_http_upstream_ip_hash_module.c | 4 | ||||
-rw-r--r-- | src/http/ngx_http_upstream_round_robin.c | 21 | ||||
-rw-r--r-- | src/http/ngx_http_upstream_round_robin.h | 1 |
3 files changed, 18 insertions, 8 deletions
diff --git a/src/http/modules/ngx_http_upstream_ip_hash_module.c b/src/http/modules/ngx_http_upstream_ip_hash_module.c index dffbf22b2..4c031eb47 100644 --- a/src/http/modules/ngx_http_upstream_ip_hash_module.c +++ b/src/http/modules/ngx_http_upstream_ip_hash_module.c @@ -185,8 +185,8 @@ ngx_http_upstream_get_ip_hash_peer(ngx_peer_connection_t *pc, void *data) break; } - if (now - peer->accessed > peer->fail_timeout) { - peer->fails = 0; + if (now - peer->checked > peer->fail_timeout) { + peer->checked = now; break; } } diff --git a/src/http/ngx_http_upstream_round_robin.c b/src/http/ngx_http_upstream_round_robin.c index bb9a704b8..138872c5f 100644 --- a/src/http/ngx_http_upstream_round_robin.c +++ b/src/http/ngx_http_upstream_round_robin.c @@ -443,8 +443,8 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data) break; } - if (now - peer->accessed > peer->fail_timeout) { - peer->fails = 0; + if (now - peer->checked > peer->fail_timeout) { + peer->checked = now; break; } @@ -491,8 +491,8 @@ ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data) break; } - if (now - peer->accessed > peer->fail_timeout) { - peer->fails = 0; + if (now - peer->checked > peer->fail_timeout) { + peer->checked = now; break; } @@ -663,15 +663,16 @@ ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data, return; } + peer = &rrp->peers->peer[rrp->current]; + if (state & NGX_PEER_FAILED) { now = ngx_time(); - peer = &rrp->peers->peer[rrp->current]; - /* ngx_lock_mutex(rrp->peers->mutex); */ peer->fails++; peer->accessed = now; + peer->checked = now; if (peer->max_fails) { peer->current_weight -= peer->weight / peer->max_fails; @@ -686,6 +687,14 @@ ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data, } /* ngx_unlock_mutex(rrp->peers->mutex); */ + + } else { + + /* mark peer live if check passed */ + + if (peer->accessed < peer->checked) { + peer->fails = 0; + } } rrp->current++; diff --git a/src/http/ngx_http_upstream_round_robin.h b/src/http/ngx_http_upstream_round_robin.h index a9cb257c7..195f4d8ca 100644 --- a/src/http/ngx_http_upstream_round_robin.h +++ b/src/http/ngx_http_upstream_round_robin.h @@ -23,6 +23,7 @@ typedef struct { ngx_uint_t fails; time_t accessed; + time_t checked; ngx_uint_t max_fails; time_t fail_timeout; |