From 5760d6148eb0be156df58bba0054bbc34a6c7942 Mon Sep 17 00:00:00 2001 From: Vladimir Homutov Date: Fri, 28 Oct 2016 16:01:53 +0300 Subject: [PATCH] Upstream: least_time balancer module The module implements load-balancing algorithm based on least average response header/last_byte time and least number of active connections. The optional "inflight" mode enables accounting of incomplete requests/sessions. This allows to mitigate cases when an upstream server hangs and does not close connections. Example configuration: upstream u { least_time header | last_byte [inflight]; server a; server b; } Co-authored-by: Roman Arutyunyan --- auto/modules | 13 + auto/options | 5 + .../ngx_http_upstream_least_time_module.c | 666 ++++++++++++++++++ src/http/ngx_http_upstream_round_robin.h | 17 +- 4 files changed, 700 insertions(+), 1 deletion(-) create mode 100644 src/http/modules/ngx_http_upstream_least_time_module.c diff --git a/auto/modules b/auto/modules index f02691e16..ea4365f18 100644 --- a/auto/modules +++ b/auto/modules @@ -915,6 +915,19 @@ if [ $HTTP = YES ]; then . auto/module fi + if [ $HTTP_UPSTREAM_LEAST_TIME = YES ]; then + have=NGX_HTTP_UPSTREAM_LEAST_TIME . auto/have + + ngx_module_name=ngx_http_upstream_least_time_module + ngx_module_incs= + ngx_module_deps= + ngx_module_srcs=src/http/modules/ngx_http_upstream_least_time_module.c + ngx_module_libs= + ngx_module_link=$HTTP_UPSTREAM_LEAST_TIME + + . auto/module + fi + if [ $HTTP_UPSTREAM_RANDOM = YES ]; then ngx_module_name=ngx_http_upstream_random_module ngx_module_incs= diff --git a/auto/options b/auto/options index e619c8fa5..fde854f9d 100644 --- a/auto/options +++ b/auto/options @@ -104,6 +104,7 @@ HTTP_GZIP_STATIC=NO HTTP_UPSTREAM_HASH=YES HTTP_UPSTREAM_IP_HASH=YES HTTP_UPSTREAM_LEAST_CONN=YES +HTTP_UPSTREAM_LEAST_TIME=YES HTTP_UPSTREAM_RANDOM=YES HTTP_UPSTREAM_KEEPALIVE=YES HTTP_UPSTREAM_ZONE=YES @@ -289,6 +290,8 @@ $0: warning: the \"--with-ipv6\" option is deprecated" --without-http_upstream_ip_hash_module) HTTP_UPSTREAM_IP_HASH=NO ;; --without-http_upstream_least_conn_module) HTTP_UPSTREAM_LEAST_CONN=NO ;; + --without-http_upstream_least_time_module) + HTTP_UPSTREAM_LEAST_TIME=NO ;; --without-http_upstream_random_module) HTTP_UPSTREAM_RANDOM=NO ;; --without-http_upstream_keepalive_module) HTTP_UPSTREAM_KEEPALIVE=NO ;; @@ -518,6 +521,8 @@ cat << END disable ngx_http_upstream_ip_hash_module --without-http_upstream_least_conn_module disable ngx_http_upstream_least_conn_module + --without-http_upstream_least_time_module + disable ngx_http_upstream_least_time_module --without-http_upstream_random_module disable ngx_http_upstream_random_module --without-http_upstream_keepalive_module diff --git a/src/http/modules/ngx_http_upstream_least_time_module.c b/src/http/modules/ngx_http_upstream_least_time_module.c new file mode 100644 index 000000000..645d01b0d --- /dev/null +++ b/src/http/modules/ngx_http_upstream_least_time_module.c @@ -0,0 +1,666 @@ + +/* + * Copyright (C) Nginx, Inc. + */ + + +#include +#include +#include + + +#define NGX_HTTP_UPSTREAM_LT_HEADER 0 +#define NGX_HTTP_UPSTREAM_LT_LAST_BYTE 1 + + +typedef struct { + ngx_uint_t mode; + ngx_uint_t use_inflight; + /* unsigned use_inflight:1; */ +} ngx_http_upstream_lt_conf_t; + + +typedef struct { + /* the round robin data must be first */ + ngx_http_upstream_rr_peer_data_t rrp; + + ngx_http_upstream_lt_conf_t *conf; + ngx_uint_t inflight; /* unsigned inflight:1; */ + ngx_http_upstream_t *upstream; +} ngx_http_upstream_lt_peer_data_t; + + +static ngx_int_t ngx_http_upstream_init_least_time_peer(ngx_http_request_t *r, + ngx_http_upstream_srv_conf_t *us); +static ngx_int_t ngx_http_upstream_get_least_time_peer( + ngx_peer_connection_t *pc, void *data); +static ngx_uint_t ngx_http_upstream_least_time_eta( + ngx_http_upstream_lt_peer_data_t *ltp, ngx_http_upstream_rr_peer_t *peer); +static void ngx_http_upstream_least_time_notify(ngx_peer_connection_t *pc, + void *data, ngx_uint_t type); +static void ngx_http_upstream_least_time_inflight_done( + ngx_http_upstream_lt_peer_data_t *ltp, ngx_http_upstream_rr_peers_t *peers, + ngx_http_upstream_rr_peer_t *peer, ngx_msec_t last); +static void ngx_http_upstream_free_least_time_peer(ngx_peer_connection_t *pc, + void *data, ngx_uint_t state); + +static void *ngx_http_upstream_least_time_create_conf(ngx_conf_t *cf); +static char *ngx_http_upstream_least_time(ngx_conf_t *cf, ngx_command_t *cmd, + void *conf); + + +static ngx_conf_enum_t ngx_http_upstream_least_time_mode[] = { + { ngx_string("header"), NGX_HTTP_UPSTREAM_LT_HEADER }, + { ngx_string("last_byte"), NGX_HTTP_UPSTREAM_LT_LAST_BYTE }, + { ngx_null_string, 0 } +}; + + +static ngx_command_t ngx_http_upstream_least_time_commands[] = { + + { ngx_string("least_time"), + NGX_HTTP_UPS_CONF|NGX_CONF_TAKE12, + ngx_http_upstream_least_time, + NGX_HTTP_SRV_CONF_OFFSET, + offsetof(ngx_http_upstream_lt_conf_t, mode), + &ngx_http_upstream_least_time_mode }, + + ngx_null_command +}; + + +static ngx_http_module_t ngx_http_upstream_least_time_module_ctx = { + NULL, /* preconfiguration */ + NULL, /* postconfiguration */ + + NULL, /* create main configuration */ + NULL, /* init main configuration */ + + ngx_http_upstream_least_time_create_conf, + /* create server configuration */ + NULL, /* merge server configuration */ + + NULL, /* create location configuration */ + NULL /* merge location configuration */ +}; + + +ngx_module_t ngx_http_upstream_least_time_module = { + NGX_MODULE_V1, + &ngx_http_upstream_least_time_module_ctx, /* module context */ + ngx_http_upstream_least_time_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + NULL, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static ngx_int_t +ngx_http_upstream_init_least_time(ngx_conf_t *cf, + ngx_http_upstream_srv_conf_t *us) +{ + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, cf->log, 0, + "init least time"); + + if (ngx_http_upstream_init_round_robin(cf, us) != NGX_OK) { + return NGX_ERROR; + } + + us->peer.init = ngx_http_upstream_init_least_time_peer; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_upstream_init_least_time_peer(ngx_http_request_t *r, + ngx_http_upstream_srv_conf_t *us) +{ + ngx_http_upstream_lt_conf_t *ltcf; + ngx_http_upstream_lt_peer_data_t *ltp; + + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "init least time peer"); + + ltp = ngx_pcalloc(r->pool, sizeof(ngx_http_upstream_lt_peer_data_t)); + if (ltp == NULL) { + return NGX_ERROR; + } + + r->upstream->peer.data = <p->rrp; + + if (ngx_http_upstream_init_round_robin_peer(r, us) != NGX_OK) { + return NGX_ERROR; + } + + r->upstream->peer.get = ngx_http_upstream_get_least_time_peer; + r->upstream->peer.free = ngx_http_upstream_free_least_time_peer; + + ltp->upstream = r->upstream; + + ltcf = ngx_http_conf_upstream_srv_conf(us, + ngx_http_upstream_least_time_module); + + if (ltcf->use_inflight) { + r->upstream->peer.notify = ngx_http_upstream_least_time_notify; + } + + ltp->conf = ltcf; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_upstream_get_least_time_peer(ngx_peer_connection_t *pc, void *data) +{ + ngx_http_upstream_lt_peer_data_t *ltp = data; + + time_t now; + uintptr_t m; + ngx_int_t rc, total; + ngx_uint_t i, n, p, many, eta, best_eta; + ngx_msec_t ift; + ngx_http_upstream_rr_peer_t *peer, *best; + ngx_http_upstream_rr_peers_t *peers; + ngx_http_upstream_rr_peer_data_t *rrp; + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, + "get least time peer, try: %ui", pc->tries); + + rrp = <p->rrp; + + if (rrp->peers->single) { + return ngx_http_upstream_get_round_robin_peer(pc, rrp); + } + + pc->cached = 0; + pc->connection = NULL; + + now = ngx_time(); + + peers = rrp->peers; + + ngx_http_upstream_rr_peers_wlock(peers); + +#if (NGX_HTTP_UPSTREAM_ZONE) + if (peers->config && rrp->config != *peers->config) { + goto busy; + } +#endif + + best = NULL; + total = 0; + +#if (NGX_SUPPRESS_WARN) + many = 0; + p = 0; + best_eta = 0; +#endif + +#if (NGX_HTTP_UPSTREAM_SID) + best = ngx_http_upstream_get_rr_peer_by_sid(rrp, pc->hint, &p, 0); + + if (best) { + goto best_chosen; + } +#endif + + for (peer = peers->peer, i = 0; + peer; + peer = peer->next, i++) + { + n = i / (8 * sizeof(uintptr_t)); + m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t)); + + if (rrp->tried[n] & m) { + continue; + } + + if (peer->down) { + continue; + } + + if (peer->max_fails + && peer->fails >= peer->max_fails + && now - peer->checked <= peer->fail_timeout) + { + continue; + } + + if (peer->max_conns && peer->conns >= peer->max_conns) { + continue; + } + + if (peer->inflight_reqs > 0) { + + ift = peer->inflight_last / peer->inflight_reqs + + (ngx_current_msec - peer->inflight_reqs_changed); + + ngx_http_upstream_response_time_avg(&peer->inflight_time, ift); + } + + /* + * select peer with least estimated time of processing; if there are + * multiple peers with the same time, select based on round-robin + */ + + eta = ngx_http_upstream_least_time_eta(ltp, peer); + + if (best == NULL + || eta * best->weight < best_eta * peer->weight) + { + best = peer; + best_eta = eta; + many = 0; + p = i; + + } else if (eta * best->weight == best_eta * peer->weight) { + many = 1; + } + } + + if (best == NULL) { + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pc->log, 0, + "get least time peer, no peer found"); + + goto failed; + } + + if (many) { + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pc->log, 0, + "get least time peer, many"); + + for (peer = best, i = p; + peer; + peer = peer->next, i++) + { + n = i / (8 * sizeof(uintptr_t)); + m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t)); + + if (rrp->tried[n] & m) { + continue; + } + + if (peer->down) { + continue; + } + + eta = ngx_http_upstream_least_time_eta(ltp, peer); + + if (eta * best->weight != best_eta * peer->weight) { + continue; + } + + if (peer->max_fails + && peer->fails >= peer->max_fails + && now - peer->checked <= peer->fail_timeout) + { + continue; + } + + if (peer->max_conns && peer->conns >= peer->max_conns) { + continue; + } + + peer->current_weight += peer->effective_weight; + total += peer->effective_weight; + + if (peer->effective_weight < peer->weight) { + peer->effective_weight++; + } + + if (peer->current_weight > best->current_weight) { + best = peer; + p = i; + } + } + } + + best->current_weight -= total; + +#if (NGX_HTTP_UPSTREAM_SID) +best_chosen: +#endif + + if (ltp->conf->use_inflight) { + if (best->inflight_reqs > 0) { + /* account time spent by inflight requests */ + best->inflight_last += + (ngx_current_msec - best->inflight_reqs_changed) + * best->inflight_reqs; + } + + best->inflight_reqs_changed = ngx_current_msec; + best->inflight_reqs++; + + ltp->inflight = 1; + } + + if (now - best->checked > best->fail_timeout) { + best->checked = now; + } + + pc->sockaddr = best->sockaddr; + pc->socklen = best->socklen; + pc->name = &best->name; + +#if (NGX_HTTP_UPSTREAM_SID) + pc->sid = &best->sid; +#endif + + best->conns++; + + rrp->current = best; + ngx_http_upstream_rr_peer_ref(peers, best); + + n = p / (8 * sizeof(uintptr_t)); + m = (uintptr_t) 1 << p % (8 * sizeof(uintptr_t)); + + rrp->tried[n] |= m; + + ngx_http_upstream_rr_peers_unlock(peers); + + return NGX_OK; + +failed: + + if (peers->next) { + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pc->log, 0, + "get least time peer, backup servers"); + + rrp->peers = peers->next; + + n = (rrp->peers->number + (8 * sizeof(uintptr_t) - 1)) + / (8 * sizeof(uintptr_t)); + + for (i = 0; i < n; i++) { + rrp->tried[i] = 0; + } + + ngx_http_upstream_rr_peers_unlock(peers); + + rc = ngx_http_upstream_get_least_time_peer(pc, rrp); + + if (rc != NGX_BUSY) { + return rc; + } + + ngx_http_upstream_rr_peers_wlock(peers); + } + +#if (NGX_HTTP_UPSTREAM_ZONE) +busy: +#endif + + ngx_http_upstream_rr_peers_unlock(peers); + + pc->name = peers->name; + + return NGX_BUSY; +} + + +static ngx_uint_t +ngx_http_upstream_least_time_eta(ngx_http_upstream_lt_peer_data_t *ltp, + ngx_http_upstream_rr_peer_t *peer) +{ + time_t now; + ngx_msec_t rt; + + switch (ltp->conf->mode) { + + case NGX_HTTP_UPSTREAM_LT_HEADER: + rt = peer->header_time; + break; + + default: /* NGX_HTTP_UPSTREAM_LT_LAST_BYTE */ + rt = peer->response_time; + } + + now = ngx_time(); + + if (now - peer->checked > peer->fail_timeout) { + /* + * once in fail_timeout make response time of a peer 2 times + * lower to give chances to slow peers + */ + rt >>= (now - peer->checked) / (peer->fail_timeout + 1); + } + + if (peer->inflight_reqs > 0) { + /* + * average inflight time exceeding average response time indicates + * bad (low priority) peer + */ + rt = ngx_max(rt, peer->inflight_time); + } + + if (rt > 5000) { + /* + * consider peers with response time greater than max equally bad + * and thus fallback to least_conns + */ + rt = 5000; + + } else { + /* + * divide response times into clusters to allow round-robin for peers + * with close response times + */ + rt += 20 - rt % 20; + } + + /* + * estimated time peer has to spend to finish processing current requests + */ + return rt * (1 + peer->conns); +} + + +static void +ngx_http_upstream_least_time_notify(ngx_peer_connection_t *pc, void *data, + ngx_uint_t type) +{ + ngx_http_upstream_lt_peer_data_t *ltp = data; + + ngx_msec_t last, *metric; + ngx_http_upstream_t *u; + ngx_http_upstream_rr_peer_t *peer; + ngx_http_upstream_rr_peers_t *peers; + ngx_http_upstream_rr_peer_data_t *rrp; + + rrp = <p->rrp; + + peers = rrp->peers; + peer = rrp->current; + + u = ltp->upstream; + + /* + * Only update average time here if needed for balancing. + * Otherwise, it will be updated in peer.free(). + */ + + switch (type) { + + case NGX_HTTP_UPSTREAM_NOTIFY_HEADER: + + if (ltp->conf->mode != NGX_HTTP_UPSTREAM_LT_HEADER) { + return; + } + + last = u->state->header_time; + metric = &peer->header_time; + break; + + default: + return; + } + + ngx_http_upstream_rr_peers_rlock(peers); + ngx_http_upstream_rr_peer_lock(peers, peer); + + ngx_http_upstream_response_time_avg(metric, last); + + if (ltp->inflight) { + ngx_http_upstream_least_time_inflight_done(ltp, peers, peer, last); + } + + ngx_http_upstream_rr_peer_unlock(peers, peer); + ngx_http_upstream_rr_peers_unlock(peers); +} + + +static void +ngx_http_upstream_least_time_inflight_done( + ngx_http_upstream_lt_peer_data_t *ltp, ngx_http_upstream_rr_peers_t *peers, + ngx_http_upstream_rr_peer_t *peer, ngx_msec_t last) +{ + if (peer->inflight_reqs == 1) { + /* no more inflight requests */ + peer->inflight_last = 0; + + } else { + + /* + * account time spent by inflight requests and forget about + * request "completed" right now + */ + peer->inflight_last += (ngx_current_msec - peer->inflight_reqs_changed) + * peer->inflight_reqs - last; + peer->inflight_reqs_changed = ngx_current_msec; + } + + peer->inflight_reqs--; + ltp->inflight = 0; +} + + +static void +ngx_http_upstream_free_least_time_peer(ngx_peer_connection_t *pc, + void *data, ngx_uint_t state) +{ + ngx_http_upstream_lt_peer_data_t *ltp = data; + + ngx_http_upstream_t *u; + ngx_http_upstream_rr_peer_t *peer; + ngx_http_upstream_rr_peers_t *peers; + ngx_http_upstream_rr_peer_data_t *rrp; + + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pc->log, 0, "free least time peer"); + + rrp = <p->rrp; + peers = rrp->peers; + peer = rrp->current; + + u = ltp->upstream; + + ngx_http_upstream_rr_peers_rlock(peers); + ngx_http_upstream_rr_peer_lock(peers, peer); + + if (ltp->inflight) { + ngx_http_upstream_least_time_inflight_done(ltp, peers, peer, + u->state->response_time); + } + + /* + * only successful attempts are accounted to mitigate preferring + * of failing peers + */ + if (state & (NGX_PEER_FAILED|NGX_PEER_NEXT)) { + goto done; + } + + if (u->state->header_time == (ngx_msec_t) -1) { + goto done; + } + + ngx_http_upstream_response_time_avg(&peer->response_time, + u->state->response_time); + + if (ltp->conf->use_inflight + && ltp->conf->mode == NGX_HTTP_UPSTREAM_LT_HEADER) + { + goto done; + } + + ngx_http_upstream_response_time_avg(&peer->header_time, + u->state->header_time); + +done: + + ngx_http_upstream_free_round_robin_peer_locked(pc, rrp, state); +} + + +static void * +ngx_http_upstream_least_time_create_conf(ngx_conf_t *cf) +{ + ngx_http_upstream_lt_conf_t *conf; + + conf = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_lt_conf_t)); + if (conf == NULL) { + return NULL; + } + + /* + * set by ngx_pcalloc(): + * + * conf->use_inflight = 0; + */ + + conf->mode = NGX_CONF_UNSET_UINT; + + return conf; +} + + +static char * +ngx_http_upstream_least_time(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + ngx_str_t *value; + ngx_http_upstream_lt_conf_t *ltcf; + ngx_http_upstream_srv_conf_t *uscf; + + uscf = ngx_http_conf_get_module_srv_conf(cf, ngx_http_upstream_module); + + if (uscf->peer.init_upstream) { + ngx_conf_log_error(NGX_LOG_WARN, cf, 0, + "load balancing method redefined"); + } + + uscf->peer.init_upstream = ngx_http_upstream_init_least_time; + + uscf->flags = NGX_HTTP_UPSTREAM_CREATE + |NGX_HTTP_UPSTREAM_MODIFY + |NGX_HTTP_UPSTREAM_WEIGHT + |NGX_HTTP_UPSTREAM_MAX_CONNS + |NGX_HTTP_UPSTREAM_MAX_FAILS + |NGX_HTTP_UPSTREAM_FAIL_TIMEOUT + |NGX_HTTP_UPSTREAM_DOWN + |NGX_HTTP_UPSTREAM_BACKUP; + + if (cf->args->nelts == 3) { + value = cf->args->elts; + ltcf = ngx_http_conf_upstream_srv_conf(uscf, + ngx_http_upstream_least_time_module); + if (ngx_strcmp(value[2].data, "inflight") == 0) { + ltcf->use_inflight = 1; + + } else { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "invalid parameter \"%V\"", &value[2]); + return NGX_CONF_ERROR; + } + } + + return ngx_conf_set_enum_slot(cf, cmd, conf); +} + diff --git a/src/http/ngx_http_upstream_round_robin.h b/src/http/ngx_http_upstream_round_robin.h index 76a888945..ec39cb6e6 100644 --- a/src/http/ngx_http_upstream_round_robin.h +++ b/src/http/ngx_http_upstream_round_robin.h @@ -91,7 +91,16 @@ struct ngx_http_upstream_rr_peer_s { ngx_http_upstream_rr_peer_t *next; - NGX_COMPAT_BEGIN(13) +#if (NGX_HTTP_UPSTREAM_LEAST_TIME || NGX_COMPAT) + ngx_msec_t header_time; + ngx_msec_t response_time; + ngx_msec_t inflight_time; + ngx_msec_t inflight_last; + ngx_msec_t inflight_reqs_changed; + ngx_uint_t inflight_reqs; +#endif + + NGX_COMPAT_BEGIN(7) NGX_COMPAT_END }; @@ -238,6 +247,12 @@ typedef struct { } ngx_http_upstream_rr_peer_data_t; +/* exponential moving average + rounding */ +#define ngx_http_upstream_response_time_avg(avg, v) \ + *(avg) = (*(avg) ? (0.5 + ((double) (v) * 0.05 + (double) (*(avg)) * 0.95))\ + : (v)) + + ngx_int_t ngx_http_upstream_init_round_robin(ngx_conf_t *cf, ngx_http_upstream_srv_conf_t *us); ngx_int_t ngx_http_upstream_init_round_robin_peer(ngx_http_request_t *r, -- 2.47.3