From 01dfe17acf5b16471482f4f80849d3cddee9b613 Mon Sep 17 00:00:00 2001 From: Aurelien DARRAGON Date: Mon, 30 Jun 2025 10:57:29 +0200 Subject: [PATCH] MEDIUM: server: add and use a separate last_change variable for internal use last_change server metric is used for 2 separate purposes. First it is used to report last server state change date for stats and other related metrics. But it is also used internally, including in sensitive paths, such as lb related stuff to take decision or perform computations (ie: in srv_dynamic_maxconn()). Due to last_change counter now being split over thread groups since 16eb0fa ("MAJOR: counters: dispatch counters over thread groups"), reading the aggregated value has a cost, and we cannot afford to consult last_change value from srv_dynamic_maxconn() anymore. Moreover, since the value is used to take decision for the current process we don't wan't the variable to be updated by another process in our back. To prevent performance regression and sharing issues, let's instead add a separate srv->last_change value, which is not updated atomically (given how rare the updates are), and only serves for places where the use of the aggregated last_change counter/stats (split over thread groups) is too costly. --- include/haproxy/server-t.h | 1 + src/check.c | 2 +- src/proxy.c | 2 +- src/queue.c | 9 +++------ src/server.c | 23 ++++++++++------------- src/server_state.c | 1 + 6 files changed, 17 insertions(+), 21 deletions(-) diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h index c1e261130..57642a031 100644 --- a/include/haproxy/server-t.h +++ b/include/haproxy/server-t.h @@ -355,6 +355,7 @@ struct server { short onmarkedup; /* what to do when marked up: one of HANA_ONMARKEDUP_* */ int slowstart; /* slowstart time in seconds (ms in the conf) */ int idle_ping; /* MUX idle-ping interval in ms */ + unsigned long last_change; /* internal use only (not for stats purpose): last time the server state was changed, doesn't change often, not updated atomically on purpose */ char *id; /* just for identification */ uint32_t rid; /* revision: if id has been reused for a new server, rid won't match */ diff --git a/src/check.c b/src/check.c index 2689eddc0..f717d5228 100644 --- a/src/check.c +++ b/src/check.c @@ -994,7 +994,7 @@ int httpchk_build_status_header(struct server *s, struct buffer *buf) "UP %d/%d", "UP", "NOLB %d/%d", "NOLB", "no check" }; - unsigned long last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change); + unsigned long last_change = s->last_change; if (!(s->check.state & CHK_ST_ENABLED)) sv_state = 6; diff --git a/src/proxy.c b/src/proxy.c index 6294005bd..5e09641a3 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -2972,7 +2972,7 @@ static int dump_servers_state(struct appctx *appctx) dump_server_addr(&srv->check.addr, srv_check_addr); dump_server_addr(&srv->agent.addr, srv_agent_addr); - srv_time_since_last_change = ns_to_sec(now_ns) - COUNTERS_SHARED_LAST(srv->counters.shared->tg, last_change); + srv_time_since_last_change = ns_to_sec(now_ns) - srv->last_change; bk_f_forced_id = px->options & PR_O_FORCED_ID ? 1 : 0; srv_f_forced_id = srv->flags & SRV_F_FORCED_ID ? 1 : 0; diff --git a/src/queue.c b/src/queue.c index c57bbe690..7f06ac905 100644 --- a/src/queue.c +++ b/src/queue.c @@ -104,7 +104,6 @@ DECLARE_POOL(pool_head_pendconn, "pendconn", sizeof(struct pendconn)); unsigned int srv_dynamic_maxconn(const struct server *s) { unsigned int max; - unsigned long last_change; if (s->proxy->beconn >= s->proxy->fullconn) /* no fullconn or proxy is full */ @@ -115,13 +114,11 @@ unsigned int srv_dynamic_maxconn(const struct server *s) else max = MAX(s->minconn, s->proxy->beconn * s->maxconn / s->proxy->fullconn); - last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change); - if ((s->cur_state == SRV_ST_STARTING) && - ns_to_sec(now_ns) < last_change + s->slowstart && - ns_to_sec(now_ns) >= last_change) { + ns_to_sec(now_ns) < s->last_change + s->slowstart && + ns_to_sec(now_ns) >= s->last_change) { unsigned int ratio; - ratio = 100 * (ns_to_sec(now_ns) - last_change) / s->slowstart; + ratio = 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart; max = MAX(1, max * ratio / 100); } return max; diff --git a/src/server.c b/src/server.c index 697b9dcc5..3fc1cd997 100644 --- a/src/server.c +++ b/src/server.c @@ -144,12 +144,10 @@ const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause) int srv_downtime(const struct server *s) { - unsigned long last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change); - - if ((s->cur_state != SRV_ST_STOPPED) || last_change >= ns_to_sec(now_ns)) // ignore negative time + if ((s->cur_state != SRV_ST_STOPPED) || s->last_change >= ns_to_sec(now_ns)) // ignore negative time return s->down_time; - return ns_to_sec(now_ns) - last_change + s->down_time; + return ns_to_sec(now_ns) - s->last_change + s->down_time; } int srv_getinter(const struct check *check) @@ -2460,11 +2458,10 @@ INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws); */ void server_recalc_eweight(struct server *sv, int must_update) { - unsigned long last_change = COUNTERS_SHARED_LAST(sv->counters.shared->tg, last_change); struct proxy *px = sv->proxy; unsigned w; - if (ns_to_sec(now_ns) < last_change || ns_to_sec(now_ns) >= last_change + sv->slowstart) { + if (ns_to_sec(now_ns) < sv->last_change || ns_to_sec(now_ns) >= sv->last_change + sv->slowstart) { /* go to full throttle if the slowstart interval is reached unless server is currently down */ if ((sv->cur_state != SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING)) sv->next_state = SRV_ST_RUNNING; @@ -2476,7 +2473,7 @@ void server_recalc_eweight(struct server *sv, int must_update) if ((sv->cur_state == SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) w = 1; else if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) - w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - last_change) + sv->slowstart) / sv->slowstart; + w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->last_change) + sv->slowstart) / sv->slowstart; else w = px->lbprm.wdiv; @@ -3047,6 +3044,7 @@ struct server *new_server(struct proxy *proxy) srv->rid = 0; /* rid defaults to 0 */ srv->next_state = SRV_ST_RUNNING; /* early server setup */ + srv->last_change = ns_to_sec(now_ns); srv->check.obj_type = OBJ_TYPE_CHECK; srv->check.status = HCHK_STATUS_INI; @@ -5782,7 +5780,7 @@ static int srv_alloc_lb(struct server *sv, struct proxy *be) /* updates the server's weight during a warmup stage. Once the final weight is * reached, the task automatically stops. Note that any server status change - * must have updated s->counters.last_change accordingly. + * must have updated server last_change accordingly. */ static struct task *server_warmup(struct task *t, void *context, unsigned int state) { @@ -5838,7 +5836,7 @@ static int init_srv_slowstart(struct server *srv) if (srv->next_state == SRV_ST_STARTING) { task_schedule(srv->warmup, tick_add(now_ms, - MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - COUNTERS_SHARED_LAST(srv->counters.shared->tg, last_change))) / 20))); + MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->last_change)) / 20))); } } @@ -7035,11 +7033,9 @@ static void srv_update_status(struct server *s, int type, int cause) /* check if server stats must be updated due the the server state change */ if (srv_prev_state != s->cur_state) { if (srv_prev_state == SRV_ST_STOPPED) { - unsigned long last_change = COUNTERS_SHARED_LAST(s->counters.shared->tg, last_change); - /* server was down and no longer is */ - if (last_change < ns_to_sec(now_ns)) // ignore negative times - s->down_time += ns_to_sec(now_ns) - last_change; + if (s->last_change < ns_to_sec(now_ns)) // ignore negative times + s->down_time += ns_to_sec(now_ns) - s->last_change; _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_UP, cb_data.common, s); } else if (s->cur_state == SRV_ST_STOPPED) { @@ -7056,6 +7052,7 @@ static void srv_update_status(struct server *s, int type, int cause) HA_ATOMIC_STORE(&s->proxy->ready_srv, NULL); HA_ATOMIC_STORE(&s->counters.shared->tg[tgid - 1]->last_change, ns_to_sec(now_ns)); + s->last_change = ns_to_sec(now_ns); /* publish the state change */ _srv_event_hdl_prepare_state(&cb_data.state, diff --git a/src/server_state.c b/src/server_state.c index 6c9b19a08..ee1f51fcb 100644 --- a/src/server_state.c +++ b/src/server_state.c @@ -322,6 +322,7 @@ static void srv_state_srv_update(struct server *srv, int version, char **params) } HA_ATOMIC_STORE(&srv->counters.shared->tg[0]->last_change, ns_to_sec(now_ns) - srv_last_time_change); + srv->last_change = ns_to_sec(now_ns) - srv_last_time_change; srv->check.status = srv_check_status; srv->check.result = srv_check_result; -- 2.47.3