From 5d0f5f816881e46cde269d0785234758aca1af76 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 21 Mar 2026 15:19:49 +0000 Subject: [PATCH] MINOR: mux-h2: assign a limited frames processing budget This introduces 3 new settings: tune.h2.be.max-frames-at-once and tune.h2.fe.max-frames-at-once, which limit the number of frames that will be processed at once for backend and frontend side respectively, and tune.h2.fe.max-rst-at-once which limits the number of RST_STREAM frames processed at once on the frontend. We can now yield when reading too many frames at once, which allows to limit the latency caused by processing too many frames in large buffers. However if we stop due to the RST budget being depleted, it's most likely the sign of a protocol abuse, so we make the tasklet go to BULK since the goal is to punish it. By limiting the number of RST per loop to 1, the SSL response time drops from 95ms to 1.6ms during an H2 RST flood attack, and the maximum SSL connection rate drops from 35.5k to 28.0k instead of 11.8k. A moderate SSL load that shows 1ms response time and 23kcps increases to 2ms with 15kcps versus 95ms and 800cps before. The average loop time goes down from 270-280us to 160us, while still doubling the attack absorption rate with the same CPU capacity. This patch may usefully be backported to 3.3 and 3.2. Note that to be effective, this relies on the following patches: MEDIUM: sched: do not run a same task multiple times in series MINOR: sched: do not requeue a tasklet into the current queue MINOR: sched: do not punish self-waking tasklets anymore MEDIUM: sched: do not punish self-waking tasklets if TASK_WOKEN_ANY MEDIUM: sched: change scheduler budgets to lower TL_BULK --- doc/configuration.txt | 29 ++++++++++++++++++++ src/mux_h2.c | 63 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/doc/configuration.txt b/doc/configuration.txt index a1257973b..5f8963745 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1886,10 +1886,13 @@ The following keywords are supported in the "global" section : - tune.h2.be.glitches-threshold - tune.h2.be.initial-window-size - tune.h2.be.max-concurrent-streams + - tune.h2.be.max-frames-at-once - tune.h2.be.rxbuf - tune.h2.fe.glitches-threshold - tune.h2.fe.initial-window-size - tune.h2.fe.max-concurrent-streams + - tune.h2.fe.max-frames-at-once + - tune.h2.fe.max-rst-at-once - tune.h2.fe.max-total-streams - tune.h2.fe.rxbuf - tune.h2.header-table-size @@ -4368,6 +4371,13 @@ tune.h2.be.max-concurrent-streams case). It is highly recommended not to increase this value; some might find it optimal to run at low values (1..5 typically). +tune.h2.be.max-frames-at-once + Sets the maximum number of HTTP/2 incoming frames that will be processed at + once on a backend connection. It can be useful to set this to a low value + (a few tens to a few hundreds) when dealing with very large buffers in order + to maintain a low latency and a better fairness between multiple connections. + The default value is zero, which means that no limitation is enforced. + tune.h2.be.rxbuf Sets the HTTP/2 receive buffer size for outgoing connections, in bytes. This size will be rounded up to the next multiple of tune.bufsize and will be @@ -4458,6 +4468,25 @@ tune.h2.fe.max-concurrent-streams [args...] tune.h2.fe.max-concurrent-streams 100 rq-load auto min 15 +tune.h2.fe.max-frames-at-once + Sets the maximum number of HTTP/2 incoming frames that will be processed at + once on a frontend connection. It can be useful to set this to a low value + (a few tens to a few hundreds) when dealing with very large buffers in order + to maintain a low latency and a better fairness between multiple connections. + The default value is zero, which means that no limitation is enforced. + +tune.h2.fe.max-rst-at-once + Sets the maximum number of HTTP/2 incoming RST_STREAM that will be processed + at once on a frontend connection. Once the specified number of RST_STREAM + frames are received, the connection handler will be placed in a low priority + queue and be processed after all other tasks. It can be useful to set this to + a very low value (1 or a few units) to significantly reduce the impacts of + RST_STREAM floods. RST_STREAM do happen when a user clicks on the Stop button + in their browser, but the few extra milliseconds caused by this requeuing are + generally unnoticeable, however they are generally effective at significantly + lowering the load caused from such floods. The default value is zero, which + means that no limitation is enforced. + tune.h2.fe.max-total-streams Sets the HTTP/2 maximum number of total streams processed per incoming connection. Once this limit is reached, HAProxy will send a graceful GOAWAY diff --git a/src/mux_h2.c b/src/mux_h2.c index 99c832ceb..edd4ec507 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -489,6 +489,9 @@ static int h2_be_glitches_threshold = 0; /* backend's max glitches static int h2_fe_glitches_threshold = 0; /* frontend's max glitches: unlimited */ static uint h2_be_rxbuf = 0; /* backend's default total rxbuf (bytes) */ static uint h2_fe_rxbuf = 0; /* frontend's default total rxbuf (bytes) */ +static unsigned int h2_be_max_frames_at_once = 0; /* backend value: 0=no limit */ +static unsigned int h2_fe_max_frames_at_once = 0; /* frontend value: 0=no limit */ +static unsigned int h2_fe_max_rst_at_once = 0; /* frontend value: 0=no limit */ static unsigned int h2_settings_max_concurrent_streams = 100; /* default value */ static unsigned int h2_be_settings_max_concurrent_streams = 0; /* backend value */ static unsigned int h2_fe_settings_max_concurrent_streams = 0; /* frontend value */ @@ -4239,6 +4242,8 @@ static void h2_process_demux(struct h2c *h2c) struct h2_fh hdr; unsigned int padlen = 0; int32_t old_iw = h2c->miw; + uint frames_budget = 0; + uint rst_budget = 0; TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn); @@ -4327,6 +4332,14 @@ static void h2_process_demux(struct h2c *h2c) } } + if (h2c->flags & H2_CF_IS_BACK) { + frames_budget = h2_be_max_frames_at_once; + } + else { + frames_budget = h2_fe_max_frames_at_once; + rst_budget = h2_fe_max_rst_at_once; + } + /* process as many incoming frames as possible below */ while (1) { int ret = 0; @@ -4629,6 +4642,29 @@ static void h2_process_demux(struct h2c *h2c) h2c->st0 = H2_CS_FRAME_H; } } + + /* If more frames remain in the buffer, let's first check if we've + * depleted the frames processing budget. Consuming the RST budget + * makes the tasklet go to TL_BULK to make it less prioritary than + * other processing since it's often used by attacks, while other + * frame types just yield normally. + */ + if (b_data(&h2c->dbuf)) { + if (h2c->dft == H2_FT_RST_STREAM && (rst_budget && !--rst_budget)) { + /* we've consumed all RST frames permitted by + * the budget, we have to yield now. + */ + tasklet_wakeup(h2c->wait_event.tasklet, 0); + break; + } + else if ((frames_budget && !--frames_budget)) { + /* we've consumed all frames permitted by the + * budget, we have to yield now. + */ + tasklet_wakeup(h2c->wait_event.tasklet); + break; + } + } } if (h2c_update_strm_rx_win(h2c) && @@ -8800,6 +8836,30 @@ static int h2_parse_max_total_streams(char **args, int section_type, struct prox return 0; } +/* config parser for global "tune.h2.{be.,fe.,}max-{frames,rst}-at-once" */ +static int h2_parse_max_frames_at_once(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + uint *vptr; + + /* backend/frontend/default */ + if (strcmp(args[0], "tune.h2.be.max-frames-at-once") == 0) + vptr = &h2_be_max_frames_at_once; + else if (strcmp(args[0], "tune.h2.fe.max-frames-at-once") == 0) + vptr = &h2_fe_max_frames_at_once; + else if (strcmp(args[0], "tune.h2.fe.max-rst-at-once") == 0) + vptr = &h2_fe_max_rst_at_once; + else + BUG_ON(1, "unhandled keyword"); + + if (too_many_args(1, args, err, NULL)) + return -1; + + *vptr = atoi(args[1]); + return 0; +} + /* config parser for global "tune.h2.max-frame-size" */ static int h2_parse_max_frame_size(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, const char *file, int line, @@ -8898,10 +8958,13 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.h2.be.glitches-threshold", h2_parse_glitches_threshold }, { CFG_GLOBAL, "tune.h2.be.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.be.max-concurrent-streams", h2_parse_max_concurrent_streams }, + { CFG_GLOBAL, "tune.h2.be.max-frames-at-once", h2_parse_max_frames_at_once }, { CFG_GLOBAL, "tune.h2.be.rxbuf", h2_parse_rxbuf }, { CFG_GLOBAL, "tune.h2.fe.glitches-threshold", h2_parse_glitches_threshold }, { CFG_GLOBAL, "tune.h2.fe.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.fe.max-concurrent-streams", h2_parse_max_concurrent_streams }, + { CFG_GLOBAL, "tune.h2.fe.max-frames-at-once", h2_parse_max_frames_at_once }, + { CFG_GLOBAL, "tune.h2.fe.max-rst-at-once", h2_parse_max_frames_at_once }, { CFG_GLOBAL, "tune.h2.fe.max-total-streams", h2_parse_max_total_streams }, { CFG_GLOBAL, "tune.h2.fe.rxbuf", h2_parse_rxbuf }, { CFG_GLOBAL, "tune.h2.header-table-size", h2_parse_header_table_size }, -- 2.47.3