[PATCH 4 of 4] QUIC: path MTU discovery

Roman Arutyunyan arut at nginx.com
Mon Jul 31 17:34:20 UTC 2023


Hi,

On Fri, Jul 28, 2023 at 07:51:22PM +0400, Sergey Kandaurov wrote:
> 
> > On 6 Jul 2023, at 17:57, Roman Arutyunyan <arut at nginx.com> wrote:
> > 
> > # HG changeset patch
> > # User Roman Arutyunyan <arut at nginx.com>
> > # Date 1688481216 -14400
> > #      Tue Jul 04 18:33:36 2023 +0400
> > # Node ID 1a49fd5d2013b6c8d50263499e6ced440927e949
> > # Parent  a1ea543d009311765144351b2557c00c8e6445bf
> > QUIC: path MTU discovery.
> > 
> > MTU selection starts by stepping up until the first failure.  Then binary
> > search is used to find the path MTU.
> > 
> > diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
> > --- a/src/core/ngx_connection.c
> > +++ b/src/core/ngx_connection.c
> > @@ -1583,6 +1583,10 @@ ngx_connection_error(ngx_connection_t *c
> >     }
> > #endif
> > 
> > +    if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
> > +        return 0;
> > +    }
> > +
> >     if (err == 0
> >         || err == NGX_ECONNRESET
> > #if (NGX_WIN32)
> > @@ -1600,6 +1604,7 @@ ngx_connection_error(ngx_connection_t *c
> >     {
> >         switch (c->log_error) {
> > 
> > +        case NGX_ERROR_IGNORE_EMSGSIZE:
> >         case NGX_ERROR_IGNORE_EINVAL:
> >         case NGX_ERROR_IGNORE_ECONNRESET:
> >         case NGX_ERROR_INFO:
> > diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
> > --- a/src/core/ngx_connection.h
> > +++ b/src/core/ngx_connection.h
> > @@ -97,7 +97,8 @@ typedef enum {
> >     NGX_ERROR_ERR,
> >     NGX_ERROR_INFO,
> >     NGX_ERROR_IGNORE_ECONNRESET,
> > -    NGX_ERROR_IGNORE_EINVAL
> > +    NGX_ERROR_IGNORE_EINVAL,
> > +    NGX_ERROR_IGNORE_EMSGSIZE
> > } ngx_connection_log_error_e;
> > 
> > 
> > diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
> > --- a/src/event/quic/ngx_event_quic.c
> > +++ b/src/event/quic/ngx_event_quic.c
> > @@ -149,11 +149,6 @@ ngx_quic_apply_transport_params(ngx_conn
> >         ngx_log_error(NGX_LOG_INFO, c->log, 0,
> >                       "quic maximum packet size is invalid");
> >         return NGX_ERROR;
> > -
> > -    } else if (ctp->max_udp_payload_size > ngx_quic_max_udp_payload(c)) {
> > -        ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
> > -        ngx_log_debug0(NGX_LOG_DEBUG_EVENT, c->log, 0,
> > -                       "quic client maximum packet size truncated");
> >     }
> > 
> >     if (ctp->active_connection_id_limit < 2) {
> > @@ -286,7 +281,7 @@ ngx_quic_new_connection(ngx_connection_t
> > 
> >     qc->path_validation.log = c->log;
> >     qc->path_validation.data = c;
> > -    qc->path_validation.handler = ngx_quic_path_validation_handler;
> > +    qc->path_validation.handler = ngx_quic_path_handler;
> > 
> >     qc->conf = conf;
> > 
> > @@ -297,7 +292,7 @@ ngx_quic_new_connection(ngx_connection_t
> >     ctp = &qc->ctp;
> > 
> >     /* defaults to be used before actual client parameters are received */
> > -    ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
> > +    ctp->max_udp_payload_size = NGX_QUIC_MAX_UDP_PAYLOAD_SIZE;
> >     ctp->ack_delay_exponent = NGX_QUIC_DEFAULT_ACK_DELAY_EXPONENT;
> >     ctp->max_ack_delay = NGX_QUIC_DEFAULT_MAX_ACK_DELAY;
> >     ctp->active_connection_id_limit = 2;
> > diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
> > --- a/src/event/quic/ngx_event_quic_ack.c
> > +++ b/src/event/quic/ngx_event_quic_ack.c
> > @@ -229,6 +229,12 @@ ngx_quic_handle_ack_frame_range(ngx_conn
> > 
> >     qc = ngx_quic_get_connection(c);
> > 
> > +    if (ctx->level == ssl_encryption_application) {
> > +        if (ngx_quic_handle_path_mtu_ack(c, qc->path, min, max) != NGX_OK) {
> > +            return NGX_ERROR;
> > +        }
> > +    }
> > +
> >     st->max_pn = NGX_TIMER_INFINITE;
> >     found = 0;
> > 
> > diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
> > --- a/src/event/quic/ngx_event_quic_connection.h
> > +++ b/src/event/quic/ngx_event_quic_connection.h
> > @@ -66,6 +66,14 @@ typedef struct ngx_quic_keys_s        ng
> > #define ngx_quic_get_socket(c)               ((ngx_quic_socket_t *)((c)->udp))
> > 
> > 
> > +typedef enum {
> > +    NGX_QUIC_PATH_IDLE = 0,
> > +    NGX_QUIC_PATH_VALIDATING,
> > +    NGX_QUIC_PATH_WAITING,
> > +    NGX_QUIC_PATH_DISCOVERING_MTU
> 
> Nitpicking on too long name.
> What about NGX_QUIC_PATH_MTUD or something.

OK, renamed.

> > +} ngx_quic_path_state_e;
> > +
> > +
> > struct ngx_quic_client_id_s {
> >     ngx_queue_t                       queue;
> >     uint64_t                          seqnum;
> > @@ -89,18 +97,22 @@ struct ngx_quic_path_s {
> >     ngx_sockaddr_t                    sa;
> >     socklen_t                         socklen;
> >     ngx_quic_client_id_t             *cid;
> > +    ngx_quic_path_state_e             state;
> >     ngx_msec_t                        expires;
> >     ngx_uint_t                        tries;
> >     ngx_uint_t                        tag;
> > +    size_t                            mtu;
> > +    size_t                            mtud;
> > +    size_t                            max_mtu;
> >     off_t                             sent;
> >     off_t                             received;
> >     u_char                            challenge1[8];
> >     u_char                            challenge2[8];
> >     uint64_t                          seqnum;
> > +    uint64_t                          mtu_pnum[NGX_QUIC_PATH_RETRIES];
> >     ngx_str_t                         addr_text;
> >     u_char                            text[NGX_SOCKADDR_STRLEN];
> > -    unsigned                          validated:1;
> > -    unsigned                          validating:1;
> > +    ngx_uint_t                        validated; /* unsigned validated:1; */
> > };
> > 
> > 
> > diff --git a/src/event/quic/ngx_event_quic_migration.c b/src/event/quic/ngx_event_quic_migration.c
> > --- a/src/event/quic/ngx_event_quic_migration.c
> > +++ b/src/event/quic/ngx_event_quic_migration.c
> > @@ -10,6 +10,11 @@
> > #include <ngx_event_quic_connection.h>
> > 
> > 
> > +#define NGX_QUIC_PATH_MTU_DELAY       1000
> 
> The value looks too much,
> it would delay each subsequent probe (which is not retry).

Probably the right value should be determined in real-life scenarios.
Let's decrease the value by a factor of 10 for now.

> > +#define NGX_QUIC_PATH_MTU_STEP        256
> > +#define NGX_QUIC_PATH_MTU_PRECISION   16
> > +
> > +
> > static void ngx_quic_set_connection_path(ngx_connection_t *c,
> >     ngx_quic_path_t *path);
> > static ngx_int_t ngx_quic_validate_path(ngx_connection_t *c,
> > @@ -17,7 +22,15 @@ static ngx_int_t ngx_quic_validate_path(
> > static ngx_int_t ngx_quic_send_path_challenge(ngx_connection_t *c,
> >     ngx_quic_path_t *path);
> > static void ngx_quic_set_path_timer(ngx_connection_t *c);
> > +static ngx_int_t ngx_quic_expire_path_validation(ngx_connection_t *c,
> > +    ngx_quic_path_t *path);
> > +static ngx_int_t ngx_quic_expire_path_mtu_delay(ngx_connection_t *c,
> > +    ngx_quic_path_t *path);
> > +static ngx_int_t ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c,
> > +    ngx_quic_path_t *path);
> > static ngx_quic_path_t *ngx_quic_get_path(ngx_connection_t *c, ngx_uint_t tag);
> > +static ngx_int_t ngx_quic_send_path_mtu_probe(ngx_connection_t *c,
> > +    ngx_quic_path_t *path);
> > 
> > 
> > ngx_int_t
> > @@ -97,7 +110,7 @@ ngx_quic_handle_path_response_frame(ngx_
> >     {
> >         path = ngx_queue_data(q, ngx_quic_path_t, queue);
> > 
> > -        if (!path->validating) {
> > +        if (path->state != NGX_QUIC_PATH_VALIDATING) {
> >             continue;
> >         }
> > 
> > @@ -136,6 +149,9 @@ valid:
> >         {
> >             /* address did not change */
> >             rst = 0;
> > +
> > +            path->mtu = prev->mtu;
> > +            path->max_mtu = prev->max_mtu;
> >         }
> >     }
> > 
> > @@ -167,9 +183,8 @@ valid:
> >     ngx_quic_path_dbg(c, "is validated", path);
> > 
> >     path->validated = 1;
> > -    path->validating = 0;
> > 
> > -    ngx_quic_set_path_timer(c);
> > +    ngx_quic_discover_path_mtu(c, path);
> > 
> >     return NGX_OK;
> > }
> > @@ -217,6 +232,8 @@ ngx_quic_new_path(ngx_connection_t *c,
> >     path->addr_text.len = ngx_sock_ntop(sockaddr, socklen, path->text,
> >                                         NGX_SOCKADDR_STRLEN, 1);
> > 
> > +    path->mtu = NGX_QUIC_MIN_INITIAL_SIZE;
> > +
> >     ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >                    "quic path seq:%uL created addr:%V",
> >                    path->seqnum, &path->addr_text);
> > @@ -464,7 +481,7 @@ ngx_quic_handle_migration(ngx_connection
> > 
> >     ngx_quic_set_connection_path(c, next);
> > 
> > -    if (!next->validated && !next->validating) {
> > +    if (!next->validated && next->state != NGX_QUIC_PATH_VALIDATING) {
> >         if (ngx_quic_validate_path(c, next) != NGX_OK) {
> >             return NGX_ERROR;
> >         }
> > @@ -492,7 +509,6 @@ ngx_quic_validate_path(ngx_connection_t 
> >     ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >                    "quic initiated validation of path seq:%uL", path->seqnum);
> > 
> > -    path->validating = 1;
> >     path->tries = 0;
> > 
> >     if (RAND_bytes(path->challenge1, 8) != 1) {
> > @@ -511,6 +527,7 @@ ngx_quic_validate_path(ngx_connection_t 
> >     pto = ngx_max(ngx_quic_pto(c, ctx), 1000);
> > 
> >     path->expires = ngx_current_msec + pto;
> > +    path->state = NGX_QUIC_PATH_VALIDATING;
> > 
> >     ngx_quic_set_path_timer(c);
> > 
> > @@ -558,6 +575,42 @@ ngx_quic_send_path_challenge(ngx_connect
> > }
> > 
> > 
> > +void
> > +ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path)
> > +{
> > +    ngx_quic_connection_t  *qc;
> > +
> > +    qc = ngx_quic_get_connection(c);
> > +
> > +    if (path->max_mtu) {
> > +        if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
> > +            path->state = NGX_QUIC_PATH_IDLE;
> > +            ngx_quic_set_path_timer(c);
> > +            return;
> > +        }
> > +
> > +        path->mtud = (path->mtu + path->max_mtu) / 2;
> > +
> > +    } else {
> > +        path->mtud = path->mtu + NGX_QUIC_PATH_MTU_STEP;
> 
> I like this approach, compared to multiplying probe steps.
> This better fits to the pattern to keep around a well-known
> initial value, which is typically 1500 bytes.
> Another one could be to let user specify its own initial
> value for cases when he knows well its network environment.
> 
> > +
> > +        if (path->mtud >= qc->ctp.max_udp_payload_size) {
> > +            path->mtud = qc->ctp.max_udp_payload_size;
> > +            path->max_mtu = qc->ctp.max_udp_payload_size;
> > +        }
> > +    }
> > +
> > +    path->state = NGX_QUIC_PATH_WAITING;
> > +    path->expires = ngx_current_msec + NGX_QUIC_PATH_MTU_DELAY;
> > +
> > +    ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> > +                   "quic path seq:%uL schedule mtu:%uz",
> > +                   path->seqnum, path->mtud);
> > +
> > +    ngx_quic_set_path_timer(c);
> > +}
> > +
> > +
> > static void
> > ngx_quic_set_path_timer(ngx_connection_t *c)
> > {
> > @@ -578,7 +631,7 @@ ngx_quic_set_path_timer(ngx_connection_t
> >     {
> >         path = ngx_queue_data(q, ngx_quic_path_t, queue);
> > 
> > -        if (!path->validating) {
> > +        if (path->state == NGX_QUIC_PATH_IDLE) {
> >             continue;
> >         }
> > 
> > @@ -600,22 +653,18 @@ ngx_quic_set_path_timer(ngx_connection_t
> > 
> > 
> > void
> > -ngx_quic_path_validation_handler(ngx_event_t *ev)
> > +ngx_quic_path_handler(ngx_event_t *ev)
> > {
> >     ngx_msec_t              now;
> >     ngx_queue_t            *q;
> > -    ngx_msec_int_t          left, next, pto;
> > -    ngx_quic_path_t        *path, *bkp;
> > +    ngx_msec_int_t          left;
> > +    ngx_quic_path_t        *path;
> >     ngx_connection_t       *c;
> > -    ngx_quic_send_ctx_t    *ctx;
> >     ngx_quic_connection_t  *qc;
> > 
> >     c = ev->data;
> >     qc = ngx_quic_get_connection(c);
> > 
> > -    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> > -
> > -    next = -1;
> >     now = ngx_current_msec;
> > 
> >     q = ngx_queue_head(&qc->paths);
> > @@ -625,83 +674,274 @@ ngx_quic_path_validation_handler(ngx_eve
> >         path = ngx_queue_data(q, ngx_quic_path_t, queue);
> >         q = ngx_queue_next(q);
> > 
> > -        if (!path->validating) {
> > +        if (path->state == NGX_QUIC_PATH_IDLE) {
> >             continue;
> >         }
> > 
> >         left = path->expires - now;
> > 
> >         if (left > 0) {
> > -
> > -            if (next == -1 || left < next) {
> > -                next = left;
> > -            }
> > -
> > -            continue;
> > -        }
> > -
> > -        if (++path->tries < NGX_QUIC_PATH_RETRIES) {
> > -            pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
> > -
> > -            path->expires = ngx_current_msec + pto;
> > -
> > -            if (next == -1 || pto < next) {
> > -                next = pto;
> > -            }
> > -
> > -            /* retransmit */
> > -            (void) ngx_quic_send_path_challenge(c, path);
> > -
> >             continue;
> >         }
> > 
> > -        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
> > -                       "quic path seq:%uL validation failed", path->seqnum);
> > +        switch (path->state) {
> > +        case NGX_QUIC_PATH_VALIDATING:
> > +             if (ngx_quic_expire_path_validation(c, path) != NGX_OK) {
> > +                goto failed;
> > +            }
> > +
> > +            break;
> > +
> > +        case NGX_QUIC_PATH_WAITING:
> > +            if (ngx_quic_expire_path_mtu_delay(c, path) != NGX_OK) {
> > +                goto failed;
> > +            }
> > +
> > +            break;
> > +
> > +        case NGX_QUIC_PATH_DISCOVERING_MTU:
> > +            if (ngx_quic_expire_path_mtu_discovery(c, path) != NGX_OK) {
> > +                goto failed;
> > +             }
> > +
> > +            break;
> > +
> 
> indentation in several places here

Fixed, thanks.

> > +        default:
> > +            break;
> > +         }
> > +     }
> > +
> > +    ngx_quic_set_path_timer(c);
> > +
> > +    return;
> > 
> > -        /* found expired path */
> > +failed:
> > +
> > +    ngx_quic_close_connection(c, NGX_ERROR);
> > +}
> > +
> > +
> > +static ngx_int_t
> > +ngx_quic_expire_path_validation(ngx_connection_t *c, ngx_quic_path_t *path)
> > +{
> > +    ngx_msec_int_t          pto;
> > +    ngx_quic_path_t        *bkp;
> > +    ngx_quic_send_ctx_t    *ctx;
> > +    ngx_quic_connection_t  *qc;
> > 
> > -        path->validated = 0;
> > -        path->validating = 0;
> > +    qc = ngx_quic_get_connection(c);
> > +    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> > +
> > +    if (++path->tries < NGX_QUIC_PATH_RETRIES) {
> > +        pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
> > +        path->expires = ngx_current_msec + pto;
> > +
> > +        (void) ngx_quic_send_path_challenge(c, path);
> > +
> > +        return NGX_OK;
> > +    }
> > +
> > +    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
> > +                   "quic path seq:%uL validation failed", path->seqnum);
> > +
> > +    /* found expired path */
> > +
> > +    path->validated = 0;
> > 
> > 
> > -        /* RFC 9000, 9.3.2.  On-Path Address Spoofing
> > -         *
> > -         * To protect the connection from failing due to such a spurious
> > -         * migration, an endpoint MUST revert to using the last validated
> > -         * peer address when validation of a new peer address fails.
> > -         */
> > -
> > -        if (qc->path == path) {
> > -            /* active path validation failed */
> > -
> > -            bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
> > +    /* RFC 9000, 9.3.2.  On-Path Address Spoofing
> > +     *
> > +     * To protect the connection from failing due to such a spurious
> > +     * migration, an endpoint MUST revert to using the last validated
> > +     * peer address when validation of a new peer address fails.
> > +     */
> > 
> > -            if (bkp == NULL) {
> > -                qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
> > -                qc->error_reason = "no viable path";
> > -                ngx_quic_close_connection(c, NGX_ERROR);
> > -                return;
> > -            }
> > +    if (qc->path == path) {
> > +        /* active path validation failed */
> > +
> > +        bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
> > 
> > -            qc->path = bkp;
> > -            qc->path->tag = NGX_QUIC_PATH_ACTIVE;
> > -
> > -            ngx_quic_set_connection_path(c, qc->path);
> > -
> > -            ngx_log_error(NGX_LOG_INFO, c->log, 0,
> > -                          "quic path seq:%uL addr:%V is restored from backup",
> > -                          qc->path->seqnum, &qc->path->addr_text);
> > -
> > -            ngx_quic_path_dbg(c, "is active", qc->path);
> > +        if (bkp == NULL) {
> > +            qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
> > +            qc->error_reason = "no viable path";
> > +            return NGX_ERROR;
> >         }
> > 
> > -        if (ngx_quic_free_path(c, path) != NGX_OK) {
> > -            ngx_quic_close_connection(c, NGX_ERROR);
> > -            return;
> > +        qc->path = bkp;
> > +        qc->path->tag = NGX_QUIC_PATH_ACTIVE;
> > +
> > +        ngx_quic_set_connection_path(c, qc->path);
> > +
> > +        ngx_log_error(NGX_LOG_INFO, c->log, 0,
> > +                      "quic path seq:%uL addr:%V is restored from backup",
> > +                      qc->path->seqnum, &qc->path->addr_text);
> > +
> > +        ngx_quic_path_dbg(c, "is active", qc->path);
> > +    }
> > +
> > +    return ngx_quic_free_path(c, path);
> > +}
> > +
> > +
> > +static ngx_int_t
> > +ngx_quic_expire_path_mtu_delay(ngx_connection_t *c, ngx_quic_path_t *path)
> > +{
> > +    ngx_int_t               rc;
> > +    ngx_uint_t              i;
> > +    ngx_msec_t              pto;
> > +    ngx_quic_send_ctx_t    *ctx;
> > +    ngx_quic_connection_t  *qc;
> > +
> > +    qc = ngx_quic_get_connection(c);
> > +    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> > +
> > +    path->tries = 0;
> > +
> > +    for ( ;; ) {
> > +
> > +        for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
> > +            path->mtu_pnum[i] = NGX_QUIC_UNSET_PN;
> > +        }
> > +
> > +        rc = ngx_quic_send_path_mtu_probe(c, path);
> > +
> > +        if (rc == NGX_ERROR) {
> > +            return NGX_ERROR;
> > +        }
> > +
> > +        if (rc == NGX_OK) {
> > +            pto = ngx_quic_pto(c, ctx);
> > +            path->expires = ngx_current_msec + pto;
> > +            path->state = NGX_QUIC_PATH_DISCOVERING_MTU;
> > +            return NGX_OK;
> > +        }
> > +
> > +        /* rc == NGX_DECLINED */
> > +
> > +        path->max_mtu = path->mtud;
> > +
> > +        if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
> > +            path->state = NGX_QUIC_PATH_IDLE;
> > +            return NGX_OK;
> > +        }
> > +
> > +        path->mtud = (path->mtu + path->max_mtu) / 2;
> > +    }
> > +}
> > +
> > +
> > +static ngx_int_t
> > +ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c, ngx_quic_path_t *path)
> > +{
> > +    ngx_int_t               rc;
> > +    ngx_msec_int_t          pto;
> > +    ngx_quic_send_ctx_t    *ctx;
> > +    ngx_quic_connection_t  *qc;
> > +
> > +    qc = ngx_quic_get_connection(c);
> > +    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> > +
> > +    if (++path->tries < NGX_QUIC_PATH_RETRIES) {
> > +        pto = ngx_quic_pto(c, ctx) << path->tries;
> > +        path->expires = ngx_current_msec + pto;
> > +
> 
> Setting path->expires there seems useless.
> It don't seem to be used in ngx_quic_send_path_mtu_probe(),
> and ngx_quic_discover_path_mtu() used to reset path->expires.

If ngx_quic_send_path_mtu_probe() below returns NGX_OK, the path should stay
in the same state, but expiration time should be advanced.  To make it
more obvious, I changed this code.

> > +        rc = ngx_quic_send_path_mtu_probe(c, path);
> > +        if (rc != NGX_DECLINED) {
> > +            return rc;
> >         }
> >     }
> > 
> > -    if (next != -1) {
> > -        ngx_add_timer(&qc->path_validation, next);
> > +    ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> > +                   "quic path seq:%uL expired mtu:%uz",
> > +                   path->seqnum, path->mtud);
> > +
> > +    path->max_mtu = path->mtud;
> > +
> > +    ngx_quic_discover_path_mtu(c, path);
> > +
> > +    return NGX_OK;
> > +}
> > +
> > +
> > +static ngx_int_t
> > +ngx_quic_send_path_mtu_probe(ngx_connection_t *c, ngx_quic_path_t *path)
> > +{
> > +    ngx_int_t               rc;
> > +    ngx_uint_t              log_error;
> > +    ngx_quic_frame_t        frame;
> > +    ngx_quic_send_ctx_t    *ctx;
> > +    ngx_quic_connection_t  *qc;
> > +
> > +    ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
> > +
> > +    frame.level = ssl_encryption_application;
> > +    frame.type = NGX_QUIC_FT_PING;
> > +
> > +    qc = ngx_quic_get_connection(c);
> > +    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> > +    path->mtu_pnum[path->tries] = ctx->pnum;
> > +
> > +    ngx_log_debug4(NGX_LOG_DEBUG_EVENT, c->log, 0,
> > +                   "quic path seq:%uL send probe "
> > +                   "mtu:%uz pnum:%uL tries:%ui",
> > +                   path->seqnum, path->mtud, ctx->pnum, path->tries);
> > +
> > +    log_error = c->log_error;
> > +    c->log_error = NGX_ERROR_IGNORE_EMSGSIZE;
> > +
> > +    rc = ngx_quic_frame_sendto(c, &frame, path->mtud, path);
> > +    c->log_error = log_error;
> > +
> > +    if (rc == NGX_ERROR) {
> > +        if (c->write->error) {
> > +            c->write->error = 0;
> > +
> > +            ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> > +                           "quic path seq:%uL rejected mtu:%uz",
> > +                           path->seqnum, path->mtud);
> > +
> > +            return NGX_DECLINED;
> 
> The error handling looks awkward to decline on write error.
> ngx_sendmsg() sets write error on fatal sendmsg errors,
> and keeps it unset NGX_EAGAIN and NGX_EINTR, which
> are typically non-fatal transient errors.
> I understand this is done mostly to ignore EMSGSIZE,
> maybe it makes sense to ignore the rest sendmsg() errors.

The problem is, the error doesn't make it to this point.  All be have here is
NGX_ERROR and c->write->error.  I think, for PMTUD purposes we can safely
ignore the errors.  It's hinghly likely, the error will repeat while doing
connection I/O and we'll shortly close the connection.

> > +        }
> > +
> > +        return NGX_ERROR;
> >     }
> > +
> > +    return NGX_OK;
> > }
> > +
> > +
> > +ngx_int_t
> > +ngx_quic_handle_path_mtu_ack(ngx_connection_t *c, ngx_quic_path_t *path,
> > +    uint64_t min, uint64_t max)
> > +{
> > +    uint64_t    pnum;
> > +    ngx_uint_t  i;
> > +
> > +    if (path->state != NGX_QUIC_PATH_DISCOVERING_MTU) {
> > +        return NGX_OK;
> > +    }
> > +
> > +    for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
> > +        pnum = path->mtu_pnum[i];
> > +
> > +        if (pnum == NGX_QUIC_UNSET_PN) {
> > +            break;
> > +        }
> > +
> > +        if (pnum < min || pnum > max) {
> > +            continue;
> > +        }
> > +
> > +        path->mtu = path->mtud;
> > +
> > +        ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> > +                       "quic path seq:%uL ack mtu:%uz",
> > +                       path->seqnum, path->mtu);
> > +
> > +        ngx_quic_discover_path_mtu(c, path);
> > +
> > +        break;
> 
> What if two probes appeared ACKed in one range.
> Though that seems to be safe to break there, as two
> probes barely have different probe sizes, rather retries.

No matter how many probes are acked, the next step is still the same.
So there's no reason to find all of them.

> > +    }
> > +
> > +    return NGX_OK;
> > +}
> > diff --git a/src/event/quic/ngx_event_quic_migration.h b/src/event/quic/ngx_event_quic_migration.h
> > --- a/src/event/quic/ngx_event_quic_migration.h
> > +++ b/src/event/quic/ngx_event_quic_migration.h
> > @@ -18,10 +18,10 @@
> > #define NGX_QUIC_PATH_BACKUP    2
> > 
> > #define ngx_quic_path_dbg(c, msg, path)                                       \
> > -    ngx_log_debug6(NGX_LOG_DEBUG_EVENT, c->log, 0,                            \
> > -                   "quic path seq:%uL %s sent:%O recvd:%O state:%s%s",        \
> > +    ngx_log_debug7(NGX_LOG_DEBUG_EVENT, c->log, 0,                            \
> > +                   "quic path seq:%uL %s tx:%O rx:%O valid:%d st:%d mtu:%uz", \
> >                    path->seqnum, msg, path->sent, path->received,             \
> > -                   path->validated ? "V": "N", path->validating ? "R": "");
> > +                   path->validated, path->state, path->mtu);
> > 
> > ngx_int_t ngx_quic_handle_path_challenge_frame(ngx_connection_t *c,
> >     ngx_quic_header_t *pkt, ngx_quic_path_challenge_frame_t *f);
> > @@ -36,6 +36,10 @@ ngx_int_t ngx_quic_set_path(ngx_connecti
> > ngx_int_t ngx_quic_handle_migration(ngx_connection_t *c,
> >     ngx_quic_header_t *pkt);
> > 
> > -void ngx_quic_path_validation_handler(ngx_event_t *ev);
> > +void ngx_quic_path_handler(ngx_event_t *ev);
> > +
> > +void ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path);
> > +ngx_int_t ngx_quic_handle_path_mtu_ack(ngx_connection_t *c,
> > +    ngx_quic_path_t *path, uint64_t min, uint64_t max);
> 
> Nitpicking:
> maybe just ngx_quic_handle_path_mtu() ?
> This would match ngx_quic_discover_path_mtu() naming.

OK, renamed.

> > 
> > #endif /* _NGX_EVENT_QUIC_MIGRATION_H_INCLUDED_ */
> > diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
> > --- a/src/event/quic/ngx_event_quic_output.c
> > +++ b/src/event/quic/ngx_event_quic_output.c
> > @@ -10,9 +10,6 @@
> > #include <ngx_event_quic_connection.h>
> > 
> > 
> > -#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT   1252
> > -#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT6  1232
> > -
> > #define NGX_QUIC_MAX_UDP_SEGMENT_BUF  65487 /* 65K - IPv6 header */
> > #define NGX_QUIC_MAX_SEGMENTS            64 /* UDP_MAX_SEGMENTS */
> > 
> > @@ -61,21 +58,6 @@ static size_t ngx_quic_path_limit(ngx_co
> >     size_t size);
> > 
> > 
> > -size_t
> > -ngx_quic_max_udp_payload(ngx_connection_t *c)
> > -{
> > -    /* TODO: path MTU discovery */
> > -
> > -#if (NGX_HAVE_INET6)
> > -    if (c->sockaddr->sa_family == AF_INET6) {
> > -        return NGX_QUIC_MAX_UDP_PAYLOAD_OUT6;
> > -    }
> > -#endif
> > -
> > -    return NGX_QUIC_MAX_UDP_PAYLOAD_OUT;
> > -}
> > -
> > -
> > ngx_int_t
> > ngx_quic_output(ngx_connection_t *c)
> > {
> > @@ -142,10 +124,7 @@ ngx_quic_create_datagrams(ngx_connection
> > 
> >         p = dst;
> > 
> > -        len = ngx_min(qc->ctp.max_udp_payload_size,
> > -                      NGX_QUIC_MAX_UDP_PAYLOAD_SIZE);
> > -
> > -        len = ngx_quic_path_limit(c, path, len);
> > +        len = ngx_quic_path_limit(c, path, path->mtu);
> > 
> >         pad = ngx_quic_get_padding_level(c);
> > 
> > @@ -299,9 +278,7 @@ ngx_quic_allow_segmentation(ngx_connecti
> >     ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> > 
> >     bytes = 0;
> > -
> > -    len = ngx_min(qc->ctp.max_udp_payload_size,
> > -                  NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> > +    len = ngx_min(qc->path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> > 
> >     for (q = ngx_queue_head(&ctx->frames);
> >          q != ngx_queue_sentinel(&ctx->frames);
> > @@ -345,8 +322,7 @@ ngx_quic_create_segments(ngx_connection_
> >         return NGX_ERROR;
> >     }
> > 
> > -    segsize = ngx_min(qc->ctp.max_udp_payload_size,
> > -                      NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> > +    segsize = ngx_min(path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> >     p = dst;
> >     end = dst + sizeof(dst);
> > 
> > diff --git a/src/event/quic/ngx_event_quic_output.h b/src/event/quic/ngx_event_quic_output.h
> > --- a/src/event/quic/ngx_event_quic_output.h
> > +++ b/src/event/quic/ngx_event_quic_output.h
> > @@ -12,8 +12,6 @@
> > #include <ngx_core.h>
> > 
> > 
> > -size_t ngx_quic_max_udp_payload(ngx_connection_t *c);
> > -
> > ngx_int_t ngx_quic_output(ngx_connection_t *c);
> > 
> > ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c,
> > diff --git a/src/event/quic/ngx_event_quic_ssl.c b/src/event/quic/ngx_event_quic_ssl.c
> > --- a/src/event/quic/ngx_event_quic_ssl.c
> > +++ b/src/event/quic/ngx_event_quic_ssl.c
> > @@ -494,6 +494,8 @@ ngx_quic_crypto_input(ngx_connection_t *
> >      */
> >     ngx_quic_discard_ctx(c, ssl_encryption_handshake);
> > 
> > +    ngx_quic_discover_path_mtu(c, qc->path);
> > +
> >     /* start accepting clients on negotiated number of server ids */
> >     if (ngx_quic_create_sockets(c) != NGX_OK) {
> >         return NGX_ERROR;
> > diff --git a/src/os/unix/ngx_errno.h b/src/os/unix/ngx_errno.h
> > --- a/src/os/unix/ngx_errno.h
> > +++ b/src/os/unix/ngx_errno.h
> > @@ -54,6 +54,7 @@ typedef int               ngx_err_t;
> > #define NGX_ENOMOREFILES  0
> > #define NGX_ELOOP         ELOOP
> > #define NGX_EBADF         EBADF
> > +#define NGX_EMSGSIZE      EMSGSIZE
> > 
> > #if (NGX_HAVE_OPENAT)
> > #define NGX_EMLINK        EMLINK
> > _______________________________________________
> > nginx-devel mailing list
> > nginx-devel at nginx.org
> > https://mailman.nginx.org/mailman/listinfo/nginx-devel
> 
> -- 
> Sergey Kandaurov
> _______________________________________________
> nginx-devel mailing list
> nginx-devel at nginx.org
> https://mailman.nginx.org/mailman/listinfo/nginx-devel

The diff is attached.

--
Roman Arutyunyan
-------------- next part --------------
# HG changeset patch
# User Roman Arutyunyan <arut at nginx.com>
# Date 1690824524 -14400
#      Mon Jul 31 21:28:44 2023 +0400
# Node ID a9f81e4b150735f01ac78a3ab5363f727b0f3b26
# Parent  ff84b813841c44f5ff326031732fab16a437c364
[mq]: quic-pmtud-fix

diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
--- a/src/event/quic/ngx_event_quic_ack.c
+++ b/src/event/quic/ngx_event_quic_ack.c
@@ -230,7 +230,7 @@ ngx_quic_handle_ack_frame_range(ngx_conn
     qc = ngx_quic_get_connection(c);
 
     if (ctx->level == ssl_encryption_application) {
-        if (ngx_quic_handle_path_mtu_ack(c, qc->path, min, max) != NGX_OK) {
+        if (ngx_quic_handle_path_mtu(c, qc->path, min, max) != NGX_OK) {
             return NGX_ERROR;
         }
     }
diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
--- a/src/event/quic/ngx_event_quic_connection.h
+++ b/src/event/quic/ngx_event_quic_connection.h
@@ -70,7 +70,7 @@ typedef enum {
     NGX_QUIC_PATH_IDLE = 0,
     NGX_QUIC_PATH_VALIDATING,
     NGX_QUIC_PATH_WAITING,
-    NGX_QUIC_PATH_DISCOVERING_MTU
+    NGX_QUIC_PATH_MTUD
 } ngx_quic_path_state_e;
 
 
diff --git a/src/event/quic/ngx_event_quic_migration.c b/src/event/quic/ngx_event_quic_migration.c
--- a/src/event/quic/ngx_event_quic_migration.c
+++ b/src/event/quic/ngx_event_quic_migration.c
@@ -10,7 +10,7 @@
 #include <ngx_event_quic_connection.h>
 
 
-#define NGX_QUIC_PATH_MTU_DELAY       1000
+#define NGX_QUIC_PATH_MTU_DELAY       100
 #define NGX_QUIC_PATH_MTU_STEP        256
 #define NGX_QUIC_PATH_MTU_PRECISION   16
 
@@ -686,7 +686,7 @@ ngx_quic_path_handler(ngx_event_t *ev)
 
         switch (path->state) {
         case NGX_QUIC_PATH_VALIDATING:
-             if (ngx_quic_expire_path_validation(c, path) != NGX_OK) {
+            if (ngx_quic_expire_path_validation(c, path) != NGX_OK) {
                 goto failed;
             }
 
@@ -699,17 +699,17 @@ ngx_quic_path_handler(ngx_event_t *ev)
 
             break;
 
-        case NGX_QUIC_PATH_DISCOVERING_MTU:
+        case NGX_QUIC_PATH_MTUD:
             if (ngx_quic_expire_path_mtu_discovery(c, path) != NGX_OK) {
                 goto failed;
-             }
+            }
 
             break;
 
         default:
             break;
-         }
-     }
+        }
+    }
 
     ngx_quic_set_path_timer(c);
 
@@ -812,7 +812,7 @@ ngx_quic_expire_path_mtu_delay(ngx_conne
         if (rc == NGX_OK) {
             pto = ngx_quic_pto(c, ctx);
             path->expires = ngx_current_msec + pto;
-            path->state = NGX_QUIC_PATH_DISCOVERING_MTU;
+            path->state = NGX_QUIC_PATH_MTUD;
             return NGX_OK;
         }
 
@@ -842,13 +842,19 @@ ngx_quic_expire_path_mtu_discovery(ngx_c
     ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
 
     if (++path->tries < NGX_QUIC_PATH_RETRIES) {
-        pto = ngx_quic_pto(c, ctx) << path->tries;
-        path->expires = ngx_current_msec + pto;
+        rc = ngx_quic_send_path_mtu_probe(c, path);
+
+        if (rc == NGX_ERROR) {
+            return NGX_ERROR;
+        }
 
-        rc = ngx_quic_send_path_mtu_probe(c, path);
-        if (rc != NGX_DECLINED) {
-            return rc;
+        if (rc == NGX_OK) {
+            pto = ngx_quic_pto(c, ctx) << path->tries;
+            path->expires = ngx_current_msec + pto;
+            return NGX_OK;
         }
+
+        /* rc == NGX_DECLINED */
     }
 
     ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
@@ -911,13 +917,13 @@ ngx_quic_send_path_mtu_probe(ngx_connect
 
 
 ngx_int_t
-ngx_quic_handle_path_mtu_ack(ngx_connection_t *c, ngx_quic_path_t *path,
+ngx_quic_handle_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path,
     uint64_t min, uint64_t max)
 {
     uint64_t    pnum;
     ngx_uint_t  i;
 
-    if (path->state != NGX_QUIC_PATH_DISCOVERING_MTU) {
+    if (path->state != NGX_QUIC_PATH_MTUD) {
         return NGX_OK;
     }
 
diff --git a/src/event/quic/ngx_event_quic_migration.h b/src/event/quic/ngx_event_quic_migration.h
--- a/src/event/quic/ngx_event_quic_migration.h
+++ b/src/event/quic/ngx_event_quic_migration.h
@@ -39,7 +39,7 @@ ngx_int_t ngx_quic_handle_migration(ngx_
 void ngx_quic_path_handler(ngx_event_t *ev);
 
 void ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path);
-ngx_int_t ngx_quic_handle_path_mtu_ack(ngx_connection_t *c,
+ngx_int_t ngx_quic_handle_path_mtu(ngx_connection_t *c,
     ngx_quic_path_t *path, uint64_t min, uint64_t max);
 
 #endif /* _NGX_EVENT_QUIC_MIGRATION_H_INCLUDED_ */


More information about the nginx-devel mailing list