tcp: sndbuf autotuning improvements
tcp_fixup_sndbuf() is underestimating initial send buffer requirements. It was not noticed because big GSO packets were escaping the limitation, but with smaller TSO packets (or TSO/GSO/SG off), application hits sk_sndbuf before having a chance to fill enough packets in socket write queue. - initial cwnd can be bigger than 10 for specific routes - SKB_TRUESIZE() is a bit under real needs in some cases, because of power-of-two rounding in kmalloc() - Fast Recovery (RFC 5681 3.2) : Cubic needs 70% factor - Extra cushion (application might react slowly to POLLOUT) tcp_v4_conn_req_fastopen() needs to call tcp_init_metrics() before calling tcp_init_buffer_space() Then we realize tcp_new_space() should call tcp_fixup_sndbuf() instead of duplicating this stuff. Rename tcp_fixup_sndbuf() to tcp_sndbuf_expand() to be more descriptive. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Maciej Żenczykowski <maze@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
bbe34cf8a1
commit
6ae705323b
@@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
|
|||||||
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
|
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void tcp_fixup_sndbuf(struct sock *sk)
|
static void tcp_sndbuf_expand(struct sock *sk)
|
||||||
{
|
{
|
||||||
int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
int sndmem, per_mss;
|
||||||
|
u32 nr_segs;
|
||||||
|
|
||||||
|
/* Worst case is non GSO/TSO : each frame consumes one skb
|
||||||
|
* and skb->head is kmalloced using power of two area of memory
|
||||||
|
*/
|
||||||
|
per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
|
||||||
|
MAX_TCP_HEADER +
|
||||||
|
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||||
|
|
||||||
|
per_mss = roundup_pow_of_two(per_mss) +
|
||||||
|
SKB_DATA_ALIGN(sizeof(struct sk_buff));
|
||||||
|
|
||||||
|
nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
|
||||||
|
nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
|
||||||
|
|
||||||
|
/* Fast Recovery (RFC 5681 3.2) :
|
||||||
|
* Cubic needs 1.7 factor, rounded to 2 to include
|
||||||
|
* extra cushion (application might react slowly to POLLOUT)
|
||||||
|
*/
|
||||||
|
sndmem = 2 * nr_segs * per_mss;
|
||||||
|
|
||||||
sndmem *= TCP_INIT_CWND;
|
|
||||||
if (sk->sk_sndbuf < sndmem)
|
if (sk->sk_sndbuf < sndmem)
|
||||||
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
|
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
|
||||||
}
|
}
|
||||||
@@ -376,7 +396,7 @@ void tcp_init_buffer_space(struct sock *sk)
|
|||||||
if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
|
if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
|
||||||
tcp_fixup_rcvbuf(sk);
|
tcp_fixup_rcvbuf(sk);
|
||||||
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
|
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
|
||||||
tcp_fixup_sndbuf(sk);
|
tcp_sndbuf_expand(sk);
|
||||||
|
|
||||||
tp->rcvq_space.space = tp->rcv_wnd;
|
tp->rcvq_space.space = tp->rcv_wnd;
|
||||||
tp->rcvq_space.time = tcp_time_stamp;
|
tp->rcvq_space.time = tcp_time_stamp;
|
||||||
@@ -4723,15 +4743,7 @@ static void tcp_new_space(struct sock *sk)
|
|||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
if (tcp_should_expand_sndbuf(sk)) {
|
if (tcp_should_expand_sndbuf(sk)) {
|
||||||
int sndmem = SKB_TRUESIZE(max_t(u32,
|
tcp_sndbuf_expand(sk);
|
||||||
tp->rx_opt.mss_clamp,
|
|
||||||
tp->mss_cache) +
|
|
||||||
MAX_TCP_HEADER);
|
|
||||||
int demanded = max_t(unsigned int, tp->snd_cwnd,
|
|
||||||
tp->reordering + 1);
|
|
||||||
sndmem *= 2 * demanded;
|
|
||||||
if (sndmem > sk->sk_sndbuf)
|
|
||||||
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
|
|
||||||
tp->snd_cwnd_stamp = tcp_time_stamp;
|
tp->snd_cwnd_stamp = tcp_time_stamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1410,8 +1410,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
|
|||||||
inet_csk(child)->icsk_af_ops->rebuild_header(child);
|
inet_csk(child)->icsk_af_ops->rebuild_header(child);
|
||||||
tcp_init_congestion_control(child);
|
tcp_init_congestion_control(child);
|
||||||
tcp_mtup_init(child);
|
tcp_mtup_init(child);
|
||||||
tcp_init_buffer_space(child);
|
|
||||||
tcp_init_metrics(child);
|
tcp_init_metrics(child);
|
||||||
|
tcp_init_buffer_space(child);
|
||||||
|
|
||||||
/* Queue the data carried in the SYN packet. We need to first
|
/* Queue the data carried in the SYN packet. We need to first
|
||||||
* bump skb's refcnt because the caller will attempt to free it.
|
* bump skb's refcnt because the caller will attempt to free it.
|
||||||
|
Reference in New Issue
Block a user