[TCP]: Appropriate Byte Count support

This is an updated version of the RFC3465 ABC patch originally
for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting
bytes ack'd rather than packets when updating congestion control.

The orignal ABC described in the RFC applied to a Reno style
algorithm. For advanced congestion control there is little
change after leaving slow start.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Stephen Hemminger
2005-11-10 17:09:53 -08:00
committed by David S. Miller
parent 7faffa1c7f
commit 9772efb970
9 changed files with 63 additions and 11 deletions

View File

@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
TCP variables: TCP variables:
tcp_abc - INTEGER
Controls Appropriate Byte Count defined in RFC3465. If set to
0 then does congestion avoid once per ack. 1 is conservative
value, and 2 is more agressive.
tcp_syn_retries - INTEGER tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value will be retransmitted. Should not be higher than 255. Default value

View File

@@ -390,6 +390,7 @@ enum
NET_TCP_BIC_BETA=108, NET_TCP_BIC_BETA=108,
NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
NET_TCP_CONG_CONTROL=110, NET_TCP_CONG_CONTROL=110,
NET_TCP_ABC=111,
}; };
enum { enum {

View File

@@ -326,6 +326,7 @@ struct tcp_sock {
__u32 snd_up; /* Urgent pointer */ __u32 snd_up; /* Urgent pointer */
__u32 total_retrans; /* Total retransmits for entire connection */ __u32 total_retrans; /* Total retransmits for entire connection */
__u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */
unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_time; /* time before keep alive takes place */
unsigned int keepalive_intvl; /* time interval between keep alive probes */ unsigned int keepalive_intvl; /* time interval between keep alive probes */

View File

@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_abc;
extern atomic_t tcp_memory_allocated; extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated; extern atomic_t tcp_sockets_allocated;
@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
*/ */
static inline void tcp_slow_start(struct tcp_sock *tp) static inline void tcp_slow_start(struct tcp_sock *tp)
{ {
if (sysctl_tcp_abc) {
/* RFC3465: Slow Start
* TCP sender SHOULD increase cwnd by the number of
* previously unacknowledged bytes ACKed by each incoming
* acknowledgment, provided the increase is not more than L
*/
if (tp->bytes_acked < tp->mss_cache)
return;
/* We MAY increase by 2 if discovered delayed ack */
if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
}
tp->bytes_acked = 0;
if (tp->snd_cwnd < tp->snd_cwnd_clamp) if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++; tp->snd_cwnd++;
} }
@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0; tp->prior_ssthresh = 0;
tp->bytes_acked = 0;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(sk); __tcp_enter_cwr(sk);
tcp_set_ca_state(sk, TCP_CA_CWR); tcp_set_ca_state(sk, TCP_CA_CWR);

View File

@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_tcp_congestion_control, .proc_handler = &proc_tcp_congestion_control,
.strategy = &sysctl_tcp_congestion_control, .strategy = &sysctl_tcp_congestion_control,
}, },
{
.ctl_name = NET_TCP_ABC,
.procname = "tcp_abc",
.data = &sysctl_tcp_abc,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 } { .ctl_name = 0 }
}; };

View File

@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->packets_out = 0; tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff; tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tp->bytes_acked = 0;
tcp_set_ca_state(sk, TCP_CA_Open); tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
inet_csk_delack_init(sk); inet_csk_delack_init(sk);

View File

@@ -192,17 +192,26 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
/* In "safe" area, increase. */ /* In "safe" area, increase. */
if (tp->snd_cwnd <= tp->snd_ssthresh) if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp); tcp_slow_start(tp);
else {
/* In dangerous area, increase slowly. /* In dangerous area, increase slowly. */
* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd else if (sysctl_tcp_abc) {
*/ /* RFC3465: Apppriate Byte Count
if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { * increase once for each full cwnd acked
if (tp->snd_cwnd < tp->snd_cwnd_clamp) */
tp->snd_cwnd++; if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
tp->snd_cwnd_cnt = 0; tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
} else if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd_cnt++; tp->snd_cwnd++;
} }
} else {
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
tp->snd_cwnd_cnt = 0;
} else
tp->snd_cwnd_cnt++;
}
} }
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);

View File

@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save; int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf = 1; int sysctl_tcp_moderate_rcvbuf = 1;
int sysctl_tcp_abc = 1;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_stamp = tcp_time_stamp;
tp->bytes_acked = 0;
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
/* Push undo marker, if it was plain RTO and nothing /* Push undo marker, if it was plain RTO and nothing
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
TCP_ECN_queue_cwr(tp); TCP_ECN_queue_cwr(tp);
} }
tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery); tcp_set_ca_state(sk, TCP_CA_Recovery);
} }
@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una)) if (before(ack, prior_snd_una))
goto old_ack; goto old_ack;
if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
tp->bytes_acked += ack - prior_snd_una;
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance. /* Window is constant, pure forward advance.
* No more checks are required. * No more checks are required.
@@ -4370,6 +4376,7 @@ discard:
EXPORT_SYMBOL(sysctl_tcp_ecn); EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering); EXPORT_SYMBOL(sysctl_tcp_reordering);
EXPORT_SYMBOL(sysctl_tcp_abc);
EXPORT_SYMBOL(tcp_parse_options); EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process); EXPORT_SYMBOL(tcp_rcv_state_process);

View File

@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/ */
newtp->snd_cwnd = 2; newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0; newtp->snd_cwnd_cnt = 0;
newtp->bytes_acked = 0;
newtp->frto_counter = 0; newtp->frto_counter = 0;
newtp->frto_highmark = 0; newtp->frto_highmark = 0;