[TCP] cubic: use Newton-Raphson
Replace cube root algorithim with a faster version using Newton-Raphson. Surprisingly, doing the scaled div64_64 is faster than a true 64 bit division on 64 bit CPU's. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
89b3d9aaf4
commit
9eb2d62719
@@ -52,6 +52,7 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
|
|||||||
module_param(tcp_friendliness, int, 0644);
|
module_param(tcp_friendliness, int, 0644);
|
||||||
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
|
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
|
||||||
|
|
||||||
|
#include <asm/div64.h>
|
||||||
|
|
||||||
/* BIC TCP Parameters */
|
/* BIC TCP Parameters */
|
||||||
struct bictcp {
|
struct bictcp {
|
||||||
@@ -93,67 +94,51 @@ static void bictcp_init(struct sock *sk)
|
|||||||
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
|
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 65536 times the cubic root */
|
/* 64bit divisor, dividend and result. dynamic precision */
|
||||||
static const u64 cubic_table[8]
|
static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
|
||||||
= {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367};
|
{
|
||||||
|
u_int32_t d = divisor;
|
||||||
|
|
||||||
|
if (divisor > 0xffffffffULL) {
|
||||||
|
unsigned int shift = fls(divisor >> 32);
|
||||||
|
|
||||||
|
d = divisor >> shift;
|
||||||
|
dividend >>= shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* avoid 64 bit division if possible */
|
||||||
|
if (dividend >> 32)
|
||||||
|
do_div(dividend, d);
|
||||||
|
else
|
||||||
|
dividend = (uint32_t) dividend / d;
|
||||||
|
|
||||||
|
return dividend;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* calculate the cubic root of x
|
* calculate the cubic root of x using Newton-Raphson
|
||||||
* the basic idea is that x can be expressed as i*8^j
|
|
||||||
* so cubic_root(x) = cubic_root(i)*2^j
|
|
||||||
* in the following code, x is i, and y is 2^j
|
|
||||||
* because of integer calculation, there are errors in calculation
|
|
||||||
* so finally use binary search to find out the exact solution
|
|
||||||
*/
|
*/
|
||||||
static u32 cubic_root(u64 x)
|
static u32 cubic_root(u64 a)
|
||||||
{
|
{
|
||||||
u64 y, app, target, start, end, mid, start_diff, end_diff;
|
u32 x, x1;
|
||||||
|
|
||||||
if (x == 0)
|
/* Initial estimate is based on:
|
||||||
return 0;
|
* cbrt(x) = exp(log(x) / 3)
|
||||||
|
*/
|
||||||
|
x = 1u << (fls64(a)/3);
|
||||||
|
|
||||||
target = x;
|
/*
|
||||||
|
* Iteration based on:
|
||||||
|
* 2
|
||||||
|
* x = ( 2 * x + a / x ) / 3
|
||||||
|
* k+1 k k
|
||||||
|
*/
|
||||||
|
do {
|
||||||
|
x1 = x;
|
||||||
|
x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
|
||||||
|
} while (abs(x1 - x) > 1);
|
||||||
|
|
||||||
/* first estimate lower and upper bound */
|
return x;
|
||||||
y = 1;
|
|
||||||
while (x >= 8){
|
|
||||||
x = (x >> 3);
|
|
||||||
y = (y << 1);
|
|
||||||
}
|
|
||||||
start = (y*cubic_table[x])>>16;
|
|
||||||
if (x==7)
|
|
||||||
end = (y<<1);
|
|
||||||
else
|
|
||||||
end = (y*cubic_table[x+1]+65535)>>16;
|
|
||||||
|
|
||||||
/* binary search for more accurate one */
|
|
||||||
while (start < end-1) {
|
|
||||||
mid = (start+end) >> 1;
|
|
||||||
app = mid*mid*mid;
|
|
||||||
if (app < target)
|
|
||||||
start = mid;
|
|
||||||
else if (app > target)
|
|
||||||
end = mid;
|
|
||||||
else
|
|
||||||
return mid;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* find the most accurate one from start and end */
|
|
||||||
app = start*start*start;
|
|
||||||
if (app < target)
|
|
||||||
start_diff = target - app;
|
|
||||||
else
|
|
||||||
start_diff = app - target;
|
|
||||||
app = end*end*end;
|
|
||||||
if (app < target)
|
|
||||||
end_diff = target - app;
|
|
||||||
else
|
|
||||||
end_diff = app - target;
|
|
||||||
|
|
||||||
if (start_diff < end_diff)
|
|
||||||
return (u32)start;
|
|
||||||
else
|
|
||||||
return (u32)end;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user