i386: move math-emu
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
30
arch/x86/math-emu/Makefile
Normal file
30
arch/x86/math-emu/Makefile
Normal file
@@ -0,0 +1,30 @@
|
||||
#
|
||||
# Makefile for wm-FPU-emu
|
||||
#
|
||||
|
||||
#DEBUG = -DDEBUGGING
|
||||
DEBUG =
|
||||
PARANOID = -DPARANOID
|
||||
CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION)
|
||||
|
||||
EXTRA_AFLAGS := $(PARANOID)
|
||||
|
||||
# From 'C' language sources:
|
||||
C_OBJS =fpu_entry.o errors.o \
|
||||
fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \
|
||||
load_store.o get_address.o \
|
||||
poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \
|
||||
reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \
|
||||
reg_ld_str.o reg_divide.o reg_mul.o
|
||||
|
||||
# From 80x86 assembler sources:
|
||||
A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \
|
||||
div_small.o reg_norm.o reg_round.o \
|
||||
wm_shrx.o wm_sqrt.o \
|
||||
div_Xsig.o polynom_Xsig.o round_Xsig.o \
|
||||
shr_Xsig.o mul_Xsig.o
|
||||
|
||||
obj-y =$(C_OBJS) $(A_OBJS)
|
||||
|
||||
proto:
|
||||
cproto -e -DMAKING_PROTO *.c >fpu_proto.h
|
427
arch/x86/math-emu/README
Normal file
427
arch/x86/math-emu/README
Normal file
@@ -0,0 +1,427 @@
|
||||
+---------------------------------------------------------------------------+
|
||||
| wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1995,1996,1997,1999 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@melbpc.org.au |
|
||||
| |
|
||||
| This program is free software; you can redistribute it and/or modify |
|
||||
| it under the terms of the GNU General Public License version 2 as |
|
||||
| published by the Free Software Foundation. |
|
||||
| |
|
||||
| This program is distributed in the hope that it will be useful, |
|
||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
||||
| GNU General Public License for more details. |
|
||||
| |
|
||||
| You should have received a copy of the GNU General Public License |
|
||||
| along with this program; if not, write to the Free Software |
|
||||
| Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------+
|
||||
|
||||
|
||||
|
||||
wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387
|
||||
which was my 80387 emulator for early versions of djgpp (gcc under
|
||||
msdos); wm-emu387 was in turn based upon emu387 which was written by
|
||||
DJ Delorie for djgpp. The interface to the Linux kernel is based upon
|
||||
the original Linux math emulator by Linus Torvalds.
|
||||
|
||||
My target FPU for wm-FPU-emu is that described in the Intel486
|
||||
Programmer's Reference Manual (1992 edition). Unfortunately, numerous
|
||||
facets of the functioning of the FPU are not well covered in the
|
||||
Reference Manual. The information in the manual has been supplemented
|
||||
with measurements on real 80486's. Unfortunately, it is simply not
|
||||
possible to be sure that all of the peculiarities of the 80486 have
|
||||
been discovered, so there is always likely to be obscure differences
|
||||
in the detailed behaviour of the emulator and a real 80486.
|
||||
|
||||
wm-FPU-emu does not implement all of the behaviour of the 80486 FPU,
|
||||
but is very close. See "Limitations" later in this file for a list of
|
||||
some differences.
|
||||
|
||||
Please report bugs, etc to me at:
|
||||
billm@melbpc.org.au
|
||||
or b.metzenthen@medoto.unimelb.edu.au
|
||||
|
||||
For more information on the emulator and on floating point topics, see
|
||||
my web pages, currently at http://www.suburbia.net/~billm/
|
||||
|
||||
|
||||
--Bill Metzenthen
|
||||
December 1999
|
||||
|
||||
|
||||
----------------------- Internals of wm-FPU-emu -----------------------
|
||||
|
||||
Numeric algorithms:
|
||||
(1) Add, subtract, and multiply. Nothing remarkable in these.
|
||||
(2) Divide has been tuned to get reasonable performance. The algorithm
|
||||
is not the obvious one which most people seem to use, but is designed
|
||||
to take advantage of the characteristics of the 80386. I expect that
|
||||
it has been invented many times before I discovered it, but I have not
|
||||
seen it. It is based upon one of those ideas which one carries around
|
||||
for years without ever bothering to check it out.
|
||||
(3) The sqrt function has been tuned to get good performance. It is based
|
||||
upon Newton's classic method. Performance was improved by capitalizing
|
||||
upon the properties of Newton's method, and the code is once again
|
||||
structured taking account of the 80386 characteristics.
|
||||
(4) The trig, log, and exp functions are based in each case upon quasi-
|
||||
"optimal" polynomial approximations. My definition of "optimal" was
|
||||
based upon getting good accuracy with reasonable speed.
|
||||
(5) The argument reducing code for the trig function effectively uses
|
||||
a value of pi which is accurate to more than 128 bits. As a consequence,
|
||||
the reduced argument is accurate to more than 64 bits for arguments up
|
||||
to a few pi, and accurate to more than 64 bits for most arguments,
|
||||
even for arguments approaching 2^63. This is far superior to an
|
||||
80486, which uses a value of pi which is accurate to 66 bits.
|
||||
|
||||
The code of the emulator is complicated slightly by the need to
|
||||
account for a limited form of re-entrancy. Normally, the emulator will
|
||||
emulate each FPU instruction to completion without interruption.
|
||||
However, it may happen that when the emulator is accessing the user
|
||||
memory space, swapping may be needed. In this case the emulator may be
|
||||
temporarily suspended while disk i/o takes place. During this time
|
||||
another process may use the emulator, thereby perhaps changing static
|
||||
variables. The code which accesses user memory is confined to five
|
||||
files:
|
||||
fpu_entry.c
|
||||
reg_ld_str.c
|
||||
load_store.c
|
||||
get_address.c
|
||||
errors.c
|
||||
As from version 1.12 of the emulator, no static variables are used
|
||||
(apart from those in the kernel's per-process tables). The emulator is
|
||||
therefore now fully re-entrant, rather than having just the restricted
|
||||
form of re-entrancy which is required by the Linux kernel.
|
||||
|
||||
----------------------- Limitations of wm-FPU-emu -----------------------
|
||||
|
||||
There are a number of differences between the current wm-FPU-emu
|
||||
(version 2.01) and the 80486 FPU (apart from bugs). The differences
|
||||
are fewer than those which applied to the 1.xx series of the emulator.
|
||||
Some of the more important differences are listed below:
|
||||
|
||||
The Roundup flag does not have much meaning for the transcendental
|
||||
functions and its 80486 value with these functions is likely to differ
|
||||
from its emulator value.
|
||||
|
||||
In a few rare cases the Underflow flag obtained with the emulator will
|
||||
be different from that obtained with an 80486. This occurs when the
|
||||
following conditions apply simultaneously:
|
||||
(a) the operands have a higher precision than the current setting of the
|
||||
precision control (PC) flags.
|
||||
(b) the underflow exception is masked.
|
||||
(c) the magnitude of the exact result (before rounding) is less than 2^-16382.
|
||||
(d) the magnitude of the final result (after rounding) is exactly 2^-16382.
|
||||
(e) the magnitude of the exact result would be exactly 2^-16382 if the
|
||||
operands were rounded to the current precision before the arithmetic
|
||||
operation was performed.
|
||||
If all of these apply, the emulator will set the Underflow flag but a real
|
||||
80486 will not.
|
||||
|
||||
NOTE: Certain formats of Extended Real are UNSUPPORTED. They are
|
||||
unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities,
|
||||
and Unnormals. None of these will be generated by an 80486 or by the
|
||||
emulator. Do not use them. The emulator treats them differently in
|
||||
detail from the way an 80486 does.
|
||||
|
||||
Self modifying code can cause the emulator to fail. An example of such
|
||||
code is:
|
||||
movl %esp,[%ebx]
|
||||
fld1
|
||||
The FPU instruction may be (usually will be) loaded into the pre-fetch
|
||||
queue of the CPU before the mov instruction is executed. If the
|
||||
destination of the 'movl' overlaps the FPU instruction then the bytes
|
||||
in the prefetch queue and memory will be inconsistent when the FPU
|
||||
instruction is executed. The emulator will be invoked but will not be
|
||||
able to find the instruction which caused the device-not-present
|
||||
exception. For this case, the emulator cannot emulate the behaviour of
|
||||
an 80486DX.
|
||||
|
||||
Handling of the address size override prefix byte (0x67) has not been
|
||||
extensively tested yet. A major problem exists because using it in
|
||||
vm86 mode can cause a general protection fault. Address offsets
|
||||
greater than 0xffff appear to be illegal in vm86 mode but are quite
|
||||
acceptable (and work) in real mode. A small test program developed to
|
||||
check the addressing, and which runs successfully in real mode,
|
||||
crashes dosemu under Linux and also brings Windows down with a general
|
||||
protection fault message when run under the MS-DOS prompt of Windows
|
||||
3.1. (The program simply reads data from a valid address).
|
||||
|
||||
The emulator supports 16-bit protected mode, with one difference from
|
||||
an 80486DX. A 80486DX will allow some floating point instructions to
|
||||
write a few bytes below the lowest address of the stack. The emulator
|
||||
will not allow this in 16-bit protected mode: no instructions are
|
||||
allowed to write outside the bounds set by the protection.
|
||||
|
||||
----------------------- Performance of wm-FPU-emu -----------------------
|
||||
|
||||
Speed.
|
||||
-----
|
||||
|
||||
The speed of floating point computation with the emulator will depend
|
||||
upon instruction mix. Relative performance is best for the instructions
|
||||
which require most computation. The simple instructions are adversely
|
||||
affected by the FPU instruction trap overhead.
|
||||
|
||||
|
||||
Timing: Some simple timing tests have been made on the emulator functions.
|
||||
The times include load/store instructions. All times are in microseconds
|
||||
measured on a 33MHz 386 with 64k cache. The Turbo C tests were under
|
||||
ms-dos, the next two columns are for emulators running with the djgpp
|
||||
ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97,
|
||||
using libm4.0 (hard).
|
||||
|
||||
function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu
|
||||
|
||||
+ 60.5 154.8 76.5 139.4
|
||||
- 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7
|
||||
* 71.0 190.8 79.6 146.6
|
||||
/ 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1
|
||||
|
||||
sin() 310.8 4692.0 319.0 398.5
|
||||
cos() 284.4 4855.2 308.0 388.7
|
||||
tan() 495.0 8807.1 394.9 504.7
|
||||
atan() 328.9 4866.4 601.1 419.5-491.9
|
||||
|
||||
sqrt() 128.7 crashed 145.2 227.0
|
||||
log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1
|
||||
exp() 479.1 6619.2 469.1 850.8
|
||||
|
||||
|
||||
The performance under Linux is improved by the use of look-ahead code.
|
||||
The following results show the improvement which is obtained under
|
||||
Linux due to the look-ahead code. Also given are the times for the
|
||||
original Linux emulator with the 4.1 'soft' lib.
|
||||
|
||||
[ Linus' note: I changed look-ahead to be the default under linux, as
|
||||
there was no reason not to use it after I had edited it to be
|
||||
disabled during tracing ]
|
||||
|
||||
wm-FPU-emu w original w
|
||||
look-ahead 'soft' lib
|
||||
+ 106.4 190.2
|
||||
- 108.6-111.6 192.4-216.2
|
||||
* 113.4 193.1
|
||||
/ 108.8-124.4 700.1-706.2
|
||||
|
||||
sin() 390.5 2642.0
|
||||
cos() 381.5 2767.4
|
||||
tan() 496.5 3153.3
|
||||
atan() 367.2-435.5 2439.4-3396.8
|
||||
|
||||
sqrt() 195.1 4732.5
|
||||
log() 358.0-387.5 3359.2-3390.3
|
||||
exp() 619.3 4046.4
|
||||
|
||||
|
||||
These figures are now somewhat out-of-date. The emulator has become
|
||||
progressively slower for most functions as more of the 80486 features
|
||||
have been implemented.
|
||||
|
||||
|
||||
----------------------- Accuracy of wm-FPU-emu -----------------------
|
||||
|
||||
|
||||
The accuracy of the emulator is in almost all cases equal to or better
|
||||
than that of an Intel 80486 FPU.
|
||||
|
||||
The results of the basic arithmetic functions (+,-,*,/), and fsqrt
|
||||
match those of an 80486 FPU. They are the best possible; the error for
|
||||
these never exceeds 1/2 an lsb. The fprem and fprem1 instructions
|
||||
return exact results; they have no error.
|
||||
|
||||
|
||||
The following table compares the emulator accuracy for the sqrt(),
|
||||
trig and log functions against the Turbo C "emulator". For this table,
|
||||
each function was tested at about 400 points. Ideal worst-case results
|
||||
would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for
|
||||
arguments greater than pi/4 can be thought of as being related to the
|
||||
precision of the argument x; e.g. an argument of pi/2-(1e-10) which is
|
||||
accurate to 64 bits can result in a relative accuracy in cos() of
|
||||
about 64 + log2(cos(x)) = 31 bits.
|
||||
|
||||
|
||||
Function Tested x range Worst result Turbo C
|
||||
(relative bits)
|
||||
|
||||
sqrt(x) 1 .. 2 64.1 63.2
|
||||
atan(x) 1e-10 .. 200 64.2 62.8
|
||||
cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4
|
||||
64.1 (x = pi/2-(1e-10)) 31.9
|
||||
sin(x) 1e-10 .. pi/2 64.0 62.8
|
||||
tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1
|
||||
64.1 (x = pi/2-(1e-10)) 31.9
|
||||
exp(x) 0 .. 1 63.1 ** 62.9
|
||||
log(x) 1+1e-6 .. 2 63.8 ** 62.1
|
||||
|
||||
** The accuracy for exp() and log() is low because the FPU (emulator)
|
||||
does not compute them directly; two operations are required.
|
||||
|
||||
|
||||
The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or
|
||||
later) for 'float' variables (24 bit precision numbers) when precision
|
||||
control is set to 24, 53 or 64 bits, and for 'double' variables (53
|
||||
bit precision numbers) when precision control is set to 53 bits (a
|
||||
properly performing FPU cannot pass the 'paranoia' tests for 'double'
|
||||
variables when precision control is set to 64 bits).
|
||||
|
||||
The code for reducing the argument for the trig functions (fsin, fcos,
|
||||
fptan and fsincos) has been improved and now effectively uses a value
|
||||
for pi which is accurate to more than 128 bits precision. As a
|
||||
consequence, the accuracy of these functions for large arguments has
|
||||
been dramatically improved (and is now very much better than an 80486
|
||||
FPU). There is also now no degradation of accuracy for fcos and fptan
|
||||
for operands close to pi/2. Measured results are (note that the
|
||||
definition of accuracy has changed slightly from that used for the
|
||||
above table):
|
||||
|
||||
Function Tested x range Worst result
|
||||
(absolute bits)
|
||||
|
||||
cos(x) 0 .. 9.22e+18 62.0
|
||||
sin(x) 1e-16 .. 9.22e+18 62.1
|
||||
tan(x) 1e-16 .. 9.22e+18 61.8
|
||||
|
||||
It is possible with some effort to find very large arguments which
|
||||
give much degraded precision. For example, the integer number
|
||||
8227740058411162616.0
|
||||
is within about 10e-7 of a multiple of pi. To find the tan (for
|
||||
example) of this number to 64 bits precision it would be necessary to
|
||||
have a value of pi which had about 150 bits precision. The FPU
|
||||
emulator computes the result to about 42.6 bits precision (the correct
|
||||
result is about -9.739715e-8). On the other hand, an 80486 FPU returns
|
||||
0.01059, which in relative terms is hopelessly inaccurate.
|
||||
|
||||
For arguments close to critical angles (which occur at multiples of
|
||||
pi/2) the emulator is more accurate than an 80486 FPU. For very large
|
||||
arguments, the emulator is far more accurate.
|
||||
|
||||
|
||||
Prior to version 1.20 of the emulator, the accuracy of the results for
|
||||
the transcendental functions (in their principal range) was not as
|
||||
good as the results from an 80486 FPU. From version 1.20, the accuracy
|
||||
has been considerably improved and these functions now give measured
|
||||
worst-case results which are better than the worst-case results given
|
||||
by an 80486 FPU.
|
||||
|
||||
The following table gives the measured results for the emulator. The
|
||||
number of randomly selected arguments in each case is about half a
|
||||
million. The group of three columns gives the frequency of the given
|
||||
accuracy in number of times per million, thus the second of these
|
||||
columns shows that an accuracy of between 63.80 and 63.89 bits was
|
||||
found at a rate of 133 times per one million measurements for fsin.
|
||||
The results show that the fsin, fcos and fptan instructions return
|
||||
results which are in error (i.e. less accurate than the best possible
|
||||
result (which is 64 bits)) for about one per cent of all arguments
|
||||
between -pi/2 and +pi/2. The other instructions have a lower
|
||||
frequency of results which are in error. The last two columns give
|
||||
the worst accuracy which was found (in bits) and the approximate value
|
||||
of the argument which produced it.
|
||||
|
||||
frequency (per M)
|
||||
------------------- ---------------
|
||||
instr arg range # tests 63.7 63.8 63.9 worst at arg
|
||||
bits bits bits bits
|
||||
----- ------------ ------- ---- ---- ----- ----- --------
|
||||
fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317
|
||||
fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801
|
||||
fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876
|
||||
fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q)
|
||||
fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x)
|
||||
fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x)
|
||||
f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709
|
||||
|
||||
|
||||
Tests performed on an 80486 FPU showed results of lower accuracy. The
|
||||
following table gives the results which were obtained with an AMD
|
||||
486DX2/66 (other tests indicate that an Intel 486DX produces
|
||||
identical results). The tests were basically the same as those used
|
||||
to measure the emulator (the values, being random, were in general not
|
||||
the same). The total number of tests for each instruction are given
|
||||
at the end of the table, in case each about 100k tests were performed.
|
||||
Another line of figures at the end of the table shows that most of the
|
||||
instructions return results which are in error for more than 10
|
||||
percent of the arguments tested.
|
||||
|
||||
The numbers in the body of the table give the approx number of times a
|
||||
result of the given accuracy in bits (given in the left-most column)
|
||||
was obtained per one million arguments. For three of the instructions,
|
||||
two columns of results are given: * The second column for f2xm1 gives
|
||||
the number cases where the results of the first column were for a
|
||||
positive argument, this shows that this instruction gives better
|
||||
results for positive arguments than it does for negative. * In the
|
||||
cases of fcos and fptan, the first column gives the results when all
|
||||
cases where arguments greater than 1.5 were removed from the results
|
||||
given in the second column. Unlike the emulator, an 80486 FPU returns
|
||||
results of relatively poor accuracy for these instructions when the
|
||||
argument approaches pi/2. The table does not show those cases when the
|
||||
accuracy of the results were less than 62 bits, which occurs quite
|
||||
often for fsin and fptan when the argument approaches pi/2. This poor
|
||||
accuracy is discussed above in relation to the Turbo C "emulator", and
|
||||
the accuracy of the value of pi.
|
||||
|
||||
|
||||
bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan
|
||||
62.0 0 0 0 0 437 0 0 0 0 925
|
||||
62.1 0 0 10 0 894 0 0 0 0 1023
|
||||
62.2 14 0 0 0 1033 0 0 0 0 945
|
||||
62.3 57 0 0 0 1202 0 0 0 0 1023
|
||||
62.4 385 0 0 10 1292 0 23 0 0 1178
|
||||
62.5 1140 0 0 119 1649 0 39 0 0 1149
|
||||
62.6 2037 0 0 189 1620 0 16 0 0 1169
|
||||
62.7 5086 14 0 646 2315 10 101 35 39 1402
|
||||
62.8 8818 86 0 984 3050 59 287 131 224 2036
|
||||
62.9 11340 1355 0 2126 4153 79 605 357 321 1948
|
||||
63.0 15557 4750 0 3319 5376 246 1281 862 808 2688
|
||||
63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302
|
||||
63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724
|
||||
63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236
|
||||
63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587
|
||||
63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262
|
||||
63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346
|
||||
63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209
|
||||
63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329
|
||||
63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601
|
||||
|
||||
Per cent with error:
|
||||
30.9 3.2 18.5 9.8 13.1 11.6 17.4
|
||||
Total arguments tested:
|
||||
70194 70099 101784 100641 100641 101799 128853 114893 102675 102675
|
||||
|
||||
|
||||
------------------------- Contributors -------------------------------
|
||||
|
||||
A number of people have contributed to the development of the
|
||||
emulator, often by just reporting bugs, sometimes with suggested
|
||||
fixes, and a few kind people have provided me with access in one way
|
||||
or another to an 80486 machine. Contributors include (to those people
|
||||
who I may have forgotten, please forgive me):
|
||||
|
||||
Linus Torvalds
|
||||
Tommy.Thorn@daimi.aau.dk
|
||||
Andrew.Tridgell@anu.edu.au
|
||||
Nick Holloway, alfie@dcs.warwick.ac.uk
|
||||
Hermano Moura, moura@dcs.gla.ac.uk
|
||||
Jon Jagger, J.Jagger@scp.ac.uk
|
||||
Lennart Benschop
|
||||
Brian Gallew, geek+@CMU.EDU
|
||||
Thomas Staniszewski, ts3v+@andrew.cmu.edu
|
||||
Martin Howell, mph@plasma.apana.org.au
|
||||
M Saggaf, alsaggaf@athena.mit.edu
|
||||
Peter Barker, PETER@socpsy.sci.fau.edu
|
||||
tom@vlsivie.tuwien.ac.at
|
||||
Dan Russel, russed@rpi.edu
|
||||
Daniel Carosone, danielce@ee.mu.oz.au
|
||||
cae@jpmorgan.com
|
||||
Hamish Coleman, t933093@minyos.xx.rmit.oz.au
|
||||
Bruce Evans, bde@kralizec.zeta.org.au
|
||||
Timo Korvola, Timo.Korvola@hut.fi
|
||||
Rick Lyons, rick@razorback.brisnet.org.au
|
||||
Rick, jrs@world.std.com
|
||||
|
||||
...and numerous others who responded to my request for help with
|
||||
a real 80486.
|
||||
|
45
arch/x86/math-emu/control_w.h
Normal file
45
arch/x86/math-emu/control_w.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| control_w.h |
|
||||
| |
|
||||
| Copyright (C) 1992,1993 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef _CONTROLW_H_
|
||||
#define _CONTROLW_H_
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define _Const_(x) $##x
|
||||
#else
|
||||
#define _Const_(x) x
|
||||
#endif
|
||||
|
||||
#define CW_RC _Const_(0x0C00) /* rounding control */
|
||||
#define CW_PC _Const_(0x0300) /* precision control */
|
||||
|
||||
#define CW_Precision Const_(0x0020) /* loss of precision mask */
|
||||
#define CW_Underflow Const_(0x0010) /* underflow mask */
|
||||
#define CW_Overflow Const_(0x0008) /* overflow mask */
|
||||
#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */
|
||||
#define CW_Denormal Const_(0x0002) /* denormalized operand mask */
|
||||
#define CW_Invalid Const_(0x0001) /* invalid operation mask */
|
||||
|
||||
#define CW_Exceptions _Const_(0x003f) /* all masks */
|
||||
|
||||
#define RC_RND _Const_(0x0000)
|
||||
#define RC_DOWN _Const_(0x0400)
|
||||
#define RC_UP _Const_(0x0800)
|
||||
#define RC_CHOP _Const_(0x0C00)
|
||||
|
||||
/* p 15-5: Precision control bits affect only the following:
|
||||
ADD, SUB(R), MUL, DIV(R), and SQRT */
|
||||
#define PR_24_BITS _Const_(0x000)
|
||||
#define PR_53_BITS _Const_(0x200)
|
||||
#define PR_64_BITS _Const_(0x300)
|
||||
#define PR_RESERVED_BITS _Const_(0x100)
|
||||
/* FULL_PRECISION simulates all exceptions masked */
|
||||
#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f)
|
||||
|
||||
#endif /* _CONTROLW_H_ */
|
365
arch/x86/math-emu/div_Xsig.S
Normal file
365
arch/x86/math-emu/div_Xsig.S
Normal file
@@ -0,0 +1,365 @@
|
||||
.file "div_Xsig.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| div_Xsig.S |
|
||||
| |
|
||||
| Division subroutine for 96 bit quantities |
|
||||
| |
|
||||
| Copyright (C) 1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
|
||||
| put the 96 bit result at the location d. |
|
||||
| |
|
||||
| The result may not be accurate to 96 bits. It is intended for use where |
|
||||
| a result better than 64 bits is required. The result should usually be |
|
||||
| good to at least 94 bits. |
|
||||
| The returned result is actually divided by one half. This is done to |
|
||||
| prevent overflow. |
|
||||
| |
|
||||
| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
|
||||
| |
|
||||
| void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
#define XsigLL(x) (x)
|
||||
#define XsigL(x) 4(x)
|
||||
#define XsigH(x) 8(x)
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/*
|
||||
Local storage on the stack:
|
||||
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
*/
|
||||
#define FPU_accum_3 -4(%ebp)
|
||||
#define FPU_accum_2 -8(%ebp)
|
||||
#define FPU_accum_1 -12(%ebp)
|
||||
#define FPU_accum_0 -16(%ebp)
|
||||
#define FPU_result_3 -20(%ebp)
|
||||
#define FPU_result_2 -24(%ebp)
|
||||
#define FPU_result_1 -28(%ebp)
|
||||
|
||||
#else
|
||||
.data
|
||||
/*
|
||||
Local storage in a static area:
|
||||
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
*/
|
||||
.align 4,0
|
||||
FPU_accum_3:
|
||||
.long 0
|
||||
FPU_accum_2:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
FPU_result_3:
|
||||
.long 0
|
||||
FPU_result_2:
|
||||
.long 0
|
||||
FPU_result_1:
|
||||
.long 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(div_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $28,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi /* pointer to num */
|
||||
movl PARAM2,%ebx /* pointer to denom */
|
||||
|
||||
#ifdef PARANOID
|
||||
testl $0x80000000, XsigH(%ebx) /* Divisor */
|
||||
je L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Divide: Return arg1/arg2 to arg3. |
|
||||
| |
|
||||
| The maximum returned value is (ignoring exponents) |
|
||||
| .ffffffff ffffffff |
|
||||
| ------------------ = 1.ffffffff fffffffe |
|
||||
| .80000000 00000000 |
|
||||
| and the minimum is |
|
||||
| .80000000 00000000 |
|
||||
| ------------------ = .80000000 00000001 (rounded) |
|
||||
| .ffffffff ffffffff |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/* Save extended dividend in local register */
|
||||
|
||||
/* Divide by 2 to prevent overflow */
|
||||
clc
|
||||
movl XsigH(%esi),%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_3
|
||||
movl XsigL(%esi),%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_2
|
||||
movl XsigLL(%esi),%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_1
|
||||
movl $0,%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_0
|
||||
|
||||
movl FPU_accum_2,%eax /* Get the current num */
|
||||
movl FPU_accum_3,%edx
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Initialization done.
|
||||
Do the first 32 bits. */
|
||||
|
||||
/* We will divide by a number which is too large */
|
||||
movl XsigH(%ebx),%ecx
|
||||
addl $1,%ecx
|
||||
jnc LFirst_div_not_1
|
||||
|
||||
/* here we need to divide by 100000000h,
|
||||
i.e., no division at all.. */
|
||||
mov %edx,%eax
|
||||
jmp LFirst_div_done
|
||||
|
||||
LFirst_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the augmented
|
||||
denom ms dw */
|
||||
|
||||
LFirst_div_done:
|
||||
movl %eax,FPU_result_3 /* Put the result in the answer */
|
||||
|
||||
mull XsigH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_3
|
||||
|
||||
movl FPU_result_3,%eax /* Get the result back */
|
||||
mull XsigL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
sbbl $0,FPU_accum_3
|
||||
je LDo_2nd_32_bits /* Must check for non-zero result here */
|
||||
|
||||
#ifdef PARANOID
|
||||
jb L_bugged_1
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
incl FPU_result_3 /* Correct the answer */
|
||||
|
||||
movl XsigL(%ebx),%eax
|
||||
movl XsigH(%ebx),%edx
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
sbbl $0,FPU_accum_3
|
||||
jne L_bugged_1 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Half of the main problem is done, there is just a reduced numerator
|
||||
to handle now.
|
||||
Work with the second 32 bits, FPU_accum_0 not used from now on */
|
||||
LDo_2nd_32_bits:
|
||||
movl FPU_accum_2,%edx /* get the reduced num */
|
||||
movl FPU_accum_1,%eax
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl XsigH(%ebx),%edx
|
||||
jb LDo_2nd_div
|
||||
ja LPrevent_2nd_overflow
|
||||
|
||||
cmpl XsigL(%ebx),%eax
|
||||
jb LDo_2nd_div
|
||||
|
||||
LPrevent_2nd_overflow:
|
||||
/* The numerator is greater or equal, would cause overflow */
|
||||
/* prevent overflow */
|
||||
subl XsigL(%ebx),%eax
|
||||
sbbl XsigH(%ebx),%edx
|
||||
movl %edx,FPU_accum_2
|
||||
movl %eax,FPU_accum_1
|
||||
|
||||
incl FPU_result_3 /* Reflect the subtraction in the answer */
|
||||
|
||||
#ifdef PARANOID
|
||||
je L_bugged_2 /* Can't bump the result to 1.0 */
|
||||
#endif /* PARANOID */
|
||||
|
||||
LDo_2nd_div:
|
||||
cmpl $0,%ecx /* augmented denom msw */
|
||||
jnz LSecond_div_not_1
|
||||
|
||||
/* %ecx == 0, we are dividing by 1.0 */
|
||||
mov %edx,%eax
|
||||
jmp LSecond_div_done
|
||||
|
||||
LSecond_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the denom ms dw */
|
||||
|
||||
LSecond_div_done:
|
||||
movl %eax,FPU_result_2 /* Put the result in the answer */
|
||||
|
||||
mull XsigH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
movl FPU_result_2,%eax /* Get the result back */
|
||||
mull XsigL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
jz LDo_3rd_32_bits
|
||||
|
||||
#ifdef PARANOID
|
||||
cmpl $1,FPU_accum_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
movl XsigL(%ebx),%eax
|
||||
movl XsigH(%ebx),%edx
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
addl $1,FPU_result_2 /* Correct the answer */
|
||||
adcl $0,FPU_result_3
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* The division is essentially finished here, we just need to perform
|
||||
tidying operations.
|
||||
Deal with the 3rd 32 bits */
|
||||
LDo_3rd_32_bits:
|
||||
/* We use an approximation for the third 32 bits.
|
||||
To take account of the 3rd 32 bits of the divisor
|
||||
(call them del), we subtract del * (a/b) */
|
||||
|
||||
movl FPU_result_3,%eax /* a/b */
|
||||
mull XsigLL(%ebx) /* del */
|
||||
|
||||
subl %edx,FPU_accum_1
|
||||
|
||||
/* A borrow indicates that the result is negative */
|
||||
jnb LTest_over
|
||||
|
||||
movl XsigH(%ebx),%edx
|
||||
addl %edx,FPU_accum_1
|
||||
|
||||
subl $1,FPU_result_2 /* Adjust the answer */
|
||||
sbbl $0,FPU_result_3
|
||||
|
||||
/* The above addition might not have been enough, check again. */
|
||||
movl FPU_accum_1,%edx /* get the reduced num */
|
||||
cmpl XsigH(%ebx),%edx /* denom */
|
||||
jb LDo_3rd_div
|
||||
|
||||
movl XsigH(%ebx),%edx
|
||||
addl %edx,FPU_accum_1
|
||||
|
||||
subl $1,FPU_result_2 /* Adjust the answer */
|
||||
sbbl $0,FPU_result_3
|
||||
jmp LDo_3rd_div
|
||||
|
||||
LTest_over:
|
||||
movl FPU_accum_1,%edx /* get the reduced num */
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl XsigH(%ebx),%edx /* denom */
|
||||
jb LDo_3rd_div
|
||||
|
||||
/* prevent overflow */
|
||||
subl XsigH(%ebx),%edx
|
||||
movl %edx,FPU_accum_1
|
||||
|
||||
addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
|
||||
adcl $0,FPU_result_3
|
||||
|
||||
LDo_3rd_div:
|
||||
movl FPU_accum_0,%eax
|
||||
movl FPU_accum_1,%edx
|
||||
divl XsigH(%ebx)
|
||||
|
||||
movl %eax,FPU_result_1 /* Rough estimate of third word */
|
||||
|
||||
movl PARAM3,%esi /* pointer to answer */
|
||||
|
||||
movl FPU_result_1,%eax
|
||||
movl %eax,XsigLL(%esi)
|
||||
movl FPU_result_2,%eax
|
||||
movl %eax,XsigL(%esi)
|
||||
movl FPU_result_3,%eax
|
||||
movl %eax,XsigH(%esi)
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
/* The logic is wrong if we got here */
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x240
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_1:
|
||||
pushl EX_INTERNAL|0x241
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_2:
|
||||
pushl EX_INTERNAL|0x242
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
#endif /* PARANOID */
|
47
arch/x86/math-emu/div_small.S
Normal file
47
arch/x86/math-emu/div_small.S
Normal file
@@ -0,0 +1,47 @@
|
||||
.file "div_small.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| div_small.S |
|
||||
| |
|
||||
| Divide a 64 bit integer by a 32 bit integer & return remainder. |
|
||||
| |
|
||||
| Copyright (C) 1992,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| unsigned long FPU_div_small(unsigned long long *x, unsigned long y) |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
ENTRY(FPU_div_small)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi /* pointer to num */
|
||||
movl PARAM2,%ecx /* The denominator */
|
||||
|
||||
movl 4(%esi),%eax /* Get the current num msw */
|
||||
xorl %edx,%edx
|
||||
divl %ecx
|
||||
|
||||
movl %eax,4(%esi)
|
||||
|
||||
movl (%esi),%eax /* Get the num lsw */
|
||||
divl %ecx
|
||||
|
||||
movl %eax,(%esi)
|
||||
|
||||
movl %edx,%eax /* Return the remainder in eax */
|
||||
|
||||
popl %esi
|
||||
|
||||
leave
|
||||
ret
|
||||
|
739
arch/x86/math-emu/errors.c
Normal file
739
arch/x86/math-emu/errors.c
Normal file
@@ -0,0 +1,739 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| errors.c |
|
||||
| |
|
||||
| The error handling functions for wm-FPU-emu |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1996 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Note: |
|
||||
| The file contains code which accesses user memory. |
|
||||
| Emulator static data may change when user memory is accessed, due to |
|
||||
| other processes using the emulator while swapping is in progress. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include <linux/signal.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
#include "status_w.h"
|
||||
#include "control_w.h"
|
||||
#include "reg_constant.h"
|
||||
#include "version.h"
|
||||
|
||||
/* */
|
||||
#undef PRINT_MESSAGES
|
||||
/* */
|
||||
|
||||
|
||||
#if 0
|
||||
void Un_impl(void)
|
||||
{
|
||||
u_char byte1, FPU_modrm;
|
||||
unsigned long address = FPU_ORIG_EIP;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
/* No need to check access_ok(), we have previously fetched these bytes. */
|
||||
printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address);
|
||||
if ( FPU_CS == __USER_CS )
|
||||
{
|
||||
while ( 1 )
|
||||
{
|
||||
FPU_get_user(byte1, (u_char __user *) address);
|
||||
if ( (byte1 & 0xf8) == 0xd8 ) break;
|
||||
printk("[%02x]", byte1);
|
||||
address++;
|
||||
}
|
||||
printk("%02x ", byte1);
|
||||
FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
|
||||
|
||||
if (FPU_modrm >= 0300)
|
||||
printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
|
||||
else
|
||||
printk("/%d\n", (FPU_modrm >> 3) & 7);
|
||||
}
|
||||
else
|
||||
{
|
||||
printk("cs selector = %04x\n", FPU_CS);
|
||||
}
|
||||
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
EXCEPTION(EX_Invalid);
|
||||
|
||||
}
|
||||
#endif /* 0 */
|
||||
|
||||
|
||||
/*
|
||||
Called for opcodes which are illegal and which are known to result in a
|
||||
SIGILL with a real 80486.
|
||||
*/
|
||||
void FPU_illegal(void)
|
||||
{
|
||||
math_abort(FPU_info,SIGILL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void FPU_printall(void)
|
||||
{
|
||||
int i;
|
||||
static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
|
||||
"DeNorm", "Inf", "NaN" };
|
||||
u_char byte1, FPU_modrm;
|
||||
unsigned long address = FPU_ORIG_EIP;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
/* No need to check access_ok(), we have previously fetched these bytes. */
|
||||
printk("At %p:", (void *) address);
|
||||
if ( FPU_CS == __USER_CS )
|
||||
{
|
||||
#define MAX_PRINTED_BYTES 20
|
||||
for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
|
||||
{
|
||||
FPU_get_user(byte1, (u_char __user *) address);
|
||||
if ( (byte1 & 0xf8) == 0xd8 )
|
||||
{
|
||||
printk(" %02x", byte1);
|
||||
break;
|
||||
}
|
||||
printk(" [%02x]", byte1);
|
||||
address++;
|
||||
}
|
||||
if ( i == MAX_PRINTED_BYTES )
|
||||
printk(" [more..]\n");
|
||||
else
|
||||
{
|
||||
FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
|
||||
|
||||
if (FPU_modrm >= 0300)
|
||||
printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
|
||||
else
|
||||
printk(" /%d, mod=%d rm=%d\n",
|
||||
(FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printk("%04x\n", FPU_CS);
|
||||
}
|
||||
|
||||
partial_status = status_word();
|
||||
|
||||
#ifdef DEBUGGING
|
||||
if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n");
|
||||
if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n");
|
||||
if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n");
|
||||
if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n");
|
||||
if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n");
|
||||
if ( partial_status & SW_Summary ) printk("SW: exception summary\n");
|
||||
if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
|
||||
if ( partial_status & SW_Precision ) printk("SW: loss of precision\n");
|
||||
if ( partial_status & SW_Underflow ) printk("SW: underflow\n");
|
||||
if ( partial_status & SW_Overflow ) printk("SW: overflow\n");
|
||||
if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n");
|
||||
if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n");
|
||||
if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n");
|
||||
#endif /* DEBUGGING */
|
||||
|
||||
printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
|
||||
partial_status & 0x8000 ? 1 : 0, /* busy */
|
||||
(partial_status & 0x3800) >> 11, /* stack top pointer */
|
||||
partial_status & 0x80 ? 1 : 0, /* Error summary status */
|
||||
partial_status & 0x40 ? 1 : 0, /* Stack flag */
|
||||
partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
|
||||
partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
|
||||
partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
|
||||
partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
|
||||
partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
|
||||
|
||||
printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n",
|
||||
control_word & 0x1000 ? 1 : 0,
|
||||
(control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
|
||||
(control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
|
||||
control_word & 0x80 ? 1 : 0,
|
||||
control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
|
||||
control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
|
||||
control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
|
||||
|
||||
for ( i = 0; i < 8; i++ )
|
||||
{
|
||||
FPU_REG *r = &st(i);
|
||||
u_char tagi = FPU_gettagi(i);
|
||||
switch (tagi)
|
||||
{
|
||||
case TAG_Empty:
|
||||
continue;
|
||||
break;
|
||||
case TAG_Zero:
|
||||
case TAG_Special:
|
||||
tagi = FPU_Special(r);
|
||||
case TAG_Valid:
|
||||
printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
|
||||
getsign(r) ? '-' : '+',
|
||||
(long)(r->sigh >> 16),
|
||||
(long)(r->sigh & 0xFFFF),
|
||||
(long)(r->sigl >> 16),
|
||||
(long)(r->sigl & 0xFFFF),
|
||||
exponent(r) - EXP_BIAS + 1);
|
||||
break;
|
||||
default:
|
||||
printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
printk("%s\n", tag_desc[(int) (unsigned) tagi]);
|
||||
}
|
||||
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
}
|
||||
|
||||
static struct {
|
||||
int type;
|
||||
const char *name;
|
||||
} exception_names[] = {
|
||||
{ EX_StackOver, "stack overflow" },
|
||||
{ EX_StackUnder, "stack underflow" },
|
||||
{ EX_Precision, "loss of precision" },
|
||||
{ EX_Underflow, "underflow" },
|
||||
{ EX_Overflow, "overflow" },
|
||||
{ EX_ZeroDiv, "divide by zero" },
|
||||
{ EX_Denormal, "denormalized operand" },
|
||||
{ EX_Invalid, "invalid operation" },
|
||||
{ EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
|
||||
{ 0, NULL }
|
||||
};
|
||||
|
||||
/*
|
||||
EX_INTERNAL is always given with a code which indicates where the
|
||||
error was detected.
|
||||
|
||||
Internal error types:
|
||||
0x14 in fpu_etc.c
|
||||
0x1nn in a *.c file:
|
||||
0x101 in reg_add_sub.c
|
||||
0x102 in reg_mul.c
|
||||
0x104 in poly_atan.c
|
||||
0x105 in reg_mul.c
|
||||
0x107 in fpu_trig.c
|
||||
0x108 in reg_compare.c
|
||||
0x109 in reg_compare.c
|
||||
0x110 in reg_add_sub.c
|
||||
0x111 in fpe_entry.c
|
||||
0x112 in fpu_trig.c
|
||||
0x113 in errors.c
|
||||
0x115 in fpu_trig.c
|
||||
0x116 in fpu_trig.c
|
||||
0x117 in fpu_trig.c
|
||||
0x118 in fpu_trig.c
|
||||
0x119 in fpu_trig.c
|
||||
0x120 in poly_atan.c
|
||||
0x121 in reg_compare.c
|
||||
0x122 in reg_compare.c
|
||||
0x123 in reg_compare.c
|
||||
0x125 in fpu_trig.c
|
||||
0x126 in fpu_entry.c
|
||||
0x127 in poly_2xm1.c
|
||||
0x128 in fpu_entry.c
|
||||
0x129 in fpu_entry.c
|
||||
0x130 in get_address.c
|
||||
0x131 in get_address.c
|
||||
0x132 in get_address.c
|
||||
0x133 in get_address.c
|
||||
0x140 in load_store.c
|
||||
0x141 in load_store.c
|
||||
0x150 in poly_sin.c
|
||||
0x151 in poly_sin.c
|
||||
0x160 in reg_ld_str.c
|
||||
0x161 in reg_ld_str.c
|
||||
0x162 in reg_ld_str.c
|
||||
0x163 in reg_ld_str.c
|
||||
0x164 in reg_ld_str.c
|
||||
0x170 in fpu_tags.c
|
||||
0x171 in fpu_tags.c
|
||||
0x172 in fpu_tags.c
|
||||
0x180 in reg_convert.c
|
||||
0x2nn in an *.S file:
|
||||
0x201 in reg_u_add.S
|
||||
0x202 in reg_u_div.S
|
||||
0x203 in reg_u_div.S
|
||||
0x204 in reg_u_div.S
|
||||
0x205 in reg_u_mul.S
|
||||
0x206 in reg_u_sub.S
|
||||
0x207 in wm_sqrt.S
|
||||
0x208 in reg_div.S
|
||||
0x209 in reg_u_sub.S
|
||||
0x210 in reg_u_sub.S
|
||||
0x211 in reg_u_sub.S
|
||||
0x212 in reg_u_sub.S
|
||||
0x213 in wm_sqrt.S
|
||||
0x214 in wm_sqrt.S
|
||||
0x215 in wm_sqrt.S
|
||||
0x220 in reg_norm.S
|
||||
0x221 in reg_norm.S
|
||||
0x230 in reg_round.S
|
||||
0x231 in reg_round.S
|
||||
0x232 in reg_round.S
|
||||
0x233 in reg_round.S
|
||||
0x234 in reg_round.S
|
||||
0x235 in reg_round.S
|
||||
0x236 in reg_round.S
|
||||
0x240 in div_Xsig.S
|
||||
0x241 in div_Xsig.S
|
||||
0x242 in div_Xsig.S
|
||||
*/
|
||||
|
||||
asmlinkage void FPU_exception(int n)
|
||||
{
|
||||
int i, int_type;
|
||||
|
||||
int_type = 0; /* Needed only to stop compiler warnings */
|
||||
if ( n & EX_INTERNAL )
|
||||
{
|
||||
int_type = n - EX_INTERNAL;
|
||||
n = EX_INTERNAL;
|
||||
/* Set lots of exception bits! */
|
||||
partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Extract only the bits which we use to set the status word */
|
||||
n &= (SW_Exc_Mask);
|
||||
/* Set the corresponding exception bit */
|
||||
partial_status |= n;
|
||||
/* Set summary bits iff exception isn't masked */
|
||||
if ( partial_status & ~control_word & CW_Exceptions )
|
||||
partial_status |= (SW_Summary | SW_Backward);
|
||||
if ( n & (SW_Stack_Fault | EX_Precision) )
|
||||
{
|
||||
if ( !(n & SW_C1) )
|
||||
/* This bit distinguishes over- from underflow for a stack fault,
|
||||
and roundup from round-down for precision loss. */
|
||||
partial_status &= ~SW_C1;
|
||||
}
|
||||
}
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
|
||||
{
|
||||
#ifdef PRINT_MESSAGES
|
||||
/* My message from the sponsor */
|
||||
printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
|
||||
#endif /* PRINT_MESSAGES */
|
||||
|
||||
/* Get a name string for error reporting */
|
||||
for (i=0; exception_names[i].type; i++)
|
||||
if ( (exception_names[i].type & n) == exception_names[i].type )
|
||||
break;
|
||||
|
||||
if (exception_names[i].type)
|
||||
{
|
||||
#ifdef PRINT_MESSAGES
|
||||
printk("FP Exception: %s!\n", exception_names[i].name);
|
||||
#endif /* PRINT_MESSAGES */
|
||||
}
|
||||
else
|
||||
printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
|
||||
|
||||
if ( n == EX_INTERNAL )
|
||||
{
|
||||
printk("FPU emulator: Internal error type 0x%04x\n", int_type);
|
||||
FPU_printall();
|
||||
}
|
||||
#ifdef PRINT_MESSAGES
|
||||
else
|
||||
FPU_printall();
|
||||
#endif /* PRINT_MESSAGES */
|
||||
|
||||
/*
|
||||
* The 80486 generates an interrupt on the next non-control FPU
|
||||
* instruction. So we need some means of flagging it.
|
||||
* We use the ES (Error Summary) bit for this.
|
||||
*/
|
||||
}
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
#ifdef __DEBUG__
|
||||
math_abort(FPU_info,SIGFPE);
|
||||
#endif /* __DEBUG__ */
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Real operation attempted on a NaN. */
|
||||
/* Returns < 0 if the exception is unmasked */
|
||||
int real_1op_NaN(FPU_REG *a)
|
||||
{
|
||||
int signalling, isNaN;
|
||||
|
||||
isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
|
||||
|
||||
/* The default result for the case of two "equal" NaNs (signs may
|
||||
differ) is chosen to reproduce 80486 behaviour */
|
||||
signalling = isNaN && !(a->sigh & 0x40000000);
|
||||
|
||||
if ( !signalling )
|
||||
{
|
||||
if ( !isNaN ) /* pseudo-NaN, or other unsupported? */
|
||||
{
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* Masked response */
|
||||
reg_copy(&CONST_QNaN, a);
|
||||
}
|
||||
EXCEPTION(EX_Invalid);
|
||||
return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
|
||||
}
|
||||
return TAG_Special;
|
||||
}
|
||||
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* The masked response */
|
||||
if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */
|
||||
{
|
||||
reg_copy(&CONST_QNaN, a);
|
||||
}
|
||||
/* ensure a Quiet NaN */
|
||||
a->sigh |= 0x40000000;
|
||||
}
|
||||
|
||||
EXCEPTION(EX_Invalid);
|
||||
|
||||
return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
|
||||
}
|
||||
|
||||
|
||||
/* Real operation attempted on two operands, one a NaN. */
|
||||
/* Returns < 0 if the exception is unmasked */
|
||||
int real_2op_NaN(FPU_REG const *b, u_char tagb,
|
||||
int deststnr,
|
||||
FPU_REG const *defaultNaN)
|
||||
{
|
||||
FPU_REG *dest = &st(deststnr);
|
||||
FPU_REG const *a = dest;
|
||||
u_char taga = FPU_gettagi(deststnr);
|
||||
FPU_REG const *x;
|
||||
int signalling, unsupported;
|
||||
|
||||
if ( taga == TAG_Special )
|
||||
taga = FPU_Special(a);
|
||||
if ( tagb == TAG_Special )
|
||||
tagb = FPU_Special(b);
|
||||
|
||||
/* TW_NaN is also used for unsupported data types. */
|
||||
unsupported = ((taga == TW_NaN)
|
||||
&& !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
|
||||
|| ((tagb == TW_NaN)
|
||||
&& !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
|
||||
if ( unsupported )
|
||||
{
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* Masked response */
|
||||
FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
|
||||
}
|
||||
EXCEPTION(EX_Invalid);
|
||||
return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
|
||||
}
|
||||
|
||||
if (taga == TW_NaN)
|
||||
{
|
||||
x = a;
|
||||
if (tagb == TW_NaN)
|
||||
{
|
||||
signalling = !(a->sigh & b->sigh & 0x40000000);
|
||||
if ( significand(b) > significand(a) )
|
||||
x = b;
|
||||
else if ( significand(b) == significand(a) )
|
||||
{
|
||||
/* The default result for the case of two "equal" NaNs (signs may
|
||||
differ) is chosen to reproduce 80486 behaviour */
|
||||
x = defaultNaN;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* return the quiet version of the NaN in a */
|
||||
signalling = !(a->sigh & 0x40000000);
|
||||
}
|
||||
}
|
||||
else
|
||||
#ifdef PARANOID
|
||||
if (tagb == TW_NaN)
|
||||
#endif /* PARANOID */
|
||||
{
|
||||
signalling = !(b->sigh & 0x40000000);
|
||||
x = b;
|
||||
}
|
||||
#ifdef PARANOID
|
||||
else
|
||||
{
|
||||
signalling = 0;
|
||||
EXCEPTION(EX_INTERNAL|0x113);
|
||||
x = &CONST_QNaN;
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
if ( (!signalling) || (control_word & CW_Invalid) )
|
||||
{
|
||||
if ( ! x )
|
||||
x = b;
|
||||
|
||||
if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */
|
||||
x = &CONST_QNaN;
|
||||
|
||||
FPU_copy_to_regi(x, TAG_Special, deststnr);
|
||||
|
||||
if ( !signalling )
|
||||
return TAG_Special;
|
||||
|
||||
/* ensure a Quiet NaN */
|
||||
dest->sigh |= 0x40000000;
|
||||
}
|
||||
|
||||
EXCEPTION(EX_Invalid);
|
||||
|
||||
return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
|
||||
}
|
||||
|
||||
|
||||
/* Invalid arith operation on Valid registers */
|
||||
/* Returns < 0 if the exception is unmasked */
|
||||
asmlinkage int arith_invalid(int deststnr)
|
||||
{
|
||||
|
||||
EXCEPTION(EX_Invalid);
|
||||
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* The masked response */
|
||||
FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
|
||||
}
|
||||
|
||||
return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Divide a finite number by zero */
|
||||
asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
|
||||
{
|
||||
FPU_REG *dest = &st(deststnr);
|
||||
int tag = TAG_Valid;
|
||||
|
||||
if ( control_word & CW_ZeroDiv )
|
||||
{
|
||||
/* The masked response */
|
||||
FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
|
||||
setsign(dest, sign);
|
||||
tag = TAG_Special;
|
||||
}
|
||||
|
||||
EXCEPTION(EX_ZeroDiv);
|
||||
|
||||
return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* This may be called often, so keep it lean */
|
||||
int set_precision_flag(int flags)
|
||||
{
|
||||
if ( control_word & CW_Precision )
|
||||
{
|
||||
partial_status &= ~(SW_C1 & flags);
|
||||
partial_status |= flags; /* The masked response */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
EXCEPTION(flags);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* This may be called often, so keep it lean */
|
||||
asmlinkage void set_precision_flag_up(void)
|
||||
{
|
||||
if ( control_word & CW_Precision )
|
||||
partial_status |= (SW_Precision | SW_C1); /* The masked response */
|
||||
else
|
||||
EXCEPTION(EX_Precision | SW_C1);
|
||||
}
|
||||
|
||||
|
||||
/* This may be called often, so keep it lean */
|
||||
asmlinkage void set_precision_flag_down(void)
|
||||
{
|
||||
if ( control_word & CW_Precision )
|
||||
{ /* The masked response */
|
||||
partial_status &= ~SW_C1;
|
||||
partial_status |= SW_Precision;
|
||||
}
|
||||
else
|
||||
EXCEPTION(EX_Precision);
|
||||
}
|
||||
|
||||
|
||||
asmlinkage int denormal_operand(void)
|
||||
{
|
||||
if ( control_word & CW_Denormal )
|
||||
{ /* The masked response */
|
||||
partial_status |= SW_Denorm_Op;
|
||||
return TAG_Special;
|
||||
}
|
||||
else
|
||||
{
|
||||
EXCEPTION(EX_Denormal);
|
||||
return TAG_Special | FPU_Exception;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
asmlinkage int arith_overflow(FPU_REG *dest)
|
||||
{
|
||||
int tag = TAG_Valid;
|
||||
|
||||
if ( control_word & CW_Overflow )
|
||||
{
|
||||
/* The masked response */
|
||||
/* ###### The response here depends upon the rounding mode */
|
||||
reg_copy(&CONST_INF, dest);
|
||||
tag = TAG_Special;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Subtract the magic number from the exponent */
|
||||
addexponent(dest, (-3 * (1 << 13)));
|
||||
}
|
||||
|
||||
EXCEPTION(EX_Overflow);
|
||||
if ( control_word & CW_Overflow )
|
||||
{
|
||||
/* The overflow exception is masked. */
|
||||
/* By definition, precision is lost.
|
||||
The roundup bit (C1) is also set because we have
|
||||
"rounded" upwards to Infinity. */
|
||||
EXCEPTION(EX_Precision | SW_C1);
|
||||
return tag;
|
||||
}
|
||||
|
||||
return tag;
|
||||
|
||||
}
|
||||
|
||||
|
||||
asmlinkage int arith_underflow(FPU_REG *dest)
|
||||
{
|
||||
int tag = TAG_Valid;
|
||||
|
||||
if ( control_word & CW_Underflow )
|
||||
{
|
||||
/* The masked response */
|
||||
if ( exponent16(dest) <= EXP_UNDER - 63 )
|
||||
{
|
||||
reg_copy(&CONST_Z, dest);
|
||||
partial_status &= ~SW_C1; /* Round down. */
|
||||
tag = TAG_Zero;
|
||||
}
|
||||
else
|
||||
{
|
||||
stdexp(dest);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Add the magic number to the exponent. */
|
||||
addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
|
||||
}
|
||||
|
||||
EXCEPTION(EX_Underflow);
|
||||
if ( control_word & CW_Underflow )
|
||||
{
|
||||
/* The underflow exception is masked. */
|
||||
EXCEPTION(EX_Precision);
|
||||
return tag;
|
||||
}
|
||||
|
||||
return tag;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void FPU_stack_overflow(void)
|
||||
{
|
||||
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* The masked response */
|
||||
top--;
|
||||
FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
|
||||
}
|
||||
|
||||
EXCEPTION(EX_StackOver);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void FPU_stack_underflow(void)
|
||||
{
|
||||
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* The masked response */
|
||||
FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
|
||||
}
|
||||
|
||||
EXCEPTION(EX_StackUnder);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void FPU_stack_underflow_i(int i)
|
||||
{
|
||||
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* The masked response */
|
||||
FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
|
||||
}
|
||||
|
||||
EXCEPTION(EX_StackUnder);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void FPU_stack_underflow_pop(int i)
|
||||
{
|
||||
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* The masked response */
|
||||
FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
EXCEPTION(EX_StackUnder);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
53
arch/x86/math-emu/exception.h
Normal file
53
arch/x86/math-emu/exception.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| exception.h |
|
||||
| |
|
||||
| Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef _EXCEPTION_H_
|
||||
#define _EXCEPTION_H_
|
||||
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define Const_(x) $##x
|
||||
#else
|
||||
#define Const_(x) x
|
||||
#endif
|
||||
|
||||
#ifndef SW_C1
|
||||
#include "fpu_emu.h"
|
||||
#endif /* SW_C1 */
|
||||
|
||||
#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */
|
||||
#define EX_ErrorSummary Const_(0x0080) /* Error summary status */
|
||||
/* Special exceptions: */
|
||||
#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */
|
||||
#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */
|
||||
#define EX_StackUnder Const_(0x0041) /* stack underflow */
|
||||
/* Exception flags: */
|
||||
#define EX_Precision Const_(0x0020) /* loss of precision */
|
||||
#define EX_Underflow Const_(0x0010) /* underflow */
|
||||
#define EX_Overflow Const_(0x0008) /* overflow */
|
||||
#define EX_ZeroDiv Const_(0x0004) /* divide by zero */
|
||||
#define EX_Denormal Const_(0x0002) /* denormalized operand */
|
||||
#define EX_Invalid Const_(0x0001) /* invalid operation */
|
||||
|
||||
|
||||
#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1))
|
||||
#define PRECISION_LOST_DOWN Const_(EX_Precision)
|
||||
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#ifdef DEBUG
|
||||
#define EXCEPTION(x) { printk("exception in %s at line %d\n", \
|
||||
__FILE__, __LINE__); FPU_exception(x); }
|
||||
#else
|
||||
#define EXCEPTION(x) FPU_exception(x)
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _EXCEPTION_H_ */
|
174
arch/x86/math-emu/fpu_arith.c
Normal file
174
arch/x86/math-emu/fpu_arith.c
Normal file
@@ -0,0 +1,174 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_arith.c |
|
||||
| |
|
||||
| Code to implement the FPU register/register arithmetic instructions |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
#include "status_w.h"
|
||||
|
||||
|
||||
void fadd__(void)
|
||||
{
|
||||
/* fadd st,st(i) */
|
||||
int i = FPU_rm;
|
||||
clear_C1();
|
||||
FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fmul__(void)
|
||||
{
|
||||
/* fmul st,st(i) */
|
||||
int i = FPU_rm;
|
||||
clear_C1();
|
||||
FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void fsub__(void)
|
||||
{
|
||||
/* fsub st,st(i) */
|
||||
clear_C1();
|
||||
FPU_sub(0, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fsubr_(void)
|
||||
{
|
||||
/* fsubr st,st(i) */
|
||||
clear_C1();
|
||||
FPU_sub(REV, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fdiv__(void)
|
||||
{
|
||||
/* fdiv st,st(i) */
|
||||
clear_C1();
|
||||
FPU_div(0, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fdivr_(void)
|
||||
{
|
||||
/* fdivr st,st(i) */
|
||||
clear_C1();
|
||||
FPU_div(REV, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void fadd_i(void)
|
||||
{
|
||||
/* fadd st(i),st */
|
||||
int i = FPU_rm;
|
||||
clear_C1();
|
||||
FPU_add(&st(i), FPU_gettagi(i), i, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fmul_i(void)
|
||||
{
|
||||
/* fmul st(i),st */
|
||||
clear_C1();
|
||||
FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fsubri(void)
|
||||
{
|
||||
/* fsubr st(i),st */
|
||||
clear_C1();
|
||||
FPU_sub(DEST_RM, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fsub_i(void)
|
||||
{
|
||||
/* fsub st(i),st */
|
||||
clear_C1();
|
||||
FPU_sub(REV|DEST_RM, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fdivri(void)
|
||||
{
|
||||
/* fdivr st(i),st */
|
||||
clear_C1();
|
||||
FPU_div(DEST_RM, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
void fdiv_i(void)
|
||||
{
|
||||
/* fdiv st(i),st */
|
||||
clear_C1();
|
||||
FPU_div(REV|DEST_RM, FPU_rm, control_word);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void faddp_(void)
|
||||
{
|
||||
/* faddp st(i),st */
|
||||
int i = FPU_rm;
|
||||
clear_C1();
|
||||
if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 )
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
void fmulp_(void)
|
||||
{
|
||||
/* fmulp st(i),st */
|
||||
clear_C1();
|
||||
if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 )
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void fsubrp(void)
|
||||
{
|
||||
/* fsubrp st(i),st */
|
||||
clear_C1();
|
||||
if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 )
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
void fsubp_(void)
|
||||
{
|
||||
/* fsubp st(i),st */
|
||||
clear_C1();
|
||||
if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 )
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
void fdivrp(void)
|
||||
{
|
||||
/* fdivrp st(i),st */
|
||||
clear_C1();
|
||||
if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 )
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
void fdivp_(void)
|
||||
{
|
||||
/* fdivp st(i),st */
|
||||
clear_C1();
|
||||
if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 )
|
||||
FPU_pop();
|
||||
}
|
32
arch/x86/math-emu/fpu_asm.h
Normal file
32
arch/x86/math-emu/fpu_asm.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_asm.h |
|
||||
| |
|
||||
| Copyright (C) 1992,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef _FPU_ASM_H_
|
||||
#define _FPU_ASM_H_
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#define EXCEPTION FPU_exception
|
||||
|
||||
|
||||
#define PARAM1 8(%ebp)
|
||||
#define PARAM2 12(%ebp)
|
||||
#define PARAM3 16(%ebp)
|
||||
#define PARAM4 20(%ebp)
|
||||
#define PARAM5 24(%ebp)
|
||||
#define PARAM6 28(%ebp)
|
||||
#define PARAM7 32(%ebp)
|
||||
|
||||
#define SIGL_OFFSET 0
|
||||
#define EXP(x) 8(x)
|
||||
#define SIG(x) SIGL_OFFSET##(x)
|
||||
#define SIGL(x) SIGL_OFFSET##(x)
|
||||
#define SIGH(x) 4(x)
|
||||
|
||||
#endif /* _FPU_ASM_H_ */
|
204
arch/x86/math-emu/fpu_aux.c
Normal file
204
arch/x86/math-emu/fpu_aux.c
Normal file
@@ -0,0 +1,204 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_aux.c |
|
||||
| |
|
||||
| Code to implement some of the FPU auxiliary instructions. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "status_w.h"
|
||||
#include "control_w.h"
|
||||
|
||||
|
||||
static void fnop(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void fclex(void)
|
||||
{
|
||||
partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
|
||||
SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
|
||||
SW_Invalid);
|
||||
no_ip_update = 1;
|
||||
}
|
||||
|
||||
/* Needs to be externally visible */
|
||||
void finit(void)
|
||||
{
|
||||
control_word = 0x037f;
|
||||
partial_status = 0;
|
||||
top = 0; /* We don't keep top in the status word internally. */
|
||||
fpu_tag_word = 0xffff;
|
||||
/* The behaviour is different from that detailed in
|
||||
Section 15.1.6 of the Intel manual */
|
||||
operand_address.offset = 0;
|
||||
operand_address.selector = 0;
|
||||
instruction_address.offset = 0;
|
||||
instruction_address.selector = 0;
|
||||
instruction_address.opcode = 0;
|
||||
no_ip_update = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* These are nops on the i387..
|
||||
*/
|
||||
#define feni fnop
|
||||
#define fdisi fnop
|
||||
#define fsetpm fnop
|
||||
|
||||
static FUNC const finit_table[] = {
|
||||
feni, fdisi, fclex, finit,
|
||||
fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
|
||||
};
|
||||
|
||||
void finit_(void)
|
||||
{
|
||||
(finit_table[FPU_rm])();
|
||||
}
|
||||
|
||||
|
||||
static void fstsw_ax(void)
|
||||
{
|
||||
*(short *) &FPU_EAX = status_word();
|
||||
no_ip_update = 1;
|
||||
}
|
||||
|
||||
static FUNC const fstsw_table[] = {
|
||||
fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
|
||||
FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
|
||||
};
|
||||
|
||||
void fstsw_(void)
|
||||
{
|
||||
(fstsw_table[FPU_rm])();
|
||||
}
|
||||
|
||||
|
||||
static FUNC const fp_nop_table[] = {
|
||||
fnop, FPU_illegal, FPU_illegal, FPU_illegal,
|
||||
FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
|
||||
};
|
||||
|
||||
void fp_nop(void)
|
||||
{
|
||||
(fp_nop_table[FPU_rm])();
|
||||
}
|
||||
|
||||
|
||||
void fld_i_(void)
|
||||
{
|
||||
FPU_REG *st_new_ptr;
|
||||
int i;
|
||||
u_char tag;
|
||||
|
||||
if ( STACK_OVERFLOW )
|
||||
{ FPU_stack_overflow(); return; }
|
||||
|
||||
/* fld st(i) */
|
||||
i = FPU_rm;
|
||||
if ( NOT_EMPTY(i) )
|
||||
{
|
||||
reg_copy(&st(i), st_new_ptr);
|
||||
tag = FPU_gettagi(i);
|
||||
push();
|
||||
FPU_settag0(tag);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* The masked response */
|
||||
FPU_stack_underflow();
|
||||
}
|
||||
else
|
||||
EXCEPTION(EX_StackUnder);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void fxch_i(void)
|
||||
{
|
||||
/* fxch st(i) */
|
||||
FPU_REG t;
|
||||
int i = FPU_rm;
|
||||
FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
|
||||
long tag_word = fpu_tag_word;
|
||||
int regnr = top & 7, regnri = ((regnr + i) & 7);
|
||||
u_char st0_tag = (tag_word >> (regnr*2)) & 3;
|
||||
u_char sti_tag = (tag_word >> (regnri*2)) & 3;
|
||||
|
||||
if ( st0_tag == TAG_Empty )
|
||||
{
|
||||
if ( sti_tag == TAG_Empty )
|
||||
{
|
||||
FPU_stack_underflow();
|
||||
FPU_stack_underflow_i(i);
|
||||
return;
|
||||
}
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* Masked response */
|
||||
FPU_copy_to_reg0(sti_ptr, sti_tag);
|
||||
}
|
||||
FPU_stack_underflow_i(i);
|
||||
return;
|
||||
}
|
||||
if ( sti_tag == TAG_Empty )
|
||||
{
|
||||
if ( control_word & CW_Invalid )
|
||||
{
|
||||
/* Masked response */
|
||||
FPU_copy_to_regi(st0_ptr, st0_tag, i);
|
||||
}
|
||||
FPU_stack_underflow();
|
||||
return;
|
||||
}
|
||||
clear_C1();
|
||||
|
||||
reg_copy(st0_ptr, &t);
|
||||
reg_copy(sti_ptr, st0_ptr);
|
||||
reg_copy(&t, sti_ptr);
|
||||
|
||||
tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
|
||||
tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
|
||||
fpu_tag_word = tag_word;
|
||||
}
|
||||
|
||||
|
||||
void ffree_(void)
|
||||
{
|
||||
/* ffree st(i) */
|
||||
FPU_settagi(FPU_rm, TAG_Empty);
|
||||
}
|
||||
|
||||
|
||||
void ffreep(void)
|
||||
{
|
||||
/* ffree st(i) + pop - unofficial code */
|
||||
FPU_settagi(FPU_rm, TAG_Empty);
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
void fst_i_(void)
|
||||
{
|
||||
/* fst st(i) */
|
||||
FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
|
||||
}
|
||||
|
||||
|
||||
void fstp_i(void)
|
||||
{
|
||||
/* fstp st(i) */
|
||||
FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
|
||||
FPU_pop();
|
||||
}
|
||||
|
218
arch/x86/math-emu/fpu_emu.h
Normal file
218
arch/x86/math-emu/fpu_emu.h
Normal file
@@ -0,0 +1,218 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_emu.h |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
#ifndef _FPU_EMU_H_
|
||||
#define _FPU_EMU_H_
|
||||
|
||||
/*
|
||||
* Define PECULIAR_486 to get a closer approximation to 80486 behaviour,
|
||||
* rather than behaviour which appears to be cleaner.
|
||||
* This is a matter of opinion: for all I know, the 80486 may simply
|
||||
* be complying with the IEEE spec. Maybe one day I'll get to see the
|
||||
* spec...
|
||||
*/
|
||||
#define PECULIAR_486
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#include "fpu_asm.h"
|
||||
#define Const(x) $##x
|
||||
#else
|
||||
#define Const(x) x
|
||||
#endif
|
||||
|
||||
#define EXP_BIAS Const(0)
|
||||
#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */
|
||||
#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */
|
||||
#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but
|
||||
still a 16 bit nr. */
|
||||
#define EXP_Infinity EXP_OVER
|
||||
#define EXP_NaN EXP_OVER
|
||||
|
||||
#define EXTENDED_Ebias Const(0x3fff)
|
||||
#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */
|
||||
|
||||
#define SIGN_POS Const(0)
|
||||
#define SIGN_NEG Const(0x80)
|
||||
|
||||
#define SIGN_Positive Const(0)
|
||||
#define SIGN_Negative Const(0x8000)
|
||||
|
||||
|
||||
/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
|
||||
/* The following fold to 2 (Special) in the Tag Word */
|
||||
#define TW_Denormal Const(4) /* De-normal */
|
||||
#define TW_Infinity Const(5) /* + or - infinity */
|
||||
#define TW_NaN Const(6) /* Not a Number */
|
||||
#define TW_Unsupported Const(7) /* Not supported by an 80486 */
|
||||
|
||||
#define TAG_Valid Const(0) /* valid */
|
||||
#define TAG_Zero Const(1) /* zero */
|
||||
#define TAG_Special Const(2) /* De-normal, + or - infinity,
|
||||
or Not a Number */
|
||||
#define TAG_Empty Const(3) /* empty */
|
||||
#define TAG_Error Const(0x80) /* probably need to abort */
|
||||
|
||||
#define LOADED_DATA Const(10101) /* Special st() number to identify
|
||||
loaded data (not on stack). */
|
||||
|
||||
/* A few flags (must be >= 0x10). */
|
||||
#define REV 0x10
|
||||
#define DEST_RM 0x20
|
||||
#define LOADED 0x40
|
||||
|
||||
#define FPU_Exception Const(0x80000000) /* Added to tag returns. */
|
||||
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include "fpu_system.h"
|
||||
|
||||
#include <asm/sigcontext.h> /* for struct _fpstate */
|
||||
#include <asm/math_emu.h>
|
||||
#include <linux/linkage.h>
|
||||
|
||||
/*
|
||||
#define RE_ENTRANT_CHECKING
|
||||
*/
|
||||
|
||||
#ifdef RE_ENTRANT_CHECKING
|
||||
extern u_char emulating;
|
||||
# define RE_ENTRANT_CHECK_OFF emulating = 0
|
||||
# define RE_ENTRANT_CHECK_ON emulating = 1
|
||||
#else
|
||||
# define RE_ENTRANT_CHECK_OFF
|
||||
# define RE_ENTRANT_CHECK_ON
|
||||
#endif /* RE_ENTRANT_CHECKING */
|
||||
|
||||
#define FWAIT_OPCODE 0x9b
|
||||
#define OP_SIZE_PREFIX 0x66
|
||||
#define ADDR_SIZE_PREFIX 0x67
|
||||
#define PREFIX_CS 0x2e
|
||||
#define PREFIX_DS 0x3e
|
||||
#define PREFIX_ES 0x26
|
||||
#define PREFIX_SS 0x36
|
||||
#define PREFIX_FS 0x64
|
||||
#define PREFIX_GS 0x65
|
||||
#define PREFIX_REPE 0xf3
|
||||
#define PREFIX_REPNE 0xf2
|
||||
#define PREFIX_LOCK 0xf0
|
||||
#define PREFIX_CS_ 1
|
||||
#define PREFIX_DS_ 2
|
||||
#define PREFIX_ES_ 3
|
||||
#define PREFIX_FS_ 4
|
||||
#define PREFIX_GS_ 5
|
||||
#define PREFIX_SS_ 6
|
||||
#define PREFIX_DEFAULT 7
|
||||
|
||||
struct address {
|
||||
unsigned int offset;
|
||||
unsigned int selector:16;
|
||||
unsigned int opcode:11;
|
||||
unsigned int empty:5;
|
||||
};
|
||||
struct fpu__reg {
|
||||
unsigned sigl;
|
||||
unsigned sigh;
|
||||
short exp;
|
||||
};
|
||||
|
||||
typedef void (*FUNC)(void);
|
||||
typedef struct fpu__reg FPU_REG;
|
||||
typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
|
||||
typedef struct { u_char address_size, operand_size, segment; }
|
||||
overrides;
|
||||
/* This structure is 32 bits: */
|
||||
typedef struct { overrides override;
|
||||
u_char default_mode; } fpu_addr_modes;
|
||||
/* PROTECTED has a restricted meaning in the emulator; it is used
|
||||
to signal that the emulator needs to do special things to ensure
|
||||
that protection is respected in a segmented model. */
|
||||
#define PROTECTED 4
|
||||
#define SIXTEEN 1 /* We rely upon this being 1 (true) */
|
||||
#define VM86 SIXTEEN
|
||||
#define PM16 (SIXTEEN | PROTECTED)
|
||||
#define SEG32 PROTECTED
|
||||
extern u_char const data_sizes_16[32];
|
||||
|
||||
#define register_base ((u_char *) registers )
|
||||
#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) )
|
||||
#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) )
|
||||
|
||||
#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr))
|
||||
#define NOT_EMPTY(i) (!FPU_empty_i(i))
|
||||
|
||||
#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty)
|
||||
|
||||
#define poppop() { FPU_pop(); FPU_pop(); }
|
||||
|
||||
/* push() does not affect the tags */
|
||||
#define push() { top--; }
|
||||
|
||||
#define signbyte(a) (((u_char *)(a))[9])
|
||||
#define getsign(a) (signbyte(a) & 0x80)
|
||||
#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
|
||||
#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
|
||||
else signbyte(b) &= 0x7f; }
|
||||
#define changesign(a) { signbyte(a) ^= 0x80; }
|
||||
#define setpositive(a) { signbyte(a) &= 0x7f; }
|
||||
#define setnegative(a) { signbyte(a) |= 0x80; }
|
||||
#define signpositive(a) ( (signbyte(a) & 0x80) == 0 )
|
||||
#define signnegative(a) (signbyte(a) & 0x80)
|
||||
|
||||
static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
|
||||
{
|
||||
*(short *)&(y->exp) = *(const short *)&(x->exp);
|
||||
*(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
|
||||
}
|
||||
|
||||
#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias)
|
||||
#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
|
||||
((y) + EXTENDED_Ebias) & 0x7fff; }
|
||||
#define exponent16(x) (*(short *)&((x)->exp))
|
||||
#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); }
|
||||
#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); }
|
||||
#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
|
||||
|
||||
#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER)
|
||||
|
||||
#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
|
||||
|
||||
|
||||
/*----- Prototypes for functions written in assembler -----*/
|
||||
/* extern void reg_move(FPU_REG *a, FPU_REG *b); */
|
||||
|
||||
asmlinkage int FPU_normalize(FPU_REG *x);
|
||||
asmlinkage int FPU_normalize_nuo(FPU_REG *x);
|
||||
asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
|
||||
FPU_REG *answ, unsigned int control_w, u_char sign,
|
||||
int expa, int expb);
|
||||
asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
|
||||
FPU_REG *answ, unsigned int control_w, u_char sign,
|
||||
int expon);
|
||||
asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
|
||||
FPU_REG *answ, unsigned int control_w, u_char sign);
|
||||
asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
|
||||
FPU_REG *answ, unsigned int control_w, u_char sign,
|
||||
int expa, int expb);
|
||||
asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
|
||||
unsigned int control_w, u_char sign);
|
||||
asmlinkage unsigned FPU_shrx(void *l, unsigned x);
|
||||
asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
|
||||
asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
|
||||
asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
|
||||
unsigned int control_w, u_char sign);
|
||||
|
||||
#ifndef MAKING_PROTO
|
||||
#include "fpu_proto.h"
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _FPU_EMU_H_ */
|
761
arch/x86/math-emu/fpu_entry.c
Normal file
761
arch/x86/math-emu/fpu_entry.c
Normal file
@@ -0,0 +1,761 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_entry.c |
|
||||
| |
|
||||
| The entry functions for wm-FPU-emu |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1996,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| See the files "README" and "COPYING" for further copyright and warranty |
|
||||
| information. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Note: |
|
||||
| The file contains code which accesses user memory. |
|
||||
| Emulator static data may change when user memory is accessed, due to |
|
||||
| other processes using the emulator while swapping is in progress. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| math_emulate(), restore_i387_soft() and save_i387_soft() are the only |
|
||||
| entry points for wm-FPU-emu. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include <linux/signal.h>
|
||||
#include <linux/ptrace.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "exception.h"
|
||||
#include "control_w.h"
|
||||
#include "status_w.h"
|
||||
|
||||
#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */
|
||||
|
||||
#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */
|
||||
|
||||
/* WARNING: These codes are not documented by Intel in their 80486 manual
|
||||
and may not work on FPU clones or later Intel FPUs. */
|
||||
|
||||
/* Changes to support the un-doc codes provided by Linus Torvalds. */
|
||||
|
||||
#define _d9_d8_ fstp_i /* unofficial code (19) */
|
||||
#define _dc_d0_ fcom_st /* unofficial code (14) */
|
||||
#define _dc_d8_ fcompst /* unofficial code (1c) */
|
||||
#define _dd_c8_ fxch_i /* unofficial code (0d) */
|
||||
#define _de_d0_ fcompst /* unofficial code (16) */
|
||||
#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */
|
||||
#define _df_c8_ fxch_i /* unofficial code (0f) */
|
||||
#define _df_d0_ fstp_i /* unofficial code (17) */
|
||||
#define _df_d8_ fstp_i /* unofficial code (1f) */
|
||||
|
||||
static FUNC const st_instr_table[64] = {
|
||||
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
|
||||
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
|
||||
fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
|
||||
fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
|
||||
fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
|
||||
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
|
||||
fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
|
||||
fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
|
||||
};
|
||||
|
||||
#else /* Support only documented FPU op-codes */
|
||||
|
||||
static FUNC const st_instr_table[64] = {
|
||||
fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
|
||||
fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
|
||||
fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
|
||||
fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
|
||||
fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
|
||||
fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
|
||||
fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
|
||||
fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
|
||||
};
|
||||
|
||||
#endif /* NO_UNDOC_CODE */
|
||||
|
||||
|
||||
#define _NONE_ 0 /* Take no special action */
|
||||
#define _REG0_ 1 /* Need to check for not empty st(0) */
|
||||
#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */
|
||||
#define _REGi_ 0 /* Uses st(rm) */
|
||||
#define _PUSH_ 3 /* Need to check for space to push onto stack */
|
||||
#define _null_ 4 /* Function illegal or not implemented */
|
||||
#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */
|
||||
#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */
|
||||
#define _REGIc 0 /* Compare st(0) and st(rm) */
|
||||
#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */
|
||||
|
||||
#ifndef NO_UNDOC_CODE
|
||||
|
||||
/* Un-documented FPU op-codes supported by default. (see above) */
|
||||
|
||||
static u_char const type_table[64] = {
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
|
||||
_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
|
||||
_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
|
||||
_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
|
||||
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
|
||||
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
|
||||
};
|
||||
|
||||
#else /* Support only documented FPU op-codes */
|
||||
|
||||
static u_char const type_table[64] = {
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
|
||||
_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
|
||||
_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
|
||||
_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
|
||||
_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
|
||||
_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
|
||||
_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
|
||||
};
|
||||
|
||||
#endif /* NO_UNDOC_CODE */
|
||||
|
||||
|
||||
#ifdef RE_ENTRANT_CHECKING
|
||||
u_char emulating=0;
|
||||
#endif /* RE_ENTRANT_CHECKING */
|
||||
|
||||
static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
|
||||
overrides *override);
|
||||
|
||||
asmlinkage void math_emulate(long arg)
|
||||
{
|
||||
u_char FPU_modrm, byte1;
|
||||
unsigned short code;
|
||||
fpu_addr_modes addr_modes;
|
||||
int unmasked;
|
||||
FPU_REG loaded_data;
|
||||
FPU_REG *st0_ptr;
|
||||
u_char loaded_tag, st0_tag;
|
||||
void __user *data_address;
|
||||
struct address data_sel_off;
|
||||
struct address entry_sel_off;
|
||||
unsigned long code_base = 0;
|
||||
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
|
||||
struct desc_struct code_descriptor;
|
||||
|
||||
#ifdef RE_ENTRANT_CHECKING
|
||||
if ( emulating )
|
||||
{
|
||||
printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
|
||||
}
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
#endif /* RE_ENTRANT_CHECKING */
|
||||
|
||||
if (!used_math())
|
||||
{
|
||||
finit();
|
||||
set_used_math();
|
||||
}
|
||||
|
||||
SETUP_DATA_AREA(arg);
|
||||
|
||||
FPU_ORIG_EIP = FPU_EIP;
|
||||
|
||||
if ( (FPU_EFLAGS & 0x00020000) != 0 )
|
||||
{
|
||||
/* Virtual 8086 mode */
|
||||
addr_modes.default_mode = VM86;
|
||||
FPU_EIP += code_base = FPU_CS << 4;
|
||||
code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */
|
||||
}
|
||||
else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS )
|
||||
{
|
||||
addr_modes.default_mode = 0;
|
||||
}
|
||||
else if ( FPU_CS == __KERNEL_CS )
|
||||
{
|
||||
printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
|
||||
panic("Math emulation needed in kernel");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */
|
||||
{
|
||||
/* Can only handle segmented addressing via the LDT
|
||||
for now, and it must be 16 bit */
|
||||
printk("FPU emulator: Unsupported addressing mode\n");
|
||||
math_abort(FPU_info, SIGILL);
|
||||
}
|
||||
|
||||
code_descriptor = LDT_DESCRIPTOR(FPU_CS);
|
||||
if ( SEG_D_SIZE(code_descriptor) )
|
||||
{
|
||||
/* The above test may be wrong, the book is not clear */
|
||||
/* Segmented 32 bit protected mode */
|
||||
addr_modes.default_mode = SEG32;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* 16 bit protected mode */
|
||||
addr_modes.default_mode = PM16;
|
||||
}
|
||||
FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
|
||||
code_limit = code_base
|
||||
+ (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
|
||||
- 1;
|
||||
if ( code_limit < code_base ) code_limit = 0xffffffff;
|
||||
}
|
||||
|
||||
FPU_lookahead = 1;
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
FPU_lookahead = 0;
|
||||
|
||||
if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
|
||||
&addr_modes.override) )
|
||||
{
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
|
||||
"FPU emulator: self-modifying code! (emulation impossible)\n",
|
||||
byte1);
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
EXCEPTION(EX_INTERNAL|0x126);
|
||||
math_abort(FPU_info,SIGILL);
|
||||
}
|
||||
|
||||
do_another_FPU_instruction:
|
||||
|
||||
no_ip_update = 0;
|
||||
|
||||
FPU_EIP++; /* We have fetched the prefix and first code bytes. */
|
||||
|
||||
if ( addr_modes.default_mode )
|
||||
{
|
||||
/* This checks for the minimum instruction bytes.
|
||||
We also need to check any extra (address mode) code access. */
|
||||
if ( FPU_EIP > code_limit )
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
|
||||
if ( (byte1 & 0xf8) != 0xd8 )
|
||||
{
|
||||
if ( byte1 == FWAIT_OPCODE )
|
||||
{
|
||||
if (partial_status & SW_Summary)
|
||||
goto do_the_FPU_interrupt;
|
||||
else
|
||||
goto FPU_fwait_done;
|
||||
}
|
||||
#ifdef PARANOID
|
||||
EXCEPTION(EX_INTERNAL|0x128);
|
||||
math_abort(FPU_info,SIGILL);
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(1);
|
||||
FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
FPU_EIP++;
|
||||
|
||||
if (partial_status & SW_Summary)
|
||||
{
|
||||
/* Ignore the error for now if the current instruction is a no-wait
|
||||
control instruction */
|
||||
/* The 80486 manual contradicts itself on this topic,
|
||||
but a real 80486 uses the following instructions:
|
||||
fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
|
||||
*/
|
||||
code = (FPU_modrm << 8) | byte1;
|
||||
if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */
|
||||
(((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv,
|
||||
fnstsw */
|
||||
((code & 0xc000) != 0xc000))) ) )
|
||||
{
|
||||
/*
|
||||
* We need to simulate the action of the kernel to FPU
|
||||
* interrupts here.
|
||||
*/
|
||||
do_the_FPU_interrupt:
|
||||
|
||||
FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
current->thread.trap_no = 16;
|
||||
current->thread.error_code = 0;
|
||||
send_sig(SIGFPE, current, 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
entry_sel_off.offset = FPU_ORIG_EIP;
|
||||
entry_sel_off.selector = FPU_CS;
|
||||
entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
|
||||
|
||||
FPU_rm = FPU_modrm & 7;
|
||||
|
||||
if ( FPU_modrm < 0300 )
|
||||
{
|
||||
/* All of these instructions use the mod/rm byte to get a data address */
|
||||
|
||||
if ( (addr_modes.default_mode & SIXTEEN)
|
||||
^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
|
||||
data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
|
||||
addr_modes);
|
||||
else
|
||||
data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
|
||||
addr_modes);
|
||||
|
||||
if ( addr_modes.default_mode )
|
||||
{
|
||||
if ( FPU_EIP-1 > code_limit )
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
|
||||
if ( !(byte1 & 1) )
|
||||
{
|
||||
unsigned short status1 = partial_status;
|
||||
|
||||
st0_ptr = &st(0);
|
||||
st0_tag = FPU_gettag0();
|
||||
|
||||
/* Stack underflow has priority */
|
||||
if ( NOT_EMPTY_ST0 )
|
||||
{
|
||||
if ( addr_modes.default_mode & PROTECTED )
|
||||
{
|
||||
/* This table works for 16 and 32 bit protected mode */
|
||||
if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
|
||||
unmasked = 0; /* Do this here to stop compiler warnings. */
|
||||
switch ( (byte1 >> 1) & 3 )
|
||||
{
|
||||
case 0:
|
||||
unmasked = FPU_load_single((float __user *)data_address,
|
||||
&loaded_data);
|
||||
loaded_tag = unmasked & 0xff;
|
||||
unmasked &= ~0xff;
|
||||
break;
|
||||
case 1:
|
||||
loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
|
||||
break;
|
||||
case 2:
|
||||
unmasked = FPU_load_double((double __user *)data_address,
|
||||
&loaded_data);
|
||||
loaded_tag = unmasked & 0xff;
|
||||
unmasked &= ~0xff;
|
||||
break;
|
||||
case 3:
|
||||
default: /* Used here to suppress gcc warnings. */
|
||||
loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
|
||||
break;
|
||||
}
|
||||
|
||||
/* No more access to user memory, it is safe
|
||||
to use static data now */
|
||||
|
||||
/* NaN operands have the next priority. */
|
||||
/* We have to delay looking at st(0) until after
|
||||
loading the data, because that data might contain an SNaN */
|
||||
if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
|
||||
((loaded_tag == TAG_Special) && isNaN(&loaded_data)) )
|
||||
{
|
||||
/* Restore the status word; we might have loaded a
|
||||
denormal. */
|
||||
partial_status = status1;
|
||||
if ( (FPU_modrm & 0x30) == 0x10 )
|
||||
{
|
||||
/* fcom or fcomp */
|
||||
EXCEPTION(EX_Invalid);
|
||||
setcc(SW_C3 | SW_C2 | SW_C0);
|
||||
if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
|
||||
FPU_pop(); /* fcomp, masked, so we pop. */
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( loaded_tag == TAG_Special )
|
||||
loaded_tag = FPU_Special(&loaded_data);
|
||||
#ifdef PECULIAR_486
|
||||
/* This is not really needed, but gives behaviour
|
||||
identical to an 80486 */
|
||||
if ( (FPU_modrm & 0x28) == 0x20 )
|
||||
/* fdiv or fsub */
|
||||
real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
|
||||
else
|
||||
#endif /* PECULIAR_486 */
|
||||
/* fadd, fdivr, fmul, or fsubr */
|
||||
real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
|
||||
}
|
||||
goto reg_mem_instr_done;
|
||||
}
|
||||
|
||||
if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
|
||||
{
|
||||
/* Is not a comparison instruction. */
|
||||
if ( (FPU_modrm & 0x38) == 0x38 )
|
||||
{
|
||||
/* fdivr */
|
||||
if ( (st0_tag == TAG_Zero) &&
|
||||
((loaded_tag == TAG_Valid)
|
||||
|| (loaded_tag == TAG_Special
|
||||
&& isdenormal(&loaded_data))) )
|
||||
{
|
||||
if ( FPU_divide_by_zero(0, getsign(&loaded_data))
|
||||
< 0 )
|
||||
{
|
||||
/* We use the fact here that the unmasked
|
||||
exception in the loaded data was for a
|
||||
denormal operand */
|
||||
/* Restore the state of the denormal op bit */
|
||||
partial_status &= ~SW_Denorm_Op;
|
||||
partial_status |= status1 & SW_Denorm_Op;
|
||||
}
|
||||
else
|
||||
setsign(st0_ptr, getsign(&loaded_data));
|
||||
}
|
||||
}
|
||||
goto reg_mem_instr_done;
|
||||
}
|
||||
|
||||
switch ( (FPU_modrm >> 3) & 7 )
|
||||
{
|
||||
case 0: /* fadd */
|
||||
clear_C1();
|
||||
FPU_add(&loaded_data, loaded_tag, 0, control_word);
|
||||
break;
|
||||
case 1: /* fmul */
|
||||
clear_C1();
|
||||
FPU_mul(&loaded_data, loaded_tag, 0, control_word);
|
||||
break;
|
||||
case 2: /* fcom */
|
||||
FPU_compare_st_data(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 3: /* fcomp */
|
||||
if ( !FPU_compare_st_data(&loaded_data, loaded_tag)
|
||||
&& !unmasked )
|
||||
FPU_pop();
|
||||
break;
|
||||
case 4: /* fsub */
|
||||
clear_C1();
|
||||
FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word);
|
||||
break;
|
||||
case 5: /* fsubr */
|
||||
clear_C1();
|
||||
FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
|
||||
break;
|
||||
case 6: /* fdiv */
|
||||
clear_C1();
|
||||
FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word);
|
||||
break;
|
||||
case 7: /* fdivr */
|
||||
clear_C1();
|
||||
if ( st0_tag == TAG_Zero )
|
||||
partial_status = status1; /* Undo any denorm tag,
|
||||
zero-divide has priority. */
|
||||
FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( (FPU_modrm & 0x30) == 0x10 )
|
||||
{
|
||||
/* The instruction is fcom or fcomp */
|
||||
EXCEPTION(EX_StackUnder);
|
||||
setcc(SW_C3 | SW_C2 | SW_C0);
|
||||
if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
|
||||
FPU_pop(); /* fcomp */
|
||||
}
|
||||
else
|
||||
FPU_stack_underflow();
|
||||
}
|
||||
reg_mem_instr_done:
|
||||
operand_address = data_sel_off;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( !(no_ip_update =
|
||||
FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
|
||||
addr_modes, data_address)) )
|
||||
{
|
||||
operand_address = data_sel_off;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
/* None of these instructions access user memory */
|
||||
u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
|
||||
|
||||
#ifdef PECULIAR_486
|
||||
/* This is supposed to be undefined, but a real 80486 seems
|
||||
to do this: */
|
||||
operand_address.offset = 0;
|
||||
operand_address.selector = FPU_DS;
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
st0_ptr = &st(0);
|
||||
st0_tag = FPU_gettag0();
|
||||
switch ( type_table[(int) instr_index] )
|
||||
{
|
||||
case _NONE_: /* also _REGIc: _REGIn */
|
||||
break;
|
||||
case _REG0_:
|
||||
if ( !NOT_EMPTY_ST0 )
|
||||
{
|
||||
FPU_stack_underflow();
|
||||
goto FPU_instruction_done;
|
||||
}
|
||||
break;
|
||||
case _REGIi:
|
||||
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
|
||||
{
|
||||
FPU_stack_underflow_i(FPU_rm);
|
||||
goto FPU_instruction_done;
|
||||
}
|
||||
break;
|
||||
case _REGIp:
|
||||
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
|
||||
{
|
||||
FPU_stack_underflow_pop(FPU_rm);
|
||||
goto FPU_instruction_done;
|
||||
}
|
||||
break;
|
||||
case _REGI_:
|
||||
if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
|
||||
{
|
||||
FPU_stack_underflow();
|
||||
goto FPU_instruction_done;
|
||||
}
|
||||
break;
|
||||
case _PUSH_: /* Only used by the fld st(i) instruction */
|
||||
break;
|
||||
case _null_:
|
||||
FPU_illegal();
|
||||
goto FPU_instruction_done;
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x111);
|
||||
goto FPU_instruction_done;
|
||||
}
|
||||
(*st_instr_table[(int) instr_index])();
|
||||
|
||||
FPU_instruction_done:
|
||||
;
|
||||
}
|
||||
|
||||
if ( ! no_ip_update )
|
||||
instruction_address = entry_sel_off;
|
||||
|
||||
FPU_fwait_done:
|
||||
|
||||
#ifdef DEBUG
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_printall();
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
#endif /* DEBUG */
|
||||
|
||||
if (FPU_lookahead && !need_resched())
|
||||
{
|
||||
FPU_ORIG_EIP = FPU_EIP - code_base;
|
||||
if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
|
||||
&addr_modes.override) )
|
||||
goto do_another_FPU_instruction;
|
||||
}
|
||||
|
||||
if ( addr_modes.default_mode )
|
||||
FPU_EIP -= code_base;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
}
|
||||
|
||||
|
||||
/* Support for prefix bytes is not yet complete. To properly handle
|
||||
all prefix bytes, further changes are needed in the emulator code
|
||||
which accesses user address space. Access to separate segments is
|
||||
important for msdos emulation. */
|
||||
static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
|
||||
overrides *override)
|
||||
{
|
||||
u_char byte;
|
||||
u_char __user *ip = *fpu_eip;
|
||||
|
||||
*override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(1);
|
||||
FPU_get_user(byte, ip);
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
while ( 1 )
|
||||
{
|
||||
switch ( byte )
|
||||
{
|
||||
case ADDR_SIZE_PREFIX:
|
||||
override->address_size = ADDR_SIZE_PREFIX;
|
||||
goto do_next_byte;
|
||||
|
||||
case OP_SIZE_PREFIX:
|
||||
override->operand_size = OP_SIZE_PREFIX;
|
||||
goto do_next_byte;
|
||||
|
||||
case PREFIX_CS:
|
||||
override->segment = PREFIX_CS_;
|
||||
goto do_next_byte;
|
||||
case PREFIX_ES:
|
||||
override->segment = PREFIX_ES_;
|
||||
goto do_next_byte;
|
||||
case PREFIX_SS:
|
||||
override->segment = PREFIX_SS_;
|
||||
goto do_next_byte;
|
||||
case PREFIX_FS:
|
||||
override->segment = PREFIX_FS_;
|
||||
goto do_next_byte;
|
||||
case PREFIX_GS:
|
||||
override->segment = PREFIX_GS_;
|
||||
goto do_next_byte;
|
||||
case PREFIX_DS:
|
||||
override->segment = PREFIX_DS_;
|
||||
goto do_next_byte;
|
||||
|
||||
/* lock is not a valid prefix for FPU instructions,
|
||||
let the cpu handle it to generate a SIGILL. */
|
||||
/* case PREFIX_LOCK: */
|
||||
|
||||
/* rep.. prefixes have no meaning for FPU instructions */
|
||||
case PREFIX_REPE:
|
||||
case PREFIX_REPNE:
|
||||
|
||||
do_next_byte:
|
||||
ip++;
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(1);
|
||||
FPU_get_user(byte, ip);
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
break;
|
||||
case FWAIT_OPCODE:
|
||||
*Byte = byte;
|
||||
return 1;
|
||||
default:
|
||||
if ( (byte & 0xf8) == 0xd8 )
|
||||
{
|
||||
*Byte = byte;
|
||||
*fpu_eip = ip;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Not a valid sequence of prefix bytes followed by
|
||||
an FPU instruction. */
|
||||
*Byte = byte; /* Needed for error message. */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void math_abort(struct info * info, unsigned int signal)
|
||||
{
|
||||
FPU_EIP = FPU_ORIG_EIP;
|
||||
current->thread.trap_no = 16;
|
||||
current->thread.error_code = 0;
|
||||
send_sig(signal,current,1);
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
__asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
|
||||
#ifdef PARANOID
|
||||
printk("ERROR: wm-FPU-emu math_abort failed!\n");
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define S387 ((struct i387_soft_struct *)s387)
|
||||
#define sstatus_word() \
|
||||
((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
|
||||
|
||||
int restore_i387_soft(void *s387, struct _fpstate __user *buf)
|
||||
{
|
||||
u_char __user *d = (u_char __user *)buf;
|
||||
int offset, other, i, tags, regnr, tag, newtop;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10);
|
||||
if (__copy_from_user(&S387->cwd, d, 7*4))
|
||||
return -1;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
d += 7*4;
|
||||
|
||||
S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
|
||||
offset = (S387->ftop & 7) * 10;
|
||||
other = 80 - offset;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
/* Copy all registers in stack order. */
|
||||
if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other))
|
||||
return -1;
|
||||
if ( offset )
|
||||
if (__copy_from_user((u_char *)&S387->st_space, d+other, offset))
|
||||
return -1;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
/* The tags may need to be corrected now. */
|
||||
tags = S387->twd;
|
||||
newtop = S387->ftop;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
{
|
||||
regnr = (i+newtop) & 7;
|
||||
if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty )
|
||||
{
|
||||
/* The loaded data over-rides all other cases. */
|
||||
tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr));
|
||||
tags &= ~(3 << (regnr*2));
|
||||
tags |= (tag & 3) << (regnr*2);
|
||||
}
|
||||
}
|
||||
S387->twd = tags;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int save_i387_soft(void *s387, struct _fpstate __user * buf)
|
||||
{
|
||||
u_char __user *d = (u_char __user *)buf;
|
||||
int offset = (S387->ftop & 7) * 10, other = 80 - offset;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10);
|
||||
#ifdef PECULIAR_486
|
||||
S387->cwd &= ~0xe080;
|
||||
/* An 80486 sets nearly all of the reserved bits to 1. */
|
||||
S387->cwd |= 0xffff0040;
|
||||
S387->swd = sstatus_word() | 0xffff0000;
|
||||
S387->twd |= 0xffff0000;
|
||||
S387->fcs &= ~0xf8000000;
|
||||
S387->fos |= 0xffff0000;
|
||||
#endif /* PECULIAR_486 */
|
||||
if (__copy_to_user(d, &S387->cwd, 7*4))
|
||||
return -1;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
d += 7*4;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
/* Copy all registers in stack order. */
|
||||
if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other))
|
||||
return -1;
|
||||
if ( offset )
|
||||
if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
|
||||
return -1;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
|
||||
return 1;
|
||||
}
|
143
arch/x86/math-emu/fpu_etc.c
Normal file
143
arch/x86/math-emu/fpu_etc.c
Normal file
@@ -0,0 +1,143 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_etc.c |
|
||||
| |
|
||||
| Implement a few FPU instructions. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "status_w.h"
|
||||
#include "reg_constant.h"
|
||||
|
||||
|
||||
static void fchs(FPU_REG *st0_ptr, u_char st0tag)
|
||||
{
|
||||
if ( st0tag ^ TAG_Empty )
|
||||
{
|
||||
signbyte(st0_ptr) ^= SIGN_NEG;
|
||||
clear_C1();
|
||||
}
|
||||
else
|
||||
FPU_stack_underflow();
|
||||
}
|
||||
|
||||
|
||||
static void fabs(FPU_REG *st0_ptr, u_char st0tag)
|
||||
{
|
||||
if ( st0tag ^ TAG_Empty )
|
||||
{
|
||||
setpositive(st0_ptr);
|
||||
clear_C1();
|
||||
}
|
||||
else
|
||||
FPU_stack_underflow();
|
||||
}
|
||||
|
||||
|
||||
static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
|
||||
{
|
||||
switch (st0tag)
|
||||
{
|
||||
case TAG_Zero:
|
||||
setcc(SW_C3);
|
||||
break;
|
||||
case TAG_Valid:
|
||||
if (getsign(st0_ptr) == SIGN_POS)
|
||||
setcc(0);
|
||||
else
|
||||
setcc(SW_C0);
|
||||
break;
|
||||
case TAG_Special:
|
||||
switch ( FPU_Special(st0_ptr) )
|
||||
{
|
||||
case TW_Denormal:
|
||||
if (getsign(st0_ptr) == SIGN_POS)
|
||||
setcc(0);
|
||||
else
|
||||
setcc(SW_C0);
|
||||
if ( denormal_operand() < 0 )
|
||||
{
|
||||
#ifdef PECULIAR_486
|
||||
/* This is weird! */
|
||||
if (getsign(st0_ptr) == SIGN_POS)
|
||||
setcc(SW_C3);
|
||||
#endif /* PECULIAR_486 */
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case TW_NaN:
|
||||
setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
|
||||
EXCEPTION(EX_Invalid);
|
||||
break;
|
||||
case TW_Infinity:
|
||||
if (getsign(st0_ptr) == SIGN_POS)
|
||||
setcc(0);
|
||||
else
|
||||
setcc(SW_C0);
|
||||
break;
|
||||
default:
|
||||
setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */
|
||||
EXCEPTION(EX_INTERNAL|0x14);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TAG_Empty:
|
||||
setcc(SW_C0|SW_C2|SW_C3);
|
||||
EXCEPTION(EX_StackUnder);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void fxam(FPU_REG *st0_ptr, u_char st0tag)
|
||||
{
|
||||
int c = 0;
|
||||
switch (st0tag)
|
||||
{
|
||||
case TAG_Empty:
|
||||
c = SW_C3|SW_C0;
|
||||
break;
|
||||
case TAG_Zero:
|
||||
c = SW_C3;
|
||||
break;
|
||||
case TAG_Valid:
|
||||
c = SW_C2;
|
||||
break;
|
||||
case TAG_Special:
|
||||
switch ( FPU_Special(st0_ptr) )
|
||||
{
|
||||
case TW_Denormal:
|
||||
c = SW_C2|SW_C3; /* Denormal */
|
||||
break;
|
||||
case TW_NaN:
|
||||
/* We also use NaN for unsupported types. */
|
||||
if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
|
||||
c = SW_C0;
|
||||
break;
|
||||
case TW_Infinity:
|
||||
c = SW_C2|SW_C0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( getsign(st0_ptr) == SIGN_NEG )
|
||||
c |= SW_C1;
|
||||
setcc(c);
|
||||
}
|
||||
|
||||
|
||||
static FUNC_ST0 const fp_etc_table[] = {
|
||||
fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
|
||||
ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
|
||||
};
|
||||
|
||||
void FPU_etc(void)
|
||||
{
|
||||
(fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
|
||||
}
|
140
arch/x86/math-emu/fpu_proto.h
Normal file
140
arch/x86/math-emu/fpu_proto.h
Normal file
@@ -0,0 +1,140 @@
|
||||
#ifndef _FPU_PROTO_H
|
||||
#define _FPU_PROTO_H
|
||||
|
||||
/* errors.c */
|
||||
extern void FPU_illegal(void);
|
||||
extern void FPU_printall(void);
|
||||
asmlinkage void FPU_exception(int n);
|
||||
extern int real_1op_NaN(FPU_REG *a);
|
||||
extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr,
|
||||
FPU_REG const *defaultNaN);
|
||||
asmlinkage int arith_invalid(int deststnr);
|
||||
asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign);
|
||||
extern int set_precision_flag(int flags);
|
||||
asmlinkage void set_precision_flag_up(void);
|
||||
asmlinkage void set_precision_flag_down(void);
|
||||
asmlinkage int denormal_operand(void);
|
||||
asmlinkage int arith_overflow(FPU_REG *dest);
|
||||
asmlinkage int arith_underflow(FPU_REG *dest);
|
||||
extern void FPU_stack_overflow(void);
|
||||
extern void FPU_stack_underflow(void);
|
||||
extern void FPU_stack_underflow_i(int i);
|
||||
extern void FPU_stack_underflow_pop(int i);
|
||||
/* fpu_arith.c */
|
||||
extern void fadd__(void);
|
||||
extern void fmul__(void);
|
||||
extern void fsub__(void);
|
||||
extern void fsubr_(void);
|
||||
extern void fdiv__(void);
|
||||
extern void fdivr_(void);
|
||||
extern void fadd_i(void);
|
||||
extern void fmul_i(void);
|
||||
extern void fsubri(void);
|
||||
extern void fsub_i(void);
|
||||
extern void fdivri(void);
|
||||
extern void fdiv_i(void);
|
||||
extern void faddp_(void);
|
||||
extern void fmulp_(void);
|
||||
extern void fsubrp(void);
|
||||
extern void fsubp_(void);
|
||||
extern void fdivrp(void);
|
||||
extern void fdivp_(void);
|
||||
/* fpu_aux.c */
|
||||
extern void finit(void);
|
||||
extern void finit_(void);
|
||||
extern void fstsw_(void);
|
||||
extern void fp_nop(void);
|
||||
extern void fld_i_(void);
|
||||
extern void fxch_i(void);
|
||||
extern void ffree_(void);
|
||||
extern void ffreep(void);
|
||||
extern void fst_i_(void);
|
||||
extern void fstp_i(void);
|
||||
/* fpu_entry.c */
|
||||
asmlinkage extern void math_emulate(long arg);
|
||||
extern void math_abort(struct info *info, unsigned int signal);
|
||||
/* fpu_etc.c */
|
||||
extern void FPU_etc(void);
|
||||
/* fpu_tags.c */
|
||||
extern int FPU_gettag0(void);
|
||||
extern int FPU_gettagi(int stnr);
|
||||
extern int FPU_gettag(int regnr);
|
||||
extern void FPU_settag0(int tag);
|
||||
extern void FPU_settagi(int stnr, int tag);
|
||||
extern void FPU_settag(int regnr, int tag);
|
||||
extern int FPU_Special(FPU_REG const *ptr);
|
||||
extern int isNaN(FPU_REG const *ptr);
|
||||
extern void FPU_pop(void);
|
||||
extern int FPU_empty_i(int stnr);
|
||||
extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
|
||||
extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr);
|
||||
extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag);
|
||||
extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag);
|
||||
/* fpu_trig.c */
|
||||
extern void FPU_triga(void);
|
||||
extern void FPU_trigb(void);
|
||||
/* get_address.c */
|
||||
extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
|
||||
struct address *addr, fpu_addr_modes addr_modes);
|
||||
extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
|
||||
struct address *addr, fpu_addr_modes addr_modes);
|
||||
/* load_store.c */
|
||||
extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
|
||||
void __user *data_address);
|
||||
/* poly_2xm1.c */
|
||||
extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
|
||||
/* poly_atan.c */
|
||||
extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
|
||||
u_char st1_tag);
|
||||
/* poly_l2.c */
|
||||
extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
|
||||
extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
|
||||
FPU_REG *d);
|
||||
/* poly_sin.c */
|
||||
extern void poly_sine(FPU_REG *st0_ptr);
|
||||
extern void poly_cos(FPU_REG *st0_ptr);
|
||||
/* poly_tan.c */
|
||||
extern void poly_tan(FPU_REG *st0_ptr);
|
||||
/* reg_add_sub.c */
|
||||
extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w);
|
||||
extern int FPU_sub(int flags, int rm, int control_w);
|
||||
/* reg_compare.c */
|
||||
extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag);
|
||||
extern void fcom_st(void);
|
||||
extern void fcompst(void);
|
||||
extern void fcompp(void);
|
||||
extern void fucom_(void);
|
||||
extern void fucomp(void);
|
||||
extern void fucompp(void);
|
||||
/* reg_constant.c */
|
||||
extern void fconst(void);
|
||||
/* reg_ld_str.c */
|
||||
extern int FPU_load_extended(long double __user *s, int stnr);
|
||||
extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data);
|
||||
extern int FPU_load_single(float __user *single, FPU_REG *loaded_data);
|
||||
extern int FPU_load_int64(long long __user *_s);
|
||||
extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data);
|
||||
extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
|
||||
extern int FPU_load_bcd(u_char __user *s);
|
||||
extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
|
||||
long double __user *d);
|
||||
extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat);
|
||||
extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single);
|
||||
extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d);
|
||||
extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
|
||||
extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
|
||||
extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
|
||||
extern int FPU_round_to_int(FPU_REG *r, u_char tag);
|
||||
extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s);
|
||||
extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
|
||||
extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d);
|
||||
extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address);
|
||||
extern int FPU_tagof(FPU_REG *ptr);
|
||||
/* reg_mul.c */
|
||||
extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w);
|
||||
|
||||
extern int FPU_div(int flags, int regrm, int control_w);
|
||||
/* reg_convert.c */
|
||||
extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
|
||||
#endif /* _FPU_PROTO_H */
|
||||
|
90
arch/x86/math-emu/fpu_system.h
Normal file
90
arch/x86/math-emu/fpu_system.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_system.h |
|
||||
| |
|
||||
| Copyright (C) 1992,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef _FPU_SYSTEM_H
|
||||
#define _FPU_SYSTEM_H
|
||||
|
||||
/* system dependent definitions */
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
/* This sets the pointer FPU_info to point to the argument part
|
||||
of the stack frame of math_emulate() */
|
||||
#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg
|
||||
|
||||
/* s is always from a cpu register, and the cpu does bounds checking
|
||||
* during register load --> no further bounds checks needed */
|
||||
#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
|
||||
#define SEG_D_SIZE(x) ((x).b & (3 << 21))
|
||||
#define SEG_G_BIT(x) ((x).b & (1 << 23))
|
||||
#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
|
||||
#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
|
||||
#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \
|
||||
| (((s).b & 0xff) << 16) | ((s).a >> 16))
|
||||
#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff))
|
||||
#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
|
||||
#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
|
||||
#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
|
||||
== (1 << 10))
|
||||
|
||||
#define I387 (current->thread.i387)
|
||||
#define FPU_info (I387.soft.info)
|
||||
|
||||
#define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
|
||||
#define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
|
||||
#define FPU_DS (*(unsigned short *) &(FPU_info->___ds))
|
||||
#define FPU_EAX (FPU_info->___eax)
|
||||
#define FPU_EFLAGS (FPU_info->___eflags)
|
||||
#define FPU_EIP (FPU_info->___eip)
|
||||
#define FPU_ORIG_EIP (FPU_info->___orig_eip)
|
||||
|
||||
#define FPU_lookahead (I387.soft.lookahead)
|
||||
|
||||
/* nz if ip_offset and cs_selector are not to be set for the current
|
||||
instruction. */
|
||||
#define no_ip_update (*(u_char *)&(I387.soft.no_update))
|
||||
#define FPU_rm (*(u_char *)&(I387.soft.rm))
|
||||
|
||||
/* Number of bytes of data which can be legally accessed by the current
|
||||
instruction. This only needs to hold a number <= 108, so a byte will do. */
|
||||
#define access_limit (*(u_char *)&(I387.soft.alimit))
|
||||
|
||||
#define partial_status (I387.soft.swd)
|
||||
#define control_word (I387.soft.cwd)
|
||||
#define fpu_tag_word (I387.soft.twd)
|
||||
#define registers (I387.soft.st_space)
|
||||
#define top (I387.soft.ftop)
|
||||
|
||||
#define instruction_address (*(struct address *)&I387.soft.fip)
|
||||
#define operand_address (*(struct address *)&I387.soft.foo)
|
||||
|
||||
#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
|
||||
math_abort(FPU_info,SIGSEGV)
|
||||
#define FPU_abort math_abort(FPU_info, SIGSEGV)
|
||||
|
||||
#undef FPU_IGNORE_CODE_SEGV
|
||||
#ifdef FPU_IGNORE_CODE_SEGV
|
||||
/* access_ok() is very expensive, and causes the emulator to run
|
||||
about 20% slower if applied to the code. Anyway, errors due to bad
|
||||
code addresses should be much rarer than errors due to bad data
|
||||
addresses. */
|
||||
#define FPU_code_access_ok(z)
|
||||
#else
|
||||
/* A simpler test than access_ok() can probably be done for
|
||||
FPU_code_access_ok() because the only possible error is to step
|
||||
past the upper boundary of a legal code area. */
|
||||
#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z)
|
||||
#endif
|
||||
|
||||
#define FPU_get_user(x,y) get_user((x),(y))
|
||||
#define FPU_put_user(x,y) put_user((x),(y))
|
||||
|
||||
#endif
|
127
arch/x86/math-emu/fpu_tags.c
Normal file
127
arch/x86/math-emu/fpu_tags.c
Normal file
@@ -0,0 +1,127 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| fpu_tags.c |
|
||||
| |
|
||||
| Set FPU register tags. |
|
||||
| |
|
||||
| Copyright (C) 1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
|
||||
|
||||
void FPU_pop(void)
|
||||
{
|
||||
fpu_tag_word |= 3 << ((top & 7)*2);
|
||||
top++;
|
||||
}
|
||||
|
||||
|
||||
int FPU_gettag0(void)
|
||||
{
|
||||
return (fpu_tag_word >> ((top & 7)*2)) & 3;
|
||||
}
|
||||
|
||||
|
||||
int FPU_gettagi(int stnr)
|
||||
{
|
||||
return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3;
|
||||
}
|
||||
|
||||
|
||||
int FPU_gettag(int regnr)
|
||||
{
|
||||
return (fpu_tag_word >> ((regnr & 7)*2)) & 3;
|
||||
}
|
||||
|
||||
|
||||
void FPU_settag0(int tag)
|
||||
{
|
||||
int regnr = top;
|
||||
regnr &= 7;
|
||||
fpu_tag_word &= ~(3 << (regnr*2));
|
||||
fpu_tag_word |= (tag & 3) << (regnr*2);
|
||||
}
|
||||
|
||||
|
||||
void FPU_settagi(int stnr, int tag)
|
||||
{
|
||||
int regnr = stnr+top;
|
||||
regnr &= 7;
|
||||
fpu_tag_word &= ~(3 << (regnr*2));
|
||||
fpu_tag_word |= (tag & 3) << (regnr*2);
|
||||
}
|
||||
|
||||
|
||||
void FPU_settag(int regnr, int tag)
|
||||
{
|
||||
regnr &= 7;
|
||||
fpu_tag_word &= ~(3 << (regnr*2));
|
||||
fpu_tag_word |= (tag & 3) << (regnr*2);
|
||||
}
|
||||
|
||||
|
||||
int FPU_Special(FPU_REG const *ptr)
|
||||
{
|
||||
int exp = exponent(ptr);
|
||||
|
||||
if ( exp == EXP_BIAS+EXP_UNDER )
|
||||
return TW_Denormal;
|
||||
else if ( exp != EXP_BIAS+EXP_OVER )
|
||||
return TW_NaN;
|
||||
else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
|
||||
return TW_Infinity;
|
||||
return TW_NaN;
|
||||
}
|
||||
|
||||
|
||||
int isNaN(FPU_REG const *ptr)
|
||||
{
|
||||
return ( (exponent(ptr) == EXP_BIAS+EXP_OVER)
|
||||
&& !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) );
|
||||
}
|
||||
|
||||
|
||||
int FPU_empty_i(int stnr)
|
||||
{
|
||||
int regnr = (top+stnr) & 7;
|
||||
|
||||
return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty;
|
||||
}
|
||||
|
||||
|
||||
int FPU_stackoverflow(FPU_REG **st_new_ptr)
|
||||
{
|
||||
*st_new_ptr = &st(-1);
|
||||
|
||||
return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty;
|
||||
}
|
||||
|
||||
|
||||
void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
|
||||
{
|
||||
reg_copy(r, &st(stnr));
|
||||
FPU_settagi(stnr, tag);
|
||||
}
|
||||
|
||||
void FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
|
||||
{
|
||||
reg_copy(r, &st(1));
|
||||
FPU_settagi(1, tag);
|
||||
}
|
||||
|
||||
void FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
|
||||
{
|
||||
int regnr = top;
|
||||
regnr &= 7;
|
||||
|
||||
reg_copy(r, &st(0));
|
||||
|
||||
fpu_tag_word &= ~(3 << (regnr*2));
|
||||
fpu_tag_word |= (tag & 3) << (regnr*2);
|
||||
}
|
1845
arch/x86/math-emu/fpu_trig.c
Normal file
1845
arch/x86/math-emu/fpu_trig.c
Normal file
File diff suppressed because it is too large
Load Diff
438
arch/x86/math-emu/get_address.c
Normal file
438
arch/x86/math-emu/get_address.c
Normal file
@@ -0,0 +1,438 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| get_address.c |
|
||||
| |
|
||||
| Get the effective address from an FPU instruction. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Note: |
|
||||
| The file contains code which accesses user memory. |
|
||||
| Emulator static data may change when user memory is accessed, due to |
|
||||
| other processes using the emulator while swapping is in progress. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
#include <linux/stddef.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
#define FPU_WRITE_BIT 0x10
|
||||
|
||||
static int reg_offset[] = {
|
||||
offsetof(struct info,___eax),
|
||||
offsetof(struct info,___ecx),
|
||||
offsetof(struct info,___edx),
|
||||
offsetof(struct info,___ebx),
|
||||
offsetof(struct info,___esp),
|
||||
offsetof(struct info,___ebp),
|
||||
offsetof(struct info,___esi),
|
||||
offsetof(struct info,___edi)
|
||||
};
|
||||
|
||||
#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
|
||||
|
||||
static int reg_offset_vm86[] = {
|
||||
offsetof(struct info,___cs),
|
||||
offsetof(struct info,___vm86_ds),
|
||||
offsetof(struct info,___vm86_es),
|
||||
offsetof(struct info,___vm86_fs),
|
||||
offsetof(struct info,___vm86_gs),
|
||||
offsetof(struct info,___ss),
|
||||
offsetof(struct info,___vm86_ds)
|
||||
};
|
||||
|
||||
#define VM86_REG_(x) (*(unsigned short *) \
|
||||
(reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
|
||||
|
||||
/* This dummy, gs is not saved on the stack. */
|
||||
#define ___GS ___ds
|
||||
|
||||
static int reg_offset_pm[] = {
|
||||
offsetof(struct info,___cs),
|
||||
offsetof(struct info,___ds),
|
||||
offsetof(struct info,___es),
|
||||
offsetof(struct info,___fs),
|
||||
offsetof(struct info,___GS),
|
||||
offsetof(struct info,___ss),
|
||||
offsetof(struct info,___ds)
|
||||
};
|
||||
|
||||
#define PM_REG_(x) (*(unsigned short *) \
|
||||
(reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
|
||||
|
||||
|
||||
/* Decode the SIB byte. This function assumes mod != 0 */
|
||||
static int sib(int mod, unsigned long *fpu_eip)
|
||||
{
|
||||
u_char ss,index,base;
|
||||
long offset;
|
||||
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(1);
|
||||
FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
(*fpu_eip)++;
|
||||
ss = base >> 6;
|
||||
index = (base >> 3) & 7;
|
||||
base &= 7;
|
||||
|
||||
if ((mod == 0) && (base == 5))
|
||||
offset = 0; /* No base register */
|
||||
else
|
||||
offset = REG_(base);
|
||||
|
||||
if (index == 4)
|
||||
{
|
||||
/* No index register */
|
||||
/* A non-zero ss is illegal */
|
||||
if ( ss )
|
||||
EXCEPTION(EX_Invalid);
|
||||
}
|
||||
else
|
||||
{
|
||||
offset += (REG_(index)) << ss;
|
||||
}
|
||||
|
||||
if (mod == 1)
|
||||
{
|
||||
/* 8 bit signed displacement */
|
||||
long displacement;
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(1);
|
||||
FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
|
||||
offset += displacement;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
(*fpu_eip)++;
|
||||
}
|
||||
else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
|
||||
{
|
||||
/* 32 bit displacement */
|
||||
long displacement;
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(4);
|
||||
FPU_get_user(displacement, (long __user *) (*fpu_eip));
|
||||
offset += displacement;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
(*fpu_eip) += 4;
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
||||
static unsigned long vm86_segment(u_char segment,
|
||||
struct address *addr)
|
||||
{
|
||||
segment--;
|
||||
#ifdef PARANOID
|
||||
if ( segment > PREFIX_SS_ )
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL|0x130);
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
addr->selector = VM86_REG_(segment);
|
||||
return (unsigned long)VM86_REG_(segment) << 4;
|
||||
}
|
||||
|
||||
|
||||
/* This should work for 16 and 32 bit protected mode. */
|
||||
static long pm_address(u_char FPU_modrm, u_char segment,
|
||||
struct address *addr, long offset)
|
||||
{
|
||||
struct desc_struct descriptor;
|
||||
unsigned long base_address, limit, address, seg_top;
|
||||
|
||||
segment--;
|
||||
|
||||
#ifdef PARANOID
|
||||
/* segment is unsigned, so this also detects if segment was 0: */
|
||||
if ( segment > PREFIX_SS_ )
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL|0x132);
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
switch ( segment )
|
||||
{
|
||||
/* gs isn't used by the kernel, so it still has its
|
||||
user-space value. */
|
||||
case PREFIX_GS_-1:
|
||||
/* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
|
||||
savesegment(gs, addr->selector);
|
||||
break;
|
||||
default:
|
||||
addr->selector = PM_REG_(segment);
|
||||
}
|
||||
|
||||
descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
|
||||
base_address = SEG_BASE_ADDR(descriptor);
|
||||
address = base_address + offset;
|
||||
limit = base_address
|
||||
+ (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
|
||||
if ( limit < base_address ) limit = 0xffffffff;
|
||||
|
||||
if ( SEG_EXPAND_DOWN(descriptor) )
|
||||
{
|
||||
if ( SEG_G_BIT(descriptor) )
|
||||
seg_top = 0xffffffff;
|
||||
else
|
||||
{
|
||||
seg_top = base_address + (1 << 20);
|
||||
if ( seg_top < base_address ) seg_top = 0xffffffff;
|
||||
}
|
||||
access_limit =
|
||||
(address <= limit) || (address >= seg_top) ? 0 :
|
||||
((seg_top-address) >= 255 ? 255 : seg_top-address);
|
||||
}
|
||||
else
|
||||
{
|
||||
access_limit =
|
||||
(address > limit) || (address < base_address) ? 0 :
|
||||
((limit-address) >= 254 ? 255 : limit-address+1);
|
||||
}
|
||||
if ( SEG_EXECUTE_ONLY(descriptor) ||
|
||||
(!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
|
||||
{
|
||||
access_limit = 0;
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
MOD R/M byte: MOD == 3 has a special use for the FPU
|
||||
SIB byte used iff R/M = 100b
|
||||
|
||||
7 6 5 4 3 2 1 0
|
||||
..... ......... .........
|
||||
MOD OPCODE(2) R/M
|
||||
|
||||
|
||||
SIB byte
|
||||
|
||||
7 6 5 4 3 2 1 0
|
||||
..... ......... .........
|
||||
SS INDEX BASE
|
||||
|
||||
*/
|
||||
|
||||
void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
|
||||
struct address *addr,
|
||||
fpu_addr_modes addr_modes)
|
||||
{
|
||||
u_char mod;
|
||||
unsigned rm = FPU_modrm & 7;
|
||||
long *cpu_reg_ptr;
|
||||
int address = 0; /* Initialized just to stop compiler warnings. */
|
||||
|
||||
/* Memory accessed via the cs selector is write protected
|
||||
in `non-segmented' 32 bit protected mode. */
|
||||
if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
|
||||
&& (addr_modes.override.segment == PREFIX_CS_) )
|
||||
{
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
|
||||
addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
|
||||
|
||||
mod = (FPU_modrm >> 6) & 3;
|
||||
|
||||
if (rm == 4 && mod != 3)
|
||||
{
|
||||
address = sib(mod, fpu_eip);
|
||||
}
|
||||
else
|
||||
{
|
||||
cpu_reg_ptr = & REG_(rm);
|
||||
switch (mod)
|
||||
{
|
||||
case 0:
|
||||
if (rm == 5)
|
||||
{
|
||||
/* Special case: disp32 */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(4);
|
||||
FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
|
||||
(*fpu_eip) += 4;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
addr->offset = address;
|
||||
return (void __user *) address;
|
||||
}
|
||||
else
|
||||
{
|
||||
address = *cpu_reg_ptr; /* Just return the contents
|
||||
of the cpu register */
|
||||
addr->offset = address;
|
||||
return (void __user *) address;
|
||||
}
|
||||
case 1:
|
||||
/* 8 bit signed displacement */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(1);
|
||||
FPU_get_user(address, (signed char __user *) (*fpu_eip));
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
(*fpu_eip)++;
|
||||
break;
|
||||
case 2:
|
||||
/* 32 bit displacement */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(4);
|
||||
FPU_get_user(address, (long __user *) (*fpu_eip));
|
||||
(*fpu_eip) += 4;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
break;
|
||||
case 3:
|
||||
/* Not legal for the FPU */
|
||||
EXCEPTION(EX_Invalid);
|
||||
}
|
||||
address += *cpu_reg_ptr;
|
||||
}
|
||||
|
||||
addr->offset = address;
|
||||
|
||||
switch ( addr_modes.default_mode )
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case VM86:
|
||||
address += vm86_segment(addr_modes.override.segment, addr);
|
||||
break;
|
||||
case PM16:
|
||||
case SEG32:
|
||||
address = pm_address(FPU_modrm, addr_modes.override.segment,
|
||||
addr, address);
|
||||
break;
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x133);
|
||||
}
|
||||
|
||||
return (void __user *)address;
|
||||
}
|
||||
|
||||
|
||||
void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
|
||||
struct address *addr,
|
||||
fpu_addr_modes addr_modes)
|
||||
{
|
||||
u_char mod;
|
||||
unsigned rm = FPU_modrm & 7;
|
||||
int address = 0; /* Default used for mod == 0 */
|
||||
|
||||
/* Memory accessed via the cs selector is write protected
|
||||
in `non-segmented' 32 bit protected mode. */
|
||||
if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
|
||||
&& (addr_modes.override.segment == PREFIX_CS_) )
|
||||
{
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
|
||||
addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */
|
||||
|
||||
mod = (FPU_modrm >> 6) & 3;
|
||||
|
||||
switch (mod)
|
||||
{
|
||||
case 0:
|
||||
if (rm == 6)
|
||||
{
|
||||
/* Special case: disp16 */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(2);
|
||||
FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
|
||||
(*fpu_eip) += 2;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
goto add_segment;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
/* 8 bit signed displacement */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(1);
|
||||
FPU_get_user(address, (signed char __user *) (*fpu_eip));
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
(*fpu_eip)++;
|
||||
break;
|
||||
case 2:
|
||||
/* 16 bit displacement */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_code_access_ok(2);
|
||||
FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
|
||||
(*fpu_eip) += 2;
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
break;
|
||||
case 3:
|
||||
/* Not legal for the FPU */
|
||||
EXCEPTION(EX_Invalid);
|
||||
break;
|
||||
}
|
||||
switch ( rm )
|
||||
{
|
||||
case 0:
|
||||
address += FPU_info->___ebx + FPU_info->___esi;
|
||||
break;
|
||||
case 1:
|
||||
address += FPU_info->___ebx + FPU_info->___edi;
|
||||
break;
|
||||
case 2:
|
||||
address += FPU_info->___ebp + FPU_info->___esi;
|
||||
if ( addr_modes.override.segment == PREFIX_DEFAULT )
|
||||
addr_modes.override.segment = PREFIX_SS_;
|
||||
break;
|
||||
case 3:
|
||||
address += FPU_info->___ebp + FPU_info->___edi;
|
||||
if ( addr_modes.override.segment == PREFIX_DEFAULT )
|
||||
addr_modes.override.segment = PREFIX_SS_;
|
||||
break;
|
||||
case 4:
|
||||
address += FPU_info->___esi;
|
||||
break;
|
||||
case 5:
|
||||
address += FPU_info->___edi;
|
||||
break;
|
||||
case 6:
|
||||
address += FPU_info->___ebp;
|
||||
if ( addr_modes.override.segment == PREFIX_DEFAULT )
|
||||
addr_modes.override.segment = PREFIX_SS_;
|
||||
break;
|
||||
case 7:
|
||||
address += FPU_info->___ebx;
|
||||
break;
|
||||
}
|
||||
|
||||
add_segment:
|
||||
address &= 0xffff;
|
||||
|
||||
addr->offset = address;
|
||||
|
||||
switch ( addr_modes.default_mode )
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case VM86:
|
||||
address += vm86_segment(addr_modes.override.segment, addr);
|
||||
break;
|
||||
case PM16:
|
||||
case SEG32:
|
||||
address = pm_address(FPU_modrm, addr_modes.override.segment,
|
||||
addr, address);
|
||||
break;
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x131);
|
||||
}
|
||||
|
||||
return (void __user *)address ;
|
||||
}
|
272
arch/x86/math-emu/load_store.c
Normal file
272
arch/x86/math-emu/load_store.c
Normal file
@@ -0,0 +1,272 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| load_store.c |
|
||||
| |
|
||||
| This file contains most of the code to interpret the FPU instructions |
|
||||
| which load and store from user memory. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Note: |
|
||||
| The file contains code which accesses user memory. |
|
||||
| Emulator static data may change when user memory is accessed, due to |
|
||||
| other processes using the emulator while swapping is in progress. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "status_w.h"
|
||||
#include "control_w.h"
|
||||
|
||||
|
||||
#define _NONE_ 0 /* st0_ptr etc not needed */
|
||||
#define _REG0_ 1 /* Will be storing st(0) */
|
||||
#define _PUSH_ 3 /* Need to check for space to push onto stack */
|
||||
#define _null_ 4 /* Function illegal or not implemented */
|
||||
|
||||
#define pop_0() { FPU_settag0(TAG_Empty); top++; }
|
||||
|
||||
|
||||
static u_char const type_table[32] = {
|
||||
_PUSH_, _PUSH_, _PUSH_, _PUSH_,
|
||||
_null_, _null_, _null_, _null_,
|
||||
_REG0_, _REG0_, _REG0_, _REG0_,
|
||||
_REG0_, _REG0_, _REG0_, _REG0_,
|
||||
_NONE_, _null_, _NONE_, _PUSH_,
|
||||
_NONE_, _PUSH_, _null_, _PUSH_,
|
||||
_NONE_, _null_, _NONE_, _REG0_,
|
||||
_NONE_, _REG0_, _NONE_, _REG0_
|
||||
};
|
||||
|
||||
u_char const data_sizes_16[32] = {
|
||||
4, 4, 8, 2, 0, 0, 0, 0,
|
||||
4, 4, 8, 2, 4, 4, 8, 2,
|
||||
14, 0, 94, 10, 2, 10, 0, 8,
|
||||
14, 0, 94, 10, 2, 10, 2, 8
|
||||
};
|
||||
|
||||
static u_char const data_sizes_32[32] = {
|
||||
4, 4, 8, 2, 0, 0, 0, 0,
|
||||
4, 4, 8, 2, 4, 4, 8, 2,
|
||||
28, 0,108, 10, 2, 10, 0, 8,
|
||||
28, 0,108, 10, 2, 10, 2, 8
|
||||
};
|
||||
|
||||
int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
|
||||
void __user *data_address)
|
||||
{
|
||||
FPU_REG loaded_data;
|
||||
FPU_REG *st0_ptr;
|
||||
u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */
|
||||
u_char loaded_tag;
|
||||
|
||||
st0_ptr = NULL; /* Initialized just to stop compiler warnings. */
|
||||
|
||||
if ( addr_modes.default_mode & PROTECTED )
|
||||
{
|
||||
if ( addr_modes.default_mode == SEG32 )
|
||||
{
|
||||
if ( access_limit < data_sizes_32[type] )
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
else if ( addr_modes.default_mode == PM16 )
|
||||
{
|
||||
if ( access_limit < data_sizes_16[type] )
|
||||
math_abort(FPU_info,SIGSEGV);
|
||||
}
|
||||
#ifdef PARANOID
|
||||
else
|
||||
EXCEPTION(EX_INTERNAL|0x140);
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
|
||||
switch ( type_table[type] )
|
||||
{
|
||||
case _NONE_:
|
||||
break;
|
||||
case _REG0_:
|
||||
st0_ptr = &st(0); /* Some of these instructions pop after
|
||||
storing */
|
||||
st0_tag = FPU_gettag0();
|
||||
break;
|
||||
case _PUSH_:
|
||||
{
|
||||
if ( FPU_gettagi(-1) != TAG_Empty )
|
||||
{ FPU_stack_overflow(); return 0; }
|
||||
top--;
|
||||
st0_ptr = &st(0);
|
||||
}
|
||||
break;
|
||||
case _null_:
|
||||
FPU_illegal();
|
||||
return 0;
|
||||
#ifdef PARANOID
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x141);
|
||||
return 0;
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
|
||||
switch ( type )
|
||||
{
|
||||
case 000: /* fld m32real */
|
||||
clear_C1();
|
||||
loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data);
|
||||
if ( (loaded_tag == TAG_Special)
|
||||
&& isNaN(&loaded_data)
|
||||
&& (real_1op_NaN(&loaded_data) < 0) )
|
||||
{
|
||||
top++;
|
||||
break;
|
||||
}
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 001: /* fild m32int */
|
||||
clear_C1();
|
||||
loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 002: /* fld m64real */
|
||||
clear_C1();
|
||||
loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data);
|
||||
if ( (loaded_tag == TAG_Special)
|
||||
&& isNaN(&loaded_data)
|
||||
&& (real_1op_NaN(&loaded_data) < 0) )
|
||||
{
|
||||
top++;
|
||||
break;
|
||||
}
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 003: /* fild m16int */
|
||||
clear_C1();
|
||||
loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
|
||||
FPU_copy_to_reg0(&loaded_data, loaded_tag);
|
||||
break;
|
||||
case 010: /* fst m32real */
|
||||
clear_C1();
|
||||
FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address);
|
||||
break;
|
||||
case 011: /* fist m32int */
|
||||
clear_C1();
|
||||
FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
|
||||
break;
|
||||
case 012: /* fst m64real */
|
||||
clear_C1();
|
||||
FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address);
|
||||
break;
|
||||
case 013: /* fist m16int */
|
||||
clear_C1();
|
||||
FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
|
||||
break;
|
||||
case 014: /* fstp m32real */
|
||||
clear_C1();
|
||||
if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) )
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
break;
|
||||
case 015: /* fistp m32int */
|
||||
clear_C1();
|
||||
if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) )
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
break;
|
||||
case 016: /* fstp m64real */
|
||||
clear_C1();
|
||||
if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) )
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
break;
|
||||
case 017: /* fistp m16int */
|
||||
clear_C1();
|
||||
if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) )
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
break;
|
||||
case 020: /* fldenv m14/28byte */
|
||||
fldenv(addr_modes, (u_char __user *)data_address);
|
||||
/* Ensure that the values just loaded are not changed by
|
||||
fix-up operations. */
|
||||
return 1;
|
||||
case 022: /* frstor m94/108byte */
|
||||
frstor(addr_modes, (u_char __user *)data_address);
|
||||
/* Ensure that the values just loaded are not changed by
|
||||
fix-up operations. */
|
||||
return 1;
|
||||
case 023: /* fbld m80dec */
|
||||
clear_C1();
|
||||
loaded_tag = FPU_load_bcd((u_char __user *)data_address);
|
||||
FPU_settag0(loaded_tag);
|
||||
break;
|
||||
case 024: /* fldcw */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_access_ok(VERIFY_READ, data_address, 2);
|
||||
FPU_get_user(control_word, (unsigned short __user *) data_address);
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
if ( partial_status & ~control_word & CW_Exceptions )
|
||||
partial_status |= (SW_Summary | SW_Backward);
|
||||
else
|
||||
partial_status &= ~(SW_Summary | SW_Backward);
|
||||
#ifdef PECULIAR_486
|
||||
control_word |= 0x40; /* An 80486 appears to always set this bit */
|
||||
#endif /* PECULIAR_486 */
|
||||
return 1;
|
||||
case 025: /* fld m80real */
|
||||
clear_C1();
|
||||
loaded_tag = FPU_load_extended((long double __user *)data_address, 0);
|
||||
FPU_settag0(loaded_tag);
|
||||
break;
|
||||
case 027: /* fild m64int */
|
||||
clear_C1();
|
||||
loaded_tag = FPU_load_int64((long long __user *)data_address);
|
||||
if (loaded_tag == TAG_Error)
|
||||
return 0;
|
||||
FPU_settag0(loaded_tag);
|
||||
break;
|
||||
case 030: /* fstenv m14/28byte */
|
||||
fstenv(addr_modes, (u_char __user *)data_address);
|
||||
return 1;
|
||||
case 032: /* fsave */
|
||||
fsave(addr_modes, (u_char __user *)data_address);
|
||||
return 1;
|
||||
case 033: /* fbstp m80dec */
|
||||
clear_C1();
|
||||
if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) )
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
break;
|
||||
case 034: /* fstcw m16int */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_access_ok(VERIFY_WRITE,data_address,2);
|
||||
FPU_put_user(control_word, (unsigned short __user *) data_address);
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
return 1;
|
||||
case 035: /* fstp m80real */
|
||||
clear_C1();
|
||||
if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) )
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
break;
|
||||
case 036: /* fstsw m2byte */
|
||||
RE_ENTRANT_CHECK_OFF;
|
||||
FPU_access_ok(VERIFY_WRITE,data_address,2);
|
||||
FPU_put_user(status_word(),(unsigned short __user *) data_address);
|
||||
RE_ENTRANT_CHECK_ON;
|
||||
return 1;
|
||||
case 037: /* fistp m64int */
|
||||
clear_C1();
|
||||
if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) )
|
||||
pop_0(); /* pop only if the number was actually stored
|
||||
(see the 80486 manual p16-28) */
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
176
arch/x86/math-emu/mul_Xsig.S
Normal file
176
arch/x86/math-emu/mul_Xsig.S
Normal file
@@ -0,0 +1,176 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| mul_Xsig.S |
|
||||
| |
|
||||
| Multiply a 12 byte fixed point number by another fixed point number. |
|
||||
| |
|
||||
| Copyright (C) 1992,1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| void mul32_Xsig(Xsig *x, unsigned b) |
|
||||
| |
|
||||
| void mul64_Xsig(Xsig *x, unsigned long long *b) |
|
||||
| |
|
||||
| void mul_Xsig_Xsig(Xsig *x, unsigned *b) |
|
||||
| |
|
||||
| The result is neither rounded nor normalized, and the ls bit or so may |
|
||||
| be wrong. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
.file "mul_Xsig.S"
|
||||
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
ENTRY(mul32_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $16,%esp
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%ecx
|
||||
|
||||
xor %eax,%eax
|
||||
movl %eax,-4(%ebp)
|
||||
movl %eax,-8(%ebp)
|
||||
|
||||
movl (%esi),%eax /* lsl of Xsig */
|
||||
mull %ecx /* msl of b */
|
||||
movl %edx,-12(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull %ecx /* msl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull %ecx /* msl of b */
|
||||
addl %eax,-8(%ebp)
|
||||
adcl %edx,-4(%ebp)
|
||||
|
||||
movl -12(%ebp),%eax
|
||||
movl %eax,(%esi)
|
||||
movl -8(%ebp),%eax
|
||||
movl %eax,4(%esi)
|
||||
movl -4(%ebp),%eax
|
||||
movl %eax,8(%esi)
|
||||
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
ENTRY(mul64_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $16,%esp
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%ecx
|
||||
|
||||
xor %eax,%eax
|
||||
movl %eax,-4(%ebp)
|
||||
movl %eax,-8(%ebp)
|
||||
|
||||
movl (%esi),%eax /* lsl of Xsig */
|
||||
mull 4(%ecx) /* msl of b */
|
||||
movl %edx,-12(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull (%ecx) /* lsl of b */
|
||||
addl %edx,-12(%ebp)
|
||||
adcl $0,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull 4(%ecx) /* msl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull (%ecx) /* lsl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull 4(%ecx) /* msl of b */
|
||||
addl %eax,-8(%ebp)
|
||||
adcl %edx,-4(%ebp)
|
||||
|
||||
movl -12(%ebp),%eax
|
||||
movl %eax,(%esi)
|
||||
movl -8(%ebp),%eax
|
||||
movl %eax,4(%esi)
|
||||
movl -4(%ebp),%eax
|
||||
movl %eax,8(%esi)
|
||||
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
|
||||
ENTRY(mul_Xsig_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $16,%esp
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%ecx
|
||||
|
||||
xor %eax,%eax
|
||||
movl %eax,-4(%ebp)
|
||||
movl %eax,-8(%ebp)
|
||||
|
||||
movl (%esi),%eax /* lsl of Xsig */
|
||||
mull 8(%ecx) /* msl of b */
|
||||
movl %edx,-12(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull 4(%ecx) /* midl of b */
|
||||
addl %edx,-12(%ebp)
|
||||
adcl $0,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull (%ecx) /* lsl of b */
|
||||
addl %edx,-12(%ebp)
|
||||
adcl $0,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull 8(%ecx) /* msl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull 4(%ecx) /* midl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull 8(%ecx) /* msl of b */
|
||||
addl %eax,-8(%ebp)
|
||||
adcl %edx,-4(%ebp)
|
||||
|
||||
movl -12(%ebp),%edx
|
||||
movl %edx,(%esi)
|
||||
movl -8(%ebp),%edx
|
||||
movl %edx,4(%esi)
|
||||
movl -4(%ebp),%edx
|
||||
movl %edx,8(%esi)
|
||||
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
121
arch/x86/math-emu/poly.h
Normal file
121
arch/x86/math-emu/poly.h
Normal file
@@ -0,0 +1,121 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| poly.h |
|
||||
| |
|
||||
| Header file for the FPU-emu poly*.c source files. |
|
||||
| |
|
||||
| Copyright (C) 1994,1999 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@melbpc.org.au |
|
||||
| |
|
||||
| Declarations and definitions for functions operating on Xsig (12-byte |
|
||||
| extended-significand) quantities. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef _POLY_H
|
||||
#define _POLY_H
|
||||
|
||||
/* This 12-byte structure is used to improve the accuracy of computation
|
||||
of transcendental functions.
|
||||
Intended to be used to get results better than 8-byte computation
|
||||
allows. 9-byte would probably be sufficient.
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned long lsw;
|
||||
unsigned long midw;
|
||||
unsigned long msw;
|
||||
} Xsig;
|
||||
|
||||
asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
|
||||
unsigned long long *result);
|
||||
asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
|
||||
const unsigned long long terms[], const int n);
|
||||
|
||||
asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
|
||||
asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
|
||||
asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
|
||||
|
||||
asmlinkage void shr_Xsig(Xsig *, const int n);
|
||||
asmlinkage int round_Xsig(Xsig *);
|
||||
asmlinkage int norm_Xsig(Xsig *);
|
||||
asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
|
||||
|
||||
/* Macro to extract the most significant 32 bits from a long long */
|
||||
#define LL_MSW(x) (((unsigned long *)&x)[1])
|
||||
|
||||
/* Macro to initialize an Xsig struct */
|
||||
#define MK_XSIG(a,b,c) { c, b, a }
|
||||
|
||||
/* Macro to access the 8 ms bytes of an Xsig as a long long */
|
||||
#define XSIG_LL(x) (*(unsigned long long *)&x.midw)
|
||||
|
||||
|
||||
/*
|
||||
Need to run gcc with optimizations on to get these to
|
||||
actually be in-line.
|
||||
*/
|
||||
|
||||
/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result.
|
||||
The answer is the ms word of the product. */
|
||||
/* Some versions of gcc make it difficult to stop eax from being clobbered.
|
||||
Merely specifying that it is used doesn't work...
|
||||
*/
|
||||
static inline unsigned long mul_32_32(const unsigned long arg1,
|
||||
const unsigned long arg2)
|
||||
{
|
||||
int retval;
|
||||
asm volatile ("mull %2; movl %%edx,%%eax" \
|
||||
:"=a" (retval) \
|
||||
:"0" (arg1), "g" (arg2) \
|
||||
:"dx");
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
|
||||
static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
|
||||
{
|
||||
asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
|
||||
"movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
|
||||
"movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
|
||||
"movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n"
|
||||
:"=g" (*dest):"g" (dest), "g" (x2)
|
||||
:"ax","si","di");
|
||||
}
|
||||
|
||||
|
||||
/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
|
||||
/* Note: the constraints in the asm statement didn't always work properly
|
||||
with gcc 2.5.8. Changing from using edi to using ecx got around the
|
||||
problem, but keep fingers crossed! */
|
||||
static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
|
||||
{
|
||||
asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
|
||||
"movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
|
||||
"movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
|
||||
"movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
|
||||
"jnc 0f;\n"
|
||||
"rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
|
||||
"movl %4,%%ecx; incl (%%ecx)\n"
|
||||
"movl $1,%%eax; jmp 1f;\n"
|
||||
"0: xorl %%eax,%%eax;\n"
|
||||
"1:\n"
|
||||
:"=g" (*exp), "=g" (*dest)
|
||||
:"g" (dest), "g" (x2), "g" (exp)
|
||||
:"cx","si","ax");
|
||||
}
|
||||
|
||||
|
||||
/* Negate (subtract from 1.0) the 12 byte Xsig */
|
||||
/* This is faster in a loop on my 386 than using the "neg" instruction. */
|
||||
static inline void negate_Xsig(Xsig *x)
|
||||
{
|
||||
asm volatile("movl %1,%%esi;\n"
|
||||
"xorl %%ecx,%%ecx;\n"
|
||||
"movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
|
||||
"movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
|
||||
"movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n"
|
||||
:"=g" (*x):"g" (x):"si","ax","cx");
|
||||
}
|
||||
|
||||
#endif /* _POLY_H */
|
156
arch/x86/math-emu/poly_2xm1.c
Normal file
156
arch/x86/math-emu/poly_2xm1.c
Normal file
@@ -0,0 +1,156 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| poly_2xm1.c |
|
||||
| |
|
||||
| Function to compute 2^x-1 by a polynomial approximation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
#include "control_w.h"
|
||||
#include "poly.h"
|
||||
|
||||
|
||||
#define HIPOWER 11
|
||||
static const unsigned long long lterms[HIPOWER] =
|
||||
{
|
||||
0x0000000000000000LL, /* This term done separately as 12 bytes */
|
||||
0xf5fdeffc162c7543LL,
|
||||
0x1c6b08d704a0bfa6LL,
|
||||
0x0276556df749cc21LL,
|
||||
0x002bb0ffcf14f6b8LL,
|
||||
0x0002861225ef751cLL,
|
||||
0x00001ffcbfcd5422LL,
|
||||
0x00000162c005d5f1LL,
|
||||
0x0000000da96ccb1bLL,
|
||||
0x0000000078d1b897LL,
|
||||
0x000000000422b029LL
|
||||
};
|
||||
|
||||
static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
|
||||
|
||||
/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0,
|
||||
These numbers are 2^(1/4), 2^(1/2), and 2^(3/4)
|
||||
*/
|
||||
static const Xsig shiftterm0 = MK_XSIG(0, 0, 0);
|
||||
static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318);
|
||||
static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
|
||||
static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
|
||||
|
||||
static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
|
||||
&shiftterm2, &shiftterm3 };
|
||||
|
||||
|
||||
/*--- poly_2xm1() -----------------------------------------------------------+
|
||||
| Requires st(0) which is TAG_Valid and < 1. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
|
||||
{
|
||||
long int exponent, shift;
|
||||
unsigned long long Xll;
|
||||
Xsig accumulator, Denom, argSignif;
|
||||
u_char tag;
|
||||
|
||||
exponent = exponent16(arg);
|
||||
|
||||
#ifdef PARANOID
|
||||
if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */
|
||||
{
|
||||
/* Number negative, too large, or not Valid. */
|
||||
EXCEPTION(EX_INTERNAL|0x127);
|
||||
return 1;
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
argSignif.lsw = 0;
|
||||
XSIG_LL(argSignif) = Xll = significand(arg);
|
||||
|
||||
if ( exponent == -1 )
|
||||
{
|
||||
shift = (argSignif.msw & 0x40000000) ? 3 : 2;
|
||||
/* subtract 0.5 or 0.75 */
|
||||
exponent -= 2;
|
||||
XSIG_LL(argSignif) <<= 2;
|
||||
Xll <<= 2;
|
||||
}
|
||||
else if ( exponent == -2 )
|
||||
{
|
||||
shift = 1;
|
||||
/* subtract 0.25 */
|
||||
exponent--;
|
||||
XSIG_LL(argSignif) <<= 1;
|
||||
Xll <<= 1;
|
||||
}
|
||||
else
|
||||
shift = 0;
|
||||
|
||||
if ( exponent < -2 )
|
||||
{
|
||||
/* Shift the argument right by the required places. */
|
||||
if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
|
||||
Xll++; /* round up */
|
||||
}
|
||||
|
||||
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
|
||||
polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
|
||||
mul_Xsig_Xsig(&accumulator, &argSignif);
|
||||
shr_Xsig(&accumulator, 3);
|
||||
|
||||
mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */
|
||||
add_two_Xsig(&accumulator, &argSignif, &exponent);
|
||||
|
||||
if ( shift )
|
||||
{
|
||||
/* The argument is large, use the identity:
|
||||
f(x+a) = f(a) * (f(x) + 1) - 1;
|
||||
*/
|
||||
shr_Xsig(&accumulator, - exponent);
|
||||
accumulator.msw |= 0x80000000; /* add 1.0 */
|
||||
mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
|
||||
accumulator.msw &= 0x3fffffff; /* subtract 1.0 */
|
||||
exponent = 1;
|
||||
}
|
||||
|
||||
if ( sign != SIGN_POS )
|
||||
{
|
||||
/* The argument is negative, use the identity:
|
||||
f(-x) = -f(x) / (1 + f(x))
|
||||
*/
|
||||
Denom.lsw = accumulator.lsw;
|
||||
XSIG_LL(Denom) = XSIG_LL(accumulator);
|
||||
if ( exponent < 0 )
|
||||
shr_Xsig(&Denom, - exponent);
|
||||
else if ( exponent > 0 )
|
||||
{
|
||||
/* exponent must be 1 here */
|
||||
XSIG_LL(Denom) <<= 1;
|
||||
if ( Denom.lsw & 0x80000000 )
|
||||
XSIG_LL(Denom) |= 1;
|
||||
(Denom.lsw) <<= 1;
|
||||
}
|
||||
Denom.msw |= 0x80000000; /* add 1.0 */
|
||||
div_Xsig(&accumulator, &Denom, &accumulator);
|
||||
}
|
||||
|
||||
/* Convert to 64 bit signed-compatible */
|
||||
exponent += round_Xsig(&accumulator);
|
||||
|
||||
result = &st(0);
|
||||
significand(result) = XSIG_LL(accumulator);
|
||||
setexponent16(result, exponent);
|
||||
|
||||
tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
|
||||
|
||||
setsign(result, sign);
|
||||
FPU_settag0(tag);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
229
arch/x86/math-emu/poly_atan.c
Normal file
229
arch/x86/math-emu/poly_atan.c
Normal file
@@ -0,0 +1,229 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| poly_atan.c |
|
||||
| |
|
||||
| Compute the arctan of a FPU_REG, using a polynomial approximation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
#include "status_w.h"
|
||||
#include "control_w.h"
|
||||
#include "poly.h"
|
||||
|
||||
|
||||
#define HIPOWERon 6 /* odd poly, negative terms */
|
||||
static const unsigned long long oddnegterms[HIPOWERon] =
|
||||
{
|
||||
0x0000000000000000LL, /* Dummy (not for - 1.0) */
|
||||
0x015328437f756467LL,
|
||||
0x0005dda27b73dec6LL,
|
||||
0x0000226bf2bfb91aLL,
|
||||
0x000000ccc439c5f7LL,
|
||||
0x0000000355438407LL
|
||||
} ;
|
||||
|
||||
#define HIPOWERop 6 /* odd poly, positive terms */
|
||||
static const unsigned long long oddplterms[HIPOWERop] =
|
||||
{
|
||||
/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */
|
||||
0x0db55a71875c9ac2LL,
|
||||
0x0029fce2d67880b0LL,
|
||||
0x0000dfd3908b4596LL,
|
||||
0x00000550fd61dab4LL,
|
||||
0x0000001c9422b3f9LL,
|
||||
0x000000003e3301e1LL
|
||||
};
|
||||
|
||||
static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
|
||||
|
||||
static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
|
||||
|
||||
static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
|
||||
|
||||
|
||||
/*--- poly_atan() -----------------------------------------------------------+
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
void poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
|
||||
FPU_REG *st1_ptr, u_char st1_tag)
|
||||
{
|
||||
u_char transformed, inverted,
|
||||
sign1, sign2;
|
||||
int exponent;
|
||||
long int dummy_exp;
|
||||
Xsig accumulator, Numer, Denom, accumulatore, argSignif,
|
||||
argSq, argSqSq;
|
||||
u_char tag;
|
||||
|
||||
sign1 = getsign(st0_ptr);
|
||||
sign2 = getsign(st1_ptr);
|
||||
if ( st0_tag == TAG_Valid )
|
||||
{
|
||||
exponent = exponent(st0_ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This gives non-compatible stack contents... */
|
||||
FPU_to_exp16(st0_ptr, st0_ptr);
|
||||
exponent = exponent16(st0_ptr);
|
||||
}
|
||||
if ( st1_tag == TAG_Valid )
|
||||
{
|
||||
exponent -= exponent(st1_ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This gives non-compatible stack contents... */
|
||||
FPU_to_exp16(st1_ptr, st1_ptr);
|
||||
exponent -= exponent16(st1_ptr);
|
||||
}
|
||||
|
||||
if ( (exponent < 0) || ((exponent == 0) &&
|
||||
((st0_ptr->sigh < st1_ptr->sigh) ||
|
||||
((st0_ptr->sigh == st1_ptr->sigh) &&
|
||||
(st0_ptr->sigl < st1_ptr->sigl))) ) )
|
||||
{
|
||||
inverted = 1;
|
||||
Numer.lsw = Denom.lsw = 0;
|
||||
XSIG_LL(Numer) = significand(st0_ptr);
|
||||
XSIG_LL(Denom) = significand(st1_ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
inverted = 0;
|
||||
exponent = -exponent;
|
||||
Numer.lsw = Denom.lsw = 0;
|
||||
XSIG_LL(Numer) = significand(st1_ptr);
|
||||
XSIG_LL(Denom) = significand(st0_ptr);
|
||||
}
|
||||
div_Xsig(&Numer, &Denom, &argSignif);
|
||||
exponent += norm_Xsig(&argSignif);
|
||||
|
||||
if ( (exponent >= -1)
|
||||
|| ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
|
||||
{
|
||||
/* The argument is greater than sqrt(2)-1 (=0.414213562...) */
|
||||
/* Convert the argument by an identity for atan */
|
||||
transformed = 1;
|
||||
|
||||
if ( exponent >= 0 )
|
||||
{
|
||||
#ifdef PARANOID
|
||||
if ( !( (exponent == 0) &&
|
||||
(argSignif.lsw == 0) && (argSignif.midw == 0) &&
|
||||
(argSignif.msw == 0x80000000) ) )
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */
|
||||
return;
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
argSignif.msw = 0; /* Make the transformed arg -> 0.0 */
|
||||
}
|
||||
else
|
||||
{
|
||||
Numer.lsw = Denom.lsw = argSignif.lsw;
|
||||
XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
|
||||
|
||||
if ( exponent < -1 )
|
||||
shr_Xsig(&Numer, -1-exponent);
|
||||
negate_Xsig(&Numer);
|
||||
|
||||
shr_Xsig(&Denom, -exponent);
|
||||
Denom.msw |= 0x80000000;
|
||||
|
||||
div_Xsig(&Numer, &Denom, &argSignif);
|
||||
|
||||
exponent = -1 + norm_Xsig(&argSignif);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
transformed = 0;
|
||||
}
|
||||
|
||||
argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
|
||||
argSq.msw = argSignif.msw;
|
||||
mul_Xsig_Xsig(&argSq, &argSq);
|
||||
|
||||
argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
|
||||
mul_Xsig_Xsig(&argSqSq, &argSqSq);
|
||||
|
||||
accumulatore.lsw = argSq.lsw;
|
||||
XSIG_LL(accumulatore) = XSIG_LL(argSq);
|
||||
|
||||
shr_Xsig(&argSq, 2*(-1-exponent-1));
|
||||
shr_Xsig(&argSqSq, 4*(-1-exponent-1));
|
||||
|
||||
/* Now have argSq etc with binary point at the left
|
||||
.1xxxxxxxx */
|
||||
|
||||
/* Do the basic fixed point polynomial evaluation */
|
||||
accumulator.msw = accumulator.midw = accumulator.lsw = 0;
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
|
||||
oddplterms, HIPOWERop-1);
|
||||
mul64_Xsig(&accumulator, &XSIG_LL(argSq));
|
||||
negate_Xsig(&accumulator);
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
|
||||
negate_Xsig(&accumulator);
|
||||
add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
|
||||
|
||||
mul64_Xsig(&accumulatore, &denomterm);
|
||||
shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
|
||||
accumulatore.msw |= 0x80000000;
|
||||
|
||||
div_Xsig(&accumulator, &accumulatore, &accumulator);
|
||||
|
||||
mul_Xsig_Xsig(&accumulator, &argSignif);
|
||||
mul_Xsig_Xsig(&accumulator, &argSq);
|
||||
|
||||
shr_Xsig(&accumulator, 3);
|
||||
negate_Xsig(&accumulator);
|
||||
add_Xsig_Xsig(&accumulator, &argSignif);
|
||||
|
||||
if ( transformed )
|
||||
{
|
||||
/* compute pi/4 - accumulator */
|
||||
shr_Xsig(&accumulator, -1-exponent);
|
||||
negate_Xsig(&accumulator);
|
||||
add_Xsig_Xsig(&accumulator, &pi_signif);
|
||||
exponent = -1;
|
||||
}
|
||||
|
||||
if ( inverted )
|
||||
{
|
||||
/* compute pi/2 - accumulator */
|
||||
shr_Xsig(&accumulator, -exponent);
|
||||
negate_Xsig(&accumulator);
|
||||
add_Xsig_Xsig(&accumulator, &pi_signif);
|
||||
exponent = 0;
|
||||
}
|
||||
|
||||
if ( sign1 )
|
||||
{
|
||||
/* compute pi - accumulator */
|
||||
shr_Xsig(&accumulator, 1 - exponent);
|
||||
negate_Xsig(&accumulator);
|
||||
add_Xsig_Xsig(&accumulator, &pi_signif);
|
||||
exponent = 1;
|
||||
}
|
||||
|
||||
exponent += round_Xsig(&accumulator);
|
||||
|
||||
significand(st1_ptr) = XSIG_LL(accumulator);
|
||||
setexponent16(st1_ptr, exponent);
|
||||
|
||||
tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
|
||||
FPU_settagi(1, tag);
|
||||
|
||||
set_precision_flag_up(); /* We do not really know if up or down,
|
||||
use this as the default. */
|
||||
|
||||
}
|
272
arch/x86/math-emu/poly_l2.c
Normal file
272
arch/x86/math-emu/poly_l2.c
Normal file
@@ -0,0 +1,272 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| poly_l2.c |
|
||||
| |
|
||||
| Compute the base 2 log of a FPU_REG, using a polynomial approximation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
#include "control_w.h"
|
||||
#include "poly.h"
|
||||
|
||||
|
||||
static void log2_kernel(FPU_REG const *arg, u_char argsign,
|
||||
Xsig *accum_result, long int *expon);
|
||||
|
||||
|
||||
/*--- poly_l2() -------------------------------------------------------------+
|
||||
| Base 2 logarithm by a polynomial approximation. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
|
||||
{
|
||||
long int exponent, expon, expon_expon;
|
||||
Xsig accumulator, expon_accum, yaccum;
|
||||
u_char sign, argsign;
|
||||
FPU_REG x;
|
||||
int tag;
|
||||
|
||||
exponent = exponent16(st0_ptr);
|
||||
|
||||
/* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
|
||||
if ( st0_ptr->sigh > (unsigned)0xb504f334 )
|
||||
{
|
||||
/* Treat as sqrt(2)/2 < st0_ptr < 1 */
|
||||
significand(&x) = - significand(st0_ptr);
|
||||
setexponent16(&x, -1);
|
||||
exponent++;
|
||||
argsign = SIGN_NEG;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Treat as 1 <= st0_ptr < sqrt(2) */
|
||||
x.sigh = st0_ptr->sigh - 0x80000000;
|
||||
x.sigl = st0_ptr->sigl;
|
||||
setexponent16(&x, 0);
|
||||
argsign = SIGN_POS;
|
||||
}
|
||||
tag = FPU_normalize_nuo(&x);
|
||||
|
||||
if ( tag == TAG_Zero )
|
||||
{
|
||||
expon = 0;
|
||||
accumulator.msw = accumulator.midw = accumulator.lsw = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
log2_kernel(&x, argsign, &accumulator, &expon);
|
||||
}
|
||||
|
||||
if ( exponent < 0 )
|
||||
{
|
||||
sign = SIGN_NEG;
|
||||
exponent = -exponent;
|
||||
}
|
||||
else
|
||||
sign = SIGN_POS;
|
||||
expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
|
||||
if ( exponent )
|
||||
{
|
||||
expon_expon = 31 + norm_Xsig(&expon_accum);
|
||||
shr_Xsig(&accumulator, expon_expon - expon);
|
||||
|
||||
if ( sign ^ argsign )
|
||||
negate_Xsig(&accumulator);
|
||||
add_Xsig_Xsig(&accumulator, &expon_accum);
|
||||
}
|
||||
else
|
||||
{
|
||||
expon_expon = expon;
|
||||
sign = argsign;
|
||||
}
|
||||
|
||||
yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
|
||||
mul_Xsig_Xsig(&accumulator, &yaccum);
|
||||
|
||||
expon_expon += round_Xsig(&accumulator);
|
||||
|
||||
if ( accumulator.msw == 0 )
|
||||
{
|
||||
FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
|
||||
return;
|
||||
}
|
||||
|
||||
significand(st1_ptr) = XSIG_LL(accumulator);
|
||||
setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
|
||||
|
||||
tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
|
||||
FPU_settagi(1, tag);
|
||||
|
||||
set_precision_flag_up(); /* 80486 appears to always do this */
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*--- poly_l2p1() -----------------------------------------------------------+
|
||||
| Base 2 logarithm by a polynomial approximation. |
|
||||
| log2(x+1) |
|
||||
+---------------------------------------------------------------------------*/
|
||||
int poly_l2p1(u_char sign0, u_char sign1,
|
||||
FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
|
||||
{
|
||||
u_char tag;
|
||||
long int exponent;
|
||||
Xsig accumulator, yaccum;
|
||||
|
||||
if ( exponent16(st0_ptr) < 0 )
|
||||
{
|
||||
log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
|
||||
|
||||
yaccum.lsw = 0;
|
||||
XSIG_LL(yaccum) = significand(st1_ptr);
|
||||
mul_Xsig_Xsig(&accumulator, &yaccum);
|
||||
|
||||
exponent += round_Xsig(&accumulator);
|
||||
|
||||
exponent += exponent16(st1_ptr) + 1;
|
||||
if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
|
||||
|
||||
significand(dest) = XSIG_LL(accumulator);
|
||||
setexponent16(dest, exponent);
|
||||
|
||||
tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
|
||||
FPU_settagi(1, tag);
|
||||
|
||||
if ( tag == TAG_Valid )
|
||||
set_precision_flag_up(); /* 80486 appears to always do this */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The magnitude of st0_ptr is far too large. */
|
||||
|
||||
if ( sign0 != SIGN_POS )
|
||||
{
|
||||
/* Trying to get the log of a negative number. */
|
||||
#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */
|
||||
changesign(st1_ptr);
|
||||
#else
|
||||
if ( arith_invalid(1) < 0 )
|
||||
return 1;
|
||||
#endif /* PECULIAR_486 */
|
||||
}
|
||||
|
||||
/* 80486 appears to do this */
|
||||
if ( sign0 == SIGN_NEG )
|
||||
set_precision_flag_down();
|
||||
else
|
||||
set_precision_flag_up();
|
||||
}
|
||||
|
||||
if ( exponent(dest) <= EXP_UNDER )
|
||||
EXCEPTION(EX_Underflow);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#undef HIPOWER
|
||||
#define HIPOWER 10
|
||||
static const unsigned long long logterms[HIPOWER] =
|
||||
{
|
||||
0x2a8eca5705fc2ef0LL,
|
||||
0xf6384ee1d01febceLL,
|
||||
0x093bb62877cdf642LL,
|
||||
0x006985d8a9ec439bLL,
|
||||
0x0005212c4f55a9c8LL,
|
||||
0x00004326a16927f0LL,
|
||||
0x0000038d1d80a0e7LL,
|
||||
0x0000003141cc80c6LL,
|
||||
0x00000002b1668c9fLL,
|
||||
0x000000002c7a46aaLL
|
||||
};
|
||||
|
||||
static const unsigned long leadterm = 0xb8000000;
|
||||
|
||||
|
||||
/*--- log2_kernel() ---------------------------------------------------------+
|
||||
| Base 2 logarithm by a polynomial approximation. |
|
||||
| log2(x+1) |
|
||||
+---------------------------------------------------------------------------*/
|
||||
static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
|
||||
long int *expon)
|
||||
{
|
||||
long int exponent, adj;
|
||||
unsigned long long Xsq;
|
||||
Xsig accumulator, Numer, Denom, argSignif, arg_signif;
|
||||
|
||||
exponent = exponent16(arg);
|
||||
Numer.lsw = Denom.lsw = 0;
|
||||
XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
|
||||
if ( argsign == SIGN_POS )
|
||||
{
|
||||
shr_Xsig(&Denom, 2 - (1 + exponent));
|
||||
Denom.msw |= 0x80000000;
|
||||
div_Xsig(&Numer, &Denom, &argSignif);
|
||||
}
|
||||
else
|
||||
{
|
||||
shr_Xsig(&Denom, 1 - (1 + exponent));
|
||||
negate_Xsig(&Denom);
|
||||
if ( Denom.msw & 0x80000000 )
|
||||
{
|
||||
div_Xsig(&Numer, &Denom, &argSignif);
|
||||
exponent ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Denom must be 1.0 */
|
||||
argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
|
||||
argSignif.msw = Numer.msw;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PECULIAR_486
|
||||
/* Should check here that |local_arg| is within the valid range */
|
||||
if ( exponent >= -2 )
|
||||
{
|
||||
if ( (exponent > -2) ||
|
||||
(argSignif.msw > (unsigned)0xafb0ccc0) )
|
||||
{
|
||||
/* The argument is too large */
|
||||
}
|
||||
}
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
|
||||
adj = norm_Xsig(&argSignif);
|
||||
accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
|
||||
mul_Xsig_Xsig(&accumulator, &accumulator);
|
||||
shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
|
||||
Xsq = XSIG_LL(accumulator);
|
||||
if ( accumulator.lsw & 0x80000000 )
|
||||
Xsq++;
|
||||
|
||||
accumulator.msw = accumulator.midw = accumulator.lsw = 0;
|
||||
/* Do the basic fixed point polynomial evaluation */
|
||||
polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
|
||||
|
||||
mul_Xsig_Xsig(&accumulator, &argSignif);
|
||||
shr_Xsig(&accumulator, 6 - adj);
|
||||
|
||||
mul32_Xsig(&arg_signif, leadterm);
|
||||
add_two_Xsig(&accumulator, &arg_signif, &exponent);
|
||||
|
||||
*expon = exponent + 1;
|
||||
accum_result->lsw = accumulator.lsw;
|
||||
accum_result->midw = accumulator.midw;
|
||||
accum_result->msw = accumulator.msw;
|
||||
|
||||
}
|
397
arch/x86/math-emu/poly_sin.c
Normal file
397
arch/x86/math-emu/poly_sin.c
Normal file
@@ -0,0 +1,397 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| poly_sin.c |
|
||||
| |
|
||||
| Computation of an approximation of the sin function and the cosine |
|
||||
| function by a polynomial. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997,1999 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@melbpc.org.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
#include "control_w.h"
|
||||
#include "poly.h"
|
||||
|
||||
|
||||
#define N_COEFF_P 4
|
||||
#define N_COEFF_N 4
|
||||
|
||||
static const unsigned long long pos_terms_l[N_COEFF_P] =
|
||||
{
|
||||
0xaaaaaaaaaaaaaaabLL,
|
||||
0x00d00d00d00cf906LL,
|
||||
0x000006b99159a8bbLL,
|
||||
0x000000000d7392e6LL
|
||||
};
|
||||
|
||||
static const unsigned long long neg_terms_l[N_COEFF_N] =
|
||||
{
|
||||
0x2222222222222167LL,
|
||||
0x0002e3bc74aab624LL,
|
||||
0x0000000b09229062LL,
|
||||
0x00000000000c7973LL
|
||||
};
|
||||
|
||||
|
||||
|
||||
#define N_COEFF_PH 4
|
||||
#define N_COEFF_NH 4
|
||||
static const unsigned long long pos_terms_h[N_COEFF_PH] =
|
||||
{
|
||||
0x0000000000000000LL,
|
||||
0x05b05b05b05b0406LL,
|
||||
0x000049f93edd91a9LL,
|
||||
0x00000000c9c9ed62LL
|
||||
};
|
||||
|
||||
static const unsigned long long neg_terms_h[N_COEFF_NH] =
|
||||
{
|
||||
0xaaaaaaaaaaaaaa98LL,
|
||||
0x001a01a01a019064LL,
|
||||
0x0000008f76c68a77LL,
|
||||
0x0000000000d58f5eLL
|
||||
};
|
||||
|
||||
|
||||
/*--- poly_sine() -----------------------------------------------------------+
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
void poly_sine(FPU_REG *st0_ptr)
|
||||
{
|
||||
int exponent, echange;
|
||||
Xsig accumulator, argSqrd, argTo4;
|
||||
unsigned long fix_up, adj;
|
||||
unsigned long long fixed_arg;
|
||||
FPU_REG result;
|
||||
|
||||
exponent = exponent(st0_ptr);
|
||||
|
||||
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
|
||||
|
||||
/* Split into two ranges, for arguments below and above 1.0 */
|
||||
/* The boundary between upper and lower is approx 0.88309101259 */
|
||||
if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
|
||||
{
|
||||
/* The argument is <= 0.88309101259 */
|
||||
|
||||
argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
|
||||
mul64_Xsig(&argSqrd, &significand(st0_ptr));
|
||||
shr_Xsig(&argSqrd, 2*(-1-exponent));
|
||||
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
|
||||
argTo4.lsw = argSqrd.lsw;
|
||||
mul_Xsig_Xsig(&argTo4, &argTo4);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
|
||||
N_COEFF_N-1);
|
||||
mul_Xsig_Xsig(&accumulator, &argSqrd);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
|
||||
N_COEFF_P-1);
|
||||
|
||||
shr_Xsig(&accumulator, 2); /* Divide by four */
|
||||
accumulator.msw |= 0x80000000; /* Add 1.0 */
|
||||
|
||||
mul64_Xsig(&accumulator, &significand(st0_ptr));
|
||||
mul64_Xsig(&accumulator, &significand(st0_ptr));
|
||||
mul64_Xsig(&accumulator, &significand(st0_ptr));
|
||||
|
||||
/* Divide by four, FPU_REG compatible, etc */
|
||||
exponent = 3*exponent;
|
||||
|
||||
/* The minimum exponent difference is 3 */
|
||||
shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
|
||||
|
||||
negate_Xsig(&accumulator);
|
||||
XSIG_LL(accumulator) += significand(st0_ptr);
|
||||
|
||||
echange = round_Xsig(&accumulator);
|
||||
|
||||
setexponentpos(&result, exponent(st0_ptr) + echange);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The argument is > 0.88309101259 */
|
||||
/* We use sin(st(0)) = cos(pi/2-st(0)) */
|
||||
|
||||
fixed_arg = significand(st0_ptr);
|
||||
|
||||
if ( exponent == 0 )
|
||||
{
|
||||
/* The argument is >= 1.0 */
|
||||
|
||||
/* Put the binary point at the left. */
|
||||
fixed_arg <<= 1;
|
||||
}
|
||||
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
|
||||
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
|
||||
/* There is a special case which arises due to rounding, to fix here. */
|
||||
if ( fixed_arg == 0xffffffffffffffffLL )
|
||||
fixed_arg = 0;
|
||||
|
||||
XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
|
||||
mul64_Xsig(&argSqrd, &fixed_arg);
|
||||
|
||||
XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
|
||||
mul_Xsig_Xsig(&argTo4, &argTo4);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
|
||||
N_COEFF_NH-1);
|
||||
mul_Xsig_Xsig(&accumulator, &argSqrd);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
|
||||
N_COEFF_PH-1);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
mul64_Xsig(&accumulator, &fixed_arg);
|
||||
mul64_Xsig(&accumulator, &fixed_arg);
|
||||
|
||||
shr_Xsig(&accumulator, 3);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
add_Xsig_Xsig(&accumulator, &argSqrd);
|
||||
|
||||
shr_Xsig(&accumulator, 1);
|
||||
|
||||
accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
/* The basic computation is complete. Now fix the answer to
|
||||
compensate for the error due to the approximation used for
|
||||
pi/2
|
||||
*/
|
||||
|
||||
/* This has an exponent of -65 */
|
||||
fix_up = 0x898cc517;
|
||||
/* The fix-up needs to be improved for larger args */
|
||||
if ( argSqrd.msw & 0xffc00000 )
|
||||
{
|
||||
/* Get about 32 bit precision in these: */
|
||||
fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
|
||||
}
|
||||
fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
|
||||
|
||||
adj = accumulator.lsw; /* temp save */
|
||||
accumulator.lsw -= fix_up;
|
||||
if ( accumulator.lsw > adj )
|
||||
XSIG_LL(accumulator) --;
|
||||
|
||||
echange = round_Xsig(&accumulator);
|
||||
|
||||
setexponentpos(&result, echange - 1);
|
||||
}
|
||||
|
||||
significand(&result) = XSIG_LL(accumulator);
|
||||
setsign(&result, getsign(st0_ptr));
|
||||
FPU_copy_to_reg0(&result, TAG_Valid);
|
||||
|
||||
#ifdef PARANOID
|
||||
if ( (exponent(&result) >= 0)
|
||||
&& (significand(&result) > 0x8000000000000000LL) )
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL|0x150);
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*--- poly_cos() ------------------------------------------------------------+
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
void poly_cos(FPU_REG *st0_ptr)
|
||||
{
|
||||
FPU_REG result;
|
||||
long int exponent, exp2, echange;
|
||||
Xsig accumulator, argSqrd, fix_up, argTo4;
|
||||
unsigned long long fixed_arg;
|
||||
|
||||
#ifdef PARANOID
|
||||
if ( (exponent(st0_ptr) > 0)
|
||||
|| ((exponent(st0_ptr) == 0)
|
||||
&& (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
|
||||
{
|
||||
EXCEPTION(EX_Invalid);
|
||||
FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
|
||||
return;
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
exponent = exponent(st0_ptr);
|
||||
|
||||
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
|
||||
|
||||
if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
|
||||
{
|
||||
/* arg is < 0.687705 */
|
||||
|
||||
argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
|
||||
argSqrd.lsw = 0;
|
||||
mul64_Xsig(&argSqrd, &significand(st0_ptr));
|
||||
|
||||
if ( exponent < -1 )
|
||||
{
|
||||
/* shift the argument right by the required places */
|
||||
shr_Xsig(&argSqrd, 2*(-1-exponent));
|
||||
}
|
||||
|
||||
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
|
||||
argTo4.lsw = argSqrd.lsw;
|
||||
mul_Xsig_Xsig(&argTo4, &argTo4);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
|
||||
N_COEFF_NH-1);
|
||||
mul_Xsig_Xsig(&accumulator, &argSqrd);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
|
||||
N_COEFF_PH-1);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
mul64_Xsig(&accumulator, &significand(st0_ptr));
|
||||
mul64_Xsig(&accumulator, &significand(st0_ptr));
|
||||
shr_Xsig(&accumulator, -2*(1+exponent));
|
||||
|
||||
shr_Xsig(&accumulator, 3);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
add_Xsig_Xsig(&accumulator, &argSqrd);
|
||||
|
||||
shr_Xsig(&accumulator, 1);
|
||||
|
||||
/* It doesn't matter if accumulator is all zero here, the
|
||||
following code will work ok */
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
if ( accumulator.lsw & 0x80000000 )
|
||||
XSIG_LL(accumulator) ++;
|
||||
if ( accumulator.msw == 0 )
|
||||
{
|
||||
/* The result is 1.0 */
|
||||
FPU_copy_to_reg0(&CONST_1, TAG_Valid);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
significand(&result) = XSIG_LL(accumulator);
|
||||
|
||||
/* will be a valid positive nr with expon = -1 */
|
||||
setexponentpos(&result, -1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fixed_arg = significand(st0_ptr);
|
||||
|
||||
if ( exponent == 0 )
|
||||
{
|
||||
/* The argument is >= 1.0 */
|
||||
|
||||
/* Put the binary point at the left. */
|
||||
fixed_arg <<= 1;
|
||||
}
|
||||
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
|
||||
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
|
||||
/* There is a special case which arises due to rounding, to fix here. */
|
||||
if ( fixed_arg == 0xffffffffffffffffLL )
|
||||
fixed_arg = 0;
|
||||
|
||||
exponent = -1;
|
||||
exp2 = -1;
|
||||
|
||||
/* A shift is needed here only for a narrow range of arguments,
|
||||
i.e. for fixed_arg approx 2^-32, but we pick up more... */
|
||||
if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
|
||||
{
|
||||
fixed_arg <<= 16;
|
||||
exponent -= 16;
|
||||
exp2 -= 16;
|
||||
}
|
||||
|
||||
XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
|
||||
mul64_Xsig(&argSqrd, &fixed_arg);
|
||||
|
||||
if ( exponent < -1 )
|
||||
{
|
||||
/* shift the argument right by the required places */
|
||||
shr_Xsig(&argSqrd, 2*(-1-exponent));
|
||||
}
|
||||
|
||||
argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
|
||||
argTo4.lsw = argSqrd.lsw;
|
||||
mul_Xsig_Xsig(&argTo4, &argTo4);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
|
||||
N_COEFF_N-1);
|
||||
mul_Xsig_Xsig(&accumulator, &argSqrd);
|
||||
negate_Xsig(&accumulator);
|
||||
|
||||
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
|
||||
N_COEFF_P-1);
|
||||
|
||||
shr_Xsig(&accumulator, 2); /* Divide by four */
|
||||
accumulator.msw |= 0x80000000; /* Add 1.0 */
|
||||
|
||||
mul64_Xsig(&accumulator, &fixed_arg);
|
||||
mul64_Xsig(&accumulator, &fixed_arg);
|
||||
mul64_Xsig(&accumulator, &fixed_arg);
|
||||
|
||||
/* Divide by four, FPU_REG compatible, etc */
|
||||
exponent = 3*exponent;
|
||||
|
||||
/* The minimum exponent difference is 3 */
|
||||
shr_Xsig(&accumulator, exp2 - exponent);
|
||||
|
||||
negate_Xsig(&accumulator);
|
||||
XSIG_LL(accumulator) += fixed_arg;
|
||||
|
||||
/* The basic computation is complete. Now fix the answer to
|
||||
compensate for the error due to the approximation used for
|
||||
pi/2
|
||||
*/
|
||||
|
||||
/* This has an exponent of -65 */
|
||||
XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
|
||||
fix_up.lsw = 0;
|
||||
|
||||
/* The fix-up needs to be improved for larger args */
|
||||
if ( argSqrd.msw & 0xffc00000 )
|
||||
{
|
||||
/* Get about 32 bit precision in these: */
|
||||
fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
|
||||
fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
|
||||
}
|
||||
|
||||
exp2 += norm_Xsig(&accumulator);
|
||||
shr_Xsig(&accumulator, 1); /* Prevent overflow */
|
||||
exp2++;
|
||||
shr_Xsig(&fix_up, 65 + exp2);
|
||||
|
||||
add_Xsig_Xsig(&accumulator, &fix_up);
|
||||
|
||||
echange = round_Xsig(&accumulator);
|
||||
|
||||
setexponentpos(&result, exp2 + echange);
|
||||
significand(&result) = XSIG_LL(accumulator);
|
||||
}
|
||||
|
||||
FPU_copy_to_reg0(&result, TAG_Valid);
|
||||
|
||||
#ifdef PARANOID
|
||||
if ( (exponent(&result) >= 0)
|
||||
&& (significand(&result) > 0x8000000000000000LL) )
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL|0x151);
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
}
|
222
arch/x86/math-emu/poly_tan.c
Normal file
222
arch/x86/math-emu/poly_tan.c
Normal file
@@ -0,0 +1,222 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| poly_tan.c |
|
||||
| |
|
||||
| Compute the tan of a FPU_REG, using a polynomial approximation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997,1999 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@melbpc.org.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
#include "control_w.h"
|
||||
#include "poly.h"
|
||||
|
||||
|
||||
#define HiPOWERop 3 /* odd poly, positive terms */
|
||||
static const unsigned long long oddplterm[HiPOWERop] =
|
||||
{
|
||||
0x0000000000000000LL,
|
||||
0x0051a1cf08fca228LL,
|
||||
0x0000000071284ff7LL
|
||||
};
|
||||
|
||||
#define HiPOWERon 2 /* odd poly, negative terms */
|
||||
static const unsigned long long oddnegterm[HiPOWERon] =
|
||||
{
|
||||
0x1291a9a184244e80LL,
|
||||
0x0000583245819c21LL
|
||||
};
|
||||
|
||||
#define HiPOWERep 2 /* even poly, positive terms */
|
||||
static const unsigned long long evenplterm[HiPOWERep] =
|
||||
{
|
||||
0x0e848884b539e888LL,
|
||||
0x00003c7f18b887daLL
|
||||
};
|
||||
|
||||
#define HiPOWERen 2 /* even poly, negative terms */
|
||||
static const unsigned long long evennegterm[HiPOWERen] =
|
||||
{
|
||||
0xf1f0200fd51569ccLL,
|
||||
0x003afb46105c4432LL
|
||||
};
|
||||
|
||||
static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
|
||||
|
||||
|
||||
/*--- poly_tan() ------------------------------------------------------------+
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
void poly_tan(FPU_REG *st0_ptr)
|
||||
{
|
||||
long int exponent;
|
||||
int invert;
|
||||
Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
|
||||
argSignif, fix_up;
|
||||
unsigned long adj;
|
||||
|
||||
exponent = exponent(st0_ptr);
|
||||
|
||||
#ifdef PARANOID
|
||||
if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */
|
||||
{ arith_invalid(0); return; } /* Need a positive number */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* Split the problem into two domains, smaller and larger than pi/4 */
|
||||
if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
|
||||
{
|
||||
/* The argument is greater than (approx) pi/4 */
|
||||
invert = 1;
|
||||
accum.lsw = 0;
|
||||
XSIG_LL(accum) = significand(st0_ptr);
|
||||
|
||||
if ( exponent == 0 )
|
||||
{
|
||||
/* The argument is >= 1.0 */
|
||||
/* Put the binary point at the left. */
|
||||
XSIG_LL(accum) <<= 1;
|
||||
}
|
||||
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
|
||||
XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
|
||||
/* This is a special case which arises due to rounding. */
|
||||
if ( XSIG_LL(accum) == 0xffffffffffffffffLL )
|
||||
{
|
||||
FPU_settag0(TAG_Valid);
|
||||
significand(st0_ptr) = 0x8a51e04daabda360LL;
|
||||
setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative);
|
||||
return;
|
||||
}
|
||||
|
||||
argSignif.lsw = accum.lsw;
|
||||
XSIG_LL(argSignif) = XSIG_LL(accum);
|
||||
exponent = -1 + norm_Xsig(&argSignif);
|
||||
}
|
||||
else
|
||||
{
|
||||
invert = 0;
|
||||
argSignif.lsw = 0;
|
||||
XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
|
||||
|
||||
if ( exponent < -1 )
|
||||
{
|
||||
/* shift the argument right by the required places */
|
||||
if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
|
||||
XSIG_LL(accum) ++; /* round up */
|
||||
}
|
||||
}
|
||||
|
||||
XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
|
||||
mul_Xsig_Xsig(&argSq, &argSq);
|
||||
XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
|
||||
mul_Xsig_Xsig(&argSqSq, &argSqSq);
|
||||
|
||||
/* Compute the negative terms for the numerator polynomial */
|
||||
accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
|
||||
polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
|
||||
mul_Xsig_Xsig(&accumulatoro, &argSq);
|
||||
negate_Xsig(&accumulatoro);
|
||||
/* Add the positive terms */
|
||||
polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
|
||||
|
||||
|
||||
/* Compute the positive terms for the denominator polynomial */
|
||||
accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
|
||||
polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
|
||||
mul_Xsig_Xsig(&accumulatore, &argSq);
|
||||
negate_Xsig(&accumulatore);
|
||||
/* Add the negative terms */
|
||||
polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
|
||||
/* Multiply by arg^2 */
|
||||
mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
|
||||
mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
|
||||
/* de-normalize and divide by 2 */
|
||||
shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
|
||||
negate_Xsig(&accumulatore); /* This does 1 - accumulator */
|
||||
|
||||
/* Now find the ratio. */
|
||||
if ( accumulatore.msw == 0 )
|
||||
{
|
||||
/* accumulatoro must contain 1.0 here, (actually, 0) but it
|
||||
really doesn't matter what value we use because it will
|
||||
have negligible effect in later calculations
|
||||
*/
|
||||
XSIG_LL(accum) = 0x8000000000000000LL;
|
||||
accum.lsw = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
div_Xsig(&accumulatoro, &accumulatore, &accum);
|
||||
}
|
||||
|
||||
/* Multiply by 1/3 * arg^3 */
|
||||
mul64_Xsig(&accum, &XSIG_LL(argSignif));
|
||||
mul64_Xsig(&accum, &XSIG_LL(argSignif));
|
||||
mul64_Xsig(&accum, &XSIG_LL(argSignif));
|
||||
mul64_Xsig(&accum, &twothirds);
|
||||
shr_Xsig(&accum, -2*(exponent+1));
|
||||
|
||||
/* tan(arg) = arg + accum */
|
||||
add_two_Xsig(&accum, &argSignif, &exponent);
|
||||
|
||||
if ( invert )
|
||||
{
|
||||
/* We now have the value of tan(pi_2 - arg) where pi_2 is an
|
||||
approximation for pi/2
|
||||
*/
|
||||
/* The next step is to fix the answer to compensate for the
|
||||
error due to the approximation used for pi/2
|
||||
*/
|
||||
|
||||
/* This is (approx) delta, the error in our approx for pi/2
|
||||
(see above). It has an exponent of -65
|
||||
*/
|
||||
XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
|
||||
fix_up.lsw = 0;
|
||||
|
||||
if ( exponent == 0 )
|
||||
adj = 0xffffffff; /* We want approx 1.0 here, but
|
||||
this is close enough. */
|
||||
else if ( exponent > -30 )
|
||||
{
|
||||
adj = accum.msw >> -(exponent+1); /* tan */
|
||||
adj = mul_32_32(adj, adj); /* tan^2 */
|
||||
}
|
||||
else
|
||||
adj = 0;
|
||||
adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */
|
||||
|
||||
fix_up.msw += adj;
|
||||
if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */
|
||||
{
|
||||
/* Yes, we need to add an msb */
|
||||
shr_Xsig(&fix_up, 1);
|
||||
fix_up.msw |= 0x80000000;
|
||||
shr_Xsig(&fix_up, 64 + exponent);
|
||||
}
|
||||
else
|
||||
shr_Xsig(&fix_up, 65 + exponent);
|
||||
|
||||
add_two_Xsig(&accum, &fix_up, &exponent);
|
||||
|
||||
/* accum now contains tan(pi/2 - arg).
|
||||
Use tan(arg) = 1.0 / tan(pi/2 - arg)
|
||||
*/
|
||||
accumulatoro.lsw = accumulatoro.midw = 0;
|
||||
accumulatoro.msw = 0x80000000;
|
||||
div_Xsig(&accumulatoro, &accum, &accum);
|
||||
exponent = - exponent - 1;
|
||||
}
|
||||
|
||||
/* Transfer the result */
|
||||
round_Xsig(&accum);
|
||||
FPU_settag0(TAG_Valid);
|
||||
significand(st0_ptr) = XSIG_LL(accum);
|
||||
setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */
|
||||
|
||||
}
|
135
arch/x86/math-emu/polynom_Xsig.S
Normal file
135
arch/x86/math-emu/polynom_Xsig.S
Normal file
@@ -0,0 +1,135 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| polynomial_Xsig.S |
|
||||
| |
|
||||
| Fixed point arithmetic polynomial evaluation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| void polynomial_Xsig(Xsig *accum, unsigned long long x, |
|
||||
| unsigned long long terms[], int n) |
|
||||
| |
|
||||
| Computes: |
|
||||
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
|
||||
| and adds the result to the 12 byte Xsig. |
|
||||
| The terms[] are each 8 bytes, but all computation is performed to 12 byte |
|
||||
| precision. |
|
||||
| |
|
||||
| This function must be used carefully: most overflow of intermediate |
|
||||
| results is controlled, but overflow of the result is not. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
.file "polynomial_Xsig.S"
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
#define TERM_SIZE $8
|
||||
#define SUM_MS -20(%ebp) /* sum ms long */
|
||||
#define SUM_MIDDLE -24(%ebp) /* sum middle long */
|
||||
#define SUM_LS -28(%ebp) /* sum ls long */
|
||||
#define ACCUM_MS -4(%ebp) /* accum ms long */
|
||||
#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
|
||||
#define ACCUM_LS -12(%ebp) /* accum ls long */
|
||||
#define OVERFLOWED -16(%ebp) /* addition overflow flag */
|
||||
|
||||
.text
|
||||
ENTRY(polynomial_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $32,%esp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM2,%esi /* x */
|
||||
movl PARAM3,%edi /* terms */
|
||||
|
||||
movl TERM_SIZE,%eax
|
||||
mull PARAM4 /* n */
|
||||
addl %eax,%edi
|
||||
|
||||
movl 4(%edi),%edx /* terms[n] */
|
||||
movl %edx,SUM_MS
|
||||
movl (%edi),%edx /* terms[n] */
|
||||
movl %edx,SUM_MIDDLE
|
||||
xor %eax,%eax
|
||||
movl %eax,SUM_LS
|
||||
movb %al,OVERFLOWED
|
||||
|
||||
subl TERM_SIZE,%edi
|
||||
decl PARAM4
|
||||
js L_accum_done
|
||||
|
||||
L_accum_loop:
|
||||
xor %eax,%eax
|
||||
movl %eax,ACCUM_MS
|
||||
movl %eax,ACCUM_MIDDLE
|
||||
|
||||
movl SUM_MIDDLE,%eax
|
||||
mull (%esi) /* x ls long */
|
||||
movl %edx,ACCUM_LS
|
||||
|
||||
movl SUM_MIDDLE,%eax
|
||||
mull 4(%esi) /* x ms long */
|
||||
addl %eax,ACCUM_LS
|
||||
adcl %edx,ACCUM_MIDDLE
|
||||
adcl $0,ACCUM_MS
|
||||
|
||||
movl SUM_MS,%eax
|
||||
mull (%esi) /* x ls long */
|
||||
addl %eax,ACCUM_LS
|
||||
adcl %edx,ACCUM_MIDDLE
|
||||
adcl $0,ACCUM_MS
|
||||
|
||||
movl SUM_MS,%eax
|
||||
mull 4(%esi) /* x ms long */
|
||||
addl %eax,ACCUM_MIDDLE
|
||||
adcl %edx,ACCUM_MS
|
||||
|
||||
testb $0xff,OVERFLOWED
|
||||
jz L_no_overflow
|
||||
|
||||
movl (%esi),%eax
|
||||
addl %eax,ACCUM_MIDDLE
|
||||
movl 4(%esi),%eax
|
||||
adcl %eax,ACCUM_MS /* This could overflow too */
|
||||
|
||||
L_no_overflow:
|
||||
|
||||
/*
|
||||
* Now put the sum of next term and the accumulator
|
||||
* into the sum register
|
||||
*/
|
||||
movl ACCUM_LS,%eax
|
||||
addl (%edi),%eax /* term ls long */
|
||||
movl %eax,SUM_LS
|
||||
movl ACCUM_MIDDLE,%eax
|
||||
adcl (%edi),%eax /* term ls long */
|
||||
movl %eax,SUM_MIDDLE
|
||||
movl ACCUM_MS,%eax
|
||||
adcl 4(%edi),%eax /* term ms long */
|
||||
movl %eax,SUM_MS
|
||||
sbbb %al,%al
|
||||
movb %al,OVERFLOWED /* Used in the next iteration */
|
||||
|
||||
subl TERM_SIZE,%edi
|
||||
decl PARAM4
|
||||
jns L_accum_loop
|
||||
|
||||
L_accum_done:
|
||||
movl PARAM1,%edi /* accum */
|
||||
movl SUM_LS,%eax
|
||||
addl %eax,(%edi)
|
||||
movl SUM_MIDDLE,%eax
|
||||
adcl %eax,4(%edi)
|
||||
movl SUM_MS,%eax
|
||||
adcl %eax,8(%edi)
|
||||
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
374
arch/x86/math-emu/reg_add_sub.c
Normal file
374
arch/x86/math-emu/reg_add_sub.c
Normal file
@@ -0,0 +1,374 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_add_sub.c |
|
||||
| |
|
||||
| Functions to add or subtract two registers and put the result in a third. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| For each function, the destination may be any FPU_REG, including one of |
|
||||
| the source FPU_REGs. |
|
||||
| Each function returns 0 if the answer is o.k., otherwise a non-zero |
|
||||
| value is returned, indicating either an exception condition or an |
|
||||
| internal error. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
#include "fpu_system.h"
|
||||
|
||||
static
|
||||
int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
|
||||
FPU_REG const *b, u_char tagb, u_char signb,
|
||||
FPU_REG *dest, int deststnr, int control_w);
|
||||
|
||||
/*
|
||||
Operates on st(0) and st(n), or on st(0) and temporary data.
|
||||
The destination must be one of the source st(x).
|
||||
*/
|
||||
int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
|
||||
{
|
||||
FPU_REG *a = &st(0);
|
||||
FPU_REG *dest = &st(deststnr);
|
||||
u_char signb = getsign(b);
|
||||
u_char taga = FPU_gettag0();
|
||||
u_char signa = getsign(a);
|
||||
u_char saved_sign = getsign(dest);
|
||||
int diff, tag, expa, expb;
|
||||
|
||||
if ( !(taga | tagb) )
|
||||
{
|
||||
expa = exponent(a);
|
||||
expb = exponent(b);
|
||||
|
||||
valid_add:
|
||||
/* Both registers are valid */
|
||||
if (!(signa ^ signb))
|
||||
{
|
||||
/* signs are the same */
|
||||
tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The signs are different, so do a subtraction */
|
||||
diff = expa - expb;
|
||||
if (!diff)
|
||||
{
|
||||
diff = a->sigh - b->sigh; /* This works only if the ms bits
|
||||
are identical. */
|
||||
if (!diff)
|
||||
{
|
||||
diff = a->sigl > b->sigl;
|
||||
if (!diff)
|
||||
diff = -(a->sigl < b->sigl);
|
||||
}
|
||||
}
|
||||
|
||||
if (diff > 0)
|
||||
{
|
||||
tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
|
||||
}
|
||||
else if ( diff < 0 )
|
||||
{
|
||||
tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
|
||||
}
|
||||
else
|
||||
{
|
||||
FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
|
||||
/* sign depends upon rounding mode */
|
||||
setsign(dest, ((control_w & CW_RC) != RC_DOWN)
|
||||
? SIGN_POS : SIGN_NEG);
|
||||
return TAG_Zero;
|
||||
}
|
||||
}
|
||||
|
||||
if ( tag < 0 )
|
||||
{
|
||||
setsign(dest, saved_sign);
|
||||
return tag;
|
||||
}
|
||||
FPU_settagi(deststnr, tag);
|
||||
return tag;
|
||||
}
|
||||
|
||||
if ( taga == TAG_Special )
|
||||
taga = FPU_Special(a);
|
||||
if ( tagb == TAG_Special )
|
||||
tagb = FPU_Special(b);
|
||||
|
||||
if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
|
||||
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
|
||||
|| ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
|
||||
{
|
||||
FPU_REG x, y;
|
||||
|
||||
if ( denormal_operand() < 0 )
|
||||
return FPU_Exception;
|
||||
|
||||
FPU_to_exp16(a, &x);
|
||||
FPU_to_exp16(b, &y);
|
||||
a = &x;
|
||||
b = &y;
|
||||
expa = exponent16(a);
|
||||
expb = exponent16(b);
|
||||
goto valid_add;
|
||||
}
|
||||
|
||||
if ( (taga == TW_NaN) || (tagb == TW_NaN) )
|
||||
{
|
||||
if ( deststnr == 0 )
|
||||
return real_2op_NaN(b, tagb, deststnr, a);
|
||||
else
|
||||
return real_2op_NaN(a, taga, deststnr, a);
|
||||
}
|
||||
|
||||
return add_sub_specials(a, taga, signa, b, tagb, signb,
|
||||
dest, deststnr, control_w);
|
||||
}
|
||||
|
||||
|
||||
/* Subtract b from a. (a-b) -> dest */
|
||||
int FPU_sub(int flags, int rm, int control_w)
|
||||
{
|
||||
FPU_REG const *a, *b;
|
||||
FPU_REG *dest;
|
||||
u_char taga, tagb, signa, signb, saved_sign, sign;
|
||||
int diff, tag = 0, expa, expb, deststnr;
|
||||
|
||||
a = &st(0);
|
||||
taga = FPU_gettag0();
|
||||
|
||||
deststnr = 0;
|
||||
if ( flags & LOADED )
|
||||
{
|
||||
b = (FPU_REG *)rm;
|
||||
tagb = flags & 0x0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
b = &st(rm);
|
||||
tagb = FPU_gettagi(rm);
|
||||
|
||||
if ( flags & DEST_RM )
|
||||
deststnr = rm;
|
||||
}
|
||||
|
||||
signa = getsign(a);
|
||||
signb = getsign(b);
|
||||
|
||||
if ( flags & REV )
|
||||
{
|
||||
signa ^= SIGN_NEG;
|
||||
signb ^= SIGN_NEG;
|
||||
}
|
||||
|
||||
dest = &st(deststnr);
|
||||
saved_sign = getsign(dest);
|
||||
|
||||
if ( !(taga | tagb) )
|
||||
{
|
||||
expa = exponent(a);
|
||||
expb = exponent(b);
|
||||
|
||||
valid_subtract:
|
||||
/* Both registers are valid */
|
||||
|
||||
diff = expa - expb;
|
||||
|
||||
if (!diff)
|
||||
{
|
||||
diff = a->sigh - b->sigh; /* Works only if ms bits are identical */
|
||||
if (!diff)
|
||||
{
|
||||
diff = a->sigl > b->sigl;
|
||||
if (!diff)
|
||||
diff = -(a->sigl < b->sigl);
|
||||
}
|
||||
}
|
||||
|
||||
switch ( (((int)signa)*2 + signb) / SIGN_NEG )
|
||||
{
|
||||
case 0: /* P - P */
|
||||
case 3: /* N - N */
|
||||
if (diff > 0)
|
||||
{
|
||||
/* |a| > |b| */
|
||||
tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
|
||||
}
|
||||
else if ( diff == 0 )
|
||||
{
|
||||
FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
|
||||
|
||||
/* sign depends upon rounding mode */
|
||||
setsign(dest, ((control_w & CW_RC) != RC_DOWN)
|
||||
? SIGN_POS : SIGN_NEG);
|
||||
return TAG_Zero;
|
||||
}
|
||||
else
|
||||
{
|
||||
sign = signa ^ SIGN_NEG;
|
||||
tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
|
||||
}
|
||||
break;
|
||||
case 1: /* P - N */
|
||||
tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
|
||||
break;
|
||||
case 2: /* N - P */
|
||||
tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
|
||||
break;
|
||||
#ifdef PARANOID
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x111);
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
if ( tag < 0 )
|
||||
{
|
||||
setsign(dest, saved_sign);
|
||||
return tag;
|
||||
}
|
||||
FPU_settagi(deststnr, tag);
|
||||
return tag;
|
||||
}
|
||||
|
||||
if ( taga == TAG_Special )
|
||||
taga = FPU_Special(a);
|
||||
if ( tagb == TAG_Special )
|
||||
tagb = FPU_Special(b);
|
||||
|
||||
if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
|
||||
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
|
||||
|| ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
|
||||
{
|
||||
FPU_REG x, y;
|
||||
|
||||
if ( denormal_operand() < 0 )
|
||||
return FPU_Exception;
|
||||
|
||||
FPU_to_exp16(a, &x);
|
||||
FPU_to_exp16(b, &y);
|
||||
a = &x;
|
||||
b = &y;
|
||||
expa = exponent16(a);
|
||||
expb = exponent16(b);
|
||||
|
||||
goto valid_subtract;
|
||||
}
|
||||
|
||||
if ( (taga == TW_NaN) || (tagb == TW_NaN) )
|
||||
{
|
||||
FPU_REG const *d1, *d2;
|
||||
if ( flags & REV )
|
||||
{
|
||||
d1 = b;
|
||||
d2 = a;
|
||||
}
|
||||
else
|
||||
{
|
||||
d1 = a;
|
||||
d2 = b;
|
||||
}
|
||||
if ( flags & LOADED )
|
||||
return real_2op_NaN(b, tagb, deststnr, d1);
|
||||
if ( flags & DEST_RM )
|
||||
return real_2op_NaN(a, taga, deststnr, d2);
|
||||
else
|
||||
return real_2op_NaN(b, tagb, deststnr, d2);
|
||||
}
|
||||
|
||||
return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
|
||||
dest, deststnr, control_w);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
|
||||
FPU_REG const *b, u_char tagb, u_char signb,
|
||||
FPU_REG *dest, int deststnr, int control_w)
|
||||
{
|
||||
if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
|
||||
&& (denormal_operand() < 0) )
|
||||
return FPU_Exception;
|
||||
|
||||
if (taga == TAG_Zero)
|
||||
{
|
||||
if (tagb == TAG_Zero)
|
||||
{
|
||||
/* Both are zero, result will be zero. */
|
||||
u_char different_signs = signa ^ signb;
|
||||
|
||||
FPU_copy_to_regi(a, TAG_Zero, deststnr);
|
||||
if ( different_signs )
|
||||
{
|
||||
/* Signs are different. */
|
||||
/* Sign of answer depends upon rounding mode. */
|
||||
setsign(dest, ((control_w & CW_RC) != RC_DOWN)
|
||||
? SIGN_POS : SIGN_NEG);
|
||||
}
|
||||
else
|
||||
setsign(dest, signa); /* signa may differ from the sign of a. */
|
||||
return TAG_Zero;
|
||||
}
|
||||
else
|
||||
{
|
||||
reg_copy(b, dest);
|
||||
if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) )
|
||||
{
|
||||
/* A pseudoDenormal, convert it. */
|
||||
addexponent(dest, 1);
|
||||
tagb = TAG_Valid;
|
||||
}
|
||||
else if ( tagb > TAG_Empty )
|
||||
tagb = TAG_Special;
|
||||
setsign(dest, signb); /* signb may differ from the sign of b. */
|
||||
FPU_settagi(deststnr, tagb);
|
||||
return tagb;
|
||||
}
|
||||
}
|
||||
else if (tagb == TAG_Zero)
|
||||
{
|
||||
reg_copy(a, dest);
|
||||
if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) )
|
||||
{
|
||||
/* A pseudoDenormal */
|
||||
addexponent(dest, 1);
|
||||
taga = TAG_Valid;
|
||||
}
|
||||
else if ( taga > TAG_Empty )
|
||||
taga = TAG_Special;
|
||||
setsign(dest, signa); /* signa may differ from the sign of a. */
|
||||
FPU_settagi(deststnr, taga);
|
||||
return taga;
|
||||
}
|
||||
else if (taga == TW_Infinity)
|
||||
{
|
||||
if ( (tagb != TW_Infinity) || (signa == signb) )
|
||||
{
|
||||
FPU_copy_to_regi(a, TAG_Special, deststnr);
|
||||
setsign(dest, signa); /* signa may differ from the sign of a. */
|
||||
return taga;
|
||||
}
|
||||
/* Infinity-Infinity is undefined. */
|
||||
return arith_invalid(deststnr);
|
||||
}
|
||||
else if (tagb == TW_Infinity)
|
||||
{
|
||||
FPU_copy_to_regi(b, TAG_Special, deststnr);
|
||||
setsign(dest, signb); /* signb may differ from the sign of b. */
|
||||
return tagb;
|
||||
}
|
||||
|
||||
#ifdef PARANOID
|
||||
EXCEPTION(EX_INTERNAL|0x101);
|
||||
#endif
|
||||
|
||||
return FPU_Exception;
|
||||
}
|
||||
|
381
arch/x86/math-emu/reg_compare.c
Normal file
381
arch/x86/math-emu/reg_compare.c
Normal file
@@ -0,0 +1,381 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_compare.c |
|
||||
| |
|
||||
| Compare two floating point registers |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| compare() is the core FPU_REG comparison function |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
#include "status_w.h"
|
||||
|
||||
|
||||
static int compare(FPU_REG const *b, int tagb)
|
||||
{
|
||||
int diff, exp0, expb;
|
||||
u_char st0_tag;
|
||||
FPU_REG *st0_ptr;
|
||||
FPU_REG x, y;
|
||||
u_char st0_sign, signb = getsign(b);
|
||||
|
||||
st0_ptr = &st(0);
|
||||
st0_tag = FPU_gettag0();
|
||||
st0_sign = getsign(st0_ptr);
|
||||
|
||||
if ( tagb == TAG_Special )
|
||||
tagb = FPU_Special(b);
|
||||
if ( st0_tag == TAG_Special )
|
||||
st0_tag = FPU_Special(st0_ptr);
|
||||
|
||||
if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
|
||||
|| ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
|
||||
{
|
||||
if ( st0_tag == TAG_Zero )
|
||||
{
|
||||
if ( tagb == TAG_Zero ) return COMP_A_eq_B;
|
||||
if ( tagb == TAG_Valid )
|
||||
return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
|
||||
if ( tagb == TW_Denormal )
|
||||
return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
|
||||
| COMP_Denormal;
|
||||
}
|
||||
else if ( tagb == TAG_Zero )
|
||||
{
|
||||
if ( st0_tag == TAG_Valid )
|
||||
return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
|
||||
if ( st0_tag == TW_Denormal )
|
||||
return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
|
||||
| COMP_Denormal;
|
||||
}
|
||||
|
||||
if ( st0_tag == TW_Infinity )
|
||||
{
|
||||
if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
|
||||
return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
|
||||
else if ( tagb == TW_Denormal )
|
||||
return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
|
||||
| COMP_Denormal;
|
||||
else if ( tagb == TW_Infinity )
|
||||
{
|
||||
/* The 80486 book says that infinities can be equal! */
|
||||
return (st0_sign == signb) ? COMP_A_eq_B :
|
||||
((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
|
||||
}
|
||||
/* Fall through to the NaN code */
|
||||
}
|
||||
else if ( tagb == TW_Infinity )
|
||||
{
|
||||
if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
|
||||
return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
|
||||
if ( st0_tag == TW_Denormal )
|
||||
return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
|
||||
| COMP_Denormal;
|
||||
/* Fall through to the NaN code */
|
||||
}
|
||||
|
||||
/* The only possibility now should be that one of the arguments
|
||||
is a NaN */
|
||||
if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
|
||||
{
|
||||
int signalling = 0, unsupported = 0;
|
||||
if ( st0_tag == TW_NaN )
|
||||
{
|
||||
signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
|
||||
unsupported = !((exponent(st0_ptr) == EXP_OVER)
|
||||
&& (st0_ptr->sigh & 0x80000000));
|
||||
}
|
||||
if ( tagb == TW_NaN )
|
||||
{
|
||||
signalling |= (b->sigh & 0xc0000000) == 0x80000000;
|
||||
unsupported |= !((exponent(b) == EXP_OVER)
|
||||
&& (b->sigh & 0x80000000));
|
||||
}
|
||||
if ( signalling || unsupported )
|
||||
return COMP_No_Comp | COMP_SNaN | COMP_NaN;
|
||||
else
|
||||
/* Neither is a signaling NaN */
|
||||
return COMP_No_Comp | COMP_NaN;
|
||||
}
|
||||
|
||||
EXCEPTION(EX_Invalid);
|
||||
}
|
||||
|
||||
if (st0_sign != signb)
|
||||
{
|
||||
return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
|
||||
| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
|
||||
COMP_Denormal : 0);
|
||||
}
|
||||
|
||||
if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
|
||||
{
|
||||
FPU_to_exp16(st0_ptr, &x);
|
||||
FPU_to_exp16(b, &y);
|
||||
st0_ptr = &x;
|
||||
b = &y;
|
||||
exp0 = exponent16(st0_ptr);
|
||||
expb = exponent16(b);
|
||||
}
|
||||
else
|
||||
{
|
||||
exp0 = exponent(st0_ptr);
|
||||
expb = exponent(b);
|
||||
}
|
||||
|
||||
#ifdef PARANOID
|
||||
if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
|
||||
if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
|
||||
#endif /* PARANOID */
|
||||
|
||||
diff = exp0 - expb;
|
||||
if ( diff == 0 )
|
||||
{
|
||||
diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are
|
||||
identical */
|
||||
if ( diff == 0 )
|
||||
{
|
||||
diff = st0_ptr->sigl > b->sigl;
|
||||
if ( diff == 0 )
|
||||
diff = -(st0_ptr->sigl < b->sigl);
|
||||
}
|
||||
}
|
||||
|
||||
if ( diff > 0 )
|
||||
{
|
||||
return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
|
||||
| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
|
||||
COMP_Denormal : 0);
|
||||
}
|
||||
if ( diff < 0 )
|
||||
{
|
||||
return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
|
||||
| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
|
||||
COMP_Denormal : 0);
|
||||
}
|
||||
|
||||
return COMP_A_eq_B
|
||||
| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
|
||||
COMP_Denormal : 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* This function requires that st(0) is not empty */
|
||||
int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
|
||||
{
|
||||
int f = 0, c;
|
||||
|
||||
c = compare(loaded_data, loaded_tag);
|
||||
|
||||
if (c & COMP_NaN)
|
||||
{
|
||||
EXCEPTION(EX_Invalid);
|
||||
f = SW_C3 | SW_C2 | SW_C0;
|
||||
}
|
||||
else
|
||||
switch (c & 7)
|
||||
{
|
||||
case COMP_A_lt_B:
|
||||
f = SW_C0;
|
||||
break;
|
||||
case COMP_A_eq_B:
|
||||
f = SW_C3;
|
||||
break;
|
||||
case COMP_A_gt_B:
|
||||
f = 0;
|
||||
break;
|
||||
case COMP_No_Comp:
|
||||
f = SW_C3 | SW_C2 | SW_C0;
|
||||
break;
|
||||
#ifdef PARANOID
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x121);
|
||||
f = SW_C3 | SW_C2 | SW_C0;
|
||||
break;
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
setcc(f);
|
||||
if (c & COMP_Denormal)
|
||||
{
|
||||
return denormal_operand() < 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int compare_st_st(int nr)
|
||||
{
|
||||
int f = 0, c;
|
||||
FPU_REG *st_ptr;
|
||||
|
||||
if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
|
||||
{
|
||||
setcc(SW_C3 | SW_C2 | SW_C0);
|
||||
/* Stack fault */
|
||||
EXCEPTION(EX_StackUnder);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
|
||||
st_ptr = &st(nr);
|
||||
c = compare(st_ptr, FPU_gettagi(nr));
|
||||
if (c & COMP_NaN)
|
||||
{
|
||||
setcc(SW_C3 | SW_C2 | SW_C0);
|
||||
EXCEPTION(EX_Invalid);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
else
|
||||
switch (c & 7)
|
||||
{
|
||||
case COMP_A_lt_B:
|
||||
f = SW_C0;
|
||||
break;
|
||||
case COMP_A_eq_B:
|
||||
f = SW_C3;
|
||||
break;
|
||||
case COMP_A_gt_B:
|
||||
f = 0;
|
||||
break;
|
||||
case COMP_No_Comp:
|
||||
f = SW_C3 | SW_C2 | SW_C0;
|
||||
break;
|
||||
#ifdef PARANOID
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x122);
|
||||
f = SW_C3 | SW_C2 | SW_C0;
|
||||
break;
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
setcc(f);
|
||||
if (c & COMP_Denormal)
|
||||
{
|
||||
return denormal_operand() < 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int compare_u_st_st(int nr)
|
||||
{
|
||||
int f = 0, c;
|
||||
FPU_REG *st_ptr;
|
||||
|
||||
if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
|
||||
{
|
||||
setcc(SW_C3 | SW_C2 | SW_C0);
|
||||
/* Stack fault */
|
||||
EXCEPTION(EX_StackUnder);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
|
||||
st_ptr = &st(nr);
|
||||
c = compare(st_ptr, FPU_gettagi(nr));
|
||||
if (c & COMP_NaN)
|
||||
{
|
||||
setcc(SW_C3 | SW_C2 | SW_C0);
|
||||
if (c & COMP_SNaN) /* This is the only difference between
|
||||
un-ordered and ordinary comparisons */
|
||||
{
|
||||
EXCEPTION(EX_Invalid);
|
||||
return !(control_word & CW_Invalid);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
switch (c & 7)
|
||||
{
|
||||
case COMP_A_lt_B:
|
||||
f = SW_C0;
|
||||
break;
|
||||
case COMP_A_eq_B:
|
||||
f = SW_C3;
|
||||
break;
|
||||
case COMP_A_gt_B:
|
||||
f = 0;
|
||||
break;
|
||||
case COMP_No_Comp:
|
||||
f = SW_C3 | SW_C2 | SW_C0;
|
||||
break;
|
||||
#ifdef PARANOID
|
||||
default:
|
||||
EXCEPTION(EX_INTERNAL|0x123);
|
||||
f = SW_C3 | SW_C2 | SW_C0;
|
||||
break;
|
||||
#endif /* PARANOID */
|
||||
}
|
||||
setcc(f);
|
||||
if (c & COMP_Denormal)
|
||||
{
|
||||
return denormal_operand() < 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
void fcom_st(void)
|
||||
{
|
||||
/* fcom st(i) */
|
||||
compare_st_st(FPU_rm);
|
||||
}
|
||||
|
||||
|
||||
void fcompst(void)
|
||||
{
|
||||
/* fcomp st(i) */
|
||||
if ( !compare_st_st(FPU_rm) )
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
void fcompp(void)
|
||||
{
|
||||
/* fcompp */
|
||||
if (FPU_rm != 1)
|
||||
{
|
||||
FPU_illegal();
|
||||
return;
|
||||
}
|
||||
if ( !compare_st_st(1) )
|
||||
poppop();
|
||||
}
|
||||
|
||||
|
||||
void fucom_(void)
|
||||
{
|
||||
/* fucom st(i) */
|
||||
compare_u_st_st(FPU_rm);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void fucomp(void)
|
||||
{
|
||||
/* fucomp st(i) */
|
||||
if ( !compare_u_st_st(FPU_rm) )
|
||||
FPU_pop();
|
||||
}
|
||||
|
||||
|
||||
void fucompp(void)
|
||||
{
|
||||
/* fucompp */
|
||||
if (FPU_rm == 1)
|
||||
{
|
||||
if ( !compare_u_st_st(1) )
|
||||
poppop();
|
||||
}
|
||||
else
|
||||
FPU_illegal();
|
||||
}
|
120
arch/x86/math-emu/reg_constant.c
Normal file
120
arch/x86/math-emu/reg_constant.c
Normal file
@@ -0,0 +1,120 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_constant.c |
|
||||
| |
|
||||
| All of the constant FPU_REGs |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_system.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "status_w.h"
|
||||
#include "reg_constant.h"
|
||||
#include "control_w.h"
|
||||
|
||||
|
||||
#define MAKE_REG(s,e,l,h) { l, h, \
|
||||
((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
|
||||
|
||||
FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
|
||||
#if 0
|
||||
FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
|
||||
FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000);
|
||||
#endif /* 0 */
|
||||
static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
|
||||
static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
|
||||
FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
|
||||
FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
|
||||
FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
|
||||
static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
|
||||
static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
|
||||
|
||||
/* Extra bits to take pi/2 to more than 128 bits precision. */
|
||||
FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
|
||||
0xfc8f8cbb, 0xece675d1);
|
||||
|
||||
/* Only the sign (and tag) is used in internal zeroes */
|
||||
FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
|
||||
|
||||
/* Only the sign and significand (and tag) are used in internal NaNs */
|
||||
/* The 80486 never generates one of these
|
||||
FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
|
||||
*/
|
||||
/* This is the real indefinite QNaN */
|
||||
FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
|
||||
|
||||
/* Only the sign (and tag) is used in internal infinities */
|
||||
FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
|
||||
|
||||
|
||||
static void fld_const(FPU_REG const *c, int adj, u_char tag)
|
||||
{
|
||||
FPU_REG *st_new_ptr;
|
||||
|
||||
if ( STACK_OVERFLOW )
|
||||
{
|
||||
FPU_stack_overflow();
|
||||
return;
|
||||
}
|
||||
push();
|
||||
reg_copy(c, st_new_ptr);
|
||||
st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to
|
||||
borrow or carry. */
|
||||
FPU_settag0(tag);
|
||||
clear_C1();
|
||||
}
|
||||
|
||||
/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
|
||||
(and not one of RC_RND or RC_UP).
|
||||
*/
|
||||
#define DOWN_OR_CHOP(x) (x & RC_DOWN)
|
||||
|
||||
static void fld1(int rc)
|
||||
{
|
||||
fld_const(&CONST_1, 0, TAG_Valid);
|
||||
}
|
||||
|
||||
static void fldl2t(int rc)
|
||||
{
|
||||
fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
|
||||
}
|
||||
|
||||
static void fldl2e(int rc)
|
||||
{
|
||||
fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
|
||||
}
|
||||
|
||||
static void fldpi(int rc)
|
||||
{
|
||||
fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
|
||||
}
|
||||
|
||||
static void fldlg2(int rc)
|
||||
{
|
||||
fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
|
||||
}
|
||||
|
||||
static void fldln2(int rc)
|
||||
{
|
||||
fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
|
||||
}
|
||||
|
||||
static void fldz(int rc)
|
||||
{
|
||||
fld_const(&CONST_Z, 0, TAG_Zero);
|
||||
}
|
||||
|
||||
typedef void (*FUNC_RC)(int);
|
||||
|
||||
static FUNC_RC constants_table[] = {
|
||||
fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
|
||||
};
|
||||
|
||||
void fconst(void)
|
||||
{
|
||||
(constants_table[FPU_rm])(control_word & CW_RC);
|
||||
}
|
25
arch/x86/math-emu/reg_constant.h
Normal file
25
arch/x86/math-emu/reg_constant.h
Normal file
@@ -0,0 +1,25 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_constant.h |
|
||||
| |
|
||||
| Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef _REG_CONSTANT_H_
|
||||
#define _REG_CONSTANT_H_
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
extern FPU_REG const CONST_1;
|
||||
extern FPU_REG const CONST_PI;
|
||||
extern FPU_REG const CONST_PI2;
|
||||
extern FPU_REG const CONST_PI2extra;
|
||||
extern FPU_REG const CONST_PI4;
|
||||
extern FPU_REG const CONST_Z;
|
||||
extern FPU_REG const CONST_PINF;
|
||||
extern FPU_REG const CONST_INF;
|
||||
extern FPU_REG const CONST_MINF;
|
||||
extern FPU_REG const CONST_QNaN;
|
||||
|
||||
#endif /* _REG_CONSTANT_H_ */
|
53
arch/x86/math-emu/reg_convert.c
Normal file
53
arch/x86/math-emu/reg_convert.c
Normal file
@@ -0,0 +1,53 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_convert.c |
|
||||
| |
|
||||
| Convert register representation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1996,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
|
||||
{
|
||||
int sign = getsign(a);
|
||||
|
||||
*(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
|
||||
|
||||
/* Set up the exponent as a 16 bit quantity. */
|
||||
setexponent16(x, exponent(a));
|
||||
|
||||
if ( exponent16(x) == EXP_UNDER )
|
||||
{
|
||||
/* The number is a de-normal or pseudodenormal. */
|
||||
/* We only deal with the significand and exponent. */
|
||||
|
||||
if (x->sigh & 0x80000000)
|
||||
{
|
||||
/* Is a pseudodenormal. */
|
||||
/* This is non-80486 behaviour because the number
|
||||
loses its 'denormal' identity. */
|
||||
addexponent(x, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Is a denormal. */
|
||||
addexponent(x, 1);
|
||||
FPU_normalize_nuo(x);
|
||||
}
|
||||
}
|
||||
|
||||
if ( !(x->sigh & 0x80000000) )
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL | 0x180);
|
||||
}
|
||||
|
||||
return sign;
|
||||
}
|
||||
|
207
arch/x86/math-emu/reg_divide.c
Normal file
207
arch/x86/math-emu/reg_divide.c
Normal file
@@ -0,0 +1,207 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_divide.c |
|
||||
| |
|
||||
| Divide one FPU_REG by another and put the result in a destination FPU_REG.|
|
||||
| |
|
||||
| Copyright (C) 1996 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| The destination may be any FPU_REG, including one of the source FPU_REGs. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "fpu_system.h"
|
||||
|
||||
/*
|
||||
Divide one register by another and put the result into a third register.
|
||||
*/
|
||||
int FPU_div(int flags, int rm, int control_w)
|
||||
{
|
||||
FPU_REG x, y;
|
||||
FPU_REG const *a, *b, *st0_ptr, *st_ptr;
|
||||
FPU_REG *dest;
|
||||
u_char taga, tagb, signa, signb, sign, saved_sign;
|
||||
int tag, deststnr;
|
||||
|
||||
if ( flags & DEST_RM )
|
||||
deststnr = rm;
|
||||
else
|
||||
deststnr = 0;
|
||||
|
||||
if ( flags & REV )
|
||||
{
|
||||
b = &st(0);
|
||||
st0_ptr = b;
|
||||
tagb = FPU_gettag0();
|
||||
if ( flags & LOADED )
|
||||
{
|
||||
a = (FPU_REG *)rm;
|
||||
taga = flags & 0x0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
a = &st(rm);
|
||||
st_ptr = a;
|
||||
taga = FPU_gettagi(rm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
a = &st(0);
|
||||
st0_ptr = a;
|
||||
taga = FPU_gettag0();
|
||||
if ( flags & LOADED )
|
||||
{
|
||||
b = (FPU_REG *)rm;
|
||||
tagb = flags & 0x0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
b = &st(rm);
|
||||
st_ptr = b;
|
||||
tagb = FPU_gettagi(rm);
|
||||
}
|
||||
}
|
||||
|
||||
signa = getsign(a);
|
||||
signb = getsign(b);
|
||||
|
||||
sign = signa ^ signb;
|
||||
|
||||
dest = &st(deststnr);
|
||||
saved_sign = getsign(dest);
|
||||
|
||||
if ( !(taga | tagb) )
|
||||
{
|
||||
/* Both regs Valid, this should be the most common case. */
|
||||
reg_copy(a, &x);
|
||||
reg_copy(b, &y);
|
||||
setpositive(&x);
|
||||
setpositive(&y);
|
||||
tag = FPU_u_div(&x, &y, dest, control_w, sign);
|
||||
|
||||
if ( tag < 0 )
|
||||
return tag;
|
||||
|
||||
FPU_settagi(deststnr, tag);
|
||||
return tag;
|
||||
}
|
||||
|
||||
if ( taga == TAG_Special )
|
||||
taga = FPU_Special(a);
|
||||
if ( tagb == TAG_Special )
|
||||
tagb = FPU_Special(b);
|
||||
|
||||
if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
|
||||
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
|
||||
|| ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
|
||||
{
|
||||
if ( denormal_operand() < 0 )
|
||||
return FPU_Exception;
|
||||
|
||||
FPU_to_exp16(a, &x);
|
||||
FPU_to_exp16(b, &y);
|
||||
tag = FPU_u_div(&x, &y, dest, control_w, sign);
|
||||
if ( tag < 0 )
|
||||
return tag;
|
||||
|
||||
FPU_settagi(deststnr, tag);
|
||||
return tag;
|
||||
}
|
||||
else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
|
||||
{
|
||||
if ( tagb != TAG_Zero )
|
||||
{
|
||||
/* Want to find Zero/Valid */
|
||||
if ( tagb == TW_Denormal )
|
||||
{
|
||||
if ( denormal_operand() < 0 )
|
||||
return FPU_Exception;
|
||||
}
|
||||
|
||||
/* The result is zero. */
|
||||
FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
|
||||
setsign(dest, sign);
|
||||
return TAG_Zero;
|
||||
}
|
||||
/* We have an exception condition, either 0/0 or Valid/Zero. */
|
||||
if ( taga == TAG_Zero )
|
||||
{
|
||||
/* 0/0 */
|
||||
return arith_invalid(deststnr);
|
||||
}
|
||||
/* Valid/Zero */
|
||||
return FPU_divide_by_zero(deststnr, sign);
|
||||
}
|
||||
/* Must have infinities, NaNs, etc */
|
||||
else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
|
||||
{
|
||||
if ( flags & LOADED )
|
||||
return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
|
||||
|
||||
if ( flags & DEST_RM )
|
||||
{
|
||||
int tag;
|
||||
tag = FPU_gettag0();
|
||||
if ( tag == TAG_Special )
|
||||
tag = FPU_Special(st0_ptr);
|
||||
return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm));
|
||||
}
|
||||
else
|
||||
{
|
||||
int tag;
|
||||
tag = FPU_gettagi(rm);
|
||||
if ( tag == TAG_Special )
|
||||
tag = FPU_Special(&st(rm));
|
||||
return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm));
|
||||
}
|
||||
}
|
||||
else if (taga == TW_Infinity)
|
||||
{
|
||||
if (tagb == TW_Infinity)
|
||||
{
|
||||
/* infinity/infinity */
|
||||
return arith_invalid(deststnr);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* tagb must be Valid or Zero */
|
||||
if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
|
||||
return FPU_Exception;
|
||||
|
||||
/* Infinity divided by Zero or Valid does
|
||||
not raise and exception, but returns Infinity */
|
||||
FPU_copy_to_regi(a, TAG_Special, deststnr);
|
||||
setsign(dest, sign);
|
||||
return taga;
|
||||
}
|
||||
}
|
||||
else if (tagb == TW_Infinity)
|
||||
{
|
||||
if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
|
||||
return FPU_Exception;
|
||||
|
||||
/* The result is zero. */
|
||||
FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
|
||||
setsign(dest, sign);
|
||||
return TAG_Zero;
|
||||
}
|
||||
#ifdef PARANOID
|
||||
else
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL|0x102);
|
||||
return FPU_Exception;
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
return 0;
|
||||
}
|
1375
arch/x86/math-emu/reg_ld_str.c
Normal file
1375
arch/x86/math-emu/reg_ld_str.c
Normal file
File diff suppressed because it is too large
Load Diff
132
arch/x86/math-emu/reg_mul.c
Normal file
132
arch/x86/math-emu/reg_mul.c
Normal file
@@ -0,0 +1,132 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_mul.c |
|
||||
| |
|
||||
| Multiply one FPU_REG by another, put the result in a destination FPU_REG. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Returns the tag of the result if no exceptions or errors occurred. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| The destination may be any FPU_REG, including one of the source FPU_REGs. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
#include "exception.h"
|
||||
#include "reg_constant.h"
|
||||
#include "fpu_system.h"
|
||||
|
||||
|
||||
/*
|
||||
Multiply two registers to give a register result.
|
||||
The sources are st(deststnr) and (b,tagb,signb).
|
||||
The destination is st(deststnr).
|
||||
*/
|
||||
/* This routine must be called with non-empty source registers */
|
||||
int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
|
||||
{
|
||||
FPU_REG *a = &st(deststnr);
|
||||
FPU_REG *dest = a;
|
||||
u_char taga = FPU_gettagi(deststnr);
|
||||
u_char saved_sign = getsign(dest);
|
||||
u_char sign = (getsign(a) ^ getsign(b));
|
||||
int tag;
|
||||
|
||||
|
||||
if ( !(taga | tagb) )
|
||||
{
|
||||
/* Both regs Valid, this should be the most common case. */
|
||||
|
||||
tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
|
||||
if ( tag < 0 )
|
||||
{
|
||||
setsign(dest, saved_sign);
|
||||
return tag;
|
||||
}
|
||||
FPU_settagi(deststnr, tag);
|
||||
return tag;
|
||||
}
|
||||
|
||||
if ( taga == TAG_Special )
|
||||
taga = FPU_Special(a);
|
||||
if ( tagb == TAG_Special )
|
||||
tagb = FPU_Special(b);
|
||||
|
||||
if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
|
||||
|| ((taga == TW_Denormal) && (tagb == TAG_Valid))
|
||||
|| ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
|
||||
{
|
||||
FPU_REG x, y;
|
||||
if ( denormal_operand() < 0 )
|
||||
return FPU_Exception;
|
||||
|
||||
FPU_to_exp16(a, &x);
|
||||
FPU_to_exp16(b, &y);
|
||||
tag = FPU_u_mul(&x, &y, dest, control_w, sign,
|
||||
exponent16(&x) + exponent16(&y));
|
||||
if ( tag < 0 )
|
||||
{
|
||||
setsign(dest, saved_sign);
|
||||
return tag;
|
||||
}
|
||||
FPU_settagi(deststnr, tag);
|
||||
return tag;
|
||||
}
|
||||
else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
|
||||
{
|
||||
if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
|
||||
&& (denormal_operand() < 0) )
|
||||
return FPU_Exception;
|
||||
|
||||
/* Must have either both arguments == zero, or
|
||||
one valid and the other zero.
|
||||
The result is therefore zero. */
|
||||
FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
|
||||
/* The 80486 book says that the answer is +0, but a real
|
||||
80486 behaves this way.
|
||||
IEEE-754 apparently says it should be this way. */
|
||||
setsign(dest, sign);
|
||||
return TAG_Zero;
|
||||
}
|
||||
/* Must have infinities, NaNs, etc */
|
||||
else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
|
||||
{
|
||||
return real_2op_NaN(b, tagb, deststnr, &st(0));
|
||||
}
|
||||
else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
|
||||
|| ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
|
||||
{
|
||||
return arith_invalid(deststnr); /* Zero*Infinity is invalid */
|
||||
}
|
||||
else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
|
||||
&& (denormal_operand() < 0) )
|
||||
{
|
||||
return FPU_Exception;
|
||||
}
|
||||
else if (taga == TW_Infinity)
|
||||
{
|
||||
FPU_copy_to_regi(a, TAG_Special, deststnr);
|
||||
setsign(dest, sign);
|
||||
return TAG_Special;
|
||||
}
|
||||
else if (tagb == TW_Infinity)
|
||||
{
|
||||
FPU_copy_to_regi(b, TAG_Special, deststnr);
|
||||
setsign(dest, sign);
|
||||
return TAG_Special;
|
||||
}
|
||||
|
||||
#ifdef PARANOID
|
||||
else
|
||||
{
|
||||
EXCEPTION(EX_INTERNAL|0x102);
|
||||
return FPU_Exception;
|
||||
}
|
||||
#endif /* PARANOID */
|
||||
|
||||
return 0;
|
||||
}
|
147
arch/x86/math-emu/reg_norm.S
Normal file
147
arch/x86/math-emu/reg_norm.S
Normal file
@@ -0,0 +1,147 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_norm.S |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Normalize the value in a FPU_REG. |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int FPU_normalize(FPU_REG *n) |
|
||||
| |
|
||||
| int FPU_normalize_nuo(FPU_REG *n) |
|
||||
| |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(FPU_normalize)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%ebx
|
||||
|
||||
movl SIGH(%ebx),%edx
|
||||
movl SIGL(%ebx),%eax
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_done /* Already normalized */
|
||||
jnz L_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
orl %eax,%eax
|
||||
jz L_zero /* The contents are zero */
|
||||
|
||||
movl %eax,%edx
|
||||
xorl %eax,%eax
|
||||
subw $32,EXP(%ebx) /* This can cause an underflow */
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_shift_1:
|
||||
bsrl %edx,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
shld %cl,%eax,%edx
|
||||
shl %cl,%eax
|
||||
subw %cx,EXP(%ebx) /* This can cause an underflow */
|
||||
|
||||
movl %edx,SIGH(%ebx)
|
||||
movl %eax,SIGL(%ebx)
|
||||
|
||||
L_done:
|
||||
cmpw EXP_OVER,EXP(%ebx)
|
||||
jge L_overflow
|
||||
|
||||
cmpw EXP_UNDER,EXP(%ebx)
|
||||
jle L_underflow
|
||||
|
||||
L_exit_valid:
|
||||
movl TAG_Valid,%eax
|
||||
|
||||
/* Convert the exponent to 80x87 form. */
|
||||
addw EXTENDED_Ebias,EXP(%ebx)
|
||||
andw $0x7fff,EXP(%ebx)
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
L_zero:
|
||||
movw $0,EXP(%ebx)
|
||||
movl TAG_Zero,%eax
|
||||
jmp L_exit
|
||||
|
||||
L_underflow:
|
||||
/* Convert the exponent to 80x87 form. */
|
||||
addw EXTENDED_Ebias,EXP(%ebx)
|
||||
push %ebx
|
||||
call arith_underflow
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_overflow:
|
||||
/* Convert the exponent to 80x87 form. */
|
||||
addw EXTENDED_Ebias,EXP(%ebx)
|
||||
push %ebx
|
||||
call arith_overflow
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
|
||||
|
||||
/* Normalise without reporting underflow or overflow */
|
||||
ENTRY(FPU_normalize_nuo)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%ebx
|
||||
|
||||
movl SIGH(%ebx),%edx
|
||||
movl SIGL(%ebx),%eax
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_exit_nuo_valid /* Already normalized */
|
||||
jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
orl %eax,%eax
|
||||
jz L_exit_nuo_zero /* The contents are zero */
|
||||
|
||||
movl %eax,%edx
|
||||
xorl %eax,%eax
|
||||
subw $32,EXP(%ebx) /* This can cause an underflow */
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_nuo_shift_1:
|
||||
bsrl %edx,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
shld %cl,%eax,%edx
|
||||
shl %cl,%eax
|
||||
subw %cx,EXP(%ebx) /* This can cause an underflow */
|
||||
|
||||
movl %edx,SIGH(%ebx)
|
||||
movl %eax,SIGL(%ebx)
|
||||
|
||||
L_exit_nuo_valid:
|
||||
movl TAG_Valid,%eax
|
||||
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
|
||||
L_exit_nuo_zero:
|
||||
movl TAG_Zero,%eax
|
||||
movw EXP_UNDER,EXP(%ebx)
|
||||
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
708
arch/x86/math-emu/reg_round.S
Normal file
708
arch/x86/math-emu/reg_round.S
Normal file
@@ -0,0 +1,708 @@
|
||||
.file "reg_round.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_round.S |
|
||||
| |
|
||||
| Rounding/truncation/etc for FPU basic arithmetic functions. |
|
||||
| |
|
||||
| Copyright (C) 1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| This code has four possible entry points. |
|
||||
| The following must be entered by a jmp instruction: |
|
||||
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
|
||||
| |
|
||||
| The FPU_round entry point is intended to be used by C code. |
|
||||
| From C, call as: |
|
||||
| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
|
||||
| |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
| For correct "up" and "down" rounding, the argument must have the correct |
|
||||
| sign. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Four entry points. |
|
||||
| |
|
||||
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
|
||||
| %eax:%ebx 64 bit significand |
|
||||
| %edx 32 bit extension of the significand |
|
||||
| %edi pointer to an FPU_REG for the result to be stored |
|
||||
| stack calling function must have set up a C stack frame and |
|
||||
| pushed %esi, %edi, and %ebx |
|
||||
| |
|
||||
| Needed just for the fpu_reg_round_sqrt entry point: |
|
||||
| %cx A control word in the same format as the FPU control word. |
|
||||
| Otherwise, PARAM4 must give such a value. |
|
||||
| |
|
||||
| |
|
||||
| The significand and its extension are assumed to be exact in the |
|
||||
| following sense: |
|
||||
| If the significand by itself is the exact result then the significand |
|
||||
| extension (%edx) must contain 0, otherwise the significand extension |
|
||||
| must be non-zero. |
|
||||
| If the significand extension is non-zero then the significand is |
|
||||
| smaller than the magnitude of the correct exact result by an amount |
|
||||
| greater than zero and less than one ls bit of the significand. |
|
||||
| The significand extension is only required to have three possible |
|
||||
| non-zero values: |
|
||||
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
|
||||
| bit smaller than the magnitude of the |
|
||||
| true exact result. |
|
||||
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
|
||||
| smaller than the magnitude of the true |
|
||||
| exact result. |
|
||||
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
|
||||
| bit smaller than the magnitude of the |
|
||||
| true exact result. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| The code in this module has become quite complex, but it should handle |
|
||||
| all of the FPU flags which are set at this stage of the basic arithmetic |
|
||||
| computations. |
|
||||
| There are a few rare cases where the results are not set identically to |
|
||||
| a real FPU. These require a bit more thought because at this stage the |
|
||||
| results of the code here appear to be more consistent... |
|
||||
| This may be changed in a future version. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
#include "fpu_emu.h"
|
||||
#include "exception.h"
|
||||
#include "control_w.h"
|
||||
|
||||
/* Flags for FPU_bits_lost */
|
||||
#define LOST_DOWN $1
|
||||
#define LOST_UP $2
|
||||
|
||||
/* Flags for FPU_denormal */
|
||||
#define DENORMAL $1
|
||||
#define UNMASKED_UNDERFLOW $2
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/* Make the code re-entrant by putting
|
||||
local storage on the stack: */
|
||||
#define FPU_bits_lost (%esp)
|
||||
#define FPU_denormal 1(%esp)
|
||||
|
||||
#else
|
||||
/* Not re-entrant, so we can gain speed by putting
|
||||
local storage in a static area: */
|
||||
.data
|
||||
.align 4,0
|
||||
FPU_bits_lost:
|
||||
.byte 0
|
||||
FPU_denormal:
|
||||
.byte 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
.globl fpu_reg_round
|
||||
.globl fpu_Arith_exit
|
||||
|
||||
/* Entry point when called from C */
|
||||
ENTRY(FPU_round)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%edi
|
||||
movl SIGH(%edi),%eax
|
||||
movl SIGL(%edi),%ebx
|
||||
movl PARAM2,%edx
|
||||
|
||||
fpu_reg_round: /* Normal entry point */
|
||||
movl PARAM4,%ecx
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
pushl %ebx /* adjust the stack pointer */
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
#ifdef PARANOID
|
||||
/* Cannot use this here yet */
|
||||
/* orl %eax,%eax */
|
||||
/* jns L_entry_bugged */
|
||||
#endif /* PARANOID */
|
||||
|
||||
cmpw EXP_UNDER,EXP(%edi)
|
||||
jle L_Make_denorm /* The number is a de-normal */
|
||||
|
||||
movb $0,FPU_denormal /* 0 -> not a de-normal */
|
||||
|
||||
Denorm_done:
|
||||
movb $0,FPU_bits_lost /* No bits yet lost in rounding */
|
||||
|
||||
movl %ecx,%esi
|
||||
andl CW_PC,%ecx
|
||||
cmpl PR_64_BITS,%ecx
|
||||
je LRound_To_64
|
||||
|
||||
cmpl PR_53_BITS,%ecx
|
||||
je LRound_To_53
|
||||
|
||||
cmpl PR_24_BITS,%ecx
|
||||
je LRound_To_24
|
||||
|
||||
#ifdef PECULIAR_486
|
||||
/* With the precision control bits set to 01 "(reserved)", a real 80486
|
||||
behaves as if the precision control bits were set to 11 "64 bits" */
|
||||
cmpl PR_RESERVED_BITS,%ecx
|
||||
je LRound_To_64
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_denorm_486
|
||||
#endif /* PARANOID */
|
||||
#else
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_denorm /* There is no bug, just a bad control word */
|
||||
#endif /* PARANOID */
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
|
||||
/* Round etc to 24 bit precision */
|
||||
LRound_To_24:
|
||||
movl %esi,%ecx
|
||||
andl CW_RC,%ecx
|
||||
cmpl RC_RND,%ecx
|
||||
je LRound_nearest_24
|
||||
|
||||
cmpl RC_CHOP,%ecx
|
||||
je LCheck_truncate_24
|
||||
|
||||
cmpl RC_UP,%ecx /* Towards +infinity */
|
||||
je LUp_24
|
||||
|
||||
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
||||
je LDown_24
|
||||
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_round24
|
||||
#endif /* PARANOID */
|
||||
|
||||
LUp_24:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
jne LCheck_truncate_24 /* If negative then up==truncate */
|
||||
|
||||
jmp LCheck_24_round_up
|
||||
|
||||
LDown_24:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je LCheck_truncate_24 /* If positive then down==truncate */
|
||||
|
||||
LCheck_24_round_up:
|
||||
movl %eax,%ecx
|
||||
andl $0x000000ff,%ecx
|
||||
orl %ebx,%ecx
|
||||
orl %edx,%ecx
|
||||
jnz LDo_24_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LRound_nearest_24:
|
||||
/* Do rounding of the 24th bit if needed (nearest or even) */
|
||||
movl %eax,%ecx
|
||||
andl $0x000000ff,%ecx
|
||||
cmpl $0x00000080,%ecx
|
||||
jc LCheck_truncate_24 /* less than half, no increment needed */
|
||||
|
||||
jne LGreater_Half_24 /* greater than half, increment needed */
|
||||
|
||||
/* Possibly half, we need to check the ls bits */
|
||||
orl %ebx,%ebx
|
||||
jnz LGreater_Half_24 /* greater than half, increment needed */
|
||||
|
||||
orl %edx,%edx
|
||||
jnz LGreater_Half_24 /* greater than half, increment needed */
|
||||
|
||||
/* Exactly half, increment only if 24th bit is 1 (round to even) */
|
||||
testl $0x00000100,%eax
|
||||
jz LDo_truncate_24
|
||||
|
||||
LGreater_Half_24: /* Rounding: increment at the 24th bit */
|
||||
LDo_24_round_up:
|
||||
andl $0xffffff00,%eax /* Truncate to 24 bits */
|
||||
xorl %ebx,%ebx
|
||||
movb LOST_UP,FPU_bits_lost
|
||||
addl $0x00000100,%eax
|
||||
jmp LCheck_Round_Overflow
|
||||
|
||||
LCheck_truncate_24:
|
||||
movl %eax,%ecx
|
||||
andl $0x000000ff,%ecx
|
||||
orl %ebx,%ecx
|
||||
orl %edx,%ecx
|
||||
jz L_Re_normalise /* No truncation needed */
|
||||
|
||||
LDo_truncate_24:
|
||||
andl $0xffffff00,%eax /* Truncate to 24 bits */
|
||||
xorl %ebx,%ebx
|
||||
movb LOST_DOWN,FPU_bits_lost
|
||||
jmp L_Re_normalise
|
||||
|
||||
|
||||
/* Round etc to 53 bit precision */
|
||||
LRound_To_53:
|
||||
movl %esi,%ecx
|
||||
andl CW_RC,%ecx
|
||||
cmpl RC_RND,%ecx
|
||||
je LRound_nearest_53
|
||||
|
||||
cmpl RC_CHOP,%ecx
|
||||
je LCheck_truncate_53
|
||||
|
||||
cmpl RC_UP,%ecx /* Towards +infinity */
|
||||
je LUp_53
|
||||
|
||||
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
||||
je LDown_53
|
||||
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_round53
|
||||
#endif /* PARANOID */
|
||||
|
||||
LUp_53:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
jne LCheck_truncate_53 /* If negative then up==truncate */
|
||||
|
||||
jmp LCheck_53_round_up
|
||||
|
||||
LDown_53:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je LCheck_truncate_53 /* If positive then down==truncate */
|
||||
|
||||
LCheck_53_round_up:
|
||||
movl %ebx,%ecx
|
||||
andl $0x000007ff,%ecx
|
||||
orl %edx,%ecx
|
||||
jnz LDo_53_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LRound_nearest_53:
|
||||
/* Do rounding of the 53rd bit if needed (nearest or even) */
|
||||
movl %ebx,%ecx
|
||||
andl $0x000007ff,%ecx
|
||||
cmpl $0x00000400,%ecx
|
||||
jc LCheck_truncate_53 /* less than half, no increment needed */
|
||||
|
||||
jnz LGreater_Half_53 /* greater than half, increment needed */
|
||||
|
||||
/* Possibly half, we need to check the ls bits */
|
||||
orl %edx,%edx
|
||||
jnz LGreater_Half_53 /* greater than half, increment needed */
|
||||
|
||||
/* Exactly half, increment only if 53rd bit is 1 (round to even) */
|
||||
testl $0x00000800,%ebx
|
||||
jz LTruncate_53
|
||||
|
||||
LGreater_Half_53: /* Rounding: increment at the 53rd bit */
|
||||
LDo_53_round_up:
|
||||
movb LOST_UP,FPU_bits_lost
|
||||
andl $0xfffff800,%ebx /* Truncate to 53 bits */
|
||||
addl $0x00000800,%ebx
|
||||
adcl $0,%eax
|
||||
jmp LCheck_Round_Overflow
|
||||
|
||||
LCheck_truncate_53:
|
||||
movl %ebx,%ecx
|
||||
andl $0x000007ff,%ecx
|
||||
orl %edx,%ecx
|
||||
jz L_Re_normalise
|
||||
|
||||
LTruncate_53:
|
||||
movb LOST_DOWN,FPU_bits_lost
|
||||
andl $0xfffff800,%ebx /* Truncate to 53 bits */
|
||||
jmp L_Re_normalise
|
||||
|
||||
|
||||
/* Round etc to 64 bit precision */
|
||||
LRound_To_64:
|
||||
movl %esi,%ecx
|
||||
andl CW_RC,%ecx
|
||||
cmpl RC_RND,%ecx
|
||||
je LRound_nearest_64
|
||||
|
||||
cmpl RC_CHOP,%ecx
|
||||
je LCheck_truncate_64
|
||||
|
||||
cmpl RC_UP,%ecx /* Towards +infinity */
|
||||
je LUp_64
|
||||
|
||||
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
||||
je LDown_64
|
||||
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_round64
|
||||
#endif /* PARANOID */
|
||||
|
||||
LUp_64:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
jne LCheck_truncate_64 /* If negative then up==truncate */
|
||||
|
||||
orl %edx,%edx
|
||||
jnz LDo_64_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LDown_64:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je LCheck_truncate_64 /* If positive then down==truncate */
|
||||
|
||||
orl %edx,%edx
|
||||
jnz LDo_64_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LRound_nearest_64:
|
||||
cmpl $0x80000000,%edx
|
||||
jc LCheck_truncate_64
|
||||
|
||||
jne LDo_64_round_up
|
||||
|
||||
/* Now test for round-to-even */
|
||||
testb $1,%bl
|
||||
jz LCheck_truncate_64
|
||||
|
||||
LDo_64_round_up:
|
||||
movb LOST_UP,FPU_bits_lost
|
||||
addl $1,%ebx
|
||||
adcl $0,%eax
|
||||
|
||||
LCheck_Round_Overflow:
|
||||
jnc L_Re_normalise
|
||||
|
||||
/* Overflow, adjust the result (significand to 1.0) */
|
||||
rcrl $1,%eax
|
||||
rcrl $1,%ebx
|
||||
incw EXP(%edi)
|
||||
jmp L_Re_normalise
|
||||
|
||||
LCheck_truncate_64:
|
||||
orl %edx,%edx
|
||||
jz L_Re_normalise
|
||||
|
||||
LTruncate_64:
|
||||
movb LOST_DOWN,FPU_bits_lost
|
||||
|
||||
L_Re_normalise:
|
||||
testb $0xff,FPU_denormal
|
||||
jnz Normalise_result
|
||||
|
||||
L_Normalised:
|
||||
movl TAG_Valid,%edx
|
||||
|
||||
L_deNormalised:
|
||||
cmpb LOST_UP,FPU_bits_lost
|
||||
je L_precision_lost_up
|
||||
|
||||
cmpb LOST_DOWN,FPU_bits_lost
|
||||
je L_precision_lost_down
|
||||
|
||||
L_no_precision_loss:
|
||||
/* store the result */
|
||||
|
||||
L_Store_significand:
|
||||
movl %eax,SIGH(%edi)
|
||||
movl %ebx,SIGL(%edi)
|
||||
|
||||
cmpw EXP_OVER,EXP(%edi)
|
||||
jge L_overflow
|
||||
|
||||
movl %edx,%eax
|
||||
|
||||
/* Convert the exponent to 80x87 form. */
|
||||
addw EXTENDED_Ebias,EXP(%edi)
|
||||
andw $0x7fff,EXP(%edi)
|
||||
|
||||
fpu_reg_round_signed_special_exit:
|
||||
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je fpu_reg_round_special_exit
|
||||
|
||||
orw $0x8000,EXP(%edi) /* Negative sign for the result. */
|
||||
|
||||
fpu_reg_round_special_exit:
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
popl %ebx /* adjust the stack pointer */
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
fpu_Arith_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
/*
|
||||
* Set the FPU status flags to represent precision loss due to
|
||||
* round-up.
|
||||
*/
|
||||
L_precision_lost_up:
|
||||
push %edx
|
||||
push %eax
|
||||
call set_precision_flag_up
|
||||
popl %eax
|
||||
popl %edx
|
||||
jmp L_no_precision_loss
|
||||
|
||||
/*
|
||||
* Set the FPU status flags to represent precision loss due to
|
||||
* truncation.
|
||||
*/
|
||||
L_precision_lost_down:
|
||||
push %edx
|
||||
push %eax
|
||||
call set_precision_flag_down
|
||||
popl %eax
|
||||
popl %edx
|
||||
jmp L_no_precision_loss
|
||||
|
||||
|
||||
/*
|
||||
* The number is a denormal (which might get rounded up to a normal)
|
||||
* Shift the number right the required number of bits, which will
|
||||
* have to be undone later...
|
||||
*/
|
||||
L_Make_denorm:
|
||||
/* The action to be taken depends upon whether the underflow
|
||||
exception is masked */
|
||||
testb CW_Underflow,%cl /* Underflow mask. */
|
||||
jz Unmasked_underflow /* Do not make a denormal. */
|
||||
|
||||
movb DENORMAL,FPU_denormal
|
||||
|
||||
pushl %ecx /* Save */
|
||||
movw EXP_UNDER+1,%cx
|
||||
subw EXP(%edi),%cx
|
||||
|
||||
cmpw $64,%cx /* shrd only works for 0..31 bits */
|
||||
jnc Denorm_shift_more_than_63
|
||||
|
||||
cmpw $32,%cx /* shrd only works for 0..31 bits */
|
||||
jnc Denorm_shift_more_than_32
|
||||
|
||||
/*
|
||||
* We got here without jumps by assuming that the most common requirement
|
||||
* is for a small de-normalising shift.
|
||||
* Shift by [1..31] bits
|
||||
*/
|
||||
addw %cx,EXP(%edi)
|
||||
orl %edx,%edx /* extension */
|
||||
setne %ch /* Save whether %edx is non-zero */
|
||||
xorl %edx,%edx
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
orb %ch,%dl
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
/* Shift by [32..63] bits */
|
||||
Denorm_shift_more_than_32:
|
||||
addw %cx,EXP(%edi)
|
||||
subb $32,%cl
|
||||
orl %edx,%edx
|
||||
setne %ch
|
||||
orb %ch,%bl
|
||||
xorl %edx,%edx
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
orl %edx,%edx /* test these 32 bits */
|
||||
setne %cl
|
||||
orb %ch,%bl
|
||||
orb %cl,%bl
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
/* Shift by [64..) bits */
|
||||
Denorm_shift_more_than_63:
|
||||
cmpw $64,%cx
|
||||
jne Denorm_shift_more_than_64
|
||||
|
||||
/* Exactly 64 bit shift */
|
||||
addw %cx,EXP(%edi)
|
||||
xorl %ecx,%ecx
|
||||
orl %edx,%edx
|
||||
setne %cl
|
||||
orl %ebx,%ebx
|
||||
setne %ch
|
||||
orb %ch,%cl
|
||||
orb %cl,%al
|
||||
movl %eax,%edx
|
||||
xorl %eax,%eax
|
||||
xorl %ebx,%ebx
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
Denorm_shift_more_than_64:
|
||||
movw EXP_UNDER+1,EXP(%edi)
|
||||
/* This is easy, %eax must be non-zero, so.. */
|
||||
movl $1,%edx
|
||||
xorl %eax,%eax
|
||||
xorl %ebx,%ebx
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
|
||||
Unmasked_underflow:
|
||||
movb UNMASKED_UNDERFLOW,FPU_denormal
|
||||
jmp Denorm_done
|
||||
|
||||
|
||||
/* Undo the de-normalisation. */
|
||||
Normalise_result:
|
||||
cmpb UNMASKED_UNDERFLOW,FPU_denormal
|
||||
je Signal_underflow
|
||||
|
||||
/* The number must be a denormal if we got here. */
|
||||
#ifdef PARANOID
|
||||
/* But check it... just in case. */
|
||||
cmpw EXP_UNDER+1,EXP(%edi)
|
||||
jne L_norm_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
#ifdef PECULIAR_486
|
||||
/*
|
||||
* This implements a special feature of 80486 behaviour.
|
||||
* Underflow will be signalled even if the number is
|
||||
* not a denormal after rounding.
|
||||
* This difference occurs only for masked underflow, and not
|
||||
* in the unmasked case.
|
||||
* Actual 80486 behaviour differs from this in some circumstances.
|
||||
*/
|
||||
orl %eax,%eax /* ms bits */
|
||||
js LPseudoDenormal /* Will be masked underflow */
|
||||
#else
|
||||
orl %eax,%eax /* ms bits */
|
||||
js L_Normalised /* No longer a denormal */
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
jnz LDenormal_adj_exponent
|
||||
|
||||
orl %ebx,%ebx
|
||||
jz L_underflow_to_zero /* The contents are zero */
|
||||
|
||||
LDenormal_adj_exponent:
|
||||
decw EXP(%edi)
|
||||
|
||||
LPseudoDenormal:
|
||||
testb $0xff,FPU_bits_lost /* bits lost == underflow */
|
||||
movl TAG_Special,%edx
|
||||
jz L_deNormalised
|
||||
|
||||
/* There must be a masked underflow */
|
||||
push %eax
|
||||
pushl EX_Underflow
|
||||
call EXCEPTION
|
||||
popl %eax
|
||||
popl %eax
|
||||
movl TAG_Special,%edx
|
||||
jmp L_deNormalised
|
||||
|
||||
|
||||
/*
|
||||
* The operations resulted in a number too small to represent.
|
||||
* Masked response.
|
||||
*/
|
||||
L_underflow_to_zero:
|
||||
push %eax
|
||||
call set_precision_flag_down
|
||||
popl %eax
|
||||
|
||||
push %eax
|
||||
pushl EX_Underflow
|
||||
call EXCEPTION
|
||||
popl %eax
|
||||
popl %eax
|
||||
|
||||
/* Reduce the exponent to EXP_UNDER */
|
||||
movw EXP_UNDER,EXP(%edi)
|
||||
movl TAG_Zero,%edx
|
||||
jmp L_Store_significand
|
||||
|
||||
|
||||
/* The operations resulted in a number too large to represent. */
|
||||
L_overflow:
|
||||
addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
|
||||
push %edi
|
||||
call arith_overflow
|
||||
pop %edi
|
||||
jmp fpu_reg_round_signed_special_exit
|
||||
|
||||
|
||||
Signal_underflow:
|
||||
/* The number may have been changed to a non-denormal */
|
||||
/* by the rounding operations. */
|
||||
cmpw EXP_UNDER,EXP(%edi)
|
||||
jle Do_unmasked_underflow
|
||||
|
||||
jmp L_Normalised
|
||||
|
||||
Do_unmasked_underflow:
|
||||
/* Increase the exponent by the magic number */
|
||||
addw $(3*(1<<13)),EXP(%edi)
|
||||
push %eax
|
||||
pushl EX_Underflow
|
||||
call EXCEPTION
|
||||
popl %eax
|
||||
popl %eax
|
||||
jmp L_Normalised
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
#ifdef PECULIAR_486
|
||||
L_bugged_denorm_486:
|
||||
pushl EX_INTERNAL|0x236
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
#else
|
||||
L_bugged_denorm:
|
||||
pushl EX_INTERNAL|0x230
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
L_bugged_round24:
|
||||
pushl EX_INTERNAL|0x231
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_bugged_round53:
|
||||
pushl EX_INTERNAL|0x232
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_bugged_round64:
|
||||
pushl EX_INTERNAL|0x233
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_norm_bugged:
|
||||
pushl EX_INTERNAL|0x234
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_entry_bugged:
|
||||
pushl EX_INTERNAL|0x235
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
L_exception_exit:
|
||||
mov $-1,%eax
|
||||
jmp fpu_reg_round_special_exit
|
||||
#endif /* PARANOID */
|
167
arch/x86/math-emu/reg_u_add.S
Normal file
167
arch/x86/math-emu/reg_u_add.S
Normal file
@@ -0,0 +1,167 @@
|
||||
.file "reg_u_add.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_add.S |
|
||||
| |
|
||||
| Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the |
|
||||
| result in a destination FPU_REG. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
|
||||
| int control_w) |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
| Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
|
||||
| Takes two valid reg f.p. numbers (TAG_Valid), which are
|
||||
| treated as unsigned numbers,
|
||||
| and returns their sum as a TAG_Valid or TAG_Special f.p. number.
|
||||
| The returned number is normalized.
|
||||
| Basic checks are performed if PARANOID is defined.
|
||||
*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_add)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi /* source 1 */
|
||||
movl PARAM2,%edi /* source 2 */
|
||||
|
||||
movl PARAM6,%ecx
|
||||
movl %ecx,%edx
|
||||
subl PARAM7,%ecx /* exp1 - exp2 */
|
||||
jge L_arg1_larger
|
||||
|
||||
/* num1 is smaller */
|
||||
movl SIGL(%esi),%ebx
|
||||
movl SIGH(%esi),%eax
|
||||
|
||||
movl %edi,%esi
|
||||
movl PARAM7,%edx
|
||||
negw %cx
|
||||
jmp L_accum_loaded
|
||||
|
||||
L_arg1_larger:
|
||||
/* num1 has larger or equal exponent */
|
||||
movl SIGL(%edi),%ebx
|
||||
movl SIGH(%edi),%eax
|
||||
|
||||
L_accum_loaded:
|
||||
movl PARAM3,%edi /* destination */
|
||||
movw %dx,EXP(%edi) /* Copy exponent to destination */
|
||||
|
||||
xorl %edx,%edx /* clear the extension */
|
||||
|
||||
#ifdef PARANOID
|
||||
testl $0x80000000,%eax
|
||||
je L_bugged
|
||||
|
||||
testl $0x80000000,SIGH(%esi)
|
||||
je L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* The number to be shifted is in %eax:%ebx:%edx */
|
||||
cmpw $32,%cx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_31:
|
||||
cmpw $64,%cx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
jz L_exactly_32
|
||||
|
||||
shrd %cl,%eax,%edx
|
||||
shr %cl,%eax
|
||||
orl %ebx,%ebx
|
||||
jz L_more_31_no_low /* none of the lowest bits is set */
|
||||
|
||||
orl $1,%edx /* record the fact in the extension */
|
||||
|
||||
L_more_31_no_low:
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_exactly_32:
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_63:
|
||||
cmpw $65,%cx
|
||||
jnc L_more_than_64
|
||||
|
||||
movl %eax,%edx
|
||||
orl %ebx,%ebx
|
||||
jz L_more_63_no_low
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_more_than_64:
|
||||
movl $1,%edx /* The shifted nr always at least one '1' */
|
||||
|
||||
L_more_63_no_low:
|
||||
xorl %ebx,%ebx
|
||||
xorl %eax,%eax
|
||||
|
||||
L_shift_done:
|
||||
/* Now do the addition */
|
||||
addl SIGL(%esi),%ebx
|
||||
adcl SIGH(%esi),%eax
|
||||
jnc L_round_the_result
|
||||
|
||||
/* Overflow, adjust the result */
|
||||
rcrl $1,%eax
|
||||
rcrl $1,%ebx
|
||||
rcrl $1,%edx
|
||||
jnc L_no_bit_lost
|
||||
|
||||
orl $1,%edx
|
||||
|
||||
L_no_bit_lost:
|
||||
incw EXP(%edi)
|
||||
|
||||
L_round_the_result:
|
||||
jmp fpu_reg_round /* Round the result */
|
||||
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
/* If we ever get here then we have problems! */
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x201
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
movl $-1,%eax
|
||||
jmp L_exit
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
#endif /* PARANOID */
|
471
arch/x86/math-emu/reg_u_div.S
Normal file
471
arch/x86/math-emu/reg_u_div.S
Normal file
@@ -0,0 +1,471 @@
|
||||
.file "reg_u_div.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_div.S |
|
||||
| |
|
||||
| Divide one FPU_REG by another and put the result in a destination FPU_REG.|
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Call from C as: |
|
||||
| int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, |
|
||||
| unsigned int control_word, char *sign) |
|
||||
| |
|
||||
| Does not compute the destination exponent, but does adjust it. |
|
||||
| |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
|
||||
/* #define dSIGL(x) (x) */
|
||||
/* #define dSIGH(x) 4(x) */
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/*
|
||||
Local storage on the stack:
|
||||
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
Overflow flag: ovfl_flag
|
||||
*/
|
||||
#define FPU_accum_3 -4(%ebp)
|
||||
#define FPU_accum_2 -8(%ebp)
|
||||
#define FPU_accum_1 -12(%ebp)
|
||||
#define FPU_accum_0 -16(%ebp)
|
||||
#define FPU_result_1 -20(%ebp)
|
||||
#define FPU_result_2 -24(%ebp)
|
||||
#define FPU_ovfl_flag -28(%ebp)
|
||||
|
||||
#else
|
||||
.data
|
||||
/*
|
||||
Local storage in a static area:
|
||||
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
Overflow flag: ovfl_flag
|
||||
*/
|
||||
.align 4,0
|
||||
FPU_accum_3:
|
||||
.long 0
|
||||
FPU_accum_2:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
FPU_result_1:
|
||||
.long 0
|
||||
FPU_result_2:
|
||||
.long 0
|
||||
FPU_ovfl_flag:
|
||||
.byte 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
#define REGA PARAM1
|
||||
#define REGB PARAM2
|
||||
#define DEST PARAM3
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_div)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $28,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl REGA,%esi
|
||||
movl REGB,%ebx
|
||||
movl DEST,%edi
|
||||
|
||||
movswl EXP(%esi),%edx
|
||||
movswl EXP(%ebx),%eax
|
||||
subl %eax,%edx
|
||||
addl EXP_BIAS,%edx
|
||||
|
||||
/* A denormal and a large number can cause an exponent underflow */
|
||||
cmpl EXP_WAY_UNDER,%edx
|
||||
jg xExp_not_underflow
|
||||
|
||||
/* Set to a really low value allow correct handling */
|
||||
movl EXP_WAY_UNDER,%edx
|
||||
|
||||
xExp_not_underflow:
|
||||
|
||||
movw %dx,EXP(%edi)
|
||||
|
||||
#ifdef PARANOID
|
||||
/* testl $0x80000000, SIGH(%esi) // Dividend */
|
||||
/* je L_bugged */
|
||||
testl $0x80000000, SIGH(%ebx) /* Divisor */
|
||||
je L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* Check if the divisor can be treated as having just 32 bits */
|
||||
cmpl $0,SIGL(%ebx)
|
||||
jnz L_Full_Division /* Can't do a quick divide */
|
||||
|
||||
/* We should be able to zip through the division here */
|
||||
movl SIGH(%ebx),%ecx /* The divisor */
|
||||
movl SIGH(%esi),%edx /* Dividend */
|
||||
movl SIGL(%esi),%eax /* Dividend */
|
||||
|
||||
cmpl %ecx,%edx
|
||||
setaeb FPU_ovfl_flag /* Keep a record */
|
||||
jb L_no_adjust
|
||||
|
||||
subl %ecx,%edx /* Prevent the overflow */
|
||||
|
||||
L_no_adjust:
|
||||
/* Divide the 64 bit number by the 32 bit denominator */
|
||||
divl %ecx
|
||||
movl %eax,FPU_result_2
|
||||
|
||||
/* Work on the remainder of the first division */
|
||||
xorl %eax,%eax
|
||||
divl %ecx
|
||||
movl %eax,FPU_result_1
|
||||
|
||||
/* Work on the remainder of the 64 bit division */
|
||||
xorl %eax,%eax
|
||||
divl %ecx
|
||||
|
||||
testb $255,FPU_ovfl_flag /* was the num > denom ? */
|
||||
je L_no_overflow
|
||||
|
||||
/* Do the shifting here */
|
||||
/* increase the exponent */
|
||||
incw EXP(%edi)
|
||||
|
||||
/* shift the mantissa right one bit */
|
||||
stc /* To set the ms bit */
|
||||
rcrl FPU_result_2
|
||||
rcrl FPU_result_1
|
||||
rcrl %eax
|
||||
|
||||
L_no_overflow:
|
||||
jmp LRound_precision /* Do the rounding as required */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Divide: Return arg1/arg2 to arg3. |
|
||||
| |
|
||||
| This routine does not use the exponents of arg1 and arg2, but does |
|
||||
| adjust the exponent of arg3. |
|
||||
| |
|
||||
| The maximum returned value is (ignoring exponents) |
|
||||
| .ffffffff ffffffff |
|
||||
| ------------------ = 1.ffffffff fffffffe |
|
||||
| .80000000 00000000 |
|
||||
| and the minimum is |
|
||||
| .80000000 00000000 |
|
||||
| ------------------ = .80000000 00000001 (rounded) |
|
||||
| .ffffffff ffffffff |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
L_Full_Division:
|
||||
/* Save extended dividend in local register */
|
||||
movl SIGL(%esi),%eax
|
||||
movl %eax,FPU_accum_2
|
||||
movl SIGH(%esi),%eax
|
||||
movl %eax,FPU_accum_3
|
||||
xorl %eax,%eax
|
||||
movl %eax,FPU_accum_1 /* zero the extension */
|
||||
movl %eax,FPU_accum_0 /* zero the extension */
|
||||
|
||||
movl SIGL(%esi),%eax /* Get the current num */
|
||||
movl SIGH(%esi),%edx
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Initialization done.
|
||||
Do the first 32 bits. */
|
||||
|
||||
movb $0,FPU_ovfl_flag
|
||||
cmpl SIGH(%ebx),%edx /* Test for imminent overflow */
|
||||
jb LLess_than_1
|
||||
ja LGreater_than_1
|
||||
|
||||
cmpl SIGL(%ebx),%eax
|
||||
jb LLess_than_1
|
||||
|
||||
LGreater_than_1:
|
||||
/* The dividend is greater or equal, would cause overflow */
|
||||
setaeb FPU_ovfl_flag /* Keep a record */
|
||||
|
||||
subl SIGL(%ebx),%eax
|
||||
sbbl SIGH(%ebx),%edx /* Prevent the overflow */
|
||||
movl %eax,FPU_accum_2
|
||||
movl %edx,FPU_accum_3
|
||||
|
||||
LLess_than_1:
|
||||
/* At this point, we have a dividend < divisor, with a record of
|
||||
adjustment in FPU_ovfl_flag */
|
||||
|
||||
/* We will divide by a number which is too large */
|
||||
movl SIGH(%ebx),%ecx
|
||||
addl $1,%ecx
|
||||
jnc LFirst_div_not_1
|
||||
|
||||
/* here we need to divide by 100000000h,
|
||||
i.e., no division at all.. */
|
||||
mov %edx,%eax
|
||||
jmp LFirst_div_done
|
||||
|
||||
LFirst_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the augmented
|
||||
denom ms dw */
|
||||
|
||||
LFirst_div_done:
|
||||
movl %eax,FPU_result_2 /* Put the result in the answer */
|
||||
|
||||
mull SIGH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_3
|
||||
|
||||
movl FPU_result_2,%eax /* Get the result back */
|
||||
mull SIGL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
sbbl $0,FPU_accum_3
|
||||
je LDo_2nd_32_bits /* Must check for non-zero result here */
|
||||
|
||||
#ifdef PARANOID
|
||||
jb L_bugged_1
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
incl FPU_result_2 /* Correct the answer */
|
||||
|
||||
movl SIGL(%ebx),%eax
|
||||
movl SIGH(%ebx),%edx
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
sbbl $0,FPU_accum_3
|
||||
jne L_bugged_1 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Half of the main problem is done, there is just a reduced numerator
|
||||
to handle now.
|
||||
Work with the second 32 bits, FPU_accum_0 not used from now on */
|
||||
LDo_2nd_32_bits:
|
||||
movl FPU_accum_2,%edx /* get the reduced num */
|
||||
movl FPU_accum_1,%eax
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl SIGH(%ebx),%edx
|
||||
jb LDo_2nd_div
|
||||
ja LPrevent_2nd_overflow
|
||||
|
||||
cmpl SIGL(%ebx),%eax
|
||||
jb LDo_2nd_div
|
||||
|
||||
LPrevent_2nd_overflow:
|
||||
/* The numerator is greater or equal, would cause overflow */
|
||||
/* prevent overflow */
|
||||
subl SIGL(%ebx),%eax
|
||||
sbbl SIGH(%ebx),%edx
|
||||
movl %edx,FPU_accum_2
|
||||
movl %eax,FPU_accum_1
|
||||
|
||||
incl FPU_result_2 /* Reflect the subtraction in the answer */
|
||||
|
||||
#ifdef PARANOID
|
||||
je L_bugged_2 /* Can't bump the result to 1.0 */
|
||||
#endif /* PARANOID */
|
||||
|
||||
LDo_2nd_div:
|
||||
cmpl $0,%ecx /* augmented denom msw */
|
||||
jnz LSecond_div_not_1
|
||||
|
||||
/* %ecx == 0, we are dividing by 1.0 */
|
||||
mov %edx,%eax
|
||||
jmp LSecond_div_done
|
||||
|
||||
LSecond_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the denom ms dw */
|
||||
|
||||
LSecond_div_done:
|
||||
movl %eax,FPU_result_1 /* Put the result in the answer */
|
||||
|
||||
mull SIGH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
movl FPU_result_1,%eax /* Get the result back */
|
||||
mull SIGL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
jz LDo_3rd_32_bits
|
||||
|
||||
#ifdef PARANOID
|
||||
cmpl $1,FPU_accum_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
movl SIGL(%ebx),%eax
|
||||
movl SIGH(%ebx),%edx
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
addl $1,FPU_result_1 /* Correct the answer */
|
||||
adcl $0,FPU_result_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* The division is essentially finished here, we just need to perform
|
||||
tidying operations.
|
||||
Deal with the 3rd 32 bits */
|
||||
LDo_3rd_32_bits:
|
||||
movl FPU_accum_1,%edx /* get the reduced num */
|
||||
movl FPU_accum_0,%eax
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl SIGH(%ebx),%edx /* denom */
|
||||
jb LRound_prep
|
||||
ja LPrevent_3rd_overflow
|
||||
|
||||
cmpl SIGL(%ebx),%eax /* denom */
|
||||
jb LRound_prep
|
||||
|
||||
LPrevent_3rd_overflow:
|
||||
/* prevent overflow */
|
||||
subl SIGL(%ebx),%eax
|
||||
sbbl SIGH(%ebx),%edx
|
||||
movl %edx,FPU_accum_1
|
||||
movl %eax,FPU_accum_0
|
||||
|
||||
addl $1,FPU_result_1 /* Reflect the subtraction in the answer */
|
||||
adcl $0,FPU_result_2
|
||||
jne LRound_prep
|
||||
jnc LRound_prep
|
||||
|
||||
/* This is a tricky spot, there is an overflow of the answer */
|
||||
movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */
|
||||
|
||||
LRound_prep:
|
||||
/*
|
||||
* Prepare for rounding.
|
||||
* To test for rounding, we just need to compare 2*accum with the
|
||||
* denom.
|
||||
*/
|
||||
movl FPU_accum_0,%ecx
|
||||
movl FPU_accum_1,%edx
|
||||
movl %ecx,%eax
|
||||
orl %edx,%eax
|
||||
jz LRound_ovfl /* The accumulator contains zero. */
|
||||
|
||||
/* Multiply by 2 */
|
||||
clc
|
||||
rcll $1,%ecx
|
||||
rcll $1,%edx
|
||||
jc LRound_large /* No need to compare, denom smaller */
|
||||
|
||||
subl SIGL(%ebx),%ecx
|
||||
sbbl SIGH(%ebx),%edx
|
||||
jnc LRound_not_small
|
||||
|
||||
movl $0x70000000,%eax /* Denom was larger */
|
||||
jmp LRound_ovfl
|
||||
|
||||
LRound_not_small:
|
||||
jnz LRound_large
|
||||
|
||||
movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */
|
||||
jmp LRound_ovfl
|
||||
|
||||
LRound_large:
|
||||
movl $0xff000000,%eax /* Denom was smaller */
|
||||
|
||||
LRound_ovfl:
|
||||
/* We are now ready to deal with rounding, but first we must get
|
||||
the bits properly aligned */
|
||||
testb $255,FPU_ovfl_flag /* was the num > denom ? */
|
||||
je LRound_precision
|
||||
|
||||
incw EXP(%edi)
|
||||
|
||||
/* shift the mantissa right one bit */
|
||||
stc /* Will set the ms bit */
|
||||
rcrl FPU_result_2
|
||||
rcrl FPU_result_1
|
||||
rcrl %eax
|
||||
|
||||
/* Round the result as required */
|
||||
LRound_precision:
|
||||
decw EXP(%edi) /* binary point between 1st & 2nd bits */
|
||||
|
||||
movl %eax,%edx
|
||||
movl FPU_result_1,%ebx
|
||||
movl FPU_result_2,%eax
|
||||
jmp fpu_reg_round
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
/* The logic is wrong if we got here */
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x202
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_1:
|
||||
pushl EX_INTERNAL|0x203
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_2:
|
||||
pushl EX_INTERNAL|0x204
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_exit:
|
||||
movl $-1,%eax
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
|
||||
leave
|
||||
ret
|
||||
#endif /* PARANOID */
|
148
arch/x86/math-emu/reg_u_mul.S
Normal file
148
arch/x86/math-emu/reg_u_mul.S
Normal file
@@ -0,0 +1,148 @@
|
||||
.file "reg_u_mul.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_mul.S |
|
||||
| |
|
||||
| Core multiplication routine |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Basic multiplication routine. |
|
||||
| Does not check the resulting exponent for overflow/underflow |
|
||||
| |
|
||||
| FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); |
|
||||
| |
|
||||
| Internal working is at approx 128 bits. |
|
||||
| Result is rounded to nearest 53 or 64 bits, using "nearest or even". |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/* Local storage on the stack: */
|
||||
#define FPU_accum_0 -4(%ebp) /* ms word */
|
||||
#define FPU_accum_1 -8(%ebp)
|
||||
|
||||
#else
|
||||
/* Local storage in a static area: */
|
||||
.data
|
||||
.align 4,0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_mul)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $8,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%edi
|
||||
|
||||
#ifdef PARANOID
|
||||
testl $0x80000000,SIGH(%esi)
|
||||
jz L_bugged
|
||||
testl $0x80000000,SIGH(%edi)
|
||||
jz L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
xorl %ecx,%ecx
|
||||
xorl %ebx,%ebx
|
||||
|
||||
movl SIGL(%esi),%eax
|
||||
mull SIGL(%edi)
|
||||
movl %eax,FPU_accum_0
|
||||
movl %edx,FPU_accum_1
|
||||
|
||||
movl SIGL(%esi),%eax
|
||||
mull SIGH(%edi)
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,%ebx
|
||||
/* adcl $0,%ecx // overflow here is not possible */
|
||||
|
||||
movl SIGH(%esi),%eax
|
||||
mull SIGL(%edi)
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,%ebx
|
||||
adcl $0,%ecx
|
||||
|
||||
movl SIGH(%esi),%eax
|
||||
mull SIGH(%edi)
|
||||
addl %eax,%ebx
|
||||
adcl %edx,%ecx
|
||||
|
||||
/* Get the sum of the exponents. */
|
||||
movl PARAM6,%eax
|
||||
subl EXP_BIAS-1,%eax
|
||||
|
||||
/* Two denormals can cause an exponent underflow */
|
||||
cmpl EXP_WAY_UNDER,%eax
|
||||
jg Exp_not_underflow
|
||||
|
||||
/* Set to a really low value allow correct handling */
|
||||
movl EXP_WAY_UNDER,%eax
|
||||
|
||||
Exp_not_underflow:
|
||||
|
||||
/* Have now finished with the sources */
|
||||
movl PARAM3,%edi /* Point to the destination */
|
||||
movw %ax,EXP(%edi)
|
||||
|
||||
/* Now make sure that the result is normalized */
|
||||
testl $0x80000000,%ecx
|
||||
jnz LResult_Normalised
|
||||
|
||||
/* Normalize by shifting left one bit */
|
||||
shll $1,FPU_accum_0
|
||||
rcll $1,FPU_accum_1
|
||||
rcll $1,%ebx
|
||||
rcll $1,%ecx
|
||||
decw EXP(%edi)
|
||||
|
||||
LResult_Normalised:
|
||||
movl FPU_accum_0,%eax
|
||||
movl FPU_accum_1,%edx
|
||||
orl %eax,%eax
|
||||
jz L_extent_zero
|
||||
|
||||
orl $1,%edx
|
||||
|
||||
L_extent_zero:
|
||||
movl %ecx,%eax
|
||||
jmp fpu_reg_round
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x205
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
#endif /* PARANOID */
|
||||
|
272
arch/x86/math-emu/reg_u_sub.S
Normal file
272
arch/x86/math-emu/reg_u_sub.S
Normal file
@@ -0,0 +1,272 @@
|
||||
.file "reg_u_sub.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_sub.S |
|
||||
| |
|
||||
| Core floating point subtraction routine. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
|
||||
| int control_w) |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
| Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
|
||||
| Takes two valid reg f.p. numbers (TAG_Valid), which are
|
||||
| treated as unsigned numbers,
|
||||
| and returns their difference as a TAG_Valid or TAG_Zero f.p.
|
||||
| number.
|
||||
| The first number (arg1) must be the larger.
|
||||
| The returned number is normalized.
|
||||
| Basic checks are performed if PARANOID is defined.
|
||||
*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_sub)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi /* source 1 */
|
||||
movl PARAM2,%edi /* source 2 */
|
||||
|
||||
movl PARAM6,%ecx
|
||||
subl PARAM7,%ecx /* exp1 - exp2 */
|
||||
|
||||
#ifdef PARANOID
|
||||
/* source 2 is always smaller than source 1 */
|
||||
js L_bugged_1
|
||||
|
||||
testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */
|
||||
je L_bugged_2
|
||||
|
||||
testl $0x80000000,SIGH(%esi)
|
||||
je L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*--------------------------------------+
|
||||
| Form a register holding the |
|
||||
| smaller number |
|
||||
+--------------------------------------*/
|
||||
movl SIGH(%edi),%eax /* register ms word */
|
||||
movl SIGL(%edi),%ebx /* register ls word */
|
||||
|
||||
movl PARAM3,%edi /* destination */
|
||||
movl PARAM6,%edx
|
||||
movw %dx,EXP(%edi) /* Copy exponent to destination */
|
||||
|
||||
xorl %edx,%edx /* register extension */
|
||||
|
||||
/*--------------------------------------+
|
||||
| Shift the temporary register |
|
||||
| right the required number of |
|
||||
| places. |
|
||||
+--------------------------------------*/
|
||||
|
||||
cmpw $32,%cx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_31:
|
||||
cmpw $64,%cx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
jz L_exactly_32
|
||||
|
||||
shrd %cl,%eax,%edx
|
||||
shr %cl,%eax
|
||||
orl %ebx,%ebx
|
||||
jz L_more_31_no_low /* none of the lowest bits is set */
|
||||
|
||||
orl $1,%edx /* record the fact in the extension */
|
||||
|
||||
L_more_31_no_low:
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_exactly_32:
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_63:
|
||||
cmpw $65,%cx
|
||||
jnc L_more_than_64
|
||||
|
||||
/* Shift right by 64 bits */
|
||||
movl %eax,%edx
|
||||
orl %ebx,%ebx
|
||||
jz L_more_63_no_low
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_more_than_64:
|
||||
jne L_more_than_65
|
||||
|
||||
/* Shift right by 65 bits */
|
||||
/* Carry is clear if we get here */
|
||||
movl %eax,%edx
|
||||
rcrl %edx
|
||||
jnc L_shift_65_nc
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_shift_65_nc:
|
||||
orl %ebx,%ebx
|
||||
jz L_more_63_no_low
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_more_than_65:
|
||||
movl $1,%edx /* The shifted nr always at least one '1' */
|
||||
|
||||
L_more_63_no_low:
|
||||
xorl %ebx,%ebx
|
||||
xorl %eax,%eax
|
||||
|
||||
L_shift_done:
|
||||
L_subtr:
|
||||
/*------------------------------+
|
||||
| Do the subtraction |
|
||||
+------------------------------*/
|
||||
xorl %ecx,%ecx
|
||||
subl %edx,%ecx
|
||||
movl %ecx,%edx
|
||||
movl SIGL(%esi),%ecx
|
||||
sbbl %ebx,%ecx
|
||||
movl %ecx,%ebx
|
||||
movl SIGH(%esi),%ecx
|
||||
sbbl %eax,%ecx
|
||||
movl %ecx,%eax
|
||||
|
||||
#ifdef PARANOID
|
||||
/* We can never get a borrow */
|
||||
jc L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*--------------------------------------+
|
||||
| Normalize the result |
|
||||
+--------------------------------------*/
|
||||
testl $0x80000000,%eax
|
||||
jnz L_round /* no shifting needed */
|
||||
|
||||
orl %eax,%eax
|
||||
jnz L_shift_1 /* shift left 1 - 31 bits */
|
||||
|
||||
orl %ebx,%ebx
|
||||
jnz L_shift_32 /* shift left 32 - 63 bits */
|
||||
|
||||
/*
|
||||
* A rare case, the only one which is non-zero if we got here
|
||||
* is: 1000000 .... 0000
|
||||
* -0111111 .... 1111 1
|
||||
* --------------------
|
||||
* 0000000 .... 0000 1
|
||||
*/
|
||||
|
||||
cmpl $0x80000000,%edx
|
||||
jnz L_must_be_zero
|
||||
|
||||
/* Shift left 64 bits */
|
||||
subw $64,EXP(%edi)
|
||||
xchg %edx,%eax
|
||||
jmp fpu_reg_round
|
||||
|
||||
L_must_be_zero:
|
||||
#ifdef PARANOID
|
||||
orl %edx,%edx
|
||||
jnz L_bugged_3
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* The result is zero */
|
||||
movw $0,EXP(%edi) /* exponent */
|
||||
movl $0,SIGL(%edi)
|
||||
movl $0,SIGH(%edi)
|
||||
movl TAG_Zero,%eax
|
||||
jmp L_exit
|
||||
|
||||
L_shift_32:
|
||||
movl %ebx,%eax
|
||||
movl %edx,%ebx
|
||||
movl $0,%edx
|
||||
subw $32,EXP(%edi) /* Can get underflow here */
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_shift_1:
|
||||
bsrl %eax,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
shld %cl,%ebx,%eax
|
||||
shld %cl,%edx,%ebx
|
||||
shl %cl,%edx
|
||||
subw %cx,EXP(%edi) /* Can get underflow here */
|
||||
|
||||
L_round:
|
||||
jmp fpu_reg_round /* Round the result */
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
L_bugged_1:
|
||||
pushl EX_INTERNAL|0x206
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged_2:
|
||||
pushl EX_INTERNAL|0x209
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged_3:
|
||||
pushl EX_INTERNAL|0x210
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged_4:
|
||||
pushl EX_INTERNAL|0x211
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x212
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_error_exit:
|
||||
movl $-1,%eax
|
||||
|
||||
#endif /* PARANOID */
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
141
arch/x86/math-emu/round_Xsig.S
Normal file
141
arch/x86/math-emu/round_Xsig.S
Normal file
@@ -0,0 +1,141 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| round_Xsig.S |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Normalize and round a 12 byte quantity. |
|
||||
| Call from C as: |
|
||||
| int round_Xsig(Xsig *n) |
|
||||
| |
|
||||
| Normalize a 12 byte quantity. |
|
||||
| Call from C as: |
|
||||
| int norm_Xsig(Xsig *n) |
|
||||
| |
|
||||
| Each function returns the size of the shift (nr of bits). |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
.file "round_Xsig.S"
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(round_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %ebx /* Reserve some space */
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
|
||||
movl 8(%esi),%edx
|
||||
movl 4(%esi),%ebx
|
||||
movl (%esi),%eax
|
||||
|
||||
movl $0,-4(%ebp)
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_round /* Already normalized */
|
||||
jnz L_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
movl $-32,-4(%ebp)
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_shift_1:
|
||||
bsrl %edx,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
subl %ecx,-4(%ebp)
|
||||
shld %cl,%ebx,%edx
|
||||
shld %cl,%eax,%ebx
|
||||
shl %cl,%eax
|
||||
|
||||
L_round:
|
||||
testl $0x80000000,%eax
|
||||
jz L_exit
|
||||
|
||||
addl $1,%ebx
|
||||
adcl $0,%edx
|
||||
jnz L_exit
|
||||
|
||||
movl $0x80000000,%edx
|
||||
incl -4(%ebp)
|
||||
|
||||
L_exit:
|
||||
movl %edx,8(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
movl %eax,(%esi)
|
||||
|
||||
movl -4(%ebp),%eax
|
||||
|
||||
popl %esi
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
ENTRY(norm_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %ebx /* Reserve some space */
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
|
||||
movl 8(%esi),%edx
|
||||
movl 4(%esi),%ebx
|
||||
movl (%esi),%eax
|
||||
|
||||
movl $0,-4(%ebp)
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_n_exit /* Already normalized */
|
||||
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
movl $-32,-4(%ebp)
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_n_exit /* Normalized now */
|
||||
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
addl $-32,-4(%ebp)
|
||||
jmp L_n_exit /* Might not be normalized,
|
||||
but shift no more. */
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_n_shift_1:
|
||||
bsrl %edx,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
subl %ecx,-4(%ebp)
|
||||
shld %cl,%ebx,%edx
|
||||
shld %cl,%eax,%ebx
|
||||
shl %cl,%eax
|
||||
|
||||
L_n_exit:
|
||||
movl %edx,8(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
movl %eax,(%esi)
|
||||
|
||||
movl -4(%ebp),%eax
|
||||
|
||||
popl %esi
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
|
87
arch/x86/math-emu/shr_Xsig.S
Normal file
87
arch/x86/math-emu/shr_Xsig.S
Normal file
@@ -0,0 +1,87 @@
|
||||
.file "shr_Xsig.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| shr_Xsig.S |
|
||||
| |
|
||||
| 12 byte right shift function |
|
||||
| |
|
||||
| Copyright (C) 1992,1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| void shr_Xsig(Xsig *arg, unsigned nr) |
|
||||
| |
|
||||
| Extended shift right function. |
|
||||
| Fastest for small shifts. |
|
||||
| Shifts the 12 byte quantity pointed to by the first arg (arg) |
|
||||
| right by the number of bits specified by the second arg (nr). |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
ENTRY(shr_Xsig)
|
||||
push %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
movl PARAM2,%ecx
|
||||
movl PARAM1,%esi
|
||||
cmpl $32,%ecx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
pushl %ebx
|
||||
movl (%esi),%eax /* lsl */
|
||||
movl 4(%esi),%ebx /* midl */
|
||||
movl 8(%esi),%edx /* msl */
|
||||
shrd %cl,%ebx,%eax
|
||||
shrd %cl,%edx,%ebx
|
||||
shr %cl,%edx
|
||||
movl %eax,(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
movl %edx,8(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_31:
|
||||
cmpl $64,%ecx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
movl 4(%esi),%eax /* midl */
|
||||
movl 8(%esi),%edx /* msl */
|
||||
shrd %cl,%edx,%eax
|
||||
shr %cl,%edx
|
||||
movl %eax,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
movl $0,8(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_63:
|
||||
cmpl $96,%ecx
|
||||
jnc L_more_than_95
|
||||
|
||||
subb $64,%cl
|
||||
movl 8(%esi),%eax /* msl */
|
||||
shr %cl,%eax
|
||||
xorl %edx,%edx
|
||||
movl %eax,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
movl %edx,8(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_95:
|
||||
xorl %eax,%eax
|
||||
movl %eax,(%esi)
|
||||
movl %eax,4(%esi)
|
||||
movl %eax,8(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
67
arch/x86/math-emu/status_w.h
Normal file
67
arch/x86/math-emu/status_w.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| status_w.h |
|
||||
| |
|
||||
| Copyright (C) 1992,1993 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#ifndef _STATUS_H_
|
||||
#define _STATUS_H_
|
||||
|
||||
#include "fpu_emu.h" /* for definition of PECULIAR_486 */
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define Const__(x) $##x
|
||||
#else
|
||||
#define Const__(x) x
|
||||
#endif
|
||||
|
||||
#define SW_Backward Const__(0x8000) /* backward compatibility */
|
||||
#define SW_C3 Const__(0x4000) /* condition bit 3 */
|
||||
#define SW_Top Const__(0x3800) /* top of stack */
|
||||
#define SW_Top_Shift Const__(11) /* shift for top of stack bits */
|
||||
#define SW_C2 Const__(0x0400) /* condition bit 2 */
|
||||
#define SW_C1 Const__(0x0200) /* condition bit 1 */
|
||||
#define SW_C0 Const__(0x0100) /* condition bit 0 */
|
||||
#define SW_Summary Const__(0x0080) /* exception summary */
|
||||
#define SW_Stack_Fault Const__(0x0040) /* stack fault */
|
||||
#define SW_Precision Const__(0x0020) /* loss of precision */
|
||||
#define SW_Underflow Const__(0x0010) /* underflow */
|
||||
#define SW_Overflow Const__(0x0008) /* overflow */
|
||||
#define SW_Zero_Div Const__(0x0004) /* divide by zero */
|
||||
#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */
|
||||
#define SW_Invalid Const__(0x0001) /* invalid operation */
|
||||
|
||||
#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#define COMP_A_gt_B 1
|
||||
#define COMP_A_eq_B 2
|
||||
#define COMP_A_lt_B 3
|
||||
#define COMP_No_Comp 4
|
||||
#define COMP_Denormal 0x20
|
||||
#define COMP_NaN 0x40
|
||||
#define COMP_SNaN 0x80
|
||||
|
||||
#define status_word() \
|
||||
((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
|
||||
static inline void setcc(int cc)
|
||||
{
|
||||
partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3);
|
||||
partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3);
|
||||
}
|
||||
|
||||
#ifdef PECULIAR_486
|
||||
/* Default, this conveys no information, but an 80486 does it. */
|
||||
/* Clear the SW_C1 bit, "other bits undefined". */
|
||||
# define clear_C1() { partial_status &= ~SW_C1; }
|
||||
# else
|
||||
# define clear_C1()
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _STATUS_H_ */
|
12
arch/x86/math-emu/version.h
Normal file
12
arch/x86/math-emu/version.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| version.h |
|
||||
| |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1996,1997,1999 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@melbpc.org.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#define FPU_VERSION "wm-FPU-emu version 2.01"
|
204
arch/x86/math-emu/wm_shrx.S
Normal file
204
arch/x86/math-emu/wm_shrx.S
Normal file
@@ -0,0 +1,204 @@
|
||||
.file "wm_shrx.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| wm_shrx.S |
|
||||
| |
|
||||
| 64 bit right shift functions |
|
||||
| |
|
||||
| Copyright (C) 1992,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| unsigned FPU_shrx(void *arg1, unsigned arg2) |
|
||||
| and |
|
||||
| unsigned FPU_shrxs(void *arg1, unsigned arg2) |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
/*---------------------------------------------------------------------------+
|
||||
| unsigned FPU_shrx(void *arg1, unsigned arg2) |
|
||||
| |
|
||||
| Extended shift right function. |
|
||||
| Fastest for small shifts. |
|
||||
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
|
||||
| right by the number of bits specified by the second arg (arg2). |
|
||||
| Forms a 96 bit quantity from the 64 bit arg and eax: |
|
||||
| [ 64 bit arg ][ eax ] |
|
||||
| shift right ---------> |
|
||||
| The eax register is initialized to 0 before the shifting. |
|
||||
| Results returned in the 64 bit arg and eax. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
ENTRY(FPU_shrx)
|
||||
push %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
movl PARAM2,%ecx
|
||||
movl PARAM1,%esi
|
||||
cmpl $32,%ecx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
pushl %ebx
|
||||
movl (%esi),%ebx /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
xorl %eax,%eax /* extension */
|
||||
shrd %cl,%ebx,%eax
|
||||
shrd %cl,%edx,%ebx
|
||||
shr %cl,%edx
|
||||
movl %ebx,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_31:
|
||||
cmpl $64,%ecx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
movl (%esi),%eax /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
shrd %cl,%edx,%eax
|
||||
shr %cl,%edx
|
||||
movl %edx,(%esi)
|
||||
movl $0,4(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_63:
|
||||
cmpl $96,%ecx
|
||||
jnc L_more_than_95
|
||||
|
||||
subb $64,%cl
|
||||
movl 4(%esi),%eax /* msl */
|
||||
shr %cl,%eax
|
||||
xorl %edx,%edx
|
||||
movl %edx,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_95:
|
||||
xorl %eax,%eax
|
||||
movl %eax,(%esi)
|
||||
movl %eax,4(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| unsigned FPU_shrxs(void *arg1, unsigned arg2) |
|
||||
| |
|
||||
| Extended shift right function (optimized for small floating point |
|
||||
| integers). |
|
||||
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
|
||||
| right by the number of bits specified by the second arg (arg2). |
|
||||
| Forms a 96 bit quantity from the 64 bit arg and eax: |
|
||||
| [ 64 bit arg ][ eax ] |
|
||||
| shift right ---------> |
|
||||
| The eax register is initialized to 0 before the shifting. |
|
||||
| The lower 8 bits of eax are lost and replaced by a flag which is |
|
||||
| set (to 0x01) if any bit, apart from the first one, is set in the |
|
||||
| part which has been shifted out of the arg. |
|
||||
| Results returned in the 64 bit arg and eax. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
ENTRY(FPU_shrxs)
|
||||
push %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl PARAM2,%ecx
|
||||
movl PARAM1,%esi
|
||||
cmpl $64,%ecx /* shrd only works for 0..31 bits */
|
||||
jnc Ls_more_than_63
|
||||
|
||||
cmpl $32,%ecx /* shrd only works for 0..31 bits */
|
||||
jc Ls_less_than_32
|
||||
|
||||
/* We got here without jumps by assuming that the most common requirement
|
||||
is for small integers */
|
||||
/* Shift by [32..63] bits */
|
||||
subb $32,%cl
|
||||
movl (%esi),%eax /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
xorl %ebx,%ebx
|
||||
shrd %cl,%eax,%ebx
|
||||
shrd %cl,%edx,%eax
|
||||
shr %cl,%edx
|
||||
orl %ebx,%ebx /* test these 32 bits */
|
||||
setne %bl
|
||||
test $0x7fffffff,%eax /* and 31 bits here */
|
||||
setne %bh
|
||||
orw %bx,%bx /* Any of the 63 bit set ? */
|
||||
setne %al
|
||||
movl %edx,(%esi)
|
||||
movl $0,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
/* Shift by [0..31] bits */
|
||||
Ls_less_than_32:
|
||||
movl (%esi),%ebx /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
xorl %eax,%eax /* extension */
|
||||
shrd %cl,%ebx,%eax
|
||||
shrd %cl,%edx,%ebx
|
||||
shr %cl,%edx
|
||||
test $0x7fffffff,%eax /* only need to look at eax here */
|
||||
setne %al
|
||||
movl %ebx,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
/* Shift by [64..95] bits */
|
||||
Ls_more_than_63:
|
||||
cmpl $96,%ecx
|
||||
jnc Ls_more_than_95
|
||||
|
||||
subb $64,%cl
|
||||
movl (%esi),%ebx /* lsl */
|
||||
movl 4(%esi),%eax /* msl */
|
||||
xorl %edx,%edx /* extension */
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
orl %ebx,%edx
|
||||
setne %bl
|
||||
test $0x7fffffff,%eax /* only need to look at eax here */
|
||||
setne %bh
|
||||
orw %bx,%bx
|
||||
setne %al
|
||||
xorl %edx,%edx
|
||||
movl %edx,(%esi) /* set to zero */
|
||||
movl %edx,4(%esi) /* set to zero */
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
Ls_more_than_95:
|
||||
/* Shift by [96..inf) bits */
|
||||
xorl %eax,%eax
|
||||
movl (%esi),%ebx
|
||||
orl 4(%esi),%ebx
|
||||
setne %al
|
||||
xorl %ebx,%ebx
|
||||
movl %ebx,(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
470
arch/x86/math-emu/wm_sqrt.S
Normal file
470
arch/x86/math-emu/wm_sqrt.S
Normal file
@@ -0,0 +1,470 @@
|
||||
.file "wm_sqrt.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| wm_sqrt.S |
|
||||
| |
|
||||
| Fixed point arithmetic square root evaluation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int wm_sqrt(FPU_REG *n, unsigned int control_word) |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| wm_sqrt(FPU_REG *n, unsigned int control_word) |
|
||||
| returns the square root of n in n. |
|
||||
| |
|
||||
| Use Newton's method to compute the square root of a number, which must |
|
||||
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
|
||||
| Does not check the sign or tag of the argument. |
|
||||
| Sets the exponent, but not the sign or tag of the result. |
|
||||
| |
|
||||
| The guess is kept in %esi:%edi |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/* Local storage on the stack: */
|
||||
#define FPU_accum_3 -4(%ebp) /* ms word */
|
||||
#define FPU_accum_2 -8(%ebp)
|
||||
#define FPU_accum_1 -12(%ebp)
|
||||
#define FPU_accum_0 -16(%ebp)
|
||||
|
||||
/*
|
||||
* The de-normalised argument:
|
||||
* sq_2 sq_1 sq_0
|
||||
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
|
||||
* ^ binary point here
|
||||
*/
|
||||
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
|
||||
#define FPU_fsqrt_arg_1 -24(%ebp)
|
||||
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
|
||||
|
||||
#else
|
||||
/* Local storage in a static area: */
|
||||
.data
|
||||
.align 4,0
|
||||
FPU_accum_3:
|
||||
.long 0 /* ms word */
|
||||
FPU_accum_2:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
|
||||
/* The de-normalised argument:
|
||||
sq_2 sq_1 sq_0
|
||||
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
|
||||
^ binary point here
|
||||
*/
|
||||
FPU_fsqrt_arg_2:
|
||||
.long 0 /* ms word */
|
||||
FPU_fsqrt_arg_1:
|
||||
.long 0
|
||||
FPU_fsqrt_arg_0:
|
||||
.long 0 /* ls word, at most the ms bit is set */
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(wm_sqrt)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $28,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi
|
||||
|
||||
movl SIGH(%esi),%eax
|
||||
movl SIGL(%esi),%ecx
|
||||
xorl %edx,%edx
|
||||
|
||||
/* We use a rough linear estimate for the first guess.. */
|
||||
|
||||
cmpw EXP_BIAS,EXP(%esi)
|
||||
jnz sqrt_arg_ge_2
|
||||
|
||||
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
|
||||
rcrl $1,%ecx
|
||||
rcrl $1,%edx
|
||||
|
||||
sqrt_arg_ge_2:
|
||||
/* From here on, n is never accessed directly again until it is
|
||||
replaced by the answer. */
|
||||
|
||||
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
|
||||
movl %ecx,FPU_fsqrt_arg_1
|
||||
movl %edx,FPU_fsqrt_arg_0
|
||||
|
||||
/* Make a linear first estimate */
|
||||
shrl $1,%eax
|
||||
addl $0x40000000,%eax
|
||||
movl $0xaaaaaaaa,%ecx
|
||||
mull %ecx
|
||||
shll %edx /* max result was 7fff... */
|
||||
testl $0x80000000,%edx /* but min was 3fff... */
|
||||
jnz sqrt_prelim_no_adjust
|
||||
|
||||
movl $0x80000000,%edx /* round up */
|
||||
|
||||
sqrt_prelim_no_adjust:
|
||||
movl %edx,%esi /* Our first guess */
|
||||
|
||||
/* We have now computed (approx) (2 + x) / 3, which forms the basis
|
||||
for a few iterations of Newton's method */
|
||||
|
||||
movl FPU_fsqrt_arg_2,%ecx /* ms word */
|
||||
|
||||
/*
|
||||
* From our initial estimate, three iterations are enough to get us
|
||||
* to 30 bits or so. This will then allow two iterations at better
|
||||
* precision to complete the process.
|
||||
*/
|
||||
|
||||
/* Compute (g + n/g)/2 at each iteration (g is the guess). */
|
||||
shrl %ecx /* Doing this first will prevent a divide */
|
||||
/* overflow later. */
|
||||
|
||||
movl %ecx,%edx /* msw of the arg / 2 */
|
||||
divl %esi /* current estimate */
|
||||
shrl %esi /* divide by 2 */
|
||||
addl %eax,%esi /* the new estimate */
|
||||
|
||||
movl %ecx,%edx
|
||||
divl %esi
|
||||
shrl %esi
|
||||
addl %eax,%esi
|
||||
|
||||
movl %ecx,%edx
|
||||
divl %esi
|
||||
shrl %esi
|
||||
addl %eax,%esi
|
||||
|
||||
/*
|
||||
* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
|
||||
* we improve it to 60 bits or so.
|
||||
*
|
||||
* The strategy from now on is to compute new estimates from
|
||||
* guess := guess + (n - guess^2) / (2 * guess)
|
||||
*/
|
||||
|
||||
/* First, find the square of the guess */
|
||||
movl %esi,%eax
|
||||
mull %esi
|
||||
/* guess^2 now in %edx:%eax */
|
||||
|
||||
movl FPU_fsqrt_arg_1,%ecx
|
||||
subl %ecx,%eax
|
||||
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
|
||||
sbbl %ecx,%edx
|
||||
jnc sqrt_stage_2_positive
|
||||
|
||||
/* Subtraction gives a negative result,
|
||||
negate the result before division. */
|
||||
notl %edx
|
||||
notl %eax
|
||||
addl $1,%eax
|
||||
adcl $0,%edx
|
||||
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
jmp sqrt_stage_2_finish
|
||||
|
||||
sqrt_stage_2_positive:
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
|
||||
notl %ecx
|
||||
notl %eax
|
||||
addl $1,%eax
|
||||
adcl $0,%ecx
|
||||
|
||||
sqrt_stage_2_finish:
|
||||
sarl $1,%ecx /* divide by 2 */
|
||||
rcrl $1,%eax
|
||||
|
||||
/* Form the new estimate in %esi:%edi */
|
||||
movl %eax,%edi
|
||||
addl %ecx,%esi
|
||||
|
||||
jnz sqrt_stage_2_done /* result should be [1..2) */
|
||||
|
||||
#ifdef PARANOID
|
||||
/* It should be possible to get here only if the arg is ffff....ffff */
|
||||
cmp $0xffffffff,FPU_fsqrt_arg_1
|
||||
jnz sqrt_stage_2_error
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* The best rounded result. */
|
||||
xorl %eax,%eax
|
||||
decl %eax
|
||||
movl %eax,%edi
|
||||
movl %eax,%esi
|
||||
movl $0x7fffffff,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
#ifdef PARANOID
|
||||
sqrt_stage_2_error:
|
||||
pushl EX_INTERNAL|0x213
|
||||
call EXCEPTION
|
||||
#endif /* PARANOID */
|
||||
|
||||
sqrt_stage_2_done:
|
||||
|
||||
/* Now the square root has been computed to better than 60 bits. */
|
||||
|
||||
/* Find the square of the guess. */
|
||||
movl %edi,%eax /* ls word of guess */
|
||||
mull %edi
|
||||
movl %edx,FPU_accum_1
|
||||
|
||||
movl %esi,%eax
|
||||
mull %esi
|
||||
movl %edx,FPU_accum_3
|
||||
movl %eax,FPU_accum_2
|
||||
|
||||
movl %edi,%eax
|
||||
mull %esi
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,FPU_accum_2
|
||||
adcl $0,FPU_accum_3
|
||||
|
||||
/* movl %esi,%eax */
|
||||
/* mull %edi */
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,FPU_accum_2
|
||||
adcl $0,FPU_accum_3
|
||||
|
||||
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
|
||||
|
||||
movl FPU_fsqrt_arg_0,%eax /* get normalized n */
|
||||
subl %eax,FPU_accum_1
|
||||
movl FPU_fsqrt_arg_1,%eax
|
||||
sbbl %eax,FPU_accum_2
|
||||
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
|
||||
sbbl %eax,FPU_accum_3
|
||||
jnc sqrt_stage_3_positive
|
||||
|
||||
/* Subtraction gives a negative result,
|
||||
negate the result before division */
|
||||
notl FPU_accum_1
|
||||
notl FPU_accum_2
|
||||
notl FPU_accum_3
|
||||
addl $1,FPU_accum_1
|
||||
adcl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
adcl $0,FPU_accum_3 /* This must be zero */
|
||||
jz sqrt_stage_3_no_error
|
||||
|
||||
sqrt_stage_3_error:
|
||||
pushl EX_INTERNAL|0x207
|
||||
call EXCEPTION
|
||||
|
||||
sqrt_stage_3_no_error:
|
||||
#endif /* PARANOID */
|
||||
|
||||
movl FPU_accum_2,%edx
|
||||
movl FPU_accum_1,%eax
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
|
||||
sarl $1,%ecx /* divide by 2 */
|
||||
rcrl $1,%eax
|
||||
|
||||
/* prepare to round the result */
|
||||
|
||||
addl %ecx,%edi
|
||||
adcl $0,%esi
|
||||
|
||||
jmp sqrt_stage_3_finished
|
||||
|
||||
sqrt_stage_3_positive:
|
||||
movl FPU_accum_2,%edx
|
||||
movl FPU_accum_1,%eax
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
|
||||
sarl $1,%ecx /* divide by 2 */
|
||||
rcrl $1,%eax
|
||||
|
||||
/* prepare to round the result */
|
||||
|
||||
notl %eax /* Negate the correction term */
|
||||
notl %ecx
|
||||
addl $1,%eax
|
||||
adcl $0,%ecx /* carry here ==> correction == 0 */
|
||||
adcl $0xffffffff,%esi
|
||||
|
||||
addl %ecx,%edi
|
||||
adcl $0,%esi
|
||||
|
||||
sqrt_stage_3_finished:
|
||||
|
||||
/*
|
||||
* The result in %esi:%edi:%esi should be good to about 90 bits here,
|
||||
* and the rounding information here does not have sufficient accuracy
|
||||
* in a few rare cases.
|
||||
*/
|
||||
cmpl $0xffffffe0,%eax
|
||||
ja sqrt_near_exact_x
|
||||
|
||||
cmpl $0x00000020,%eax
|
||||
jb sqrt_near_exact
|
||||
|
||||
cmpl $0x7fffffe0,%eax
|
||||
jb sqrt_round_result
|
||||
|
||||
cmpl $0x80000020,%eax
|
||||
jb sqrt_get_more_precision
|
||||
|
||||
sqrt_round_result:
|
||||
/* Set up for rounding operations */
|
||||
movl %eax,%edx
|
||||
movl %esi,%eax
|
||||
movl %edi,%ebx
|
||||
movl PARAM1,%edi
|
||||
movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
|
||||
jmp fpu_reg_round
|
||||
|
||||
|
||||
sqrt_near_exact_x:
|
||||
/* First, the estimate must be rounded up. */
|
||||
addl $1,%edi
|
||||
adcl $0,%esi
|
||||
|
||||
sqrt_near_exact:
|
||||
/*
|
||||
* This is an easy case because x^1/2 is monotonic.
|
||||
* We need just find the square of our estimate, compare it
|
||||
* with the argument, and deduce whether our estimate is
|
||||
* above, below, or exact. We use the fact that the estimate
|
||||
* is known to be accurate to about 90 bits.
|
||||
*/
|
||||
movl %edi,%eax /* ls word of guess */
|
||||
mull %edi
|
||||
movl %edx,%ebx /* 2nd ls word of square */
|
||||
movl %eax,%ecx /* ls word of square */
|
||||
|
||||
movl %edi,%eax
|
||||
mull %esi
|
||||
addl %eax,%ebx
|
||||
addl %eax,%ebx
|
||||
|
||||
#ifdef PARANOID
|
||||
cmp $0xffffffb0,%ebx
|
||||
jb sqrt_near_exact_ok
|
||||
|
||||
cmp $0x00000050,%ebx
|
||||
ja sqrt_near_exact_ok
|
||||
|
||||
pushl EX_INTERNAL|0x214
|
||||
call EXCEPTION
|
||||
|
||||
sqrt_near_exact_ok:
|
||||
#endif /* PARANOID */
|
||||
|
||||
or %ebx,%ebx
|
||||
js sqrt_near_exact_small
|
||||
|
||||
jnz sqrt_near_exact_large
|
||||
|
||||
or %ebx,%edx
|
||||
jnz sqrt_near_exact_large
|
||||
|
||||
/* Our estimate is exactly the right answer */
|
||||
xorl %eax,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_near_exact_small:
|
||||
/* Our estimate is too small */
|
||||
movl $0x000000ff,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_near_exact_large:
|
||||
/* Our estimate is too large, we need to decrement it */
|
||||
subl $1,%edi
|
||||
sbbl $0,%esi
|
||||
movl $0xffffff00,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
|
||||
sqrt_get_more_precision:
|
||||
/* This case is almost the same as the above, except we start
|
||||
with an extra bit of precision in the estimate. */
|
||||
stc /* The extra bit. */
|
||||
rcll $1,%edi /* Shift the estimate left one bit */
|
||||
rcll $1,%esi
|
||||
|
||||
movl %edi,%eax /* ls word of guess */
|
||||
mull %edi
|
||||
movl %edx,%ebx /* 2nd ls word of square */
|
||||
movl %eax,%ecx /* ls word of square */
|
||||
|
||||
movl %edi,%eax
|
||||
mull %esi
|
||||
addl %eax,%ebx
|
||||
addl %eax,%ebx
|
||||
|
||||
/* Put our estimate back to its original value */
|
||||
stc /* The ms bit. */
|
||||
rcrl $1,%esi /* Shift the estimate left one bit */
|
||||
rcrl $1,%edi
|
||||
|
||||
#ifdef PARANOID
|
||||
cmp $0xffffff60,%ebx
|
||||
jb sqrt_more_prec_ok
|
||||
|
||||
cmp $0x000000a0,%ebx
|
||||
ja sqrt_more_prec_ok
|
||||
|
||||
pushl EX_INTERNAL|0x215
|
||||
call EXCEPTION
|
||||
|
||||
sqrt_more_prec_ok:
|
||||
#endif /* PARANOID */
|
||||
|
||||
or %ebx,%ebx
|
||||
js sqrt_more_prec_small
|
||||
|
||||
jnz sqrt_more_prec_large
|
||||
|
||||
or %ebx,%ecx
|
||||
jnz sqrt_more_prec_large
|
||||
|
||||
/* Our estimate is exactly the right answer */
|
||||
movl $0x80000000,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_more_prec_small:
|
||||
/* Our estimate is too small */
|
||||
movl $0x800000ff,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_more_prec_large:
|
||||
/* Our estimate is too large */
|
||||
movl $0x7fffff00,%eax
|
||||
jmp sqrt_round_result
|
Reference in New Issue
Block a user