...
Run Format

Source file src/math/big/float.go

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// This file implements multi-precision floating-point numbers.
     6	// Like in the GNU MPFR library (http://www.mpfr.org/), operands
     7	// can be of mixed precision. Unlike MPFR, the rounding mode is
     8	// not specified with each operation, but with each operand. The
     9	// rounding mode of the result operand determines the rounding
    10	// mode of an operation. This is a from-scratch implementation.
    11	
    12	package big
    13	
    14	import (
    15		"fmt"
    16		"math"
    17	)
    18	
    19	const debugFloat = false // enable for debugging
    20	
    21	// A nonzero finite Float represents a multi-precision floating point number
    22	//
    23	//   sign × mantissa × 2**exponent
    24	//
    25	// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
    26	// A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
    27	// All Floats are ordered, and the ordering of two Floats x and y
    28	// is defined by x.Cmp(y).
    29	//
    30	// Each Float value also has a precision, rounding mode, and accuracy.
    31	// The precision is the maximum number of mantissa bits available to
    32	// represent the value. The rounding mode specifies how a result should
    33	// be rounded to fit into the mantissa bits, and accuracy describes the
    34	// rounding error with respect to the exact result.
    35	//
    36	// Unless specified otherwise, all operations (including setters) that
    37	// specify a *Float variable for the result (usually via the receiver
    38	// with the exception of MantExp), round the numeric result according
    39	// to the precision and rounding mode of the result variable.
    40	//
    41	// If the provided result precision is 0 (see below), it is set to the
    42	// precision of the argument with the largest precision value before any
    43	// rounding takes place, and the rounding mode remains unchanged. Thus,
    44	// uninitialized Floats provided as result arguments will have their
    45	// precision set to a reasonable value determined by the operands and
    46	// their mode is the zero value for RoundingMode (ToNearestEven).
    47	//
    48	// By setting the desired precision to 24 or 53 and using matching rounding
    49	// mode (typically ToNearestEven), Float operations produce the same results
    50	// as the corresponding float32 or float64 IEEE-754 arithmetic for operands
    51	// that correspond to normal (i.e., not denormal) float32 or float64 numbers.
    52	// Exponent underflow and overflow lead to a 0 or an Infinity for different
    53	// values than IEEE-754 because Float exponents have a much larger range.
    54	//
    55	// The zero (uninitialized) value for a Float is ready to use and represents
    56	// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
    57	//
    58	type Float struct {
    59		prec uint32
    60		mode RoundingMode
    61		acc  Accuracy
    62		form form
    63		neg  bool
    64		mant nat
    65		exp  int32
    66	}
    67	
    68	// An ErrNaN panic is raised by a Float operation that would lead to
    69	// a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
    70	type ErrNaN struct {
    71		msg string
    72	}
    73	
    74	func (err ErrNaN) Error() string {
    75		return err.msg
    76	}
    77	
    78	// NewFloat allocates and returns a new Float set to x,
    79	// with precision 53 and rounding mode ToNearestEven.
    80	// NewFloat panics with ErrNaN if x is a NaN.
    81	func NewFloat(x float64) *Float {
    82		if math.IsNaN(x) {
    83			panic(ErrNaN{"NewFloat(NaN)"})
    84		}
    85		return new(Float).SetFloat64(x)
    86	}
    87	
    88	// Exponent and precision limits.
    89	const (
    90		MaxExp  = math.MaxInt32  // largest supported exponent
    91		MinExp  = math.MinInt32  // smallest supported exponent
    92		MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
    93	)
    94	
    95	// Internal representation: The mantissa bits x.mant of a nonzero finite
    96	// Float x are stored in a nat slice long enough to hold up to x.prec bits;
    97	// the slice may (but doesn't have to) be shorter if the mantissa contains
    98	// trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
    99	// the msb is shifted all the way "to the left"). Thus, if the mantissa has
   100	// trailing 0 bits or x.prec is not a multiple of the the Word size _W,
   101	// x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
   102	// to the value 0.5; the exponent x.exp shifts the binary point as needed.
   103	//
   104	// A zero or non-finite Float x ignores x.mant and x.exp.
   105	//
   106	// x                 form      neg      mant         exp
   107	// ----------------------------------------------------------
   108	// ±0                zero      sign     -            -
   109	// 0 < |x| < +Inf    finite    sign     mantissa     exponent
   110	// ±Inf              inf       sign     -            -
   111	
   112	// A form value describes the internal representation.
   113	type form byte
   114	
   115	// The form value order is relevant - do not change!
   116	const (
   117		zero form = iota
   118		finite
   119		inf
   120	)
   121	
   122	// RoundingMode determines how a Float value is rounded to the
   123	// desired precision. Rounding may change the Float value; the
   124	// rounding error is described by the Float's Accuracy.
   125	type RoundingMode byte
   126	
   127	// These constants define supported rounding modes.
   128	const (
   129		ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
   130		ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
   131		ToZero                            // == IEEE 754-2008 roundTowardZero
   132		AwayFromZero                      // no IEEE 754-2008 equivalent
   133		ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
   134		ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
   135	)
   136	
   137	//go:generate stringer -type=RoundingMode
   138	
   139	// Accuracy describes the rounding error produced by the most recent
   140	// operation that generated a Float value, relative to the exact value.
   141	type Accuracy int8
   142	
   143	// Constants describing the Accuracy of a Float.
   144	const (
   145		Below Accuracy = -1
   146		Exact Accuracy = 0
   147		Above Accuracy = +1
   148	)
   149	
   150	//go:generate stringer -type=Accuracy
   151	
   152	// SetPrec sets z's precision to prec and returns the (possibly) rounded
   153	// value of z. Rounding occurs according to z's rounding mode if the mantissa
   154	// cannot be represented in prec bits without loss of precision.
   155	// SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
   156	// If prec > MaxPrec, it is set to MaxPrec.
   157	func (z *Float) SetPrec(prec uint) *Float {
   158		z.acc = Exact // optimistically assume no rounding is needed
   159	
   160		// special case
   161		if prec == 0 {
   162			z.prec = 0
   163			if z.form == finite {
   164				// truncate z to 0
   165				z.acc = makeAcc(z.neg)
   166				z.form = zero
   167			}
   168			return z
   169		}
   170	
   171		// general case
   172		if prec > MaxPrec {
   173			prec = MaxPrec
   174		}
   175		old := z.prec
   176		z.prec = uint32(prec)
   177		if z.prec < old {
   178			z.round(0)
   179		}
   180		return z
   181	}
   182	
   183	func makeAcc(above bool) Accuracy {
   184		if above {
   185			return Above
   186		}
   187		return Below
   188	}
   189	
   190	// SetMode sets z's rounding mode to mode and returns an exact z.
   191	// z remains unchanged otherwise.
   192	// z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
   193	func (z *Float) SetMode(mode RoundingMode) *Float {
   194		z.mode = mode
   195		z.acc = Exact
   196		return z
   197	}
   198	
   199	// Prec returns the mantissa precision of x in bits.
   200	// The result may be 0 for |x| == 0 and |x| == Inf.
   201	func (x *Float) Prec() uint {
   202		return uint(x.prec)
   203	}
   204	
   205	// MinPrec returns the minimum precision required to represent x exactly
   206	// (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
   207	// The result is 0 for |x| == 0 and |x| == Inf.
   208	func (x *Float) MinPrec() uint {
   209		if x.form != finite {
   210			return 0
   211		}
   212		return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
   213	}
   214	
   215	// Mode returns the rounding mode of x.
   216	func (x *Float) Mode() RoundingMode {
   217		return x.mode
   218	}
   219	
   220	// Acc returns the accuracy of x produced by the most recent operation.
   221	func (x *Float) Acc() Accuracy {
   222		return x.acc
   223	}
   224	
   225	// Sign returns:
   226	//
   227	//	-1 if x <   0
   228	//	 0 if x is ±0
   229	//	+1 if x >   0
   230	//
   231	func (x *Float) Sign() int {
   232		if debugFloat {
   233			x.validate()
   234		}
   235		if x.form == zero {
   236			return 0
   237		}
   238		if x.neg {
   239			return -1
   240		}
   241		return 1
   242	}
   243	
   244	// MantExp breaks x into its mantissa and exponent components
   245	// and returns the exponent. If a non-nil mant argument is
   246	// provided its value is set to the mantissa of x, with the
   247	// same precision and rounding mode as x. The components
   248	// satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
   249	// Calling MantExp with a nil argument is an efficient way to
   250	// get the exponent of the receiver.
   251	//
   252	// Special cases are:
   253	//
   254	//	(  ±0).MantExp(mant) = 0, with mant set to   ±0
   255	//	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
   256	//
   257	// x and mant may be the same in which case x is set to its
   258	// mantissa value.
   259	func (x *Float) MantExp(mant *Float) (exp int) {
   260		if debugFloat {
   261			x.validate()
   262		}
   263		if x.form == finite {
   264			exp = int(x.exp)
   265		}
   266		if mant != nil {
   267			mant.Copy(x)
   268			if mant.form == finite {
   269				mant.exp = 0
   270			}
   271		}
   272		return
   273	}
   274	
   275	func (z *Float) setExpAndRound(exp int64, sbit uint) {
   276		if exp < MinExp {
   277			// underflow
   278			z.acc = makeAcc(z.neg)
   279			z.form = zero
   280			return
   281		}
   282	
   283		if exp > MaxExp {
   284			// overflow
   285			z.acc = makeAcc(!z.neg)
   286			z.form = inf
   287			return
   288		}
   289	
   290		z.form = finite
   291		z.exp = int32(exp)
   292		z.round(sbit)
   293	}
   294	
   295	// SetMantExp sets z to mant × 2**exp and and returns z.
   296	// The result z has the same precision and rounding mode
   297	// as mant. SetMantExp is an inverse of MantExp but does
   298	// not require 0.5 <= |mant| < 1.0. Specifically:
   299	//
   300	//	mant := new(Float)
   301	//	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
   302	//
   303	// Special cases are:
   304	//
   305	//	z.SetMantExp(  ±0, exp) =   ±0
   306	//	z.SetMantExp(±Inf, exp) = ±Inf
   307	//
   308	// z and mant may be the same in which case z's exponent
   309	// is set to exp.
   310	func (z *Float) SetMantExp(mant *Float, exp int) *Float {
   311		if debugFloat {
   312			z.validate()
   313			mant.validate()
   314		}
   315		z.Copy(mant)
   316		if z.form != finite {
   317			return z
   318		}
   319		z.setExpAndRound(int64(z.exp)+int64(exp), 0)
   320		return z
   321	}
   322	
   323	// Signbit returns true if x is negative or negative zero.
   324	func (x *Float) Signbit() bool {
   325		return x.neg
   326	}
   327	
   328	// IsInf reports whether x is +Inf or -Inf.
   329	func (x *Float) IsInf() bool {
   330		return x.form == inf
   331	}
   332	
   333	// IsInt reports whether x is an integer.
   334	// ±Inf values are not integers.
   335	func (x *Float) IsInt() bool {
   336		if debugFloat {
   337			x.validate()
   338		}
   339		// special cases
   340		if x.form != finite {
   341			return x.form == zero
   342		}
   343		// x.form == finite
   344		if x.exp <= 0 {
   345			return false
   346		}
   347		// x.exp > 0
   348		return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
   349	}
   350	
   351	// debugging support
   352	func (x *Float) validate() {
   353		if !debugFloat {
   354			// avoid performance bugs
   355			panic("validate called but debugFloat is not set")
   356		}
   357		if x.form != finite {
   358			return
   359		}
   360		m := len(x.mant)
   361		if m == 0 {
   362			panic("nonzero finite number with empty mantissa")
   363		}
   364		const msb = 1 << (_W - 1)
   365		if x.mant[m-1]&msb == 0 {
   366			panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
   367		}
   368		if x.prec == 0 {
   369			panic("zero precision finite number")
   370		}
   371	}
   372	
   373	// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
   374	// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
   375	// have before calling round. z's mantissa must be normalized (with the msb set)
   376	// or empty.
   377	//
   378	// CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
   379	// sign of z. For correct rounding, the sign of z must be set correctly before
   380	// calling round.
   381	func (z *Float) round(sbit uint) {
   382		if debugFloat {
   383			z.validate()
   384		}
   385	
   386		z.acc = Exact
   387		if z.form != finite {
   388			// ±0 or ±Inf => nothing left to do
   389			return
   390		}
   391		// z.form == finite && len(z.mant) > 0
   392		// m > 0 implies z.prec > 0 (checked by validate)
   393	
   394		m := uint32(len(z.mant)) // present mantissa length in words
   395		bits := m * _W           // present mantissa bits; bits > 0
   396		if bits <= z.prec {
   397			// mantissa fits => nothing to do
   398			return
   399		}
   400		// bits > z.prec
   401	
   402		// Rounding is based on two bits: the rounding bit (rbit) and the
   403		// sticky bit (sbit). The rbit is the bit immediately before the
   404		// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
   405		// of the bits before the rbit are set (the "0.25", "0.125", etc.):
   406		//
   407		//   rbit  sbit  => "fractional part"
   408		//
   409		//   0     0        == 0
   410		//   0     1        >  0  , < 0.5
   411		//   1     0        == 0.5
   412		//   1     1        >  0.5, < 1.0
   413	
   414		// bits > z.prec: mantissa too large => round
   415		r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
   416		rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
   417		if sbit == 0 {
   418			// TODO(gri) if rbit != 0 we don't need to compute sbit for some rounding modes (optimization)
   419			sbit = z.mant.sticky(r)
   420		}
   421		sbit &= 1 // be safe and ensure it's a single bit
   422	
   423		// cut off extra words
   424		n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
   425		if m > n {
   426			copy(z.mant, z.mant[m-n:]) // move n last words to front
   427			z.mant = z.mant[:n]
   428		}
   429	
   430		// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
   431		ntz := n*_W - z.prec // 0 <= ntz < _W
   432		lsb := Word(1) << ntz
   433	
   434		// round if result is inexact
   435		if rbit|sbit != 0 {
   436			// Make rounding decision: The result mantissa is truncated ("rounded down")
   437			// by default. Decide if we need to increment, or "round up", the (unsigned)
   438			// mantissa.
   439			inc := false
   440			switch z.mode {
   441			case ToNegativeInf:
   442				inc = z.neg
   443			case ToZero:
   444				// nothing to do
   445			case ToNearestEven:
   446				inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
   447			case ToNearestAway:
   448				inc = rbit != 0
   449			case AwayFromZero:
   450				inc = true
   451			case ToPositiveInf:
   452				inc = !z.neg
   453			default:
   454				panic("unreachable")
   455			}
   456	
   457			// A positive result (!z.neg) is Above the exact result if we increment,
   458			// and it's Below if we truncate (Exact results require no rounding).
   459			// For a negative result (z.neg) it is exactly the opposite.
   460			z.acc = makeAcc(inc != z.neg)
   461	
   462			if inc {
   463				// add 1 to mantissa
   464				if addVW(z.mant, z.mant, lsb) != 0 {
   465					// mantissa overflow => adjust exponent
   466					if z.exp >= MaxExp {
   467						// exponent overflow
   468						z.form = inf
   469						return
   470					}
   471					z.exp++
   472					// adjust mantissa: divide by 2 to compensate for exponent adjustment
   473					shrVU(z.mant, z.mant, 1)
   474					// set msb == carry == 1 from the mantissa overflow above
   475					const msb = 1 << (_W - 1)
   476					z.mant[n-1] |= msb
   477				}
   478			}
   479		}
   480	
   481		// zero out trailing bits in least-significant word
   482		z.mant[0] &^= lsb - 1
   483	
   484		if debugFloat {
   485			z.validate()
   486		}
   487	}
   488	
   489	func (z *Float) setBits64(neg bool, x uint64) *Float {
   490		if z.prec == 0 {
   491			z.prec = 64
   492		}
   493		z.acc = Exact
   494		z.neg = neg
   495		if x == 0 {
   496			z.form = zero
   497			return z
   498		}
   499		// x != 0
   500		z.form = finite
   501		s := nlz64(x)
   502		z.mant = z.mant.setUint64(x << s)
   503		z.exp = int32(64 - s) // always fits
   504		if z.prec < 64 {
   505			z.round(0)
   506		}
   507		return z
   508	}
   509	
   510	// SetUint64 sets z to the (possibly rounded) value of x and returns z.
   511	// If z's precision is 0, it is changed to 64 (and rounding will have
   512	// no effect).
   513	func (z *Float) SetUint64(x uint64) *Float {
   514		return z.setBits64(false, x)
   515	}
   516	
   517	// SetInt64 sets z to the (possibly rounded) value of x and returns z.
   518	// If z's precision is 0, it is changed to 64 (and rounding will have
   519	// no effect).
   520	func (z *Float) SetInt64(x int64) *Float {
   521		u := x
   522		if u < 0 {
   523			u = -u
   524		}
   525		// We cannot simply call z.SetUint64(uint64(u)) and change
   526		// the sign afterwards because the sign affects rounding.
   527		return z.setBits64(x < 0, uint64(u))
   528	}
   529	
   530	// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
   531	// If z's precision is 0, it is changed to 53 (and rounding will have
   532	// no effect). SetFloat64 panics with ErrNaN if x is a NaN.
   533	func (z *Float) SetFloat64(x float64) *Float {
   534		if z.prec == 0 {
   535			z.prec = 53
   536		}
   537		if math.IsNaN(x) {
   538			panic(ErrNaN{"Float.SetFloat64(NaN)"})
   539		}
   540		z.acc = Exact
   541		z.neg = math.Signbit(x) // handle -0, -Inf correctly
   542		if x == 0 {
   543			z.form = zero
   544			return z
   545		}
   546		if math.IsInf(x, 0) {
   547			z.form = inf
   548			return z
   549		}
   550		// normalized x != 0
   551		z.form = finite
   552		fmant, exp := math.Frexp(x) // get normalized mantissa
   553		z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
   554		z.exp = int32(exp) // always fits
   555		if z.prec < 53 {
   556			z.round(0)
   557		}
   558		return z
   559	}
   560	
   561	// fnorm normalizes mantissa m by shifting it to the left
   562	// such that the msb of the most-significant word (msw) is 1.
   563	// It returns the shift amount. It assumes that len(m) != 0.
   564	func fnorm(m nat) int64 {
   565		if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
   566			panic("msw of mantissa is 0")
   567		}
   568		s := nlz(m[len(m)-1])
   569		if s > 0 {
   570			c := shlVU(m, m, s)
   571			if debugFloat && c != 0 {
   572				panic("nlz or shlVU incorrect")
   573			}
   574		}
   575		return int64(s)
   576	}
   577	
   578	// SetInt sets z to the (possibly rounded) value of x and returns z.
   579	// If z's precision is 0, it is changed to the larger of x.BitLen()
   580	// or 64 (and rounding will have no effect).
   581	func (z *Float) SetInt(x *Int) *Float {
   582		// TODO(gri) can be more efficient if z.prec > 0
   583		// but small compared to the size of x, or if there
   584		// are many trailing 0's.
   585		bits := uint32(x.BitLen())
   586		if z.prec == 0 {
   587			z.prec = umax32(bits, 64)
   588		}
   589		z.acc = Exact
   590		z.neg = x.neg
   591		if len(x.abs) == 0 {
   592			z.form = zero
   593			return z
   594		}
   595		// x != 0
   596		z.mant = z.mant.set(x.abs)
   597		fnorm(z.mant)
   598		z.setExpAndRound(int64(bits), 0)
   599		return z
   600	}
   601	
   602	// SetRat sets z to the (possibly rounded) value of x and returns z.
   603	// If z's precision is 0, it is changed to the largest of a.BitLen(),
   604	// b.BitLen(), or 64; with x = a/b.
   605	func (z *Float) SetRat(x *Rat) *Float {
   606		if x.IsInt() {
   607			return z.SetInt(x.Num())
   608		}
   609		var a, b Float
   610		a.SetInt(x.Num())
   611		b.SetInt(x.Denom())
   612		if z.prec == 0 {
   613			z.prec = umax32(a.prec, b.prec)
   614		}
   615		return z.Quo(&a, &b)
   616	}
   617	
   618	// SetInf sets z to the infinite Float -Inf if signbit is
   619	// set, or +Inf if signbit is not set, and returns z. The
   620	// precision of z is unchanged and the result is always
   621	// Exact.
   622	func (z *Float) SetInf(signbit bool) *Float {
   623		z.acc = Exact
   624		z.form = inf
   625		z.neg = signbit
   626		return z
   627	}
   628	
   629	// Set sets z to the (possibly rounded) value of x and returns z.
   630	// If z's precision is 0, it is changed to the precision of x
   631	// before setting z (and rounding will have no effect).
   632	// Rounding is performed according to z's precision and rounding
   633	// mode; and z's accuracy reports the result error relative to the
   634	// exact (not rounded) result.
   635	func (z *Float) Set(x *Float) *Float {
   636		if debugFloat {
   637			x.validate()
   638		}
   639		z.acc = Exact
   640		if z != x {
   641			z.form = x.form
   642			z.neg = x.neg
   643			if x.form == finite {
   644				z.exp = x.exp
   645				z.mant = z.mant.set(x.mant)
   646			}
   647			if z.prec == 0 {
   648				z.prec = x.prec
   649			} else if z.prec < x.prec {
   650				z.round(0)
   651			}
   652		}
   653		return z
   654	}
   655	
   656	// Copy sets z to x, with the same precision, rounding mode, and
   657	// accuracy as x, and returns z. x is not changed even if z and
   658	// x are the same.
   659	func (z *Float) Copy(x *Float) *Float {
   660		if debugFloat {
   661			x.validate()
   662		}
   663		if z != x {
   664			z.prec = x.prec
   665			z.mode = x.mode
   666			z.acc = x.acc
   667			z.form = x.form
   668			z.neg = x.neg
   669			if z.form == finite {
   670				z.mant = z.mant.set(x.mant)
   671				z.exp = x.exp
   672			}
   673		}
   674		return z
   675	}
   676	
   677	// msb32 returns the 32 most significant bits of x.
   678	func msb32(x nat) uint32 {
   679		i := len(x) - 1
   680		if i < 0 {
   681			return 0
   682		}
   683		if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   684			panic("x not normalized")
   685		}
   686		switch _W {
   687		case 32:
   688			return uint32(x[i])
   689		case 64:
   690			return uint32(x[i] >> 32)
   691		}
   692		panic("unreachable")
   693	}
   694	
   695	// msb64 returns the 64 most significant bits of x.
   696	func msb64(x nat) uint64 {
   697		i := len(x) - 1
   698		if i < 0 {
   699			return 0
   700		}
   701		if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   702			panic("x not normalized")
   703		}
   704		switch _W {
   705		case 32:
   706			v := uint64(x[i]) << 32
   707			if i > 0 {
   708				v |= uint64(x[i-1])
   709			}
   710			return v
   711		case 64:
   712			return uint64(x[i])
   713		}
   714		panic("unreachable")
   715	}
   716	
   717	// Uint64 returns the unsigned integer resulting from truncating x
   718	// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
   719	// if x is an integer and Below otherwise.
   720	// The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
   721	// for x > math.MaxUint64.
   722	func (x *Float) Uint64() (uint64, Accuracy) {
   723		if debugFloat {
   724			x.validate()
   725		}
   726	
   727		switch x.form {
   728		case finite:
   729			if x.neg {
   730				return 0, Above
   731			}
   732			// 0 < x < +Inf
   733			if x.exp <= 0 {
   734				// 0 < x < 1
   735				return 0, Below
   736			}
   737			// 1 <= x < Inf
   738			if x.exp <= 64 {
   739				// u = trunc(x) fits into a uint64
   740				u := msb64(x.mant) >> (64 - uint32(x.exp))
   741				if x.MinPrec() <= 64 {
   742					return u, Exact
   743				}
   744				return u, Below // x truncated
   745			}
   746			// x too large
   747			return math.MaxUint64, Below
   748	
   749		case zero:
   750			return 0, Exact
   751	
   752		case inf:
   753			if x.neg {
   754				return 0, Above
   755			}
   756			return math.MaxUint64, Below
   757		}
   758	
   759		panic("unreachable")
   760	}
   761	
   762	// Int64 returns the integer resulting from truncating x towards zero.
   763	// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
   764	// an integer, and Above (x < 0) or Below (x > 0) otherwise.
   765	// The result is (math.MinInt64, Above) for x < math.MinInt64,
   766	// and (math.MaxInt64, Below) for x > math.MaxInt64.
   767	func (x *Float) Int64() (int64, Accuracy) {
   768		if debugFloat {
   769			x.validate()
   770		}
   771	
   772		switch x.form {
   773		case finite:
   774			// 0 < |x| < +Inf
   775			acc := makeAcc(x.neg)
   776			if x.exp <= 0 {
   777				// 0 < |x| < 1
   778				return 0, acc
   779			}
   780			// x.exp > 0
   781	
   782			// 1 <= |x| < +Inf
   783			if x.exp <= 63 {
   784				// i = trunc(x) fits into an int64 (excluding math.MinInt64)
   785				i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
   786				if x.neg {
   787					i = -i
   788				}
   789				if x.MinPrec() <= uint(x.exp) {
   790					return i, Exact
   791				}
   792				return i, acc // x truncated
   793			}
   794			if x.neg {
   795				// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
   796				if x.exp == 64 && x.MinPrec() == 1 {
   797					acc = Exact
   798				}
   799				return math.MinInt64, acc
   800			}
   801			// x too large
   802			return math.MaxInt64, Below
   803	
   804		case zero:
   805			return 0, Exact
   806	
   807		case inf:
   808			if x.neg {
   809				return math.MinInt64, Above
   810			}
   811			return math.MaxInt64, Below
   812		}
   813	
   814		panic("unreachable")
   815	}
   816	
   817	// Float32 returns the float32 value nearest to x. If x is too small to be
   818	// represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
   819	// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   820	// If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
   821	// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   822	func (x *Float) Float32() (float32, Accuracy) {
   823		if debugFloat {
   824			x.validate()
   825		}
   826	
   827		switch x.form {
   828		case finite:
   829			// 0 < |x| < +Inf
   830	
   831			const (
   832				fbits = 32                //        float size
   833				mbits = 23                //        mantissa size (excluding implicit msb)
   834				ebits = fbits - mbits - 1 //     8  exponent size
   835				bias  = 1<<(ebits-1) - 1  //   127  exponent bias
   836				dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
   837				emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
   838				emax  = bias              //   127  largest unbiased exponent (normal)
   839			)
   840	
   841			// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
   842			e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   843	
   844			// Compute precision p for float32 mantissa.
   845			// If the exponent is too small, we have a denormal number before
   846			// rounding and fewer than p mantissa bits of precision available
   847			// (the exponent remains fixed but the mantissa gets shifted right).
   848			p := mbits + 1 // precision of normal float
   849			if e < emin {
   850				// recompute precision
   851				p = mbits + 1 - emin + int(e)
   852				// If p == 0, the mantissa of x is shifted so much to the right
   853				// that its msb falls immediately to the right of the float32
   854				// mantissa space. In other words, if the smallest denormal is
   855				// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   856				// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   857				// If m == 0.5, it is rounded down to even, i.e., 0.0.
   858				// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   859				if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   860					// underflow to ±0
   861					if x.neg {
   862						var z float32
   863						return -z, Above
   864					}
   865					return 0.0, Below
   866				}
   867				// otherwise, round up
   868				// We handle p == 0 explicitly because it's easy and because
   869				// Float.round doesn't support rounding to 0 bits of precision.
   870				if p == 0 {
   871					if x.neg {
   872						return -math.SmallestNonzeroFloat32, Below
   873					}
   874					return math.SmallestNonzeroFloat32, Above
   875				}
   876			}
   877			// p > 0
   878	
   879			// round
   880			var r Float
   881			r.prec = uint32(p)
   882			r.Set(x)
   883			e = r.exp - 1
   884	
   885			// Rounding may have caused r to overflow to ±Inf
   886			// (rounding never causes underflows to 0).
   887			// If the exponent is too large, also overflow to ±Inf.
   888			if r.form == inf || e > emax {
   889				// overflow
   890				if x.neg {
   891					return float32(math.Inf(-1)), Below
   892				}
   893				return float32(math.Inf(+1)), Above
   894			}
   895			// e <= emax
   896	
   897			// Determine sign, biased exponent, and mantissa.
   898			var sign, bexp, mant uint32
   899			if x.neg {
   900				sign = 1 << (fbits - 1)
   901			}
   902	
   903			// Rounding may have caused a denormal number to
   904			// become normal. Check again.
   905			if e < emin {
   906				// denormal number: recompute precision
   907				// Since rounding may have at best increased precision
   908				// and we have eliminated p <= 0 early, we know p > 0.
   909				// bexp == 0 for denormals
   910				p = mbits + 1 - emin + int(e)
   911				mant = msb32(r.mant) >> uint(fbits-p)
   912			} else {
   913				// normal number: emin <= e <= emax
   914				bexp = uint32(e+bias) << mbits
   915				mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
   916			}
   917	
   918			return math.Float32frombits(sign | bexp | mant), r.acc
   919	
   920		case zero:
   921			if x.neg {
   922				var z float32
   923				return -z, Exact
   924			}
   925			return 0.0, Exact
   926	
   927		case inf:
   928			if x.neg {
   929				return float32(math.Inf(-1)), Exact
   930			}
   931			return float32(math.Inf(+1)), Exact
   932		}
   933	
   934		panic("unreachable")
   935	}
   936	
   937	// Float64 returns the float64 value nearest to x. If x is too small to be
   938	// represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
   939	// is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   940	// If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
   941	// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   942	func (x *Float) Float64() (float64, Accuracy) {
   943		if debugFloat {
   944			x.validate()
   945		}
   946	
   947		switch x.form {
   948		case finite:
   949			// 0 < |x| < +Inf
   950	
   951			const (
   952				fbits = 64                //        float size
   953				mbits = 52                //        mantissa size (excluding implicit msb)
   954				ebits = fbits - mbits - 1 //    11  exponent size
   955				bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
   956				dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
   957				emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
   958				emax  = bias              //  1023  largest unbiased exponent (normal)
   959			)
   960	
   961			// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
   962			e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   963	
   964			// Compute precision p for float64 mantissa.
   965			// If the exponent is too small, we have a denormal number before
   966			// rounding and fewer than p mantissa bits of precision available
   967			// (the exponent remains fixed but the mantissa gets shifted right).
   968			p := mbits + 1 // precision of normal float
   969			if e < emin {
   970				// recompute precision
   971				p = mbits + 1 - emin + int(e)
   972				// If p == 0, the mantissa of x is shifted so much to the right
   973				// that its msb falls immediately to the right of the float64
   974				// mantissa space. In other words, if the smallest denormal is
   975				// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   976				// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   977				// If m == 0.5, it is rounded down to even, i.e., 0.0.
   978				// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   979				if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   980					// underflow to ±0
   981					if x.neg {
   982						var z float64
   983						return -z, Above
   984					}
   985					return 0.0, Below
   986				}
   987				// otherwise, round up
   988				// We handle p == 0 explicitly because it's easy and because
   989				// Float.round doesn't support rounding to 0 bits of precision.
   990				if p == 0 {
   991					if x.neg {
   992						return -math.SmallestNonzeroFloat64, Below
   993					}
   994					return math.SmallestNonzeroFloat64, Above
   995				}
   996			}
   997			// p > 0
   998	
   999			// round
  1000			var r Float
  1001			r.prec = uint32(p)
  1002			r.Set(x)
  1003			e = r.exp - 1
  1004	
  1005			// Rounding may have caused r to overflow to ±Inf
  1006			// (rounding never causes underflows to 0).
  1007			// If the exponent is too large, also overflow to ±Inf.
  1008			if r.form == inf || e > emax {
  1009				// overflow
  1010				if x.neg {
  1011					return math.Inf(-1), Below
  1012				}
  1013				return math.Inf(+1), Above
  1014			}
  1015			// e <= emax
  1016	
  1017			// Determine sign, biased exponent, and mantissa.
  1018			var sign, bexp, mant uint64
  1019			if x.neg {
  1020				sign = 1 << (fbits - 1)
  1021			}
  1022	
  1023			// Rounding may have caused a denormal number to
  1024			// become normal. Check again.
  1025			if e < emin {
  1026				// denormal number: recompute precision
  1027				// Since rounding may have at best increased precision
  1028				// and we have eliminated p <= 0 early, we know p > 0.
  1029				// bexp == 0 for denormals
  1030				p = mbits + 1 - emin + int(e)
  1031				mant = msb64(r.mant) >> uint(fbits-p)
  1032			} else {
  1033				// normal number: emin <= e <= emax
  1034				bexp = uint64(e+bias) << mbits
  1035				mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
  1036			}
  1037	
  1038			return math.Float64frombits(sign | bexp | mant), r.acc
  1039	
  1040		case zero:
  1041			if x.neg {
  1042				var z float64
  1043				return -z, Exact
  1044			}
  1045			return 0.0, Exact
  1046	
  1047		case inf:
  1048			if x.neg {
  1049				return math.Inf(-1), Exact
  1050			}
  1051			return math.Inf(+1), Exact
  1052		}
  1053	
  1054		panic("unreachable")
  1055	}
  1056	
  1057	// Int returns the result of truncating x towards zero;
  1058	// or nil if x is an infinity.
  1059	// The result is Exact if x.IsInt(); otherwise it is Below
  1060	// for x > 0, and Above for x < 0.
  1061	// If a non-nil *Int argument z is provided, Int stores
  1062	// the result in z instead of allocating a new Int.
  1063	func (x *Float) Int(z *Int) (*Int, Accuracy) {
  1064		if debugFloat {
  1065			x.validate()
  1066		}
  1067	
  1068		if z == nil && x.form <= finite {
  1069			z = new(Int)
  1070		}
  1071	
  1072		switch x.form {
  1073		case finite:
  1074			// 0 < |x| < +Inf
  1075			acc := makeAcc(x.neg)
  1076			if x.exp <= 0 {
  1077				// 0 < |x| < 1
  1078				return z.SetInt64(0), acc
  1079			}
  1080			// x.exp > 0
  1081	
  1082			// 1 <= |x| < +Inf
  1083			// determine minimum required precision for x
  1084			allBits := uint(len(x.mant)) * _W
  1085			exp := uint(x.exp)
  1086			if x.MinPrec() <= exp {
  1087				acc = Exact
  1088			}
  1089			// shift mantissa as needed
  1090			if z == nil {
  1091				z = new(Int)
  1092			}
  1093			z.neg = x.neg
  1094			switch {
  1095			case exp > allBits:
  1096				z.abs = z.abs.shl(x.mant, exp-allBits)
  1097			default:
  1098				z.abs = z.abs.set(x.mant)
  1099			case exp < allBits:
  1100				z.abs = z.abs.shr(x.mant, allBits-exp)
  1101			}
  1102			return z, acc
  1103	
  1104		case zero:
  1105			return z.SetInt64(0), Exact
  1106	
  1107		case inf:
  1108			return nil, makeAcc(x.neg)
  1109		}
  1110	
  1111		panic("unreachable")
  1112	}
  1113	
  1114	// Rat returns the rational number corresponding to x;
  1115	// or nil if x is an infinity.
  1116	// The result is Exact if x is not an Inf.
  1117	// If a non-nil *Rat argument z is provided, Rat stores
  1118	// the result in z instead of allocating a new Rat.
  1119	func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
  1120		if debugFloat {
  1121			x.validate()
  1122		}
  1123	
  1124		if z == nil && x.form <= finite {
  1125			z = new(Rat)
  1126		}
  1127	
  1128		switch x.form {
  1129		case finite:
  1130			// 0 < |x| < +Inf
  1131			allBits := int32(len(x.mant)) * _W
  1132			// build up numerator and denominator
  1133			z.a.neg = x.neg
  1134			switch {
  1135			case x.exp > allBits:
  1136				z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
  1137				z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1138				// z already in normal form
  1139			default:
  1140				z.a.abs = z.a.abs.set(x.mant)
  1141				z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1142				// z already in normal form
  1143			case x.exp < allBits:
  1144				z.a.abs = z.a.abs.set(x.mant)
  1145				t := z.b.abs.setUint64(1)
  1146				z.b.abs = t.shl(t, uint(allBits-x.exp))
  1147				z.norm()
  1148			}
  1149			return z, Exact
  1150	
  1151		case zero:
  1152			return z.SetInt64(0), Exact
  1153	
  1154		case inf:
  1155			return nil, makeAcc(x.neg)
  1156		}
  1157	
  1158		panic("unreachable")
  1159	}
  1160	
  1161	// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
  1162	// and returns z.
  1163	func (z *Float) Abs(x *Float) *Float {
  1164		z.Set(x)
  1165		z.neg = false
  1166		return z
  1167	}
  1168	
  1169	// Neg sets z to the (possibly rounded) value of x with its sign negated,
  1170	// and returns z.
  1171	func (z *Float) Neg(x *Float) *Float {
  1172		z.Set(x)
  1173		z.neg = !z.neg
  1174		return z
  1175	}
  1176	
  1177	func validateBinaryOperands(x, y *Float) {
  1178		if !debugFloat {
  1179			// avoid performance bugs
  1180			panic("validateBinaryOperands called but debugFloat is not set")
  1181		}
  1182		if len(x.mant) == 0 {
  1183			panic("empty mantissa for x")
  1184		}
  1185		if len(y.mant) == 0 {
  1186			panic("empty mantissa for y")
  1187		}
  1188	}
  1189	
  1190	// z = x + y, ignoring signs of x and y for the addition
  1191	// but using the sign of z for rounding the result.
  1192	// x and y must have a non-empty mantissa and valid exponent.
  1193	func (z *Float) uadd(x, y *Float) {
  1194		// Note: This implementation requires 2 shifts most of the
  1195		// time. It is also inefficient if exponents or precisions
  1196		// differ by wide margins. The following article describes
  1197		// an efficient (but much more complicated) implementation
  1198		// compatible with the internal representation used here:
  1199		//
  1200		// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
  1201		// Point Addition With Exact Rounding (as in the MPFR Library)"
  1202		// http://www.vinc17.net/research/papers/rnc6.pdf
  1203	
  1204		if debugFloat {
  1205			validateBinaryOperands(x, y)
  1206		}
  1207	
  1208		// compute exponents ex, ey for mantissa with "binary point"
  1209		// on the right (mantissa.0) - use int64 to avoid overflow
  1210		ex := int64(x.exp) - int64(len(x.mant))*_W
  1211		ey := int64(y.exp) - int64(len(y.mant))*_W
  1212	
  1213		al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1214	
  1215		// TODO(gri) having a combined add-and-shift primitive
  1216		//           could make this code significantly faster
  1217		switch {
  1218		case ex < ey:
  1219			if al {
  1220				t := nat(nil).shl(y.mant, uint(ey-ex))
  1221				z.mant = z.mant.add(x.mant, t)
  1222			} else {
  1223				z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1224				z.mant = z.mant.add(x.mant, z.mant)
  1225			}
  1226		default:
  1227			// ex == ey, no shift needed
  1228			z.mant = z.mant.add(x.mant, y.mant)
  1229		case ex > ey:
  1230			if al {
  1231				t := nat(nil).shl(x.mant, uint(ex-ey))
  1232				z.mant = z.mant.add(t, y.mant)
  1233			} else {
  1234				z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1235				z.mant = z.mant.add(z.mant, y.mant)
  1236			}
  1237			ex = ey
  1238		}
  1239		// len(z.mant) > 0
  1240	
  1241		z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1242	}
  1243	
  1244	// z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
  1245	// but using the sign of z for rounding the result.
  1246	// x and y must have a non-empty mantissa and valid exponent.
  1247	func (z *Float) usub(x, y *Float) {
  1248		// This code is symmetric to uadd.
  1249		// We have not factored the common code out because
  1250		// eventually uadd (and usub) should be optimized
  1251		// by special-casing, and the code will diverge.
  1252	
  1253		if debugFloat {
  1254			validateBinaryOperands(x, y)
  1255		}
  1256	
  1257		ex := int64(x.exp) - int64(len(x.mant))*_W
  1258		ey := int64(y.exp) - int64(len(y.mant))*_W
  1259	
  1260		al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1261	
  1262		switch {
  1263		case ex < ey:
  1264			if al {
  1265				t := nat(nil).shl(y.mant, uint(ey-ex))
  1266				z.mant = t.sub(x.mant, t)
  1267			} else {
  1268				z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1269				z.mant = z.mant.sub(x.mant, z.mant)
  1270			}
  1271		default:
  1272			// ex == ey, no shift needed
  1273			z.mant = z.mant.sub(x.mant, y.mant)
  1274		case ex > ey:
  1275			if al {
  1276				t := nat(nil).shl(x.mant, uint(ex-ey))
  1277				z.mant = t.sub(t, y.mant)
  1278			} else {
  1279				z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1280				z.mant = z.mant.sub(z.mant, y.mant)
  1281			}
  1282			ex = ey
  1283		}
  1284	
  1285		// operands may have canceled each other out
  1286		if len(z.mant) == 0 {
  1287			z.acc = Exact
  1288			z.form = zero
  1289			z.neg = false
  1290			return
  1291		}
  1292		// len(z.mant) > 0
  1293	
  1294		z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1295	}
  1296	
  1297	// z = x * y, ignoring signs of x and y for the multiplication
  1298	// but using the sign of z for rounding the result.
  1299	// x and y must have a non-empty mantissa and valid exponent.
  1300	func (z *Float) umul(x, y *Float) {
  1301		if debugFloat {
  1302			validateBinaryOperands(x, y)
  1303		}
  1304	
  1305		// Note: This is doing too much work if the precision
  1306		// of z is less than the sum of the precisions of x
  1307		// and y which is often the case (e.g., if all floats
  1308		// have the same precision).
  1309		// TODO(gri) Optimize this for the common case.
  1310	
  1311		e := int64(x.exp) + int64(y.exp)
  1312		z.mant = z.mant.mul(x.mant, y.mant)
  1313	
  1314		z.setExpAndRound(e-fnorm(z.mant), 0)
  1315	}
  1316	
  1317	// z = x / y, ignoring signs of x and y for the division
  1318	// but using the sign of z for rounding the result.
  1319	// x and y must have a non-empty mantissa and valid exponent.
  1320	func (z *Float) uquo(x, y *Float) {
  1321		if debugFloat {
  1322			validateBinaryOperands(x, y)
  1323		}
  1324	
  1325		// mantissa length in words for desired result precision + 1
  1326		// (at least one extra bit so we get the rounding bit after
  1327		// the division)
  1328		n := int(z.prec/_W) + 1
  1329	
  1330		// compute adjusted x.mant such that we get enough result precision
  1331		xadj := x.mant
  1332		if d := n - len(x.mant) + len(y.mant); d > 0 {
  1333			// d extra words needed => add d "0 digits" to x
  1334			xadj = make(nat, len(x.mant)+d)
  1335			copy(xadj[d:], x.mant)
  1336		}
  1337		// TODO(gri): If we have too many digits (d < 0), we should be able
  1338		// to shorten x for faster division. But we must be extra careful
  1339		// with rounding in that case.
  1340	
  1341		// Compute d before division since there may be aliasing of x.mant
  1342		// (via xadj) or y.mant with z.mant.
  1343		d := len(xadj) - len(y.mant)
  1344	
  1345		// divide
  1346		var r nat
  1347		z.mant, r = z.mant.div(nil, xadj, y.mant)
  1348		e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
  1349	
  1350		// The result is long enough to include (at least) the rounding bit.
  1351		// If there's a non-zero remainder, the corresponding fractional part
  1352		// (if it were computed), would have a non-zero sticky bit (if it were
  1353		// zero, it couldn't have a non-zero remainder).
  1354		var sbit uint
  1355		if len(r) > 0 {
  1356			sbit = 1
  1357		}
  1358	
  1359		z.setExpAndRound(e-fnorm(z.mant), sbit)
  1360	}
  1361	
  1362	// ucmp returns -1, 0, or +1, depending on whether
  1363	// |x| < |y|, |x| == |y|, or |x| > |y|.
  1364	// x and y must have a non-empty mantissa and valid exponent.
  1365	func (x *Float) ucmp(y *Float) int {
  1366		if debugFloat {
  1367			validateBinaryOperands(x, y)
  1368		}
  1369	
  1370		switch {
  1371		case x.exp < y.exp:
  1372			return -1
  1373		case x.exp > y.exp:
  1374			return +1
  1375		}
  1376		// x.exp == y.exp
  1377	
  1378		// compare mantissas
  1379		i := len(x.mant)
  1380		j := len(y.mant)
  1381		for i > 0 || j > 0 {
  1382			var xm, ym Word
  1383			if i > 0 {
  1384				i--
  1385				xm = x.mant[i]
  1386			}
  1387			if j > 0 {
  1388				j--
  1389				ym = y.mant[j]
  1390			}
  1391			switch {
  1392			case xm < ym:
  1393				return -1
  1394			case xm > ym:
  1395				return +1
  1396			}
  1397		}
  1398	
  1399		return 0
  1400	}
  1401	
  1402	// Handling of sign bit as defined by IEEE 754-2008, section 6.3:
  1403	//
  1404	// When neither the inputs nor result are NaN, the sign of a product or
  1405	// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
  1406	// or of a difference x−y regarded as a sum x+(−y), differs from at most
  1407	// one of the addends’ signs; and the sign of the result of conversions,
  1408	// the quantize operation, the roundToIntegral operations, and the
  1409	// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
  1410	// These rules shall apply even when operands or results are zero or infinite.
  1411	//
  1412	// When the sum of two operands with opposite signs (or the difference of
  1413	// two operands with like signs) is exactly zero, the sign of that sum (or
  1414	// difference) shall be +0 in all rounding-direction attributes except
  1415	// roundTowardNegative; under that attribute, the sign of an exact zero
  1416	// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
  1417	// sign as x even when x is zero.
  1418	//
  1419	// See also: https://play.golang.org/p/RtH3UCt5IH
  1420	
  1421	// Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
  1422	// it is changed to the larger of x's or y's precision before the operation.
  1423	// Rounding is performed according to z's precision and rounding mode; and
  1424	// z's accuracy reports the result error relative to the exact (not rounded)
  1425	// result. Add panics with ErrNaN if x and y are infinities with opposite
  1426	// signs. The value of z is undefined in that case.
  1427	//
  1428	// BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
  1429	func (z *Float) Add(x, y *Float) *Float {
  1430		if debugFloat {
  1431			x.validate()
  1432			y.validate()
  1433		}
  1434	
  1435		if z.prec == 0 {
  1436			z.prec = umax32(x.prec, y.prec)
  1437		}
  1438	
  1439		if x.form == finite && y.form == finite {
  1440			// x + y (common case)
  1441			z.neg = x.neg
  1442			if x.neg == y.neg {
  1443				// x + y == x + y
  1444				// (-x) + (-y) == -(x + y)
  1445				z.uadd(x, y)
  1446			} else {
  1447				// x + (-y) == x - y == -(y - x)
  1448				// (-x) + y == y - x == -(x - y)
  1449				if x.ucmp(y) > 0 {
  1450					z.usub(x, y)
  1451				} else {
  1452					z.neg = !z.neg
  1453					z.usub(y, x)
  1454				}
  1455			}
  1456			return z
  1457		}
  1458	
  1459		if x.form == inf && y.form == inf && x.neg != y.neg {
  1460			// +Inf + -Inf
  1461			// -Inf + +Inf
  1462			// value of z is undefined but make sure it's valid
  1463			z.acc = Exact
  1464			z.form = zero
  1465			z.neg = false
  1466			panic(ErrNaN{"addition of infinities with opposite signs"})
  1467		}
  1468	
  1469		if x.form == zero && y.form == zero {
  1470			// ±0 + ±0
  1471			z.acc = Exact
  1472			z.form = zero
  1473			z.neg = x.neg && y.neg // -0 + -0 == -0
  1474			return z
  1475		}
  1476	
  1477		if x.form == inf || y.form == zero {
  1478			// ±Inf + y
  1479			// x + ±0
  1480			return z.Set(x)
  1481		}
  1482	
  1483		// ±0 + y
  1484		// x + ±Inf
  1485		return z.Set(y)
  1486	}
  1487	
  1488	// Sub sets z to the rounded difference x-y and returns z.
  1489	// Precision, rounding, and accuracy reporting are as for Add.
  1490	// Sub panics with ErrNaN if x and y are infinities with equal
  1491	// signs. The value of z is undefined in that case.
  1492	func (z *Float) Sub(x, y *Float) *Float {
  1493		if debugFloat {
  1494			x.validate()
  1495			y.validate()
  1496		}
  1497	
  1498		if z.prec == 0 {
  1499			z.prec = umax32(x.prec, y.prec)
  1500		}
  1501	
  1502		if x.form == finite && y.form == finite {
  1503			// x - y (common case)
  1504			z.neg = x.neg
  1505			if x.neg != y.neg {
  1506				// x - (-y) == x + y
  1507				// (-x) - y == -(x + y)
  1508				z.uadd(x, y)
  1509			} else {
  1510				// x - y == x - y == -(y - x)
  1511				// (-x) - (-y) == y - x == -(x - y)
  1512				if x.ucmp(y) > 0 {
  1513					z.usub(x, y)
  1514				} else {
  1515					z.neg = !z.neg
  1516					z.usub(y, x)
  1517				}
  1518			}
  1519			return z
  1520		}
  1521	
  1522		if x.form == inf && y.form == inf && x.neg == y.neg {
  1523			// +Inf - +Inf
  1524			// -Inf - -Inf
  1525			// value of z is undefined but make sure it's valid
  1526			z.acc = Exact
  1527			z.form = zero
  1528			z.neg = false
  1529			panic(ErrNaN{"subtraction of infinities with equal signs"})
  1530		}
  1531	
  1532		if x.form == zero && y.form == zero {
  1533			// ±0 - ±0
  1534			z.acc = Exact
  1535			z.form = zero
  1536			z.neg = x.neg && !y.neg // -0 - +0 == -0
  1537			return z
  1538		}
  1539	
  1540		if x.form == inf || y.form == zero {
  1541			// ±Inf - y
  1542			// x - ±0
  1543			return z.Set(x)
  1544		}
  1545	
  1546		// ±0 - y
  1547		// x - ±Inf
  1548		return z.Neg(y)
  1549	}
  1550	
  1551	// Mul sets z to the rounded product x*y and returns z.
  1552	// Precision, rounding, and accuracy reporting are as for Add.
  1553	// Mul panics with ErrNaN if one operand is zero and the other
  1554	// operand an infinity. The value of z is undefined in that case.
  1555	func (z *Float) Mul(x, y *Float) *Float {
  1556		if debugFloat {
  1557			x.validate()
  1558			y.validate()
  1559		}
  1560	
  1561		if z.prec == 0 {
  1562			z.prec = umax32(x.prec, y.prec)
  1563		}
  1564	
  1565		z.neg = x.neg != y.neg
  1566	
  1567		if x.form == finite && y.form == finite {
  1568			// x * y (common case)
  1569			z.umul(x, y)
  1570			return z
  1571		}
  1572	
  1573		z.acc = Exact
  1574		if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
  1575			// ±0 * ±Inf
  1576			// ±Inf * ±0
  1577			// value of z is undefined but make sure it's valid
  1578			z.form = zero
  1579			z.neg = false
  1580			panic(ErrNaN{"multiplication of zero with infinity"})
  1581		}
  1582	
  1583		if x.form == inf || y.form == inf {
  1584			// ±Inf * y
  1585			// x * ±Inf
  1586			z.form = inf
  1587			return z
  1588		}
  1589	
  1590		// ±0 * y
  1591		// x * ±0
  1592		z.form = zero
  1593		return z
  1594	}
  1595	
  1596	// Quo sets z to the rounded quotient x/y and returns z.
  1597	// Precision, rounding, and accuracy reporting are as for Add.
  1598	// Quo panics with ErrNaN if both operands are zero or infinities.
  1599	// The value of z is undefined in that case.
  1600	func (z *Float) Quo(x, y *Float) *Float {
  1601		if debugFloat {
  1602			x.validate()
  1603			y.validate()
  1604		}
  1605	
  1606		if z.prec == 0 {
  1607			z.prec = umax32(x.prec, y.prec)
  1608		}
  1609	
  1610		z.neg = x.neg != y.neg
  1611	
  1612		if x.form == finite && y.form == finite {
  1613			// x / y (common case)
  1614			z.uquo(x, y)
  1615			return z
  1616		}
  1617	
  1618		z.acc = Exact
  1619		if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
  1620			// ±0 / ±0
  1621			// ±Inf / ±Inf
  1622			// value of z is undefined but make sure it's valid
  1623			z.form = zero
  1624			z.neg = false
  1625			panic(ErrNaN{"division of zero by zero or infinity by infinity"})
  1626		}
  1627	
  1628		if x.form == zero || y.form == inf {
  1629			// ±0 / y
  1630			// x / ±Inf
  1631			z.form = zero
  1632			return z
  1633		}
  1634	
  1635		// x / ±0
  1636		// ±Inf / y
  1637		z.form = inf
  1638		return z
  1639	}
  1640	
  1641	// Cmp compares x and y and returns:
  1642	//
  1643	//   -1 if x <  y
  1644	//    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
  1645	//   +1 if x >  y
  1646	//
  1647	func (x *Float) Cmp(y *Float) int {
  1648		if debugFloat {
  1649			x.validate()
  1650			y.validate()
  1651		}
  1652	
  1653		mx := x.ord()
  1654		my := y.ord()
  1655		switch {
  1656		case mx < my:
  1657			return -1
  1658		case mx > my:
  1659			return +1
  1660		}
  1661		// mx == my
  1662	
  1663		// only if |mx| == 1 we have to compare the mantissae
  1664		switch mx {
  1665		case -1:
  1666			return y.ucmp(x)
  1667		case +1:
  1668			return x.ucmp(y)
  1669		}
  1670	
  1671		return 0
  1672	}
  1673	
  1674	// ord classifies x and returns:
  1675	//
  1676	//	-2 if -Inf == x
  1677	//	-1 if -Inf < x < 0
  1678	//	 0 if x == 0 (signed or unsigned)
  1679	//	+1 if 0 < x < +Inf
  1680	//	+2 if x == +Inf
  1681	//
  1682	func (x *Float) ord() int {
  1683		var m int
  1684		switch x.form {
  1685		case finite:
  1686			m = 1
  1687		case zero:
  1688			return 0
  1689		case inf:
  1690			m = 2
  1691		}
  1692		if x.neg {
  1693			m = -m
  1694		}
  1695		return m
  1696	}
  1697	
  1698	func umax32(x, y uint32) uint32 {
  1699		if x > y {
  1700			return x
  1701		}
  1702		return y
  1703	}
  1704	

View as plain text