1 // Copyright 2009 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS-IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Original author: ericv@google.com (Eric Veach) 16 // Converted to D: madric@gmail.com (Vijay Nayar) 17 18 module s2.util.math.exactfloat; 19 20 import algorithm = std.algorithm; 21 import format = std.format; 22 import math = std.math; 23 import s2.util.hash.mix; 24 import std.bigint; 25 import std.range; 26 import traits = std.traits; 27 28 /** 29 * ExactFloat is a multiple-precision floating point type based on the OpenSSL 30 * Bignum library. It has the same interface as the built-in "float" and 31 * "double" types, but only supports the subset of operators and intrinsics 32 * where it is possible to compute the result exactly. So for example, 33 * ExactFloat supports addition and multiplication but not division (since in 34 * general, the quotient of two floating-point numbers cannot be represented 35 * exactly). Exact arithmetic is useful for geometric algorithms, especially 36 * for disambiguating cases where ordinary double-precision arithmetic yields 37 * an uncertain result. 38 * 39 * ExactFloat is a subset of the faster and more capable MPFloat class (which 40 * is based on the GNU MPFR library). The main reason to use this class 41 * rather than MPFloat is that it is subject to a BSD-style license rather 42 * than the much more restrictive LGPL license. 43 * 44 * It has the following features: 45 * 46 * - ExactFloat uses the same syntax as the built-in "float" and "double" 47 * types, for example: x += 4 + fabs(2*y*y - z*z). There are a few 48 * differences (see below), but the syntax is compatible enough so that 49 * ExactFloat can be used as a template argument to templatized classes 50 * such as Vector2, VectorN, Matrix3x3, etc. 51 * 52 * - Results are not rounded; instead, precision is increased so that the 53 * result can be represented exactly. An inexact result is returned only 54 * in the case of underflow or overflow (yielding signed zero or infinity 55 * respectively), or if the maximum allowed precision is exceeded (yielding 56 * NaN). ExactFloat uses IEEE 754-2008 rules for handling infinities, NaN, 57 * rounding to integers, etc. 58 * 59 * - ExactFloat only supports calculations where the result can be 60 * represented exactly. Therefore it supports intrinsics such as fabs() 61 * but not transcendentals such as sin(), sqrt(), etc. 62 * 63 * Syntax Compatibility with "float" and "double" 64 * ---------------------------------------------- 65 * 66 * ExactFloat supports a subset of the operators and intrinsics for the 67 * built-in "double" type. (Thus it supports fabs() but not fabsf(), for 68 * example.) The syntax is different only in the following cases: 69 * 70 * - Casts and implicit conversions to built-in types (including "bool") are 71 * not supported. So for example, the following will not compile: 72 * 73 * ExactFloat x = 7.5; 74 * double y = x; // ERROR: use x.ToDouble() instead 75 * long z = x; // ERROR: use x.ToDouble() or lround(trunc(x)) 76 * q = static_cast<int>(x); // ERROR: use x.ToDouble() or lround(trunc(x)) 77 * if (x) { ... } // ERROR: use (x != 0) instead 78 * 79 * - The glibc floating-point classification macros (fpclassify, isfinite, 80 * isnormal, isnan, isinf) are not supported. Instead there are 81 * zero-argument methods: 82 * 83 * ExactFloat x; 84 * if (isnan(x)) { ... } // ERROR: use (x.is_nan()) instead 85 * if (isinf(x)) { ... } // ERROR: use (x.is_inf()) instead 86 * 87 * Using ExactFloat with Vector3, etc. 88 * ----------------------------------- 89 * 90 * ExactFloat can be used with templatized classes such as Vector2 and Vector3 91 * (see "util/math/vector.h"), with the following limitations: 92 * 93 * - Cast() can be used to convert other vector types to an ExactFloat vector 94 * type, but not the other way around. This is because there are no 95 * implicit conversions from ExactFloat to built-in types. You can work 96 * around this by calling an explicit conversion method such as 97 * ToDouble(). For example: 98 * 99 * typedef Vector3<ExactFloat> Vector3_xf; 100 * Vector3_xf x; 101 * Vector3_d y; 102 * x = Vector3_xf::Cast(y); // This works. 103 * y = Vector3_d::Cast(x); // This doesn't. 104 * y = Vector3_d(x[0].ToDouble(), x[1].ToDouble(), x[2].ToDouble()); // OK 105 * 106 * - IsNaN() is not supported because it calls isnan(), which is defined as a 107 * macro in <math.h> and therefore can't easily be overrided. 108 * 109 * Precision Semantics 110 * ___________________ 111 * 112 * Unlike MPFloat, ExactFloat does not allow a maximum precision to be 113 * specified (it is always unbounded). Therefore it does not have any of the 114 * corresponding constructors. 115 * 116 * The current precision of an ExactFloat (i.e., the number of bits in its 117 * mantissa) is returned by prec(). The precision is increased as necessary 118 * so that the result of every operation can be represented exactly. 119 */ 120 struct ExactFloat { 121 public: 122 // The following limits are imposed by OpenSSL. 123 124 /// The maximum exponent supported. If a value has an exponent larger than 125 /// this, it is replaced by infinity (with the appropriate sign). 126 static immutable int MAX_EXP = 200*1000*1000; // About 10**(60 million) 127 128 /// The minimum exponent supported. If a value has an exponent less than 129 /// this, it is replaced by zero (with the appropriate sign). 130 static immutable int MIN_EXP = -MAX_EXP; // About 10**(-60 million) 131 132 /** 133 * The maximum number of mantissa bits supported. If a value has more 134 * mantissa bits than this, it is replaced with NaN. (It is expected that 135 * users of this class will never want this much precision.) 136 */ 137 static immutable int MAX_PREC = 64 << 20; // About 20 million digits 138 139 /** 140 * Rounding modes. kRoundTiesToEven and kRoundTiesAwayFromZero both round 141 * to the nearest representable value unless two values are equally close. 142 * In that case kRoundTiesToEven rounds to the nearest even value, while 143 * kRoundTiesAwayFromZero always rounds away from zero. 144 */ 145 enum RoundingMode { 146 ROUND_TIES_TO_EVEN, 147 ROUND_TIES_AWAY_FROM_ZERO, 148 ROUND_TOWARD_ZERO, 149 ROUND_AWAY_FROM_ZERO, 150 ROUND_TOWARD_POSITIVE, 151 ROUND_TOWARD_NEGATIVE 152 } 153 154 ///////////////////////////////////////////////////////////////////////////// 155 // Constructors 156 157 /// Copy constructor. 158 this(in ExactFloat b) nothrow @nogc pure { 159 _sign = b._sign; 160 _bnExp = b._bnExp; 161 _bn = b._bn; 162 } 163 164 this(T)(T v) nothrow pure { 165 this = v; 166 } 167 168 /** 169 * Construct an ExactFloat from a "double". The constructor is implicit so 170 * that this class can be used as a replacement for "float" or "double" in 171 * templatized libraries. (With an explicit constructor, code such as 172 * "ExactFloat f = 2.5;" would not compile.) All double-precision values are 173 * supported, including denormalized numbers, infinities, and NaNs. 174 */ 175 void opAssign(T)(T v) nothrow pure 176 if (traits.isFloatingPoint!T) { 177 _bn = BigInt(); 178 _sign = math.signbit(v) ? -1 : 1; 179 if (math.isNaN(v)) { 180 setNan(); 181 } else if (math.isInfinity(v)) { 182 setInf(_sign); 183 } else { 184 // The following code is much simpler than messing about with bit masks, 185 // has the advantage of handling denormalized numbers and zero correctly, 186 // and is actually quite efficient (at least compared to the rest of this 187 // code). "f" is a fraction in the range [0.5, 1), so if we shift it left 188 // by the number of mantissa bits in a double (53, including the leading 189 // "1") then the result is always an integer. 190 int exp; 191 T f = math.frexp(math.fabs(v), exp); 192 _bn = cast(ulong) math.ldexp(f, DOUBLE_MANTISSA_BITS); 193 _bnExp = exp - DOUBLE_MANTISSA_BITS; 194 canonicalize(); 195 } 196 } 197 198 /** 199 * Construct an ExactFloat from an "int". Note that in general, ints are 200 * automatically converted to doubles and so would be handled by the 201 * constructor above. However, the particular argument (0) would be 202 * ambiguous; the compiler wouldn't know whether to treat it as a "double" or 203 * "const char*" (since 0 is a valid null pointer constant). Adding an "int" 204 * constructor solves this problem. 205 * 206 * We do not provide constructors for "unsigned", "long", "unsigned long", 207 * "long long", or "unsigned long long", since these types are not typically 208 * used in floating-point calculations and it is safer to require them to be 209 * explicitly cast. 210 */ 211 void opAssign(T)(T v) nothrow pure 212 if (traits.isIntegral!T) { 213 _sign = (v >= 0) ? 1 : -1; 214 _bn = math.abs(v); 215 _bnExp = 0; 216 canonicalize(); 217 } 218 219 /** 220 * Construct an ExactFloat from a string (such as "1.2e50"). Requires that 221 * the value is exactly representable as a floating-point number (so for 222 * example, "0.125" is allowed but "0.1" is not). 223 */ 224 void opAssign(T)(T s) nothrow pure 225 if (traits.isSomeString!T) { 226 ExactFloat.unimplemented(); 227 } 228 229 ///////////////////////////////////////////////////////////////////// 230 // Constants 231 // 232 // As an alternative to the constants below, you can also just use the 233 // constants defined in <math.h>, for example: 234 // 235 // ExactFloat x = NAN, y = -INFINITY; 236 237 /// Return an ExactFloat equal to positive zero (if sign >= 0) or 238 /// negative zero (if sign < 0). 239 static ExactFloat signedZero(int sign) nothrow pure { 240 ExactFloat r; 241 r.setZero(sign); 242 return r; 243 } 244 245 /// Return an ExactFloat equal to positive infinity (if sign >= 0) or 246 /// negative infinity (if sign < 0). 247 static ExactFloat infinity(int sign) nothrow pure { 248 ExactFloat r; 249 r.setInf(sign); 250 return r; 251 } 252 253 /// Return an ExactFloat that is NaN (Not-a-Number). 254 static ExactFloat nan() nothrow pure { 255 ExactFloat r; 256 r.setNan(); 257 return r; 258 } 259 260 261 ///////////////////////////////////////////////////////////////////////////// 262 // Accessor Methods 263 264 /// Return the maximum precision of the ExactFloat. This method exists only 265 /// for compatibility with MPFloat. 266 @property 267 int maxPrec() const nothrow @nogc pure { 268 return MAX_PREC; 269 } 270 271 /// Return the actual precision of this ExactFloat (the current number of 272 /// bits in its mantissa). Returns 0 for non-normal numbers such as NaN. 273 @property 274 int prec() const nothrow @nogc pure { 275 // TODO: BUG Fix this to get the exact number of used bits. 276 int totalBits = cast(int) (_bn.uintLength() - 1) * 32; 277 uint lastDigit = _bn.getDigit!uint(_bn.uintLength() - 1); 278 while (lastDigit != 0) { 279 lastDigit >>= 1; 280 totalBits++; 281 } 282 return totalBits; 283 } 284 285 /** 286 * Return the exponent of this ExactFloat given that the mantissa is in the 287 * range \[0.5, 1\). It is an error to call this method if the value is zero, 288 * infinity, or NaN. 289 */ 290 int exp() const nothrow @nogc pure 291 in { 292 assert(isNormal()); 293 } do { 294 return _bnExp + prec(); 295 } 296 297 /// Set the value of the ExactFloat to +0 (if sign >= 0) or -0 (if sign < 0). 298 void setZero(int sign) nothrow pure { 299 _sign = sign; 300 _bnExp = EXP_ZERO; 301 if (_bn != 0) { 302 _bn = 0; 303 } 304 } 305 306 /// Set the value of the ExactFloat to positive infinity (if sign >= 0) or 307 /// negative infinity (if sign < 0). 308 void setInf(int sign) nothrow pure { 309 _sign = sign; 310 _bnExp = EXP_INFINITY; 311 if (_bn != 0) { 312 _bn = 0; 313 } 314 } 315 316 /// Set the value of the ExactFloat to NaN (Not-a-Number). 317 void setNan() nothrow pure { 318 _sign = 1; 319 _bnExp = EXP_NAN; 320 if (_bn != 0) { 321 _bn = 0; 322 } 323 } 324 325 // Unfortunately, isinf(x), isnan(x), isnormal(x), and isfinite(x) are 326 // defined as macros in <math.h>. Therefore we can't easily extend them 327 // here. Instead we provide methods with underscores in their names that do 328 // the same thing: x.is_inf(), etc. 329 // 330 // These macros are not implemented: signbit(x), fpclassify(x). 331 332 /// Return true if this value is zero (including negative zero). 333 bool isZero() const nothrow @nogc pure { 334 return _bnExp == EXP_ZERO; 335 } 336 337 /// Return true if this value is infinity (positive or negative). 338 bool isInf() const nothrow @nogc pure { 339 return _bnExp == EXP_INFINITY; 340 } 341 342 /// Return true if this value is NaN (Not-a-Number). 343 bool isNan() const nothrow @nogc pure { 344 return _bnExp == EXP_NAN; 345 } 346 347 /** 348 * Return true if this value is a normal floating-point number. Non-normal 349 * values (zero, infinity, and NaN) often need to be handled separately 350 * because they are represented using special exponent values and their 351 * mantissa is not defined. 352 */ 353 bool isNormal() const nothrow @nogc pure { 354 return _bnExp < EXP_ZERO; 355 } 356 357 358 /// Return true if this value is a normal floating-point number or zero, 359 /// i.e. it is not infinity or NaN. 360 bool isFinite() const nothrow @nogc pure { 361 return _bnExp <= EXP_ZERO; 362 } 363 364 365 /// Return true if the sign bit is set (this includes negative zero). 366 @property 367 bool signBit() const nothrow @nogc pure { 368 return _sign < 0; 369 } 370 371 /** 372 * Return +1 if this ExactFloat is positive, -1 if it is negative, and 0 373 * if it is zero or NaN. Note that unlike sign_bit(), sgn() returns 0 for 374 * both positive and negative zero. 375 */ 376 @property 377 int sign() const nothrow @nogc pure { 378 return (isNan() || isZero()) ? 0 : _sign; 379 } 380 381 382 ///////////////////////////////////////////////////////////////////////////// 383 // Conversion Methods 384 // 385 // Note that some conversions are defined as functions further below, 386 // e.g. to convert to an integer you can use lround(), llrint(), etc. 387 388 /** 389 * Round to double precision. Note that since doubles have a much smaller 390 * exponent range than ExactFloats, very small values may be rounded to 391 * (positive or negative) zero, and very large values may be rounded to 392 * infinity. 393 * 394 * It is very important to make this a named method rather than an implicit 395 * conversion, because otherwise there would be a silent loss of precision 396 * whenever some desired operator or function happens not to be implemented. 397 * For example, if fabs() were not implemented and "x" and "y" were 398 * ExactFloats, then x = fabs(y) would silently convert "y" to a "double", 399 * take its absolute value, and convert it back to an ExactFloat. 400 */ 401 double toDouble() const nothrow pure { 402 // If the mantissa has too many bits, we need to round it. 403 if (prec() <= DOUBLE_MANTISSA_BITS) { 404 return toDoubleHelper(); 405 } else { 406 ExactFloat r = roundToMaxPrec(DOUBLE_MANTISSA_BITS, RoundingMode.ROUND_TIES_TO_EVEN); 407 return r.toDoubleHelper(); 408 } 409 } 410 411 /** 412 * Return a human-readable string such that if two values with the same 413 * precision are different, then their string representations are different. 414 * The format is similar to printf("%g"), except that the number of 415 * significant digits depends on the precision (with a minimum of 10). 416 * Trailing zeros are stripped (just like "%g"). 417 * 418 * Note that if two values have different precisions, they may have the same 419 * ToString() value even though their values are slightly different. If you 420 * need to distinguish such values, use ToUniqueString() intead. 421 */ 422 string toString() const { 423 int max_digits = algorithm.max(MIN_SIGNIFICANT_DIGITS, 424 numSignificantDigitsForPrec(prec())); 425 return toStringWithMaxDigits(max_digits); 426 } 427 428 /// Return a string formatted according to printf("%Ng") where N is the given 429 /// maximum number of significant digits. 430 string toStringWithMaxDigits(int max_digits) const 431 in { 432 assert(max_digits >= 0); 433 } do { 434 if (!isNormal()) { 435 if (isNan()) return "nan"; 436 if (isZero()) return (_sign < 0) ? "-0" : "0"; 437 return (_sign < 0) ? "-inf" : "inf"; 438 } 439 char[] digits; 440 int exp10 = getDecimalDigits(max_digits, digits); 441 string str; 442 if (_sign < 0) str ~= "-"; 443 444 // We use the standard '%g' formatting rules. If the exponent is less than 445 // -4 or greater than or equal to the requested precision (i.e., max_digits) 446 // then we use exponential notation. 447 // 448 // But since "exp10" is the base-10 exponent corresponding to a mantissa in 449 // the range [0.1, 1), whereas the '%g' rules assume a mantissa in the range 450 // [1.0, 10), we need to adjust these parameters by 1. 451 if (exp10 <= -4 || exp10 > max_digits) { 452 // Use exponential format. 453 str ~= digits[0]; 454 if (digits.length > 1) { 455 str ~= "."; 456 str ~= digits[1 .. $]; 457 } 458 char[20] exp_buf; 459 format.sformat(exp_buf, "e%+02d", exp10 - 1); 460 str ~= exp_buf; 461 } else { 462 // Use fixed format. We split this into two cases depending on whether 463 // the integer portion is non-zero or not. 464 if (exp10 > 0) { 465 if (exp10 >= digits.length) { 466 str ~= digits; 467 for (ulong i = exp10 - digits.length; i > 0; --i) { 468 str ~= "0"; 469 } 470 } else { 471 str ~= digits[0 .. exp10]; 472 str ~= "."; 473 str ~= digits[exp10 .. $]; 474 } 475 } else { 476 str ~= "0."; 477 for (int i = exp10; i < 0; ++i) { 478 str ~= "0"; 479 } 480 str ~= digits; 481 } 482 } 483 return str; 484 } 485 486 /** 487 * Return a human-readable string such that if two ExactFloats have different 488 * values, then their string representations are always different. This 489 * method is useful for debugging. The string has the form "value<prec>", 490 * where "prec" is the actual precision of the ExactFloat (e.g., "0.215<50>"). 491 */ 492 string toUniqueString() const { 493 string precStr = format.format("<%d>", prec()); 494 return toString() ~ precStr; 495 } 496 497 size_t toHash() const nothrow @safe { 498 return HashMix(_bn.toHash()) 499 .mix(cast(size_t) _bnExp) 500 .mix(cast(size_t) _sign) 501 .get(); 502 } 503 504 /** 505 * Return an upper bound on the number of significant digits required to 506 * distinguish any two floating-point numbers with the given precision when 507 * they are formatted as decimal strings in exponential format. 508 */ 509 static int numSignificantDigitsForPrec(int prec) nothrow { 510 // The simplest bound is 511 // 512 // d <= 1 + ceil(prec * log10(2)) 513 // 514 // The following bound is tighter by 0.5 digits on average, but requires 515 // the exponent to be known as well: 516 // 517 // d <= ceil(exp * log10(2)) - floor((exp - prec) * log10(2)) 518 // 519 // Since either of these bounds can be too large by 0, 1, or 2 digits, we 520 // stick with the simpler first bound. 521 return cast(int) (1 + math.ceil(prec * (math.LN2 / math.LN10))); 522 } 523 524 ///////////////////////////////////////////////////////////////////////////// 525 // Other Methods 526 527 /** 528 * Round the ExactFloat so that its mantissa has at most "max_prec" bits 529 * using the given rounding mode. Requires "max_prec" to be at least 2 530 * (since kRoundTiesToEven doesn't make sense with fewer bits than this). 531 */ 532 ExactFloat roundToMaxPrec(int max_prec, RoundingMode mode) const nothrow pure 533 in { 534 // The "kRoundTiesToEven" mode requires at least 2 bits of precision 535 // (otherwise both adjacent representable values may be odd). 536 assert(max_prec >= 2); 537 assert(max_prec <= MAX_PREC); 538 } do { 539 // The following test also catches zero, infinity, and NaN. 540 int shift = prec() - max_prec; 541 if (shift <= 0) { 542 return this; 543 } 544 545 // Round by removing the appropriate number of bits from the mantissa. Note 546 // that if the value is rounded up to a power of 2, the high-order bit 547 // position may increase, but in that case Canonicalize() will remove at 548 // least one zero bit and so the output will still have prec() <= max_prec. 549 return roundToPowerOf2(_bnExp + shift, mode); 550 } 551 552 ///////////////////////////////////////////////////////////////////////////// 553 // Operators 554 555 /// Unary plus. 556 ExactFloat opUnary(string op)() const nothrow @nogc pure 557 if (op == "+") { 558 return *this; 559 } 560 561 /// Unary minus. 562 ExactFloat opUnary(string op)() const nothrow @nogc pure 563 if (op == "-") { 564 return copyWithSign(-_sign); 565 } 566 567 /// Addition. 568 ExactFloat opBinary(string op)(in ExactFloat b) const nothrow 569 if (op == "+") { 570 return signedSum(_sign, this, b._sign, b); 571 } 572 573 /// Subtraction. 574 ExactFloat opBinary(string op)(in ExactFloat b) const nothrow 575 if (op == "-") { 576 return signedSum(_sign, this, -b._sign, b); 577 } 578 579 /// Multiplication. 580 ExactFloat opBinary(string op)(in ExactFloat b) const nothrow 581 if (op == "*") { 582 int result_sign = _sign * b._sign; 583 if (!isNormal() || !b.isNormal()) { 584 // Handle zero, infinity, and NaN according to IEEE 754-2008. 585 if (isNan()) return this; 586 if (b.isNan()) return b; 587 if (isInf()) { 588 // Infinity times zero yields NaN. 589 if (b.isZero()) return ExactFloat.nan(); 590 return ExactFloat.infinity(result_sign); 591 } 592 if (b.isInf()) { 593 if (isZero()) return ExactFloat.nan(); 594 return ExactFloat.infinity(result_sign); 595 } 596 assert(isZero() || b.isZero()); 597 return ExactFloat.signedZero(result_sign); 598 } 599 ExactFloat r; 600 r._sign = result_sign; 601 r._bnExp = _bnExp + b._bnExp; 602 r._bn = _bn * b._bn; 603 r.canonicalize(); 604 return r; 605 } 606 607 /// Support operations with any convertable types. 608 ExactFloat opBinary(string op, T)(in T b) const nothrow { 609 return opBinary!op(ExactFloat(b)); 610 } 611 612 /// Support operations with any convertable types. 613 ExactFloat opBinaryRight(string op, T)(in T a) const nothrow { 614 ExactFloat axf = ExactFloat(a); 615 return axf.opBinary!op(this); 616 } 617 618 // Division is not implemented because the result cannot be represented 619 // exactly in general. Doing this properly would require extending all the 620 // operations to support rounding to a specified precision. 621 622 // Arithmetic assignment operators (+=, -=, *=). 623 ref ExactFloat opOpAssign(string op)(in ExactFloat b) nothrow @nogc pure 624 if (op == "+" || op == "-" || op == "*") { 625 return mixin("this = this " ~ op ~ " b"); 626 } 627 628 /// Comparison operators (==, !=). 629 bool opEquals(in ExactFloat b) const nothrow @nogc pure { 630 // NaN is not equal to anything, not even itself. 631 if (isNan() || b.isNan()) return false; 632 633 // Since Canonicalize() strips low-order zero bits, all other cases 634 // (including non-normal values) require bn_exp_ to be equal. 635 if (_bnExp != b._bnExp) return false; 636 637 // Positive and negative zero are equal. 638 if (isZero() && b.isZero()) return true; 639 640 // Otherwise, the signs and mantissas must match. Note that non-normal 641 // values such as infinity have a mantissa of zero. 642 return _sign == b._sign && _bn == b._bn; 643 } 644 645 /// Support operations with any convertable types. 646 bool opEquals(T)(in T b) const nothrow @nogc pure { 647 return opEquals(ExactFloat(b)); 648 } 649 650 int scaleAndCompare(in ExactFloat b) const nothrow @nogc pure 651 in { 652 assert(isNormal() && b.isNormal() && _bnExp >= b._bnExp); 653 } do { 654 ExactFloat tmp = this; 655 tmp._bnExp <<= _bnExp - b._bnExp; 656 if (tmp._bn > b._bn) return 1; 657 else if (tmp._bn < b._bn) return -1; 658 else return 0; 659 } 660 661 bool unsignedLess(in ExactFloat b) const nothrow @nogc pure { 662 // Handle the zero/infinity cases (NaN has already been done). 663 if (isInf() || b.isZero()) return false; 664 if (isZero() || b.isInf()) return true; 665 // If the high-order bit positions differ, we are done. 666 int cmp = exp() - b.exp(); 667 if (cmp != 0) return cmp < 0; 668 // Otherwise shift one of the two values so that they both have the same 669 // bn_exp_ and then compare the mantissas. 670 return (_bnExp >= b._bnExp ? 671 scaleAndCompare(b) < 0 : b.scaleAndCompare(this) > 0); 672 } 673 674 675 /// Comparison operators (<, <=, >, >=). 676 int opCmp(in ExactFloat b) const nothrow @nogc pure { 677 // NaN is unordered compared to everything, including itself. 678 if (isNan() || b.isNan()) return -1; 679 // Positive and negative zero are equal. 680 if (isZero() && b.isZero()) return 0; 681 // Otherwise, anything negative is less than anything positive. 682 if (_sign != b._sign) return _sign > b._sign ? 1 : -1; 683 // Now we just compare absolute values. 684 bool isLess = (_sign > 0) ? unsignedLess(b) : b.unsignedLess(this); 685 if (isLess) return -1; 686 else if (this == b) return 0; 687 return 1; 688 } 689 690 /// Support operations with any convertable types. 691 int opCmp(T)(in T b) const nothrow pure { 692 return opCmp(ExactFloat(b)); 693 } 694 695 //////// Miscellaneous simple arithmetic functions. 696 697 /// Absolute value. 698 ExactFloat fabs() const nothrow @nogc pure { 699 return abs(); 700 } 701 702 ExactFloat abs() const nothrow @nogc pure { 703 return copyWithSign(+1); 704 } 705 706 private: 707 // Numbers are always formatted with at least this many significant digits. 708 // This prevents small integers from being formatted in exponential notation 709 // (e.g. 1024 formatted as 1e+03), and also avoids the confusion of having 710 // supposedly "high precision" numbers formatted with just 1 or 2 digits 711 // (e.g. 1/512 == 0.001953125 formatted as 0.002). 712 static const int MIN_SIGNIFICANT_DIGITS = 10; 713 714 // Non-normal numbers are represented using special exponent values and a 715 // mantissa of zero. Do not change these values; methods such as 716 // is_normal() make assumptions about their ordering. Non-normal numbers 717 // can have either a positive or negative sign (including zero and NaN). 718 static immutable int EXP_NAN = int.max; 719 static immutable int EXP_INFINITY = int.max - 1; 720 static immutable int EXP_ZERO = int.max - 2; 721 722 // Normal numbers are represented as (sign_ * bn_ * (2 ** bn_exp_)), where: 723 // - _sign is either +1 or -1 724 // - _bn is a BIGNUM with a positive value 725 // - _bnExp is the base-2 exponent applied to _bn. 726 int _sign = 1; 727 int _bnExp = EXP_ZERO; 728 BigInt _bn = BigInt(); 729 730 /// A standard IEEE "double" has a 53-bit mantissa consisting of a 52-bit 731 /// fraction plus an implicit leading "1" bit. 732 static immutable int DOUBLE_MANTISSA_BITS = 53; 733 734 /** 735 * Convert an ExactFloat with no more than 53 bits in its mantissa to a 736 * "double". This method handles non-normal values (NaN, etc). 737 */ 738 double toDoubleHelper() const nothrow @nogc pure 739 in { 740 assert(prec() <= DOUBLE_MANTISSA_BITS); 741 } do { 742 if (!isNormal()) { 743 if (isZero()) { 744 return math.copysign(0, cast(double) _sign); 745 } 746 if (isInf()) { 747 return math.copysign(double.infinity, cast(double) _sign); 748 } 749 return math.copysign(double.nan, cast(double) _sign); 750 } 751 long d_mantissa = _bn.toLong(); 752 // We rely on ldexp() to handle overflow and underflow. (It will return a 753 // signed zero or infinity if the result is too small or too large.) 754 return _sign * math.ldexp(cast(double) d_mantissa, _bnExp); 755 } 756 757 /** 758 * Round an ExactFloat so that it is a multiple of (2 ** bit_exp), using the 759 * given rounding mode. 760 */ 761 ExactFloat roundToPowerOf2(int bit_exp, RoundingMode mode) const nothrow pure 762 in { 763 assert(bit_exp >= MIN_EXP - MAX_PREC); 764 assert(bit_exp <= MAX_EXP); 765 } do { 766 // If the exponent is already large enough, or the value is zero, infinity, 767 // or NaN, then there is nothing to do. 768 int shift = bit_exp - _bnExp; 769 if (shift <= 0) return this; 770 assert(isNormal()); 771 772 // Convert rounding up/down to toward/away from zero, so that we don't need 773 // to consider the sign of the number from this point onward. 774 if (mode == RoundingMode.ROUND_TOWARD_POSITIVE) { 775 mode = (_sign > 0) ? RoundingMode.ROUND_AWAY_FROM_ZERO : RoundingMode.ROUND_TOWARD_ZERO; 776 } else if (mode == RoundingMode.ROUND_TOWARD_NEGATIVE) { 777 mode = (_sign > 0) ? RoundingMode.ROUND_TOWARD_ZERO : RoundingMode.ROUND_AWAY_FROM_ZERO; 778 } 779 780 // Rounding consists of right-shifting the mantissa by "shift", and then 781 // possibly incrementing the result (depending on the rounding mode, the 782 // bits that were discarded, and sometimes the lowest kept bit). The 783 // following code figures out whether we need to increment. 784 ExactFloat r; 785 bool increment = false; 786 if (mode == RoundingMode.ROUND_TOWARD_ZERO) { 787 // Never increment. 788 } else if (mode == RoundingMode.ROUND_TIES_AWAY_FROM_ZERO) { 789 // Increment if the highest discarded bit is 1. 790 if (isBitSet(_bn, shift - 1)) 791 increment = true; 792 } else if (mode == RoundingMode.ROUND_AWAY_FROM_ZERO) { 793 // Increment unless all discarded bits are zero. 794 if (extCountLowZeroBits(_bn) < shift) 795 increment = true; 796 } else { 797 assert(mode == RoundingMode.ROUND_TIES_TO_EVEN); 798 // Let "w/xyz" denote a mantissa where "w" is the lowest kept bit and 799 // "xyz" are the discarded bits. Then using regexp notation: 800 // ./0.* -> Don't increment (fraction < 1/2) 801 // 0/10* -> Don't increment (fraction = 1/2, kept part even) 802 // 1/10* -> Increment (fraction = 1/2, kept part odd) 803 // ./1.*1.* -> Increment (fraction > 1/2) 804 if (isBitSet(_bn, shift - 1) 805 && ((isBitSet(_bn, shift) 806 || extCountLowZeroBits(_bn) < shift - 1))) { 807 increment = true; 808 } 809 } 810 r._bnExp = _bnExp + shift; 811 r._bn = _bn >> shift; 812 if (increment) { 813 r._bn += 1; 814 } 815 r._sign = _sign; 816 r.canonicalize(); 817 return r; 818 } 819 820 private static bool isBitSet(in BigInt bn, int bitNum) nothrow @nogc pure { 821 size_t digitNum = bitNum / (8 * ulong.sizeof); 822 size_t shift = bitNum % (8 * ulong.sizeof); 823 ulong digit = bn.getDigit!ulong(digitNum); 824 return (digit & (1uL << shift)) != 0; 825 } 826 827 /** 828 * Count the number of low-order zero bits in the given BIGNUM (ignoring its 829 * sign). Returns 0 if the argument is zero. 830 */ 831 private static int extCountLowZeroBits(in BigInt bn) nothrow @nogc pure { 832 int count = 0; 833 for (int i = 0; i < bn.ulongLength(); ++i) { 834 ulong w = bn.getDigit!ulong(i); 835 if (w == 0) { 836 count += 8 * ulong.sizeof; 837 } else { 838 for (; (w & 1) == 0; w >>= 1) { 839 ++count; 840 } 841 break; 842 } 843 } 844 return count; 845 } 846 847 /** 848 * Convert the ExactFloat to a decimal value of the form 0.ddd * (10 ** x), 849 * with at most "max_digits" significant digits (trailing zeros are removed). 850 * Set (*digits) to the ASCII digits and return the decimal exponent "x". 851 */ 852 int getDecimalDigits(int max_digits, out char[] digits) const 853 in { 854 assert(isNormal()); 855 } do { 856 // Convert the value to the form (bn * (10 ** bn_exp10)) where "bn" is a 857 // positive integer (BIGNUM). 858 BigInt bn = BigInt(); 859 int bn_exp10; 860 if (_bnExp >= 0) { 861 // The easy case: bn = bn_ * (2 ** bn_exp_)), bn_exp10 = 0. 862 bn = _bn << _bnExp; 863 bn_exp10 = 0; 864 } else { 865 // Set bn = bn_ * (5 ** -bn_exp_) and bn_exp10 = bn_exp_. This is 866 // equivalent to the original value of (bn_ * (2 ** bn_exp_)). 867 bn = 5; 868 bn ^^= -_bnExp; 869 bn *= _bn; 870 bn_exp10 = _bnExp; 871 } 872 // Now convert "bn" to a decimal string. 873 string all_digits = format.format("%d", bn); 874 assert(all_digits != null); 875 // Check whether we have too many digits and round if necessary. 876 size_t num_digits = all_digits.length; 877 if (num_digits <= max_digits) { 878 digits = all_digits.dup; 879 } else { 880 digits = all_digits[0 .. max_digits].dup; 881 // Standard "printf" formatting rounds ties to an even number. This means 882 // that we round up (away from zero) if highest discarded digit is '5' or 883 // more, unless all other discarded digits are zero in which case we round 884 // up only if the lowest kept digit is odd. 885 if (all_digits[max_digits] >= '5' && 886 ((all_digits[max_digits-1] & 1) == 1 || 887 !algorithm.findAmong(all_digits[max_digits + 1 .. $], "123456789").empty)) { 888 // This can increase the number of digits by 1, but in that case at 889 // least one trailing zero will be stripped off below. 890 incrementDecimalDigits(digits); 891 } 892 // Adjust the base-10 exponent to reflect the digits we have removed. 893 bn_exp10 += num_digits - max_digits; 894 } 895 896 // Now strip any trailing zeros. 897 assert(digits[0] != '0'); 898 size_t pos = digits.length - 1; 899 while (digits[pos] == '0') { 900 --pos; 901 } 902 if (pos < digits.length - 1) { 903 bn_exp10 += digits.length - pos; 904 digits.length = pos + 1; 905 } 906 assert(digits.length <= max_digits); 907 908 // Finally, we adjust the base-10 exponent so that the mantissa is a 909 // fraction in the range [0.1, 1) rather than an integer. 910 return bn_exp10 + cast(int) digits.length; 911 } 912 913 /// Increment an unsigned integer represented as a string of ASCII digits. 914 private static void incrementDecimalDigits(char[] digits) nothrow { 915 size_t pos = digits.length; 916 while (--pos >= 0) { 917 if (digits[pos] < '9') { 918 ++digits[pos]; 919 return; 920 } 921 digits[pos] = '0'; 922 } 923 digits = "1" ~ digits; 924 } 925 926 /// Return a_sign * fabs(a) + b_sign * fabs(b). Used to implement addition 927 /// and subtraction. 928 static ExactFloat signedSum(int a_sign, ExactFloat a, int b_sign, ExactFloat b) nothrow { 929 if (!a.isNormal() || !b.isNormal()) { 930 // Handle zero, infinity, and NaN according to IEEE 754-2008. 931 if (a.isNan()) return a; 932 if (b.isNan()) return b; 933 if (a.isInf()) { 934 // Adding two infinities with opposite sign yields NaN. 935 if (b.isInf() && a_sign != b_sign) return ExactFloat.nan(); 936 return infinity(a_sign); 937 } 938 if (b.isInf()) return infinity(b_sign); 939 if (a.isZero()) { 940 if (!b.isZero()) return b.copyWithSign(b_sign); 941 // Adding two zeros with the same sign preserves the sign. 942 if (a_sign == b_sign) return signedZero(a_sign); 943 // Adding two zeros of opposite sign produces +0. 944 return signedZero(+1); 945 } 946 assert(b.isZero()); 947 return a.copyWithSign(a_sign); 948 } 949 950 // Swap the numbers if necessary so that "a" has the larger bn_exp_. 951 if (a._bnExp < b._bnExp) { 952 algorithm.swap(a_sign, b_sign); 953 algorithm.swap(a, b); 954 } 955 // Shift "a" if necessary so that both values have the same bn_exp_. 956 ExactFloat r; 957 if (a._bnExp > b._bnExp) { 958 r._bn = a._bn << (a._bnExp - b._bnExp); 959 a = r; // The only field of "a" used below is bn_. 960 } 961 r._bnExp = b._bnExp; 962 if (a_sign == b_sign) { 963 r._bn = a._bn + b._bn; 964 r._sign = a_sign; 965 } else { 966 // Note that the BIGNUM documentation is out of date -- all methods now 967 // allow the result to be the same as any input argument, so it is okay if 968 // (a == &r) due to the shift above. 969 r._bn = a._bn - b._bn; 970 if (r._bn == 0) { 971 r._sign = +1; 972 } else if (r._bn < 0) { 973 // The magnitude of "b" was larger. 974 r._sign = b_sign; 975 r._bn = -r._bn; 976 } else { 977 // They were equal, or the magnitude of "a" was larger. 978 r._sign = a_sign; 979 } 980 } 981 r.canonicalize(); 982 return r; 983 } 984 985 /** 986 * Convert an ExactFloat to its canonical form. Underflow results in signed 987 * zero, overflow results in signed infinity, and precision overflow results 988 * in NaN. A zero mantissa is converted to the canonical zero value with 989 * the given sign; otherwise the mantissa is normalized so that its low bit 990 * is 1. Non-normal numbers are left unchanged. 991 */ 992 void canonicalize() nothrow pure { 993 if (!isNormal()) return; 994 995 // Underflow/overflow occurs if exp() is not in [kMinExp, kMaxExp]. 996 // We also convert a zero mantissa to signed zero. 997 int my_exp = exp(); 998 if (my_exp < MIN_EXP || _bn == 0) { 999 setZero(_sign); 1000 } else if (my_exp > MAX_EXP) { 1001 setInf(_sign); 1002 } else if (_bn % 2 == 1) { 1003 // Remove any low-order zero bits from the mantissa. 1004 assert(_bn != 0); 1005 int shift = countLowZeroBits(_bn); 1006 if (shift > 0) { 1007 _bn = _bn >> shift; 1008 _bnExp += shift; 1009 } 1010 } 1011 // If the mantissa has too many bits, we replace it by NaN to indicate 1012 // that an inexact calculation has occurred. 1013 if (prec() > MAX_PREC) { 1014 setNan(); 1015 } 1016 } 1017 1018 /** 1019 * Count the number of low-order zero bits in the given BIGNUM (ignoring its 1020 * sign). Returns 0 if the argument is zero. 1021 */ 1022 private static int countLowZeroBits(in BigInt bn) nothrow pure { 1023 int count = 0; 1024 for (int i = 0; i < bn.ulongLength(); ++i) { 1025 ulong w = bn.getDigit!ulong(i); 1026 if (w == 0) { 1027 count += 8 * ulong.sizeof; 1028 } else { 1029 for (; (w & 1) == 0; w >>= 1) { 1030 ++count; 1031 } 1032 break; 1033 } 1034 } 1035 return count; 1036 } 1037 1038 /** 1039 * Return an ExactFloat with the magnitude of this ExactFloat and the given 1040 * sign. (Similar to copysign, except that the sign is given explicitly 1041 * rather than being copied from another ExactFloat.) 1042 */ 1043 ExactFloat copyWithSign(int sign) const nothrow @nogc pure { 1044 ExactFloat r = this; 1045 r._sign = sign; 1046 return r; 1047 } 1048 1049 /** 1050 * Convert an ExactFloat to an integer of type "T" using the given rounding 1051 * mode. The type "T" must be signed. Returns the largest possible integer 1052 * for NaN, and clamps out of range values to the largest or smallest 1053 * possible values. 1054 */ 1055 T toInteger(T)(RoundingMode mode) const nothrow 1056 if (traits.isIntegral!T && traits.isSigned!T) { 1057 ExactFloat r = roundToPowerOf2(0, mode); 1058 if (r.isNan()) return T.min; 1059 if (r.isZero()) return 0; 1060 if (!r.isInf()) { 1061 // If the unsigned value has more than 63 bits it is always clamped. 1062 if (r.exp() < 64) { 1063 long value = r._bn.toLong() << r._bnExp; 1064 if (r._sign < 0) value = -value; 1065 return algorithm.max(T.min, algorithm.min(T.max, value)); 1066 } 1067 } 1068 return (r._sign < 0) ? T.min : T.max; 1069 } 1070 1071 // Log a fatal error message (used for unimplemented methods). 1072 static ExactFloat unimplemented() nothrow { 1073 assert(false, "Unimplemented ExactFloat method called."); 1074 } 1075 } 1076 1077 1078 ////////////////////////////////////////////////////////////////////// 1079 // Math Intrinsics 1080 // 1081 // The math intrinsics currently supported by ExactFloat are listed below. 1082 // Except as noted, they behave identically to the usual glibc intrinsics 1083 // except that they have greater precision. See the man pages for more 1084 // information. 1085 // 1086 // They can be used in a consistent manner with functions used for double 1087 // and real with Uniform Function Call Syntax. 1088 // 1089 // For example: 1090 // double dval = 5.4; 1091 // ExactFloat xfval = 5.4; // Consistent, no problem. 1092 // 1093 // import math = std.math; 1094 // math.fabs(dval); 1095 // math.fabs(xfval); // Error: Does not exist! 1096 // 1097 // import std.math; 1098 // import s2.util.math.exactfloat; 1099 // fabs(dval); // OK, but symbol fabs is in the main namespace. 1100 // dval.fabs(); // OK, uses UFCS. 1101 // fabs(xfval); // OK as well, but function comes from exactfloat. 1102 // xfval.fabs(); // OK, uses UFCS. 1103 1104 1105 //////// Miscellaneous simple arithmetic functions. 1106 1107 /// Absolute value. 1108 ExactFloat fabs(in ExactFloat a) nothrow @nogc pure { 1109 return abs(a); 1110 } 1111 1112 ExactFloat abs(in ExactFloat a) nothrow @nogc pure { 1113 return a.copyWithSign(+1); 1114 } 1115 1116 /// Maximum of two values. 1117 ExactFloat fmax(in ExactFloat a, in ExactFloat b) nothrow { 1118 // If one argument is NaN, return the other argument. 1119 if (a.isNan()) return b; 1120 if (b.isNan()) return a; 1121 // Not required by IEEE 754, but we prefer +0 over -0. 1122 if (a.sign != b.sign) { 1123 return (a.sign() < b.sign()) ? b : a; 1124 } 1125 return (a < b) ? b : a; 1126 } 1127 1128 /// Minimum of two values. 1129 ExactFloat fmin(in ExactFloat a, in ExactFloat b) nothrow { 1130 // If one argument is NaN, return the other argument. 1131 if (a.isNan()) return b; 1132 if (b.isNan()) return a; 1133 // Not required by IEEE 754, but we prefer -0 over +0. 1134 if (a.sign != b.sign) { 1135 return (a.sign < b.sign) ? a : b; 1136 } 1137 return (a < b) ? a : b; 1138 } 1139 1140 /// Positive difference: max(a - b, 0). 1141 ExactFloat fdim(in ExactFloat a, in ExactFloat b) nothrow { 1142 // This formulation has the correct behavior for NaNs. 1143 return (a <= b) ? ExactFloat(0) : (a - b); 1144 } 1145 1146 //////// Integer rounding functions that return ExactFloat values. 1147 1148 /// Round up to the nearest integer. 1149 ExactFloat ceil(in ExactFloat a) nothrow { 1150 return a.roundToPowerOf2(0, ExactFloat.RoundingMode.ROUND_TOWARD_POSITIVE); 1151 } 1152 1153 /// Round down to the nearest integer. 1154 ExactFloat floor(in ExactFloat a) nothrow { 1155 return a.roundToPowerOf2(0, ExactFloat.RoundingMode.ROUND_TOWARD_NEGATIVE); 1156 } 1157 1158 /// Round to the nearest integer not larger in absolute value. 1159 /// For example: f(-1.9) = -1, f(2.9) = 2. 1160 ExactFloat trunc(in ExactFloat a) nothrow { 1161 return a.roundToPowerOf2(0, ExactFloat.RoundingMode.ROUND_TOWARD_ZERO); 1162 } 1163 1164 /// Round to the nearest integer, rounding halfway cases away from zero. 1165 /// For example: f(-0.5) = -1, f(0.5) = 1, f(1.5) = 2, f(2.5) = 3. 1166 ExactFloat round(in ExactFloat a) nothrow { 1167 return a.roundToPowerOf2(0, ExactFloat.RoundingMode.ROUND_TIES_AWAY_FROM_ZERO); 1168 } 1169 1170 /// Round to the nearest integer, rounding halfway cases to an even integer. 1171 /// For example: f(-0.5) = 0, f(0.5) = 0, f(1.5) = 2, f(2.5) = 2. 1172 ExactFloat rint(in ExactFloat a) nothrow { 1173 return a.roundToPowerOf2(0, ExactFloat.RoundingMode.ROUND_TIES_TO_EVEN); 1174 } 1175 1176 /// A synonym for rint(). 1177 ExactFloat nearbyint(in ExactFloat a) nothrow { return rint(a); } 1178 1179 //////// Integer rounding functions that return C++ integer types. 1180 1181 /// Like rint(), but rounds to the nearest "long" value. Returns the 1182 /// minimum/maximum possible integer if the value is out of range. 1183 long lrint(in ExactFloat a) nothrow { 1184 return a.toInteger!long(ExactFloat.RoundingMode.ROUND_TIES_TO_EVEN); 1185 } 1186 1187 /// Like round(), but rounds to the nearest "long" value. Returns the 1188 /// minimum/maximum possible integer if the value is out of range. 1189 long lround(in ExactFloat a) nothrow { 1190 return a.toInteger!long(ExactFloat.RoundingMode.ROUND_TIES_AWAY_FROM_ZERO); 1191 } 1192 1193 //////// Remainder functions. 1194 1195 /// The remainder of dividing "a" by "b", where the quotient is rounded toward 1196 /// zero to the nearest integer. Similar to (a - trunc(a / b) * b). 1197 ExactFloat fmod(in ExactFloat a, in ExactFloat b) nothrow { 1198 // Note that it is possible to implement this operation exactly, it just 1199 // hasn't been done. 1200 return ExactFloat.unimplemented(); 1201 } 1202 1203 /// The remainder of dividing "a" by "b", where the quotient is rounded to the 1204 /// nearest integer, rounding halfway cases to an even integer. Similar to 1205 /// (a - rint(a / b) * b). 1206 ExactFloat remainder(in ExactFloat a, in ExactFloat b) nothrow { 1207 // Note that it is possible to implement this operation exactly, it just 1208 // hasn't been done. 1209 return ExactFloat.unimplemented(); 1210 } 1211 1212 /// A synonym for remainder(). 1213 ExactFloat drem(in ExactFloat a, in ExactFloat b) nothrow { 1214 return remainder(a, b); 1215 } 1216 1217 /** 1218 * Break the argument "a" into integer and fractional parts, each of which 1219 * has the same sign as "a". The fractional part is returned, and the 1220 * integer part is stored in the output parameter "i_ptr". Both output 1221 * values are set to have the same maximum precision as "a". 1222 */ 1223 ExactFloat modf(in ExactFloat a, out ExactFloat i_ptr) nothrow { 1224 // Note that it is possible to implement this operation exactly, it just 1225 // hasn't been done. 1226 return ExactFloat.unimplemented(); 1227 } 1228 1229 //////// Floating-point manipulation functions. 1230 1231 /** 1232 * Return an ExactFloat with the magnitude of "a" and the sign bit of "b". 1233 * (Note that an IEEE zero can be either positive or negative.) 1234 */ 1235 ExactFloat copysign(in ExactFloat a, in ExactFloat b) nothrow @nogc pure { 1236 return a.copyWithSign(b.sign); 1237 } 1238 1239 /** 1240 * Convert "a" to a normalized fraction in the range \[0.5, 1\) times a power 1241 * of two. Return the fraction and set "exp" to the exponent. If "a" is 1242 * zero, infinity, or NaN then return "a" and set "exp" to zero. 1243 */ 1244 ExactFloat frexp(in ExactFloat a, out int exp) nothrow { 1245 if (!a.isNormal()) { 1246 // If a == 0, exp should be zero. If a.is_inf() or a.is_nan(), exp is not 1247 // defined but the glibc implementation returns zero. 1248 exp = 0; 1249 return a; 1250 } 1251 exp = a.exp(); 1252 return ldexp(a, -a.exp()); 1253 } 1254 1255 /// Return "a" multiplied by 2 raised to the power "exp". 1256 ExactFloat ldexp(in ExactFloat a, int exp) nothrow { 1257 if (!a.isNormal()) return a; 1258 1259 // To prevent integer overflow, we first clamp "exp" so that 1260 // (kMinExp - 1) <= (a_exp + exp) <= (kMaxExp + 1). 1261 int a_exp = a.exp(); 1262 exp = algorithm.min(ExactFloat.MAX_EXP + 1 - a_exp, 1263 algorithm.max(ExactFloat.MIN_EXP - 1 + a_exp, exp)); 1264 1265 // Now modify the exponent and check for overflow/underflow. 1266 ExactFloat r = a; 1267 r._bnExp += exp; 1268 r.canonicalize(); 1269 return r; 1270 } 1271 1272 /// A synonym for ldexp(). 1273 ExactFloat scalbn(in ExactFloat a, int exp) nothrow { 1274 return ldexp(a, exp); 1275 } 1276 1277 /// A version of ldexp() where "exp" is a long integer. 1278 ExactFloat scalbln(in ExactFloat a, long exp) nothrow { 1279 // Clamp the exponent to the range of "int" in order to avoid truncation. 1280 exp = algorithm.max(cast(long) int.min, algorithm.min(cast(long) int.max, exp)); 1281 return ldexp(a, cast(int) exp); 1282 } 1283 1284 /** 1285 * Convert "a" to a normalized fraction in the range \[1,2\) times a power of 1286 * two, and return the exponent value as an integer. This is equivalent to 1287 * lrint(floor(log2(fabs(a)))) but it is computed more efficiently. Returns 1288 * the constants documented in the man page for zero, infinity, or NaN. 1289 */ 1290 int ilogb(in ExactFloat a) nothrow { 1291 if (a.isZero()) return math.FP_ILOGB0; 1292 if (a.isInf()) return int.max; 1293 if (a.isNan()) return math.FP_ILOGBNAN; 1294 // a.exp() assumes the significand is in the range [0.5, 1). 1295 return a.exp() - 1; 1296 } 1297 1298 /** 1299 * Convert "a" to a normalized fraction in the range \[1,2\) times a power of 1300 * two, and return the exponent value as an ExactFloat. This is equivalent to 1301 * floor(log2(fabs(a))) but it is computed more efficiently. 1302 */ 1303 ExactFloat logb(in ExactFloat a) nothrow { 1304 if (a.isZero()) return ExactFloat.infinity(-1); 1305 if (a.isInf()) return ExactFloat.infinity(+1); // Even if a < 0. 1306 if (a.isNan()) return a; 1307 // exp() assumes the significand is in the range [0.5,1). 1308 return ExactFloat(a.exp() - 1); 1309 } 1310 1311