12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547 |
- /*
- * QEMU float support
- *
- * The code in this source file is derived from release 2a of the SoftFloat
- * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
- * some later contributions) are provided under that license, as detailed below.
- * It has subsequently been modified by contributors to the QEMU Project,
- * so some portions are provided under:
- * the SoftFloat-2a license
- * the BSD license
- * GPL-v2-or-later
- *
- * Any future contributions to this file after December 1st 2014 will be
- * taken to be licensed under the Softfloat-2a license unless specifically
- * indicated otherwise.
- */
- static void partsN(return_nan)(FloatPartsN *a, float_status *s)
- {
- switch (a->cls) {
- case float_class_snan:
- float_raise(float_flag_invalid | float_flag_invalid_snan, s);
- if (s->default_nan_mode) {
- parts_default_nan(a, s);
- } else {
- parts_silence_nan(a, s);
- }
- break;
- case float_class_qnan:
- if (s->default_nan_mode) {
- parts_default_nan(a, s);
- }
- break;
- default:
- g_assert_not_reached();
- }
- }
- static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
- float_status *s)
- {
- if (is_snan(a->cls) || is_snan(b->cls)) {
- float_raise(float_flag_invalid | float_flag_invalid_snan, s);
- }
- if (s->default_nan_mode) {
- parts_default_nan(a, s);
- } else {
- int cmp = frac_cmp(a, b);
- if (cmp == 0) {
- cmp = a->sign < b->sign;
- }
- if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
- a = b;
- }
- if (is_snan(a->cls)) {
- parts_silence_nan(a, s);
- }
- }
- return a;
- }
- static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
- FloatPartsN *c, float_status *s,
- int ab_mask, int abc_mask)
- {
- int which;
- if (unlikely(abc_mask & float_cmask_snan)) {
- float_raise(float_flag_invalid | float_flag_invalid_snan, s);
- }
- which = pickNaNMulAdd(a->cls, b->cls, c->cls,
- ab_mask == float_cmask_infzero, s);
- if (s->default_nan_mode || which == 3) {
- /*
- * Note that this check is after pickNaNMulAdd so that function
- * has an opportunity to set the Invalid flag for infzero.
- */
- parts_default_nan(a, s);
- return a;
- }
- switch (which) {
- case 0:
- break;
- case 1:
- a = b;
- break;
- case 2:
- a = c;
- break;
- default:
- g_assert_not_reached();
- }
- if (is_snan(a->cls)) {
- parts_silence_nan(a, s);
- }
- return a;
- }
- /*
- * Canonicalize the FloatParts structure. Determine the class,
- * unbias the exponent, and normalize the fraction.
- */
- static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
- const FloatFmt *fmt)
- {
- if (unlikely(p->exp == 0)) {
- if (likely(frac_eqz(p))) {
- p->cls = float_class_zero;
- } else if (status->flush_inputs_to_zero) {
- float_raise(float_flag_input_denormal, status);
- p->cls = float_class_zero;
- frac_clear(p);
- } else {
- int shift = frac_normalize(p);
- p->cls = float_class_normal;
- p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
- }
- } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
- p->cls = float_class_normal;
- p->exp -= fmt->exp_bias;
- frac_shl(p, fmt->frac_shift);
- p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
- } else if (likely(frac_eqz(p))) {
- p->cls = float_class_inf;
- } else {
- frac_shl(p, fmt->frac_shift);
- p->cls = (parts_is_snan_frac(p->frac_hi, status)
- ? float_class_snan : float_class_qnan);
- }
- }
- /*
- * Round and uncanonicalize a floating-point number by parts. There
- * are FRAC_SHIFT bits that may require rounding at the bottom of the
- * fraction; these bits will be removed. The exponent will be biased
- * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
- */
- static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
- const FloatFmt *fmt)
- {
- const int exp_max = fmt->exp_max;
- const int frac_shift = fmt->frac_shift;
- const uint64_t round_mask = fmt->round_mask;
- const uint64_t frac_lsb = round_mask + 1;
- const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
- const uint64_t roundeven_mask = round_mask | frac_lsb;
- uint64_t inc;
- bool overflow_norm = false;
- int exp, flags = 0;
- switch (s->float_rounding_mode) {
- case float_round_nearest_even:
- if (N > 64 && frac_lsb == 0) {
- inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
- ? frac_lsbm1 : 0);
- } else {
- inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
- ? frac_lsbm1 : 0);
- }
- break;
- case float_round_ties_away:
- inc = frac_lsbm1;
- break;
- case float_round_to_zero:
- overflow_norm = true;
- inc = 0;
- break;
- case float_round_up:
- inc = p->sign ? 0 : round_mask;
- overflow_norm = p->sign;
- break;
- case float_round_down:
- inc = p->sign ? round_mask : 0;
- overflow_norm = !p->sign;
- break;
- case float_round_to_odd:
- overflow_norm = true;
- /* fall through */
- case float_round_to_odd_inf:
- if (N > 64 && frac_lsb == 0) {
- inc = p->frac_hi & 1 ? 0 : round_mask;
- } else {
- inc = p->frac_lo & frac_lsb ? 0 : round_mask;
- }
- break;
- default:
- g_assert_not_reached();
- }
- exp = p->exp + fmt->exp_bias;
- if (likely(exp > 0)) {
- if (p->frac_lo & round_mask) {
- flags |= float_flag_inexact;
- if (frac_addi(p, p, inc)) {
- frac_shr(p, 1);
- p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
- exp++;
- }
- p->frac_lo &= ~round_mask;
- }
- if (fmt->arm_althp) {
- /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
- if (unlikely(exp > exp_max)) {
- /* Overflow. Return the maximum normal. */
- flags = float_flag_invalid;
- exp = exp_max;
- frac_allones(p);
- p->frac_lo &= ~round_mask;
- }
- } else if (unlikely(exp >= exp_max)) {
- flags |= float_flag_overflow;
- if (s->rebias_overflow) {
- exp -= fmt->exp_re_bias;
- } else if (overflow_norm) {
- flags |= float_flag_inexact;
- exp = exp_max - 1;
- frac_allones(p);
- p->frac_lo &= ~round_mask;
- } else {
- flags |= float_flag_inexact;
- p->cls = float_class_inf;
- exp = exp_max;
- frac_clear(p);
- }
- }
- frac_shr(p, frac_shift);
- } else if (unlikely(s->rebias_underflow)) {
- flags |= float_flag_underflow;
- exp += fmt->exp_re_bias;
- if (p->frac_lo & round_mask) {
- flags |= float_flag_inexact;
- if (frac_addi(p, p, inc)) {
- frac_shr(p, 1);
- p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
- exp++;
- }
- p->frac_lo &= ~round_mask;
- }
- frac_shr(p, frac_shift);
- } else if (s->flush_to_zero) {
- flags |= float_flag_output_denormal;
- p->cls = float_class_zero;
- exp = 0;
- frac_clear(p);
- } else {
- bool is_tiny = s->tininess_before_rounding || exp < 0;
- if (!is_tiny) {
- FloatPartsN discard;
- is_tiny = !frac_addi(&discard, p, inc);
- }
- frac_shrjam(p, 1 - exp);
- if (p->frac_lo & round_mask) {
- /* Need to recompute round-to-even/round-to-odd. */
- switch (s->float_rounding_mode) {
- case float_round_nearest_even:
- if (N > 64 && frac_lsb == 0) {
- inc = ((p->frac_hi & 1) ||
- (p->frac_lo & round_mask) != frac_lsbm1
- ? frac_lsbm1 : 0);
- } else {
- inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
- ? frac_lsbm1 : 0);
- }
- break;
- case float_round_to_odd:
- case float_round_to_odd_inf:
- if (N > 64 && frac_lsb == 0) {
- inc = p->frac_hi & 1 ? 0 : round_mask;
- } else {
- inc = p->frac_lo & frac_lsb ? 0 : round_mask;
- }
- break;
- default:
- break;
- }
- flags |= float_flag_inexact;
- frac_addi(p, p, inc);
- p->frac_lo &= ~round_mask;
- }
- exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
- frac_shr(p, frac_shift);
- if (is_tiny && (flags & float_flag_inexact)) {
- flags |= float_flag_underflow;
- }
- if (exp == 0 && frac_eqz(p)) {
- p->cls = float_class_zero;
- }
- }
- p->exp = exp;
- float_raise(flags, s);
- }
- static void partsN(uncanon)(FloatPartsN *p, float_status *s,
- const FloatFmt *fmt)
- {
- if (likely(p->cls == float_class_normal)) {
- parts_uncanon_normal(p, s, fmt);
- } else {
- switch (p->cls) {
- case float_class_zero:
- p->exp = 0;
- frac_clear(p);
- return;
- case float_class_inf:
- g_assert(!fmt->arm_althp);
- p->exp = fmt->exp_max;
- frac_clear(p);
- return;
- case float_class_qnan:
- case float_class_snan:
- g_assert(!fmt->arm_althp);
- p->exp = fmt->exp_max;
- frac_shr(p, fmt->frac_shift);
- return;
- default:
- break;
- }
- g_assert_not_reached();
- }
- }
- /*
- * Returns the result of adding or subtracting the values of the
- * floating-point values `a' and `b'. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
- */
- static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
- float_status *s, bool subtract)
- {
- bool b_sign = b->sign ^ subtract;
- int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
- if (a->sign != b_sign) {
- /* Subtraction */
- if (likely(ab_mask == float_cmask_normal)) {
- if (parts_sub_normal(a, b)) {
- return a;
- }
- /* Subtract was exact, fall through to set sign. */
- ab_mask = float_cmask_zero;
- }
- if (ab_mask == float_cmask_zero) {
- a->sign = s->float_rounding_mode == float_round_down;
- return a;
- }
- if (unlikely(ab_mask & float_cmask_anynan)) {
- goto p_nan;
- }
- if (ab_mask & float_cmask_inf) {
- if (a->cls != float_class_inf) {
- /* N - Inf */
- goto return_b;
- }
- if (b->cls != float_class_inf) {
- /* Inf - N */
- return a;
- }
- /* Inf - Inf */
- float_raise(float_flag_invalid | float_flag_invalid_isi, s);
- parts_default_nan(a, s);
- return a;
- }
- } else {
- /* Addition */
- if (likely(ab_mask == float_cmask_normal)) {
- parts_add_normal(a, b);
- return a;
- }
- if (ab_mask == float_cmask_zero) {
- return a;
- }
- if (unlikely(ab_mask & float_cmask_anynan)) {
- goto p_nan;
- }
- if (ab_mask & float_cmask_inf) {
- a->cls = float_class_inf;
- return a;
- }
- }
- if (b->cls == float_class_zero) {
- g_assert(a->cls == float_class_normal);
- return a;
- }
- g_assert(a->cls == float_class_zero);
- g_assert(b->cls == float_class_normal);
- return_b:
- b->sign = b_sign;
- return b;
- p_nan:
- return parts_pick_nan(a, b, s);
- }
- /*
- * Returns the result of multiplying the floating-point values `a' and
- * `b'. The operation is performed according to the IEC/IEEE Standard
- * for Binary Floating-Point Arithmetic.
- */
- static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
- float_status *s)
- {
- int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
- bool sign = a->sign ^ b->sign;
- if (likely(ab_mask == float_cmask_normal)) {
- FloatPartsW tmp;
- frac_mulw(&tmp, a, b);
- frac_truncjam(a, &tmp);
- a->exp += b->exp + 1;
- if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
- frac_add(a, a, a);
- a->exp -= 1;
- }
- a->sign = sign;
- return a;
- }
- /* Inf * Zero == NaN */
- if (unlikely(ab_mask == float_cmask_infzero)) {
- float_raise(float_flag_invalid | float_flag_invalid_imz, s);
- parts_default_nan(a, s);
- return a;
- }
- if (unlikely(ab_mask & float_cmask_anynan)) {
- return parts_pick_nan(a, b, s);
- }
- /* Multiply by 0 or Inf */
- if (ab_mask & float_cmask_inf) {
- a->cls = float_class_inf;
- a->sign = sign;
- return a;
- }
- g_assert(ab_mask & float_cmask_zero);
- a->cls = float_class_zero;
- a->sign = sign;
- return a;
- }
- /*
- * Returns the result of multiplying the floating-point values `a' and
- * `b' then adding 'c', with no intermediate rounding step after the
- * multiplication. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
- * The flags argument allows the caller to select negation of the
- * addend, the intermediate product, or the final result. (The
- * difference between this and having the caller do a separate
- * negation is that negating externally will flip the sign bit on NaNs.)
- *
- * Requires A and C extracted into a double-sized structure to provide the
- * extra space for the widening multiply.
- */
- static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
- FloatPartsN *c, int flags, float_status *s)
- {
- int ab_mask, abc_mask;
- FloatPartsW p_widen, c_widen;
- ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
- abc_mask = float_cmask(c->cls) | ab_mask;
- /*
- * It is implementation-defined whether the cases of (0,inf,qnan)
- * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
- * they return if they do), so we have to hand this information
- * off to the target-specific pick-a-NaN routine.
- */
- if (unlikely(abc_mask & float_cmask_anynan)) {
- return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
- }
- if (flags & float_muladd_negate_c) {
- c->sign ^= 1;
- }
- /* Compute the sign of the product into A. */
- a->sign ^= b->sign;
- if (flags & float_muladd_negate_product) {
- a->sign ^= 1;
- }
- if (unlikely(ab_mask != float_cmask_normal)) {
- if (unlikely(ab_mask == float_cmask_infzero)) {
- float_raise(float_flag_invalid | float_flag_invalid_imz, s);
- goto d_nan;
- }
- if (ab_mask & float_cmask_inf) {
- if (c->cls == float_class_inf && a->sign != c->sign) {
- float_raise(float_flag_invalid | float_flag_invalid_isi, s);
- goto d_nan;
- }
- goto return_inf;
- }
- g_assert(ab_mask & float_cmask_zero);
- if (c->cls == float_class_normal) {
- *a = *c;
- goto return_normal;
- }
- if (c->cls == float_class_zero) {
- if (a->sign != c->sign) {
- goto return_sub_zero;
- }
- goto return_zero;
- }
- g_assert(c->cls == float_class_inf);
- }
- if (unlikely(c->cls == float_class_inf)) {
- a->sign = c->sign;
- goto return_inf;
- }
- /* Perform the multiplication step. */
- p_widen.sign = a->sign;
- p_widen.exp = a->exp + b->exp + 1;
- frac_mulw(&p_widen, a, b);
- if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
- frac_add(&p_widen, &p_widen, &p_widen);
- p_widen.exp -= 1;
- }
- /* Perform the addition step. */
- if (c->cls != float_class_zero) {
- /* Zero-extend C to less significant bits. */
- frac_widen(&c_widen, c);
- c_widen.exp = c->exp;
- if (a->sign == c->sign) {
- parts_add_normal(&p_widen, &c_widen);
- } else if (!parts_sub_normal(&p_widen, &c_widen)) {
- goto return_sub_zero;
- }
- }
- /* Narrow with sticky bit, for proper rounding later. */
- frac_truncjam(a, &p_widen);
- a->sign = p_widen.sign;
- a->exp = p_widen.exp;
- return_normal:
- if (flags & float_muladd_halve_result) {
- a->exp -= 1;
- }
- finish_sign:
- if (flags & float_muladd_negate_result) {
- a->sign ^= 1;
- }
- return a;
- return_sub_zero:
- a->sign = s->float_rounding_mode == float_round_down;
- return_zero:
- a->cls = float_class_zero;
- goto finish_sign;
- return_inf:
- a->cls = float_class_inf;
- goto finish_sign;
- d_nan:
- parts_default_nan(a, s);
- return a;
- }
- /*
- * Returns the result of dividing the floating-point value `a' by the
- * corresponding value `b'. The operation is performed according to
- * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
- static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
- float_status *s)
- {
- int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
- bool sign = a->sign ^ b->sign;
- if (likely(ab_mask == float_cmask_normal)) {
- a->sign = sign;
- a->exp -= b->exp + frac_div(a, b);
- return a;
- }
- /* 0/0 or Inf/Inf => NaN */
- if (unlikely(ab_mask == float_cmask_zero)) {
- float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
- goto d_nan;
- }
- if (unlikely(ab_mask == float_cmask_inf)) {
- float_raise(float_flag_invalid | float_flag_invalid_idi, s);
- goto d_nan;
- }
- /* All the NaN cases */
- if (unlikely(ab_mask & float_cmask_anynan)) {
- return parts_pick_nan(a, b, s);
- }
- a->sign = sign;
- /* Inf / X */
- if (a->cls == float_class_inf) {
- return a;
- }
- /* 0 / X */
- if (a->cls == float_class_zero) {
- return a;
- }
- /* X / Inf */
- if (b->cls == float_class_inf) {
- a->cls = float_class_zero;
- return a;
- }
- /* X / 0 => Inf */
- g_assert(b->cls == float_class_zero);
- float_raise(float_flag_divbyzero, s);
- a->cls = float_class_inf;
- return a;
- d_nan:
- parts_default_nan(a, s);
- return a;
- }
- /*
- * Floating point remainder, per IEC/IEEE, or modulus.
- */
- static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
- uint64_t *mod_quot, float_status *s)
- {
- int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
- if (likely(ab_mask == float_cmask_normal)) {
- frac_modrem(a, b, mod_quot);
- return a;
- }
- if (mod_quot) {
- *mod_quot = 0;
- }
- /* All the NaN cases */
- if (unlikely(ab_mask & float_cmask_anynan)) {
- return parts_pick_nan(a, b, s);
- }
- /* Inf % N; N % 0 */
- if (a->cls == float_class_inf || b->cls == float_class_zero) {
- float_raise(float_flag_invalid, s);
- parts_default_nan(a, s);
- return a;
- }
- /* N % Inf; 0 % N */
- g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
- return a;
- }
- /*
- * Square Root
- *
- * The base algorithm is lifted from
- * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
- * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
- * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
- * and is thus MIT licenced.
- */
- static void partsN(sqrt)(FloatPartsN *a, float_status *status,
- const FloatFmt *fmt)
- {
- const uint32_t three32 = 3u << 30;
- const uint64_t three64 = 3ull << 62;
- uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */
- uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */
- uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */
- uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
- uint64_t discard;
- bool exp_odd;
- size_t index;
- if (unlikely(a->cls != float_class_normal)) {
- switch (a->cls) {
- case float_class_snan:
- case float_class_qnan:
- parts_return_nan(a, status);
- return;
- case float_class_zero:
- return;
- case float_class_inf:
- if (unlikely(a->sign)) {
- goto d_nan;
- }
- return;
- default:
- g_assert_not_reached();
- }
- }
- if (unlikely(a->sign)) {
- goto d_nan;
- }
- /*
- * Argument reduction.
- * x = 4^e frac; with integer e, and frac in [1, 4)
- * m = frac fixed point at bit 62, since we're in base 4.
- * If base-2 exponent is odd, exchange that for multiply by 2,
- * which results in no shift.
- */
- exp_odd = a->exp & 1;
- index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
- if (!exp_odd) {
- frac_shr(a, 1);
- }
- /*
- * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
- *
- * Initial estimate:
- * 7-bit lookup table (1-bit exponent and 6-bit significand).
- *
- * The relative error (e = r0*sqrt(m)-1) of a linear estimate
- * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
- * a table lookup is faster and needs one less iteration.
- * The 7-bit table gives |e| < 0x1.fdp-9.
- *
- * A Newton-Raphson iteration for r is
- * s = m*r
- * d = s*r
- * u = 3 - d
- * r = r*u/2
- *
- * Fixed point representations:
- * m, s, d, u, three are all 2.30; r is 0.32
- */
- m64 = a->frac_hi;
- m32 = m64 >> 32;
- r32 = rsqrt_tab[index] << 16;
- /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
- s32 = ((uint64_t)m32 * r32) >> 32;
- d32 = ((uint64_t)s32 * r32) >> 32;
- u32 = three32 - d32;
- if (N == 64) {
- /* float64 or smaller */
- r32 = ((uint64_t)r32 * u32) >> 31;
- /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
- s32 = ((uint64_t)m32 * r32) >> 32;
- d32 = ((uint64_t)s32 * r32) >> 32;
- u32 = three32 - d32;
- if (fmt->frac_size <= 23) {
- /* float32 or smaller */
- s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */
- s32 = (s32 - 1) >> 6; /* 9.23 */
- /* s < sqrt(m) < s + 0x1.08p-23 */
- /* compute nearest rounded result to 2.23 bits */
- uint32_t d0 = (m32 << 16) - s32 * s32;
- uint32_t d1 = s32 - d0;
- uint32_t d2 = d1 + s32 + 1;
- s32 += d1 >> 31;
- a->frac_hi = (uint64_t)s32 << (64 - 25);
- /* increment or decrement for inexact */
- if (d2 != 0) {
- a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
- }
- goto done;
- }
- /* float64 */
- r64 = (uint64_t)r32 * u32 * 2;
- /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
- mul64To128(m64, r64, &s64, &discard);
- mul64To128(s64, r64, &d64, &discard);
- u64 = three64 - d64;
- mul64To128(s64, u64, &s64, &discard); /* 3.61 */
- s64 = (s64 - 2) >> 9; /* 12.52 */
- /* Compute nearest rounded result */
- uint64_t d0 = (m64 << 42) - s64 * s64;
- uint64_t d1 = s64 - d0;
- uint64_t d2 = d1 + s64 + 1;
- s64 += d1 >> 63;
- a->frac_hi = s64 << (64 - 54);
- /* increment or decrement for inexact */
- if (d2 != 0) {
- a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
- }
- goto done;
- }
- r64 = (uint64_t)r32 * u32 * 2;
- /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
- mul64To128(m64, r64, &s64, &discard);
- mul64To128(s64, r64, &d64, &discard);
- u64 = three64 - d64;
- mul64To128(u64, r64, &r64, &discard);
- r64 <<= 1;
- /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
- mul64To128(m64, r64, &s64, &discard);
- mul64To128(s64, r64, &d64, &discard);
- u64 = three64 - d64;
- mul64To128(u64, r64, &rh, &rl);
- add128(rh, rl, rh, rl, &rh, &rl);
- /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
- mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
- mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
- sub128(three64, 0, dh, dl, &uh, &ul);
- mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */
- /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
- sub128(sh, sl, 0, 4, &sh, &sl);
- shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */
- /* s < sqrt(m) < s + 1ulp */
- /* Compute nearest rounded result */
- mul64To128(sl, sl, &d0h, &d0l);
- d0h += 2 * sh * sl;
- sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
- sub128(sh, sl, d0h, d0l, &d1h, &d1l);
- add128(sh, sl, 0, 1, &d2h, &d2l);
- add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
- add128(sh, sl, 0, d1h >> 63, &sh, &sl);
- shift128Left(sh, sl, 128 - 114, &sh, &sl);
- /* increment or decrement for inexact */
- if (d2h | d2l) {
- if ((int64_t)(d1h ^ d2h) < 0) {
- sub128(sh, sl, 0, 1, &sh, &sl);
- } else {
- add128(sh, sl, 0, 1, &sh, &sl);
- }
- }
- a->frac_lo = sl;
- a->frac_hi = sh;
- done:
- /* Convert back from base 4 to base 2. */
- a->exp >>= 1;
- if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
- frac_add(a, a, a);
- } else {
- a->exp += 1;
- }
- return;
- d_nan:
- float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
- parts_default_nan(a, status);
- }
- /*
- * Rounds the floating-point value `a' to an integer, and returns the
- * result as a floating-point value. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
- *
- * parts_round_to_int_normal is an internal helper function for
- * normal numbers only, returning true for inexact but not directly
- * raising float_flag_inexact.
- */
- static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
- int scale, int frac_size)
- {
- uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
- int shift_adj;
- scale = MIN(MAX(scale, -0x10000), 0x10000);
- a->exp += scale;
- if (a->exp < 0) {
- bool one;
- /* All fractional */
- switch (rmode) {
- case float_round_nearest_even:
- one = false;
- if (a->exp == -1) {
- FloatPartsN tmp;
- /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
- frac_add(&tmp, a, a);
- /* Anything remaining means frac > 0.5. */
- one = !frac_eqz(&tmp);
- }
- break;
- case float_round_ties_away:
- one = a->exp == -1;
- break;
- case float_round_to_zero:
- one = false;
- break;
- case float_round_up:
- one = !a->sign;
- break;
- case float_round_down:
- one = a->sign;
- break;
- case float_round_to_odd:
- one = true;
- break;
- default:
- g_assert_not_reached();
- }
- frac_clear(a);
- a->exp = 0;
- if (one) {
- a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
- } else {
- a->cls = float_class_zero;
- }
- return true;
- }
- if (a->exp >= frac_size) {
- /* All integral */
- return false;
- }
- if (N > 64 && a->exp < N - 64) {
- /*
- * Rounding is not in the low word -- shift lsb to bit 2,
- * which leaves room for sticky and rounding bit.
- */
- shift_adj = (N - 1) - (a->exp + 2);
- frac_shrjam(a, shift_adj);
- frac_lsb = 1 << 2;
- } else {
- shift_adj = 0;
- frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
- }
- frac_lsbm1 = frac_lsb >> 1;
- rnd_mask = frac_lsb - 1;
- rnd_even_mask = rnd_mask | frac_lsb;
- if (!(a->frac_lo & rnd_mask)) {
- /* Fractional bits already clear, undo the shift above. */
- frac_shl(a, shift_adj);
- return false;
- }
- switch (rmode) {
- case float_round_nearest_even:
- inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
- break;
- case float_round_ties_away:
- inc = frac_lsbm1;
- break;
- case float_round_to_zero:
- inc = 0;
- break;
- case float_round_up:
- inc = a->sign ? 0 : rnd_mask;
- break;
- case float_round_down:
- inc = a->sign ? rnd_mask : 0;
- break;
- case float_round_to_odd:
- inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
- break;
- default:
- g_assert_not_reached();
- }
- if (shift_adj == 0) {
- if (frac_addi(a, a, inc)) {
- frac_shr(a, 1);
- a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
- a->exp++;
- }
- a->frac_lo &= ~rnd_mask;
- } else {
- frac_addi(a, a, inc);
- a->frac_lo &= ~rnd_mask;
- /* Be careful shifting back, not to overflow */
- frac_shl(a, shift_adj - 1);
- if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
- a->exp++;
- } else {
- frac_add(a, a, a);
- }
- }
- return true;
- }
- static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
- int scale, float_status *s,
- const FloatFmt *fmt)
- {
- switch (a->cls) {
- case float_class_qnan:
- case float_class_snan:
- parts_return_nan(a, s);
- break;
- case float_class_zero:
- case float_class_inf:
- break;
- case float_class_normal:
- if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
- float_raise(float_flag_inexact, s);
- }
- break;
- default:
- g_assert_not_reached();
- }
- }
- /*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format. The conversion is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest positive integer is returned. Otherwise, if the
- * conversion overflows, the largest integer with the same sign as `a'
- * is returned.
- */
- static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
- int scale, int64_t min, int64_t max,
- float_status *s)
- {
- int flags = 0;
- uint64_t r;
- switch (p->cls) {
- case float_class_snan:
- flags |= float_flag_invalid_snan;
- /* fall through */
- case float_class_qnan:
- flags |= float_flag_invalid;
- r = max;
- break;
- case float_class_inf:
- flags = float_flag_invalid | float_flag_invalid_cvti;
- r = p->sign ? min : max;
- break;
- case float_class_zero:
- return 0;
- case float_class_normal:
- /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
- if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
- flags = float_flag_inexact;
- }
- if (p->exp <= DECOMPOSED_BINARY_POINT) {
- r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
- } else {
- r = UINT64_MAX;
- }
- if (p->sign) {
- if (r <= -(uint64_t)min) {
- r = -r;
- } else {
- flags = float_flag_invalid | float_flag_invalid_cvti;
- r = min;
- }
- } else if (r > max) {
- flags = float_flag_invalid | float_flag_invalid_cvti;
- r = max;
- }
- break;
- default:
- g_assert_not_reached();
- }
- float_raise(flags, s);
- return r;
- }
- /*
- * Returns the result of converting the floating-point value `a' to
- * the unsigned integer format. The conversion is performed according
- * to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest unsigned integer is returned. Otherwise, if the
- * conversion overflows, the largest unsigned integer is returned. If
- * the 'a' is negative, the result is rounded and zero is returned;
- * values that do not round to zero will raise the inexact exception
- * flag.
- */
- static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
- int scale, uint64_t max, float_status *s)
- {
- int flags = 0;
- uint64_t r;
- switch (p->cls) {
- case float_class_snan:
- flags |= float_flag_invalid_snan;
- /* fall through */
- case float_class_qnan:
- flags |= float_flag_invalid;
- r = max;
- break;
- case float_class_inf:
- flags = float_flag_invalid | float_flag_invalid_cvti;
- r = p->sign ? 0 : max;
- break;
- case float_class_zero:
- return 0;
- case float_class_normal:
- /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
- if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
- flags = float_flag_inexact;
- if (p->cls == float_class_zero) {
- r = 0;
- break;
- }
- }
- if (p->sign) {
- flags = float_flag_invalid | float_flag_invalid_cvti;
- r = 0;
- } else if (p->exp > DECOMPOSED_BINARY_POINT) {
- flags = float_flag_invalid | float_flag_invalid_cvti;
- r = max;
- } else {
- r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
- if (r > max) {
- flags = float_flag_invalid | float_flag_invalid_cvti;
- r = max;
- }
- }
- break;
- default:
- g_assert_not_reached();
- }
- float_raise(flags, s);
- return r;
- }
- /*
- * Integer to float conversions
- *
- * Returns the result of converting the two's complement integer `a'
- * to the floating-point format. The conversion is performed according
- * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
- static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
- int scale, float_status *s)
- {
- uint64_t f = a;
- int shift;
- memset(p, 0, sizeof(*p));
- if (a == 0) {
- p->cls = float_class_zero;
- return;
- }
- p->cls = float_class_normal;
- if (a < 0) {
- f = -f;
- p->sign = true;
- }
- shift = clz64(f);
- scale = MIN(MAX(scale, -0x10000), 0x10000);
- p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
- p->frac_hi = f << shift;
- }
- /*
- * Unsigned Integer to float conversions
- *
- * Returns the result of converting the unsigned integer `a' to the
- * floating-point format. The conversion is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
- static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
- int scale, float_status *status)
- {
- memset(p, 0, sizeof(*p));
- if (a == 0) {
- p->cls = float_class_zero;
- } else {
- int shift = clz64(a);
- scale = MIN(MAX(scale, -0x10000), 0x10000);
- p->cls = float_class_normal;
- p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
- p->frac_hi = a << shift;
- }
- }
- /*
- * Float min/max.
- */
- static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
- float_status *s, int flags)
- {
- int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
- int a_exp, b_exp, cmp;
- if (unlikely(ab_mask & float_cmask_anynan)) {
- /*
- * For minNum/maxNum (IEEE 754-2008)
- * or minimumNumber/maximumNumber (IEEE 754-2019),
- * if one operand is a QNaN, and the other
- * operand is numerical, then return numerical argument.
- */
- if ((flags & (minmax_isnum | minmax_isnumber))
- && !(ab_mask & float_cmask_snan)
- && (ab_mask & ~float_cmask_qnan)) {
- return is_nan(a->cls) ? b : a;
- }
- /*
- * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
- * are removed and replaced with minimum, minimumNumber, maximum
- * and maximumNumber.
- * minimumNumber/maximumNumber behavior for SNaN is changed to:
- * If both operands are NaNs, a QNaN is returned.
- * If either operand is a SNaN,
- * an invalid operation exception is signaled,
- * but unless both operands are NaNs,
- * the SNaN is otherwise ignored and not converted to a QNaN.
- */
- if ((flags & minmax_isnumber)
- && (ab_mask & float_cmask_snan)
- && (ab_mask & ~float_cmask_anynan)) {
- float_raise(float_flag_invalid, s);
- return is_nan(a->cls) ? b : a;
- }
- return parts_pick_nan(a, b, s);
- }
- a_exp = a->exp;
- b_exp = b->exp;
- if (unlikely(ab_mask != float_cmask_normal)) {
- switch (a->cls) {
- case float_class_normal:
- break;
- case float_class_inf:
- a_exp = INT16_MAX;
- break;
- case float_class_zero:
- a_exp = INT16_MIN;
- break;
- default:
- g_assert_not_reached();
- break;
- }
- switch (b->cls) {
- case float_class_normal:
- break;
- case float_class_inf:
- b_exp = INT16_MAX;
- break;
- case float_class_zero:
- b_exp = INT16_MIN;
- break;
- default:
- g_assert_not_reached();
- break;
- }
- }
- /* Compare magnitudes. */
- cmp = a_exp - b_exp;
- if (cmp == 0) {
- cmp = frac_cmp(a, b);
- }
- /*
- * Take the sign into account.
- * For ismag, only do this if the magnitudes are equal.
- */
- if (!(flags & minmax_ismag) || cmp == 0) {
- if (a->sign != b->sign) {
- /* For differing signs, the negative operand is less. */
- cmp = a->sign ? -1 : 1;
- } else if (a->sign) {
- /* For two negative operands, invert the magnitude comparison. */
- cmp = -cmp;
- }
- }
- if (flags & minmax_ismin) {
- cmp = -cmp;
- }
- return cmp < 0 ? b : a;
- }
- /*
- * Floating point compare
- */
- static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
- float_status *s, bool is_quiet)
- {
- int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
- if (likely(ab_mask == float_cmask_normal)) {
- FloatRelation cmp;
- if (a->sign != b->sign) {
- goto a_sign;
- }
- if (a->exp == b->exp) {
- cmp = frac_cmp(a, b);
- } else if (a->exp < b->exp) {
- cmp = float_relation_less;
- } else {
- cmp = float_relation_greater;
- }
- if (a->sign) {
- cmp = -cmp;
- }
- return cmp;
- }
- if (unlikely(ab_mask & float_cmask_anynan)) {
- if (ab_mask & float_cmask_snan) {
- float_raise(float_flag_invalid | float_flag_invalid_snan, s);
- } else if (!is_quiet) {
- float_raise(float_flag_invalid, s);
- }
- return float_relation_unordered;
- }
- if (ab_mask & float_cmask_zero) {
- if (ab_mask == float_cmask_zero) {
- return float_relation_equal;
- } else if (a->cls == float_class_zero) {
- goto b_sign;
- } else {
- goto a_sign;
- }
- }
- if (ab_mask == float_cmask_inf) {
- if (a->sign == b->sign) {
- return float_relation_equal;
- }
- } else if (b->cls == float_class_inf) {
- goto b_sign;
- } else {
- g_assert(a->cls == float_class_inf);
- }
- a_sign:
- return a->sign ? float_relation_less : float_relation_greater;
- b_sign:
- return b->sign ? float_relation_greater : float_relation_less;
- }
- /*
- * Multiply A by 2 raised to the power N.
- */
- static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
- {
- switch (a->cls) {
- case float_class_snan:
- case float_class_qnan:
- parts_return_nan(a, s);
- break;
- case float_class_zero:
- case float_class_inf:
- break;
- case float_class_normal:
- a->exp += MIN(MAX(n, -0x10000), 0x10000);
- break;
- default:
- g_assert_not_reached();
- }
- }
- /*
- * Return log2(A)
- */
- static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
- {
- uint64_t a0, a1, r, t, ign;
- FloatPartsN f;
- int i, n, a_exp, f_exp;
- if (unlikely(a->cls != float_class_normal)) {
- switch (a->cls) {
- case float_class_snan:
- case float_class_qnan:
- parts_return_nan(a, s);
- return;
- case float_class_zero:
- float_raise(float_flag_divbyzero, s);
- /* log2(0) = -inf */
- a->cls = float_class_inf;
- a->sign = 1;
- return;
- case float_class_inf:
- if (unlikely(a->sign)) {
- goto d_nan;
- }
- return;
- default:
- break;
- }
- g_assert_not_reached();
- }
- if (unlikely(a->sign)) {
- goto d_nan;
- }
- /* TODO: This algorithm looses bits too quickly for float128. */
- g_assert(N == 64);
- a_exp = a->exp;
- f_exp = -1;
- r = 0;
- t = DECOMPOSED_IMPLICIT_BIT;
- a0 = a->frac_hi;
- a1 = 0;
- n = fmt->frac_size + 2;
- if (unlikely(a_exp == -1)) {
- /*
- * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
- * When the value is very close to 1.0, there are lots of 1's in
- * the msb parts of the fraction. At the end, when we subtract
- * this value from -1.0, we can see a catastrophic loss of precision,
- * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
- * bits of y in the final result. To minimize this, compute as many
- * digits as we can.
- * ??? This case needs another algorithm to avoid this.
- */
- n = fmt->frac_size * 2 + 2;
- /* Don't compute a value overlapping the sticky bit */
- n = MIN(n, 62);
- }
- for (i = 0; i < n; i++) {
- if (a1) {
- mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
- } else if (a0 & 0xffffffffull) {
- mul64To128(a0, a0, &a0, &a1);
- } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
- a0 >>= 32;
- a0 *= a0;
- } else {
- goto exact;
- }
- if (a0 & DECOMPOSED_IMPLICIT_BIT) {
- if (unlikely(a_exp == 0 && r == 0)) {
- /*
- * When a_exp == 0, we're computing the log2 of a value
- * [1.0,2.0). When the value is very close to 1.0, there
- * are lots of 0's in the msb parts of the fraction.
- * We need to compute more digits to produce a correct
- * result -- restart at the top of the fraction.
- * ??? This is likely to lose precision quickly, as for
- * float128; we may need another method.
- */
- f_exp -= i;
- t = r = DECOMPOSED_IMPLICIT_BIT;
- i = 0;
- } else {
- r |= t;
- }
- } else {
- add128(a0, a1, a0, a1, &a0, &a1);
- }
- t >>= 1;
- }
- /* Set sticky for inexact. */
- r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
- exact:
- parts_sint_to_float(a, a_exp, 0, s);
- if (r == 0) {
- return;
- }
- memset(&f, 0, sizeof(f));
- f.cls = float_class_normal;
- f.frac_hi = r;
- f.exp = f_exp - frac_normalize(&f);
- if (a_exp < 0) {
- parts_sub_normal(a, &f);
- } else if (a_exp > 0) {
- parts_add_normal(a, &f);
- } else {
- *a = f;
- }
- return;
- d_nan:
- float_raise(float_flag_invalid, s);
- parts_default_nan(a, s);
- }
|