softfloat-parts.c.inc 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547
  1. /*
  2. * QEMU float support
  3. *
  4. * The code in this source file is derived from release 2a of the SoftFloat
  5. * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
  6. * some later contributions) are provided under that license, as detailed below.
  7. * It has subsequently been modified by contributors to the QEMU Project,
  8. * so some portions are provided under:
  9. * the SoftFloat-2a license
  10. * the BSD license
  11. * GPL-v2-or-later
  12. *
  13. * Any future contributions to this file after December 1st 2014 will be
  14. * taken to be licensed under the Softfloat-2a license unless specifically
  15. * indicated otherwise.
  16. */
  17. static void partsN(return_nan)(FloatPartsN *a, float_status *s)
  18. {
  19. switch (a->cls) {
  20. case float_class_snan:
  21. float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  22. if (s->default_nan_mode) {
  23. parts_default_nan(a, s);
  24. } else {
  25. parts_silence_nan(a, s);
  26. }
  27. break;
  28. case float_class_qnan:
  29. if (s->default_nan_mode) {
  30. parts_default_nan(a, s);
  31. }
  32. break;
  33. default:
  34. g_assert_not_reached();
  35. }
  36. }
  37. static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
  38. float_status *s)
  39. {
  40. if (is_snan(a->cls) || is_snan(b->cls)) {
  41. float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  42. }
  43. if (s->default_nan_mode) {
  44. parts_default_nan(a, s);
  45. } else {
  46. int cmp = frac_cmp(a, b);
  47. if (cmp == 0) {
  48. cmp = a->sign < b->sign;
  49. }
  50. if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
  51. a = b;
  52. }
  53. if (is_snan(a->cls)) {
  54. parts_silence_nan(a, s);
  55. }
  56. }
  57. return a;
  58. }
  59. static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
  60. FloatPartsN *c, float_status *s,
  61. int ab_mask, int abc_mask)
  62. {
  63. int which;
  64. if (unlikely(abc_mask & float_cmask_snan)) {
  65. float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  66. }
  67. which = pickNaNMulAdd(a->cls, b->cls, c->cls,
  68. ab_mask == float_cmask_infzero, s);
  69. if (s->default_nan_mode || which == 3) {
  70. /*
  71. * Note that this check is after pickNaNMulAdd so that function
  72. * has an opportunity to set the Invalid flag for infzero.
  73. */
  74. parts_default_nan(a, s);
  75. return a;
  76. }
  77. switch (which) {
  78. case 0:
  79. break;
  80. case 1:
  81. a = b;
  82. break;
  83. case 2:
  84. a = c;
  85. break;
  86. default:
  87. g_assert_not_reached();
  88. }
  89. if (is_snan(a->cls)) {
  90. parts_silence_nan(a, s);
  91. }
  92. return a;
  93. }
  94. /*
  95. * Canonicalize the FloatParts structure. Determine the class,
  96. * unbias the exponent, and normalize the fraction.
  97. */
  98. static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
  99. const FloatFmt *fmt)
  100. {
  101. if (unlikely(p->exp == 0)) {
  102. if (likely(frac_eqz(p))) {
  103. p->cls = float_class_zero;
  104. } else if (status->flush_inputs_to_zero) {
  105. float_raise(float_flag_input_denormal, status);
  106. p->cls = float_class_zero;
  107. frac_clear(p);
  108. } else {
  109. int shift = frac_normalize(p);
  110. p->cls = float_class_normal;
  111. p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
  112. }
  113. } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
  114. p->cls = float_class_normal;
  115. p->exp -= fmt->exp_bias;
  116. frac_shl(p, fmt->frac_shift);
  117. p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
  118. } else if (likely(frac_eqz(p))) {
  119. p->cls = float_class_inf;
  120. } else {
  121. frac_shl(p, fmt->frac_shift);
  122. p->cls = (parts_is_snan_frac(p->frac_hi, status)
  123. ? float_class_snan : float_class_qnan);
  124. }
  125. }
  126. /*
  127. * Round and uncanonicalize a floating-point number by parts. There
  128. * are FRAC_SHIFT bits that may require rounding at the bottom of the
  129. * fraction; these bits will be removed. The exponent will be biased
  130. * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
  131. */
  132. static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
  133. const FloatFmt *fmt)
  134. {
  135. const int exp_max = fmt->exp_max;
  136. const int frac_shift = fmt->frac_shift;
  137. const uint64_t round_mask = fmt->round_mask;
  138. const uint64_t frac_lsb = round_mask + 1;
  139. const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
  140. const uint64_t roundeven_mask = round_mask | frac_lsb;
  141. uint64_t inc;
  142. bool overflow_norm = false;
  143. int exp, flags = 0;
  144. switch (s->float_rounding_mode) {
  145. case float_round_nearest_even:
  146. if (N > 64 && frac_lsb == 0) {
  147. inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
  148. ? frac_lsbm1 : 0);
  149. } else {
  150. inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
  151. ? frac_lsbm1 : 0);
  152. }
  153. break;
  154. case float_round_ties_away:
  155. inc = frac_lsbm1;
  156. break;
  157. case float_round_to_zero:
  158. overflow_norm = true;
  159. inc = 0;
  160. break;
  161. case float_round_up:
  162. inc = p->sign ? 0 : round_mask;
  163. overflow_norm = p->sign;
  164. break;
  165. case float_round_down:
  166. inc = p->sign ? round_mask : 0;
  167. overflow_norm = !p->sign;
  168. break;
  169. case float_round_to_odd:
  170. overflow_norm = true;
  171. /* fall through */
  172. case float_round_to_odd_inf:
  173. if (N > 64 && frac_lsb == 0) {
  174. inc = p->frac_hi & 1 ? 0 : round_mask;
  175. } else {
  176. inc = p->frac_lo & frac_lsb ? 0 : round_mask;
  177. }
  178. break;
  179. default:
  180. g_assert_not_reached();
  181. }
  182. exp = p->exp + fmt->exp_bias;
  183. if (likely(exp > 0)) {
  184. if (p->frac_lo & round_mask) {
  185. flags |= float_flag_inexact;
  186. if (frac_addi(p, p, inc)) {
  187. frac_shr(p, 1);
  188. p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
  189. exp++;
  190. }
  191. p->frac_lo &= ~round_mask;
  192. }
  193. if (fmt->arm_althp) {
  194. /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
  195. if (unlikely(exp > exp_max)) {
  196. /* Overflow. Return the maximum normal. */
  197. flags = float_flag_invalid;
  198. exp = exp_max;
  199. frac_allones(p);
  200. p->frac_lo &= ~round_mask;
  201. }
  202. } else if (unlikely(exp >= exp_max)) {
  203. flags |= float_flag_overflow;
  204. if (s->rebias_overflow) {
  205. exp -= fmt->exp_re_bias;
  206. } else if (overflow_norm) {
  207. flags |= float_flag_inexact;
  208. exp = exp_max - 1;
  209. frac_allones(p);
  210. p->frac_lo &= ~round_mask;
  211. } else {
  212. flags |= float_flag_inexact;
  213. p->cls = float_class_inf;
  214. exp = exp_max;
  215. frac_clear(p);
  216. }
  217. }
  218. frac_shr(p, frac_shift);
  219. } else if (unlikely(s->rebias_underflow)) {
  220. flags |= float_flag_underflow;
  221. exp += fmt->exp_re_bias;
  222. if (p->frac_lo & round_mask) {
  223. flags |= float_flag_inexact;
  224. if (frac_addi(p, p, inc)) {
  225. frac_shr(p, 1);
  226. p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
  227. exp++;
  228. }
  229. p->frac_lo &= ~round_mask;
  230. }
  231. frac_shr(p, frac_shift);
  232. } else if (s->flush_to_zero) {
  233. flags |= float_flag_output_denormal;
  234. p->cls = float_class_zero;
  235. exp = 0;
  236. frac_clear(p);
  237. } else {
  238. bool is_tiny = s->tininess_before_rounding || exp < 0;
  239. if (!is_tiny) {
  240. FloatPartsN discard;
  241. is_tiny = !frac_addi(&discard, p, inc);
  242. }
  243. frac_shrjam(p, 1 - exp);
  244. if (p->frac_lo & round_mask) {
  245. /* Need to recompute round-to-even/round-to-odd. */
  246. switch (s->float_rounding_mode) {
  247. case float_round_nearest_even:
  248. if (N > 64 && frac_lsb == 0) {
  249. inc = ((p->frac_hi & 1) ||
  250. (p->frac_lo & round_mask) != frac_lsbm1
  251. ? frac_lsbm1 : 0);
  252. } else {
  253. inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
  254. ? frac_lsbm1 : 0);
  255. }
  256. break;
  257. case float_round_to_odd:
  258. case float_round_to_odd_inf:
  259. if (N > 64 && frac_lsb == 0) {
  260. inc = p->frac_hi & 1 ? 0 : round_mask;
  261. } else {
  262. inc = p->frac_lo & frac_lsb ? 0 : round_mask;
  263. }
  264. break;
  265. default:
  266. break;
  267. }
  268. flags |= float_flag_inexact;
  269. frac_addi(p, p, inc);
  270. p->frac_lo &= ~round_mask;
  271. }
  272. exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
  273. frac_shr(p, frac_shift);
  274. if (is_tiny && (flags & float_flag_inexact)) {
  275. flags |= float_flag_underflow;
  276. }
  277. if (exp == 0 && frac_eqz(p)) {
  278. p->cls = float_class_zero;
  279. }
  280. }
  281. p->exp = exp;
  282. float_raise(flags, s);
  283. }
  284. static void partsN(uncanon)(FloatPartsN *p, float_status *s,
  285. const FloatFmt *fmt)
  286. {
  287. if (likely(p->cls == float_class_normal)) {
  288. parts_uncanon_normal(p, s, fmt);
  289. } else {
  290. switch (p->cls) {
  291. case float_class_zero:
  292. p->exp = 0;
  293. frac_clear(p);
  294. return;
  295. case float_class_inf:
  296. g_assert(!fmt->arm_althp);
  297. p->exp = fmt->exp_max;
  298. frac_clear(p);
  299. return;
  300. case float_class_qnan:
  301. case float_class_snan:
  302. g_assert(!fmt->arm_althp);
  303. p->exp = fmt->exp_max;
  304. frac_shr(p, fmt->frac_shift);
  305. return;
  306. default:
  307. break;
  308. }
  309. g_assert_not_reached();
  310. }
  311. }
  312. /*
  313. * Returns the result of adding or subtracting the values of the
  314. * floating-point values `a' and `b'. The operation is performed
  315. * according to the IEC/IEEE Standard for Binary Floating-Point
  316. * Arithmetic.
  317. */
  318. static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
  319. float_status *s, bool subtract)
  320. {
  321. bool b_sign = b->sign ^ subtract;
  322. int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
  323. if (a->sign != b_sign) {
  324. /* Subtraction */
  325. if (likely(ab_mask == float_cmask_normal)) {
  326. if (parts_sub_normal(a, b)) {
  327. return a;
  328. }
  329. /* Subtract was exact, fall through to set sign. */
  330. ab_mask = float_cmask_zero;
  331. }
  332. if (ab_mask == float_cmask_zero) {
  333. a->sign = s->float_rounding_mode == float_round_down;
  334. return a;
  335. }
  336. if (unlikely(ab_mask & float_cmask_anynan)) {
  337. goto p_nan;
  338. }
  339. if (ab_mask & float_cmask_inf) {
  340. if (a->cls != float_class_inf) {
  341. /* N - Inf */
  342. goto return_b;
  343. }
  344. if (b->cls != float_class_inf) {
  345. /* Inf - N */
  346. return a;
  347. }
  348. /* Inf - Inf */
  349. float_raise(float_flag_invalid | float_flag_invalid_isi, s);
  350. parts_default_nan(a, s);
  351. return a;
  352. }
  353. } else {
  354. /* Addition */
  355. if (likely(ab_mask == float_cmask_normal)) {
  356. parts_add_normal(a, b);
  357. return a;
  358. }
  359. if (ab_mask == float_cmask_zero) {
  360. return a;
  361. }
  362. if (unlikely(ab_mask & float_cmask_anynan)) {
  363. goto p_nan;
  364. }
  365. if (ab_mask & float_cmask_inf) {
  366. a->cls = float_class_inf;
  367. return a;
  368. }
  369. }
  370. if (b->cls == float_class_zero) {
  371. g_assert(a->cls == float_class_normal);
  372. return a;
  373. }
  374. g_assert(a->cls == float_class_zero);
  375. g_assert(b->cls == float_class_normal);
  376. return_b:
  377. b->sign = b_sign;
  378. return b;
  379. p_nan:
  380. return parts_pick_nan(a, b, s);
  381. }
  382. /*
  383. * Returns the result of multiplying the floating-point values `a' and
  384. * `b'. The operation is performed according to the IEC/IEEE Standard
  385. * for Binary Floating-Point Arithmetic.
  386. */
  387. static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
  388. float_status *s)
  389. {
  390. int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
  391. bool sign = a->sign ^ b->sign;
  392. if (likely(ab_mask == float_cmask_normal)) {
  393. FloatPartsW tmp;
  394. frac_mulw(&tmp, a, b);
  395. frac_truncjam(a, &tmp);
  396. a->exp += b->exp + 1;
  397. if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
  398. frac_add(a, a, a);
  399. a->exp -= 1;
  400. }
  401. a->sign = sign;
  402. return a;
  403. }
  404. /* Inf * Zero == NaN */
  405. if (unlikely(ab_mask == float_cmask_infzero)) {
  406. float_raise(float_flag_invalid | float_flag_invalid_imz, s);
  407. parts_default_nan(a, s);
  408. return a;
  409. }
  410. if (unlikely(ab_mask & float_cmask_anynan)) {
  411. return parts_pick_nan(a, b, s);
  412. }
  413. /* Multiply by 0 or Inf */
  414. if (ab_mask & float_cmask_inf) {
  415. a->cls = float_class_inf;
  416. a->sign = sign;
  417. return a;
  418. }
  419. g_assert(ab_mask & float_cmask_zero);
  420. a->cls = float_class_zero;
  421. a->sign = sign;
  422. return a;
  423. }
  424. /*
  425. * Returns the result of multiplying the floating-point values `a' and
  426. * `b' then adding 'c', with no intermediate rounding step after the
  427. * multiplication. The operation is performed according to the
  428. * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
  429. * The flags argument allows the caller to select negation of the
  430. * addend, the intermediate product, or the final result. (The
  431. * difference between this and having the caller do a separate
  432. * negation is that negating externally will flip the sign bit on NaNs.)
  433. *
  434. * Requires A and C extracted into a double-sized structure to provide the
  435. * extra space for the widening multiply.
  436. */
  437. static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
  438. FloatPartsN *c, int flags, float_status *s)
  439. {
  440. int ab_mask, abc_mask;
  441. FloatPartsW p_widen, c_widen;
  442. ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
  443. abc_mask = float_cmask(c->cls) | ab_mask;
  444. /*
  445. * It is implementation-defined whether the cases of (0,inf,qnan)
  446. * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
  447. * they return if they do), so we have to hand this information
  448. * off to the target-specific pick-a-NaN routine.
  449. */
  450. if (unlikely(abc_mask & float_cmask_anynan)) {
  451. return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
  452. }
  453. if (flags & float_muladd_negate_c) {
  454. c->sign ^= 1;
  455. }
  456. /* Compute the sign of the product into A. */
  457. a->sign ^= b->sign;
  458. if (flags & float_muladd_negate_product) {
  459. a->sign ^= 1;
  460. }
  461. if (unlikely(ab_mask != float_cmask_normal)) {
  462. if (unlikely(ab_mask == float_cmask_infzero)) {
  463. float_raise(float_flag_invalid | float_flag_invalid_imz, s);
  464. goto d_nan;
  465. }
  466. if (ab_mask & float_cmask_inf) {
  467. if (c->cls == float_class_inf && a->sign != c->sign) {
  468. float_raise(float_flag_invalid | float_flag_invalid_isi, s);
  469. goto d_nan;
  470. }
  471. goto return_inf;
  472. }
  473. g_assert(ab_mask & float_cmask_zero);
  474. if (c->cls == float_class_normal) {
  475. *a = *c;
  476. goto return_normal;
  477. }
  478. if (c->cls == float_class_zero) {
  479. if (a->sign != c->sign) {
  480. goto return_sub_zero;
  481. }
  482. goto return_zero;
  483. }
  484. g_assert(c->cls == float_class_inf);
  485. }
  486. if (unlikely(c->cls == float_class_inf)) {
  487. a->sign = c->sign;
  488. goto return_inf;
  489. }
  490. /* Perform the multiplication step. */
  491. p_widen.sign = a->sign;
  492. p_widen.exp = a->exp + b->exp + 1;
  493. frac_mulw(&p_widen, a, b);
  494. if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
  495. frac_add(&p_widen, &p_widen, &p_widen);
  496. p_widen.exp -= 1;
  497. }
  498. /* Perform the addition step. */
  499. if (c->cls != float_class_zero) {
  500. /* Zero-extend C to less significant bits. */
  501. frac_widen(&c_widen, c);
  502. c_widen.exp = c->exp;
  503. if (a->sign == c->sign) {
  504. parts_add_normal(&p_widen, &c_widen);
  505. } else if (!parts_sub_normal(&p_widen, &c_widen)) {
  506. goto return_sub_zero;
  507. }
  508. }
  509. /* Narrow with sticky bit, for proper rounding later. */
  510. frac_truncjam(a, &p_widen);
  511. a->sign = p_widen.sign;
  512. a->exp = p_widen.exp;
  513. return_normal:
  514. if (flags & float_muladd_halve_result) {
  515. a->exp -= 1;
  516. }
  517. finish_sign:
  518. if (flags & float_muladd_negate_result) {
  519. a->sign ^= 1;
  520. }
  521. return a;
  522. return_sub_zero:
  523. a->sign = s->float_rounding_mode == float_round_down;
  524. return_zero:
  525. a->cls = float_class_zero;
  526. goto finish_sign;
  527. return_inf:
  528. a->cls = float_class_inf;
  529. goto finish_sign;
  530. d_nan:
  531. parts_default_nan(a, s);
  532. return a;
  533. }
  534. /*
  535. * Returns the result of dividing the floating-point value `a' by the
  536. * corresponding value `b'. The operation is performed according to
  537. * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  538. */
  539. static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
  540. float_status *s)
  541. {
  542. int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
  543. bool sign = a->sign ^ b->sign;
  544. if (likely(ab_mask == float_cmask_normal)) {
  545. a->sign = sign;
  546. a->exp -= b->exp + frac_div(a, b);
  547. return a;
  548. }
  549. /* 0/0 or Inf/Inf => NaN */
  550. if (unlikely(ab_mask == float_cmask_zero)) {
  551. float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
  552. goto d_nan;
  553. }
  554. if (unlikely(ab_mask == float_cmask_inf)) {
  555. float_raise(float_flag_invalid | float_flag_invalid_idi, s);
  556. goto d_nan;
  557. }
  558. /* All the NaN cases */
  559. if (unlikely(ab_mask & float_cmask_anynan)) {
  560. return parts_pick_nan(a, b, s);
  561. }
  562. a->sign = sign;
  563. /* Inf / X */
  564. if (a->cls == float_class_inf) {
  565. return a;
  566. }
  567. /* 0 / X */
  568. if (a->cls == float_class_zero) {
  569. return a;
  570. }
  571. /* X / Inf */
  572. if (b->cls == float_class_inf) {
  573. a->cls = float_class_zero;
  574. return a;
  575. }
  576. /* X / 0 => Inf */
  577. g_assert(b->cls == float_class_zero);
  578. float_raise(float_flag_divbyzero, s);
  579. a->cls = float_class_inf;
  580. return a;
  581. d_nan:
  582. parts_default_nan(a, s);
  583. return a;
  584. }
  585. /*
  586. * Floating point remainder, per IEC/IEEE, or modulus.
  587. */
  588. static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
  589. uint64_t *mod_quot, float_status *s)
  590. {
  591. int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
  592. if (likely(ab_mask == float_cmask_normal)) {
  593. frac_modrem(a, b, mod_quot);
  594. return a;
  595. }
  596. if (mod_quot) {
  597. *mod_quot = 0;
  598. }
  599. /* All the NaN cases */
  600. if (unlikely(ab_mask & float_cmask_anynan)) {
  601. return parts_pick_nan(a, b, s);
  602. }
  603. /* Inf % N; N % 0 */
  604. if (a->cls == float_class_inf || b->cls == float_class_zero) {
  605. float_raise(float_flag_invalid, s);
  606. parts_default_nan(a, s);
  607. return a;
  608. }
  609. /* N % Inf; 0 % N */
  610. g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
  611. return a;
  612. }
  613. /*
  614. * Square Root
  615. *
  616. * The base algorithm is lifted from
  617. * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
  618. * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
  619. * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
  620. * and is thus MIT licenced.
  621. */
  622. static void partsN(sqrt)(FloatPartsN *a, float_status *status,
  623. const FloatFmt *fmt)
  624. {
  625. const uint32_t three32 = 3u << 30;
  626. const uint64_t three64 = 3ull << 62;
  627. uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */
  628. uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */
  629. uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */
  630. uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
  631. uint64_t discard;
  632. bool exp_odd;
  633. size_t index;
  634. if (unlikely(a->cls != float_class_normal)) {
  635. switch (a->cls) {
  636. case float_class_snan:
  637. case float_class_qnan:
  638. parts_return_nan(a, status);
  639. return;
  640. case float_class_zero:
  641. return;
  642. case float_class_inf:
  643. if (unlikely(a->sign)) {
  644. goto d_nan;
  645. }
  646. return;
  647. default:
  648. g_assert_not_reached();
  649. }
  650. }
  651. if (unlikely(a->sign)) {
  652. goto d_nan;
  653. }
  654. /*
  655. * Argument reduction.
  656. * x = 4^e frac; with integer e, and frac in [1, 4)
  657. * m = frac fixed point at bit 62, since we're in base 4.
  658. * If base-2 exponent is odd, exchange that for multiply by 2,
  659. * which results in no shift.
  660. */
  661. exp_odd = a->exp & 1;
  662. index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
  663. if (!exp_odd) {
  664. frac_shr(a, 1);
  665. }
  666. /*
  667. * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
  668. *
  669. * Initial estimate:
  670. * 7-bit lookup table (1-bit exponent and 6-bit significand).
  671. *
  672. * The relative error (e = r0*sqrt(m)-1) of a linear estimate
  673. * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
  674. * a table lookup is faster and needs one less iteration.
  675. * The 7-bit table gives |e| < 0x1.fdp-9.
  676. *
  677. * A Newton-Raphson iteration for r is
  678. * s = m*r
  679. * d = s*r
  680. * u = 3 - d
  681. * r = r*u/2
  682. *
  683. * Fixed point representations:
  684. * m, s, d, u, three are all 2.30; r is 0.32
  685. */
  686. m64 = a->frac_hi;
  687. m32 = m64 >> 32;
  688. r32 = rsqrt_tab[index] << 16;
  689. /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
  690. s32 = ((uint64_t)m32 * r32) >> 32;
  691. d32 = ((uint64_t)s32 * r32) >> 32;
  692. u32 = three32 - d32;
  693. if (N == 64) {
  694. /* float64 or smaller */
  695. r32 = ((uint64_t)r32 * u32) >> 31;
  696. /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
  697. s32 = ((uint64_t)m32 * r32) >> 32;
  698. d32 = ((uint64_t)s32 * r32) >> 32;
  699. u32 = three32 - d32;
  700. if (fmt->frac_size <= 23) {
  701. /* float32 or smaller */
  702. s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */
  703. s32 = (s32 - 1) >> 6; /* 9.23 */
  704. /* s < sqrt(m) < s + 0x1.08p-23 */
  705. /* compute nearest rounded result to 2.23 bits */
  706. uint32_t d0 = (m32 << 16) - s32 * s32;
  707. uint32_t d1 = s32 - d0;
  708. uint32_t d2 = d1 + s32 + 1;
  709. s32 += d1 >> 31;
  710. a->frac_hi = (uint64_t)s32 << (64 - 25);
  711. /* increment or decrement for inexact */
  712. if (d2 != 0) {
  713. a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
  714. }
  715. goto done;
  716. }
  717. /* float64 */
  718. r64 = (uint64_t)r32 * u32 * 2;
  719. /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
  720. mul64To128(m64, r64, &s64, &discard);
  721. mul64To128(s64, r64, &d64, &discard);
  722. u64 = three64 - d64;
  723. mul64To128(s64, u64, &s64, &discard); /* 3.61 */
  724. s64 = (s64 - 2) >> 9; /* 12.52 */
  725. /* Compute nearest rounded result */
  726. uint64_t d0 = (m64 << 42) - s64 * s64;
  727. uint64_t d1 = s64 - d0;
  728. uint64_t d2 = d1 + s64 + 1;
  729. s64 += d1 >> 63;
  730. a->frac_hi = s64 << (64 - 54);
  731. /* increment or decrement for inexact */
  732. if (d2 != 0) {
  733. a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
  734. }
  735. goto done;
  736. }
  737. r64 = (uint64_t)r32 * u32 * 2;
  738. /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
  739. mul64To128(m64, r64, &s64, &discard);
  740. mul64To128(s64, r64, &d64, &discard);
  741. u64 = three64 - d64;
  742. mul64To128(u64, r64, &r64, &discard);
  743. r64 <<= 1;
  744. /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
  745. mul64To128(m64, r64, &s64, &discard);
  746. mul64To128(s64, r64, &d64, &discard);
  747. u64 = three64 - d64;
  748. mul64To128(u64, r64, &rh, &rl);
  749. add128(rh, rl, rh, rl, &rh, &rl);
  750. /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
  751. mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
  752. mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
  753. sub128(three64, 0, dh, dl, &uh, &ul);
  754. mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */
  755. /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
  756. sub128(sh, sl, 0, 4, &sh, &sl);
  757. shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */
  758. /* s < sqrt(m) < s + 1ulp */
  759. /* Compute nearest rounded result */
  760. mul64To128(sl, sl, &d0h, &d0l);
  761. d0h += 2 * sh * sl;
  762. sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
  763. sub128(sh, sl, d0h, d0l, &d1h, &d1l);
  764. add128(sh, sl, 0, 1, &d2h, &d2l);
  765. add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
  766. add128(sh, sl, 0, d1h >> 63, &sh, &sl);
  767. shift128Left(sh, sl, 128 - 114, &sh, &sl);
  768. /* increment or decrement for inexact */
  769. if (d2h | d2l) {
  770. if ((int64_t)(d1h ^ d2h) < 0) {
  771. sub128(sh, sl, 0, 1, &sh, &sl);
  772. } else {
  773. add128(sh, sl, 0, 1, &sh, &sl);
  774. }
  775. }
  776. a->frac_lo = sl;
  777. a->frac_hi = sh;
  778. done:
  779. /* Convert back from base 4 to base 2. */
  780. a->exp >>= 1;
  781. if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
  782. frac_add(a, a, a);
  783. } else {
  784. a->exp += 1;
  785. }
  786. return;
  787. d_nan:
  788. float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
  789. parts_default_nan(a, status);
  790. }
  791. /*
  792. * Rounds the floating-point value `a' to an integer, and returns the
  793. * result as a floating-point value. The operation is performed
  794. * according to the IEC/IEEE Standard for Binary Floating-Point
  795. * Arithmetic.
  796. *
  797. * parts_round_to_int_normal is an internal helper function for
  798. * normal numbers only, returning true for inexact but not directly
  799. * raising float_flag_inexact.
  800. */
  801. static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
  802. int scale, int frac_size)
  803. {
  804. uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
  805. int shift_adj;
  806. scale = MIN(MAX(scale, -0x10000), 0x10000);
  807. a->exp += scale;
  808. if (a->exp < 0) {
  809. bool one;
  810. /* All fractional */
  811. switch (rmode) {
  812. case float_round_nearest_even:
  813. one = false;
  814. if (a->exp == -1) {
  815. FloatPartsN tmp;
  816. /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
  817. frac_add(&tmp, a, a);
  818. /* Anything remaining means frac > 0.5. */
  819. one = !frac_eqz(&tmp);
  820. }
  821. break;
  822. case float_round_ties_away:
  823. one = a->exp == -1;
  824. break;
  825. case float_round_to_zero:
  826. one = false;
  827. break;
  828. case float_round_up:
  829. one = !a->sign;
  830. break;
  831. case float_round_down:
  832. one = a->sign;
  833. break;
  834. case float_round_to_odd:
  835. one = true;
  836. break;
  837. default:
  838. g_assert_not_reached();
  839. }
  840. frac_clear(a);
  841. a->exp = 0;
  842. if (one) {
  843. a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
  844. } else {
  845. a->cls = float_class_zero;
  846. }
  847. return true;
  848. }
  849. if (a->exp >= frac_size) {
  850. /* All integral */
  851. return false;
  852. }
  853. if (N > 64 && a->exp < N - 64) {
  854. /*
  855. * Rounding is not in the low word -- shift lsb to bit 2,
  856. * which leaves room for sticky and rounding bit.
  857. */
  858. shift_adj = (N - 1) - (a->exp + 2);
  859. frac_shrjam(a, shift_adj);
  860. frac_lsb = 1 << 2;
  861. } else {
  862. shift_adj = 0;
  863. frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
  864. }
  865. frac_lsbm1 = frac_lsb >> 1;
  866. rnd_mask = frac_lsb - 1;
  867. rnd_even_mask = rnd_mask | frac_lsb;
  868. if (!(a->frac_lo & rnd_mask)) {
  869. /* Fractional bits already clear, undo the shift above. */
  870. frac_shl(a, shift_adj);
  871. return false;
  872. }
  873. switch (rmode) {
  874. case float_round_nearest_even:
  875. inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
  876. break;
  877. case float_round_ties_away:
  878. inc = frac_lsbm1;
  879. break;
  880. case float_round_to_zero:
  881. inc = 0;
  882. break;
  883. case float_round_up:
  884. inc = a->sign ? 0 : rnd_mask;
  885. break;
  886. case float_round_down:
  887. inc = a->sign ? rnd_mask : 0;
  888. break;
  889. case float_round_to_odd:
  890. inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
  891. break;
  892. default:
  893. g_assert_not_reached();
  894. }
  895. if (shift_adj == 0) {
  896. if (frac_addi(a, a, inc)) {
  897. frac_shr(a, 1);
  898. a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
  899. a->exp++;
  900. }
  901. a->frac_lo &= ~rnd_mask;
  902. } else {
  903. frac_addi(a, a, inc);
  904. a->frac_lo &= ~rnd_mask;
  905. /* Be careful shifting back, not to overflow */
  906. frac_shl(a, shift_adj - 1);
  907. if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
  908. a->exp++;
  909. } else {
  910. frac_add(a, a, a);
  911. }
  912. }
  913. return true;
  914. }
  915. static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
  916. int scale, float_status *s,
  917. const FloatFmt *fmt)
  918. {
  919. switch (a->cls) {
  920. case float_class_qnan:
  921. case float_class_snan:
  922. parts_return_nan(a, s);
  923. break;
  924. case float_class_zero:
  925. case float_class_inf:
  926. break;
  927. case float_class_normal:
  928. if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
  929. float_raise(float_flag_inexact, s);
  930. }
  931. break;
  932. default:
  933. g_assert_not_reached();
  934. }
  935. }
  936. /*
  937. * Returns the result of converting the floating-point value `a' to
  938. * the two's complement integer format. The conversion is performed
  939. * according to the IEC/IEEE Standard for Binary Floating-Point
  940. * Arithmetic---which means in particular that the conversion is
  941. * rounded according to the current rounding mode. If `a' is a NaN,
  942. * the largest positive integer is returned. Otherwise, if the
  943. * conversion overflows, the largest integer with the same sign as `a'
  944. * is returned.
  945. */
  946. static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
  947. int scale, int64_t min, int64_t max,
  948. float_status *s)
  949. {
  950. int flags = 0;
  951. uint64_t r;
  952. switch (p->cls) {
  953. case float_class_snan:
  954. flags |= float_flag_invalid_snan;
  955. /* fall through */
  956. case float_class_qnan:
  957. flags |= float_flag_invalid;
  958. r = max;
  959. break;
  960. case float_class_inf:
  961. flags = float_flag_invalid | float_flag_invalid_cvti;
  962. r = p->sign ? min : max;
  963. break;
  964. case float_class_zero:
  965. return 0;
  966. case float_class_normal:
  967. /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
  968. if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
  969. flags = float_flag_inexact;
  970. }
  971. if (p->exp <= DECOMPOSED_BINARY_POINT) {
  972. r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
  973. } else {
  974. r = UINT64_MAX;
  975. }
  976. if (p->sign) {
  977. if (r <= -(uint64_t)min) {
  978. r = -r;
  979. } else {
  980. flags = float_flag_invalid | float_flag_invalid_cvti;
  981. r = min;
  982. }
  983. } else if (r > max) {
  984. flags = float_flag_invalid | float_flag_invalid_cvti;
  985. r = max;
  986. }
  987. break;
  988. default:
  989. g_assert_not_reached();
  990. }
  991. float_raise(flags, s);
  992. return r;
  993. }
  994. /*
  995. * Returns the result of converting the floating-point value `a' to
  996. * the unsigned integer format. The conversion is performed according
  997. * to the IEC/IEEE Standard for Binary Floating-Point
  998. * Arithmetic---which means in particular that the conversion is
  999. * rounded according to the current rounding mode. If `a' is a NaN,
  1000. * the largest unsigned integer is returned. Otherwise, if the
  1001. * conversion overflows, the largest unsigned integer is returned. If
  1002. * the 'a' is negative, the result is rounded and zero is returned;
  1003. * values that do not round to zero will raise the inexact exception
  1004. * flag.
  1005. */
  1006. static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
  1007. int scale, uint64_t max, float_status *s)
  1008. {
  1009. int flags = 0;
  1010. uint64_t r;
  1011. switch (p->cls) {
  1012. case float_class_snan:
  1013. flags |= float_flag_invalid_snan;
  1014. /* fall through */
  1015. case float_class_qnan:
  1016. flags |= float_flag_invalid;
  1017. r = max;
  1018. break;
  1019. case float_class_inf:
  1020. flags = float_flag_invalid | float_flag_invalid_cvti;
  1021. r = p->sign ? 0 : max;
  1022. break;
  1023. case float_class_zero:
  1024. return 0;
  1025. case float_class_normal:
  1026. /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
  1027. if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
  1028. flags = float_flag_inexact;
  1029. if (p->cls == float_class_zero) {
  1030. r = 0;
  1031. break;
  1032. }
  1033. }
  1034. if (p->sign) {
  1035. flags = float_flag_invalid | float_flag_invalid_cvti;
  1036. r = 0;
  1037. } else if (p->exp > DECOMPOSED_BINARY_POINT) {
  1038. flags = float_flag_invalid | float_flag_invalid_cvti;
  1039. r = max;
  1040. } else {
  1041. r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
  1042. if (r > max) {
  1043. flags = float_flag_invalid | float_flag_invalid_cvti;
  1044. r = max;
  1045. }
  1046. }
  1047. break;
  1048. default:
  1049. g_assert_not_reached();
  1050. }
  1051. float_raise(flags, s);
  1052. return r;
  1053. }
  1054. /*
  1055. * Integer to float conversions
  1056. *
  1057. * Returns the result of converting the two's complement integer `a'
  1058. * to the floating-point format. The conversion is performed according
  1059. * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  1060. */
  1061. static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
  1062. int scale, float_status *s)
  1063. {
  1064. uint64_t f = a;
  1065. int shift;
  1066. memset(p, 0, sizeof(*p));
  1067. if (a == 0) {
  1068. p->cls = float_class_zero;
  1069. return;
  1070. }
  1071. p->cls = float_class_normal;
  1072. if (a < 0) {
  1073. f = -f;
  1074. p->sign = true;
  1075. }
  1076. shift = clz64(f);
  1077. scale = MIN(MAX(scale, -0x10000), 0x10000);
  1078. p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
  1079. p->frac_hi = f << shift;
  1080. }
  1081. /*
  1082. * Unsigned Integer to float conversions
  1083. *
  1084. * Returns the result of converting the unsigned integer `a' to the
  1085. * floating-point format. The conversion is performed according to the
  1086. * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  1087. */
  1088. static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
  1089. int scale, float_status *status)
  1090. {
  1091. memset(p, 0, sizeof(*p));
  1092. if (a == 0) {
  1093. p->cls = float_class_zero;
  1094. } else {
  1095. int shift = clz64(a);
  1096. scale = MIN(MAX(scale, -0x10000), 0x10000);
  1097. p->cls = float_class_normal;
  1098. p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
  1099. p->frac_hi = a << shift;
  1100. }
  1101. }
  1102. /*
  1103. * Float min/max.
  1104. */
  1105. static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
  1106. float_status *s, int flags)
  1107. {
  1108. int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
  1109. int a_exp, b_exp, cmp;
  1110. if (unlikely(ab_mask & float_cmask_anynan)) {
  1111. /*
  1112. * For minNum/maxNum (IEEE 754-2008)
  1113. * or minimumNumber/maximumNumber (IEEE 754-2019),
  1114. * if one operand is a QNaN, and the other
  1115. * operand is numerical, then return numerical argument.
  1116. */
  1117. if ((flags & (minmax_isnum | minmax_isnumber))
  1118. && !(ab_mask & float_cmask_snan)
  1119. && (ab_mask & ~float_cmask_qnan)) {
  1120. return is_nan(a->cls) ? b : a;
  1121. }
  1122. /*
  1123. * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
  1124. * are removed and replaced with minimum, minimumNumber, maximum
  1125. * and maximumNumber.
  1126. * minimumNumber/maximumNumber behavior for SNaN is changed to:
  1127. * If both operands are NaNs, a QNaN is returned.
  1128. * If either operand is a SNaN,
  1129. * an invalid operation exception is signaled,
  1130. * but unless both operands are NaNs,
  1131. * the SNaN is otherwise ignored and not converted to a QNaN.
  1132. */
  1133. if ((flags & minmax_isnumber)
  1134. && (ab_mask & float_cmask_snan)
  1135. && (ab_mask & ~float_cmask_anynan)) {
  1136. float_raise(float_flag_invalid, s);
  1137. return is_nan(a->cls) ? b : a;
  1138. }
  1139. return parts_pick_nan(a, b, s);
  1140. }
  1141. a_exp = a->exp;
  1142. b_exp = b->exp;
  1143. if (unlikely(ab_mask != float_cmask_normal)) {
  1144. switch (a->cls) {
  1145. case float_class_normal:
  1146. break;
  1147. case float_class_inf:
  1148. a_exp = INT16_MAX;
  1149. break;
  1150. case float_class_zero:
  1151. a_exp = INT16_MIN;
  1152. break;
  1153. default:
  1154. g_assert_not_reached();
  1155. break;
  1156. }
  1157. switch (b->cls) {
  1158. case float_class_normal:
  1159. break;
  1160. case float_class_inf:
  1161. b_exp = INT16_MAX;
  1162. break;
  1163. case float_class_zero:
  1164. b_exp = INT16_MIN;
  1165. break;
  1166. default:
  1167. g_assert_not_reached();
  1168. break;
  1169. }
  1170. }
  1171. /* Compare magnitudes. */
  1172. cmp = a_exp - b_exp;
  1173. if (cmp == 0) {
  1174. cmp = frac_cmp(a, b);
  1175. }
  1176. /*
  1177. * Take the sign into account.
  1178. * For ismag, only do this if the magnitudes are equal.
  1179. */
  1180. if (!(flags & minmax_ismag) || cmp == 0) {
  1181. if (a->sign != b->sign) {
  1182. /* For differing signs, the negative operand is less. */
  1183. cmp = a->sign ? -1 : 1;
  1184. } else if (a->sign) {
  1185. /* For two negative operands, invert the magnitude comparison. */
  1186. cmp = -cmp;
  1187. }
  1188. }
  1189. if (flags & minmax_ismin) {
  1190. cmp = -cmp;
  1191. }
  1192. return cmp < 0 ? b : a;
  1193. }
  1194. /*
  1195. * Floating point compare
  1196. */
  1197. static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
  1198. float_status *s, bool is_quiet)
  1199. {
  1200. int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
  1201. if (likely(ab_mask == float_cmask_normal)) {
  1202. FloatRelation cmp;
  1203. if (a->sign != b->sign) {
  1204. goto a_sign;
  1205. }
  1206. if (a->exp == b->exp) {
  1207. cmp = frac_cmp(a, b);
  1208. } else if (a->exp < b->exp) {
  1209. cmp = float_relation_less;
  1210. } else {
  1211. cmp = float_relation_greater;
  1212. }
  1213. if (a->sign) {
  1214. cmp = -cmp;
  1215. }
  1216. return cmp;
  1217. }
  1218. if (unlikely(ab_mask & float_cmask_anynan)) {
  1219. if (ab_mask & float_cmask_snan) {
  1220. float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  1221. } else if (!is_quiet) {
  1222. float_raise(float_flag_invalid, s);
  1223. }
  1224. return float_relation_unordered;
  1225. }
  1226. if (ab_mask & float_cmask_zero) {
  1227. if (ab_mask == float_cmask_zero) {
  1228. return float_relation_equal;
  1229. } else if (a->cls == float_class_zero) {
  1230. goto b_sign;
  1231. } else {
  1232. goto a_sign;
  1233. }
  1234. }
  1235. if (ab_mask == float_cmask_inf) {
  1236. if (a->sign == b->sign) {
  1237. return float_relation_equal;
  1238. }
  1239. } else if (b->cls == float_class_inf) {
  1240. goto b_sign;
  1241. } else {
  1242. g_assert(a->cls == float_class_inf);
  1243. }
  1244. a_sign:
  1245. return a->sign ? float_relation_less : float_relation_greater;
  1246. b_sign:
  1247. return b->sign ? float_relation_greater : float_relation_less;
  1248. }
  1249. /*
  1250. * Multiply A by 2 raised to the power N.
  1251. */
  1252. static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
  1253. {
  1254. switch (a->cls) {
  1255. case float_class_snan:
  1256. case float_class_qnan:
  1257. parts_return_nan(a, s);
  1258. break;
  1259. case float_class_zero:
  1260. case float_class_inf:
  1261. break;
  1262. case float_class_normal:
  1263. a->exp += MIN(MAX(n, -0x10000), 0x10000);
  1264. break;
  1265. default:
  1266. g_assert_not_reached();
  1267. }
  1268. }
  1269. /*
  1270. * Return log2(A)
  1271. */
  1272. static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
  1273. {
  1274. uint64_t a0, a1, r, t, ign;
  1275. FloatPartsN f;
  1276. int i, n, a_exp, f_exp;
  1277. if (unlikely(a->cls != float_class_normal)) {
  1278. switch (a->cls) {
  1279. case float_class_snan:
  1280. case float_class_qnan:
  1281. parts_return_nan(a, s);
  1282. return;
  1283. case float_class_zero:
  1284. float_raise(float_flag_divbyzero, s);
  1285. /* log2(0) = -inf */
  1286. a->cls = float_class_inf;
  1287. a->sign = 1;
  1288. return;
  1289. case float_class_inf:
  1290. if (unlikely(a->sign)) {
  1291. goto d_nan;
  1292. }
  1293. return;
  1294. default:
  1295. break;
  1296. }
  1297. g_assert_not_reached();
  1298. }
  1299. if (unlikely(a->sign)) {
  1300. goto d_nan;
  1301. }
  1302. /* TODO: This algorithm looses bits too quickly for float128. */
  1303. g_assert(N == 64);
  1304. a_exp = a->exp;
  1305. f_exp = -1;
  1306. r = 0;
  1307. t = DECOMPOSED_IMPLICIT_BIT;
  1308. a0 = a->frac_hi;
  1309. a1 = 0;
  1310. n = fmt->frac_size + 2;
  1311. if (unlikely(a_exp == -1)) {
  1312. /*
  1313. * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
  1314. * When the value is very close to 1.0, there are lots of 1's in
  1315. * the msb parts of the fraction. At the end, when we subtract
  1316. * this value from -1.0, we can see a catastrophic loss of precision,
  1317. * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
  1318. * bits of y in the final result. To minimize this, compute as many
  1319. * digits as we can.
  1320. * ??? This case needs another algorithm to avoid this.
  1321. */
  1322. n = fmt->frac_size * 2 + 2;
  1323. /* Don't compute a value overlapping the sticky bit */
  1324. n = MIN(n, 62);
  1325. }
  1326. for (i = 0; i < n; i++) {
  1327. if (a1) {
  1328. mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
  1329. } else if (a0 & 0xffffffffull) {
  1330. mul64To128(a0, a0, &a0, &a1);
  1331. } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
  1332. a0 >>= 32;
  1333. a0 *= a0;
  1334. } else {
  1335. goto exact;
  1336. }
  1337. if (a0 & DECOMPOSED_IMPLICIT_BIT) {
  1338. if (unlikely(a_exp == 0 && r == 0)) {
  1339. /*
  1340. * When a_exp == 0, we're computing the log2 of a value
  1341. * [1.0,2.0). When the value is very close to 1.0, there
  1342. * are lots of 0's in the msb parts of the fraction.
  1343. * We need to compute more digits to produce a correct
  1344. * result -- restart at the top of the fraction.
  1345. * ??? This is likely to lose precision quickly, as for
  1346. * float128; we may need another method.
  1347. */
  1348. f_exp -= i;
  1349. t = r = DECOMPOSED_IMPLICIT_BIT;
  1350. i = 0;
  1351. } else {
  1352. r |= t;
  1353. }
  1354. } else {
  1355. add128(a0, a1, a0, a1, &a0, &a1);
  1356. }
  1357. t >>= 1;
  1358. }
  1359. /* Set sticky for inexact. */
  1360. r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
  1361. exact:
  1362. parts_sint_to_float(a, a_exp, 0, s);
  1363. if (r == 0) {
  1364. return;
  1365. }
  1366. memset(&f, 0, sizeof(f));
  1367. f.cls = float_class_normal;
  1368. f.frac_hi = r;
  1369. f.exp = f_exp - frac_normalize(&f);
  1370. if (a_exp < 0) {
  1371. parts_sub_normal(a, &f);
  1372. } else if (a_exp > 0) {
  1373. parts_add_normal(a, &f);
  1374. } else {
  1375. *a = f;
  1376. }
  1377. return;
  1378. d_nan:
  1379. float_raise(float_flag_invalid, s);
  1380. parts_default_nan(a, s);
  1381. }