softfloat-macros.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. /*
  2. * QEMU float support macros
  3. *
  4. * The code in this source file is derived from release 2a of the SoftFloat
  5. * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
  6. * some later contributions) are provided under that license, as detailed below.
  7. * It has subsequently been modified by contributors to the QEMU Project,
  8. * so some portions are provided under:
  9. * the SoftFloat-2a license
  10. * the BSD license
  11. * GPL-v2-or-later
  12. *
  13. * Any future contributions to this file after December 1st 2014 will be
  14. * taken to be licensed under the Softfloat-2a license unless specifically
  15. * indicated otherwise.
  16. */
  17. /*
  18. ===============================================================================
  19. This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
  20. Arithmetic Package, Release 2a.
  21. Written by John R. Hauser. This work was made possible in part by the
  22. International Computer Science Institute, located at Suite 600, 1947 Center
  23. Street, Berkeley, California 94704. Funding was partially provided by the
  24. National Science Foundation under grant MIP-9311980. The original version
  25. of this code was written as part of a project to build a fixed-point vector
  26. processor in collaboration with the University of California at Berkeley,
  27. overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  28. is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  29. arithmetic/SoftFloat.html'.
  30. THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  31. has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  32. TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  33. PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  34. AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  35. Derivative works are acceptable, even for commercial purposes, so long as
  36. (1) they include prominent notice that the work is derivative, and (2) they
  37. include prominent notice akin to these four paragraphs for those parts of
  38. this code that are retained.
  39. ===============================================================================
  40. */
  41. /* BSD licensing:
  42. * Copyright (c) 2006, Fabrice Bellard
  43. * All rights reserved.
  44. *
  45. * Redistribution and use in source and binary forms, with or without
  46. * modification, are permitted provided that the following conditions are met:
  47. *
  48. * 1. Redistributions of source code must retain the above copyright notice,
  49. * this list of conditions and the following disclaimer.
  50. *
  51. * 2. Redistributions in binary form must reproduce the above copyright notice,
  52. * this list of conditions and the following disclaimer in the documentation
  53. * and/or other materials provided with the distribution.
  54. *
  55. * 3. Neither the name of the copyright holder nor the names of its contributors
  56. * may be used to endorse or promote products derived from this software without
  57. * specific prior written permission.
  58. *
  59. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  60. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  61. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  62. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  63. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  64. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  65. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  66. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  67. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  68. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  69. * THE POSSIBILITY OF SUCH DAMAGE.
  70. */
  71. /* Portions of this work are licensed under the terms of the GNU GPL,
  72. * version 2 or later. See the COPYING file in the top-level directory.
  73. */
  74. /*----------------------------------------------------------------------------
  75. | This macro tests for minimum version of the GNU C compiler.
  76. *----------------------------------------------------------------------------*/
  77. #if defined(__GNUC__) && defined(__GNUC_MINOR__)
  78. # define SOFTFLOAT_GNUC_PREREQ(maj, min) \
  79. ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
  80. #else
  81. # define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
  82. #endif
  83. /*----------------------------------------------------------------------------
  84. | Shifts `a' right by the number of bits given in `count'. If any nonzero
  85. | bits are shifted off, they are ``jammed'' into the least significant bit of
  86. | the result by setting the least significant bit to 1. The value of `count'
  87. | can be arbitrarily large; in particular, if `count' is greater than 32, the
  88. | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  89. | The result is stored in the location pointed to by `zPtr'.
  90. *----------------------------------------------------------------------------*/
  91. static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
  92. {
  93. uint32_t z;
  94. if ( count == 0 ) {
  95. z = a;
  96. }
  97. else if ( count < 32 ) {
  98. z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
  99. }
  100. else {
  101. z = ( a != 0 );
  102. }
  103. *zPtr = z;
  104. }
  105. /*----------------------------------------------------------------------------
  106. | Shifts `a' right by the number of bits given in `count'. If any nonzero
  107. | bits are shifted off, they are ``jammed'' into the least significant bit of
  108. | the result by setting the least significant bit to 1. The value of `count'
  109. | can be arbitrarily large; in particular, if `count' is greater than 64, the
  110. | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  111. | The result is stored in the location pointed to by `zPtr'.
  112. *----------------------------------------------------------------------------*/
  113. static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
  114. {
  115. uint64_t z;
  116. if ( count == 0 ) {
  117. z = a;
  118. }
  119. else if ( count < 64 ) {
  120. z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
  121. }
  122. else {
  123. z = ( a != 0 );
  124. }
  125. *zPtr = z;
  126. }
  127. /*----------------------------------------------------------------------------
  128. | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  129. | _plus_ the number of bits given in `count'. The shifted result is at most
  130. | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
  131. | bits shifted off form a second 64-bit result as follows: The _last_ bit
  132. | shifted off is the most-significant bit of the extra result, and the other
  133. | 63 bits of the extra result are all zero if and only if _all_but_the_last_
  134. | bits shifted off were all zero. This extra result is stored in the location
  135. | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
  136. | (This routine makes more sense if `a0' and `a1' are considered to form a
  137. | fixed-point value with binary point between `a0' and `a1'. This fixed-point
  138. | value is shifted right by the number of bits given in `count', and the
  139. | integer part of the result is returned at the location pointed to by
  140. | `z0Ptr'. The fractional part of the result may be slightly corrupted as
  141. | described above, and is returned at the location pointed to by `z1Ptr'.)
  142. *----------------------------------------------------------------------------*/
  143. static inline void
  144. shift64ExtraRightJamming(
  145. uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
  146. {
  147. uint64_t z0, z1;
  148. int8_t negCount = ( - count ) & 63;
  149. if ( count == 0 ) {
  150. z1 = a1;
  151. z0 = a0;
  152. }
  153. else if ( count < 64 ) {
  154. z1 = ( a0<<negCount ) | ( a1 != 0 );
  155. z0 = a0>>count;
  156. }
  157. else {
  158. if ( count == 64 ) {
  159. z1 = a0 | ( a1 != 0 );
  160. }
  161. else {
  162. z1 = ( ( a0 | a1 ) != 0 );
  163. }
  164. z0 = 0;
  165. }
  166. *z1Ptr = z1;
  167. *z0Ptr = z0;
  168. }
  169. /*----------------------------------------------------------------------------
  170. | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  171. | number of bits given in `count'. Any bits shifted off are lost. The value
  172. | of `count' can be arbitrarily large; in particular, if `count' is greater
  173. | than 128, the result will be 0. The result is broken into two 64-bit pieces
  174. | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  175. *----------------------------------------------------------------------------*/
  176. static inline void
  177. shift128Right(
  178. uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
  179. {
  180. uint64_t z0, z1;
  181. int8_t negCount = ( - count ) & 63;
  182. if ( count == 0 ) {
  183. z1 = a1;
  184. z0 = a0;
  185. }
  186. else if ( count < 64 ) {
  187. z1 = ( a0<<negCount ) | ( a1>>count );
  188. z0 = a0>>count;
  189. }
  190. else {
  191. z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
  192. z0 = 0;
  193. }
  194. *z1Ptr = z1;
  195. *z0Ptr = z0;
  196. }
  197. /*----------------------------------------------------------------------------
  198. | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  199. | number of bits given in `count'. If any nonzero bits are shifted off, they
  200. | are ``jammed'' into the least significant bit of the result by setting the
  201. | least significant bit to 1. The value of `count' can be arbitrarily large;
  202. | in particular, if `count' is greater than 128, the result will be either
  203. | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
  204. | nonzero. The result is broken into two 64-bit pieces which are stored at
  205. | the locations pointed to by `z0Ptr' and `z1Ptr'.
  206. *----------------------------------------------------------------------------*/
  207. static inline void
  208. shift128RightJamming(
  209. uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
  210. {
  211. uint64_t z0, z1;
  212. int8_t negCount = ( - count ) & 63;
  213. if ( count == 0 ) {
  214. z1 = a1;
  215. z0 = a0;
  216. }
  217. else if ( count < 64 ) {
  218. z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
  219. z0 = a0>>count;
  220. }
  221. else {
  222. if ( count == 64 ) {
  223. z1 = a0 | ( a1 != 0 );
  224. }
  225. else if ( count < 128 ) {
  226. z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
  227. }
  228. else {
  229. z1 = ( ( a0 | a1 ) != 0 );
  230. }
  231. z0 = 0;
  232. }
  233. *z1Ptr = z1;
  234. *z0Ptr = z0;
  235. }
  236. /*----------------------------------------------------------------------------
  237. | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
  238. | by 64 _plus_ the number of bits given in `count'. The shifted result is
  239. | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
  240. | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
  241. | off form a third 64-bit result as follows: The _last_ bit shifted off is
  242. | the most-significant bit of the extra result, and the other 63 bits of the
  243. | extra result are all zero if and only if _all_but_the_last_ bits shifted off
  244. | were all zero. This extra result is stored in the location pointed to by
  245. | `z2Ptr'. The value of `count' can be arbitrarily large.
  246. | (This routine makes more sense if `a0', `a1', and `a2' are considered
  247. | to form a fixed-point value with binary point between `a1' and `a2'. This
  248. | fixed-point value is shifted right by the number of bits given in `count',
  249. | and the integer part of the result is returned at the locations pointed to
  250. | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
  251. | corrupted as described above, and is returned at the location pointed to by
  252. | `z2Ptr'.)
  253. *----------------------------------------------------------------------------*/
  254. static inline void
  255. shift128ExtraRightJamming(
  256. uint64_t a0,
  257. uint64_t a1,
  258. uint64_t a2,
  259. int count,
  260. uint64_t *z0Ptr,
  261. uint64_t *z1Ptr,
  262. uint64_t *z2Ptr
  263. )
  264. {
  265. uint64_t z0, z1, z2;
  266. int8_t negCount = ( - count ) & 63;
  267. if ( count == 0 ) {
  268. z2 = a2;
  269. z1 = a1;
  270. z0 = a0;
  271. }
  272. else {
  273. if ( count < 64 ) {
  274. z2 = a1<<negCount;
  275. z1 = ( a0<<negCount ) | ( a1>>count );
  276. z0 = a0>>count;
  277. }
  278. else {
  279. if ( count == 64 ) {
  280. z2 = a1;
  281. z1 = a0;
  282. }
  283. else {
  284. a2 |= a1;
  285. if ( count < 128 ) {
  286. z2 = a0<<negCount;
  287. z1 = a0>>( count & 63 );
  288. }
  289. else {
  290. z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
  291. z1 = 0;
  292. }
  293. }
  294. z0 = 0;
  295. }
  296. z2 |= ( a2 != 0 );
  297. }
  298. *z2Ptr = z2;
  299. *z1Ptr = z1;
  300. *z0Ptr = z0;
  301. }
  302. /*----------------------------------------------------------------------------
  303. | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
  304. | number of bits given in `count'. Any bits shifted off are lost. The value
  305. | of `count' must be less than 64. The result is broken into two 64-bit
  306. | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  307. *----------------------------------------------------------------------------*/
  308. static inline void
  309. shortShift128Left(
  310. uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
  311. {
  312. *z1Ptr = a1<<count;
  313. *z0Ptr =
  314. ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
  315. }
  316. /*----------------------------------------------------------------------------
  317. | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
  318. | by the number of bits given in `count'. Any bits shifted off are lost.
  319. | The value of `count' must be less than 64. The result is broken into three
  320. | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  321. | `z1Ptr', and `z2Ptr'.
  322. *----------------------------------------------------------------------------*/
  323. static inline void
  324. shortShift192Left(
  325. uint64_t a0,
  326. uint64_t a1,
  327. uint64_t a2,
  328. int count,
  329. uint64_t *z0Ptr,
  330. uint64_t *z1Ptr,
  331. uint64_t *z2Ptr
  332. )
  333. {
  334. uint64_t z0, z1, z2;
  335. int8_t negCount;
  336. z2 = a2<<count;
  337. z1 = a1<<count;
  338. z0 = a0<<count;
  339. if ( 0 < count ) {
  340. negCount = ( ( - count ) & 63 );
  341. z1 |= a2>>negCount;
  342. z0 |= a1>>negCount;
  343. }
  344. *z2Ptr = z2;
  345. *z1Ptr = z1;
  346. *z0Ptr = z0;
  347. }
  348. /*----------------------------------------------------------------------------
  349. | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
  350. | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
  351. | any carry out is lost. The result is broken into two 64-bit pieces which
  352. | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  353. *----------------------------------------------------------------------------*/
  354. static inline void
  355. add128(
  356. uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
  357. {
  358. uint64_t z1;
  359. z1 = a1 + b1;
  360. *z1Ptr = z1;
  361. *z0Ptr = a0 + b0 + ( z1 < a1 );
  362. }
  363. /*----------------------------------------------------------------------------
  364. | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
  365. | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
  366. | modulo 2^192, so any carry out is lost. The result is broken into three
  367. | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  368. | `z1Ptr', and `z2Ptr'.
  369. *----------------------------------------------------------------------------*/
  370. static inline void
  371. add192(
  372. uint64_t a0,
  373. uint64_t a1,
  374. uint64_t a2,
  375. uint64_t b0,
  376. uint64_t b1,
  377. uint64_t b2,
  378. uint64_t *z0Ptr,
  379. uint64_t *z1Ptr,
  380. uint64_t *z2Ptr
  381. )
  382. {
  383. uint64_t z0, z1, z2;
  384. int8_t carry0, carry1;
  385. z2 = a2 + b2;
  386. carry1 = ( z2 < a2 );
  387. z1 = a1 + b1;
  388. carry0 = ( z1 < a1 );
  389. z0 = a0 + b0;
  390. z1 += carry1;
  391. z0 += ( z1 < carry1 );
  392. z0 += carry0;
  393. *z2Ptr = z2;
  394. *z1Ptr = z1;
  395. *z0Ptr = z0;
  396. }
  397. /*----------------------------------------------------------------------------
  398. | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
  399. | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
  400. | 2^128, so any borrow out (carry out) is lost. The result is broken into two
  401. | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
  402. | `z1Ptr'.
  403. *----------------------------------------------------------------------------*/
  404. static inline void
  405. sub128(
  406. uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
  407. {
  408. *z1Ptr = a1 - b1;
  409. *z0Ptr = a0 - b0 - ( a1 < b1 );
  410. }
  411. /*----------------------------------------------------------------------------
  412. | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
  413. | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
  414. | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
  415. | result is broken into three 64-bit pieces which are stored at the locations
  416. | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
  417. *----------------------------------------------------------------------------*/
  418. static inline void
  419. sub192(
  420. uint64_t a0,
  421. uint64_t a1,
  422. uint64_t a2,
  423. uint64_t b0,
  424. uint64_t b1,
  425. uint64_t b2,
  426. uint64_t *z0Ptr,
  427. uint64_t *z1Ptr,
  428. uint64_t *z2Ptr
  429. )
  430. {
  431. uint64_t z0, z1, z2;
  432. int8_t borrow0, borrow1;
  433. z2 = a2 - b2;
  434. borrow1 = ( a2 < b2 );
  435. z1 = a1 - b1;
  436. borrow0 = ( a1 < b1 );
  437. z0 = a0 - b0;
  438. z0 -= ( z1 < borrow1 );
  439. z1 -= borrow1;
  440. z0 -= borrow0;
  441. *z2Ptr = z2;
  442. *z1Ptr = z1;
  443. *z0Ptr = z0;
  444. }
  445. /*----------------------------------------------------------------------------
  446. | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
  447. | into two 64-bit pieces which are stored at the locations pointed to by
  448. | `z0Ptr' and `z1Ptr'.
  449. *----------------------------------------------------------------------------*/
  450. static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
  451. {
  452. uint32_t aHigh, aLow, bHigh, bLow;
  453. uint64_t z0, zMiddleA, zMiddleB, z1;
  454. aLow = a;
  455. aHigh = a>>32;
  456. bLow = b;
  457. bHigh = b>>32;
  458. z1 = ( (uint64_t) aLow ) * bLow;
  459. zMiddleA = ( (uint64_t) aLow ) * bHigh;
  460. zMiddleB = ( (uint64_t) aHigh ) * bLow;
  461. z0 = ( (uint64_t) aHigh ) * bHigh;
  462. zMiddleA += zMiddleB;
  463. z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
  464. zMiddleA <<= 32;
  465. z1 += zMiddleA;
  466. z0 += ( z1 < zMiddleA );
  467. *z1Ptr = z1;
  468. *z0Ptr = z0;
  469. }
  470. /*----------------------------------------------------------------------------
  471. | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
  472. | `b' to obtain a 192-bit product. The product is broken into three 64-bit
  473. | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
  474. | `z2Ptr'.
  475. *----------------------------------------------------------------------------*/
  476. static inline void
  477. mul128By64To192(
  478. uint64_t a0,
  479. uint64_t a1,
  480. uint64_t b,
  481. uint64_t *z0Ptr,
  482. uint64_t *z1Ptr,
  483. uint64_t *z2Ptr
  484. )
  485. {
  486. uint64_t z0, z1, z2, more1;
  487. mul64To128( a1, b, &z1, &z2 );
  488. mul64To128( a0, b, &z0, &more1 );
  489. add128( z0, more1, 0, z1, &z0, &z1 );
  490. *z2Ptr = z2;
  491. *z1Ptr = z1;
  492. *z0Ptr = z0;
  493. }
  494. /*----------------------------------------------------------------------------
  495. | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
  496. | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
  497. | product. The product is broken into four 64-bit pieces which are stored at
  498. | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
  499. *----------------------------------------------------------------------------*/
  500. static inline void
  501. mul128To256(
  502. uint64_t a0,
  503. uint64_t a1,
  504. uint64_t b0,
  505. uint64_t b1,
  506. uint64_t *z0Ptr,
  507. uint64_t *z1Ptr,
  508. uint64_t *z2Ptr,
  509. uint64_t *z3Ptr
  510. )
  511. {
  512. uint64_t z0, z1, z2, z3;
  513. uint64_t more1, more2;
  514. mul64To128( a1, b1, &z2, &z3 );
  515. mul64To128( a1, b0, &z1, &more2 );
  516. add128( z1, more2, 0, z2, &z1, &z2 );
  517. mul64To128( a0, b0, &z0, &more1 );
  518. add128( z0, more1, 0, z1, &z0, &z1 );
  519. mul64To128( a0, b1, &more1, &more2 );
  520. add128( more1, more2, 0, z2, &more1, &z2 );
  521. add128( z0, z1, 0, more1, &z0, &z1 );
  522. *z3Ptr = z3;
  523. *z2Ptr = z2;
  524. *z1Ptr = z1;
  525. *z0Ptr = z0;
  526. }
  527. /*----------------------------------------------------------------------------
  528. | Returns an approximation to the 64-bit integer quotient obtained by dividing
  529. | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
  530. | divisor `b' must be at least 2^63. If q is the exact quotient truncated
  531. | toward zero, the approximation returned lies between q and q + 2 inclusive.
  532. | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
  533. | unsigned integer is returned.
  534. *----------------------------------------------------------------------------*/
  535. static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
  536. {
  537. uint64_t b0, b1;
  538. uint64_t rem0, rem1, term0, term1;
  539. uint64_t z;
  540. if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
  541. b0 = b>>32;
  542. z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
  543. mul64To128( b, z, &term0, &term1 );
  544. sub128( a0, a1, term0, term1, &rem0, &rem1 );
  545. while ( ( (int64_t) rem0 ) < 0 ) {
  546. z -= LIT64( 0x100000000 );
  547. b1 = b<<32;
  548. add128( rem0, rem1, b0, b1, &rem0, &rem1 );
  549. }
  550. rem0 = ( rem0<<32 ) | ( rem1>>32 );
  551. z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
  552. return z;
  553. }
  554. /*----------------------------------------------------------------------------
  555. | Returns an approximation to the square root of the 32-bit significand given
  556. | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
  557. | `aExp' (the least significant bit) is 1, the integer returned approximates
  558. | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
  559. | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
  560. | case, the approximation returned lies strictly within +/-2 of the exact
  561. | value.
  562. *----------------------------------------------------------------------------*/
  563. static uint32_t estimateSqrt32(int aExp, uint32_t a)
  564. {
  565. static const uint16_t sqrtOddAdjustments[] = {
  566. 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
  567. 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
  568. };
  569. static const uint16_t sqrtEvenAdjustments[] = {
  570. 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
  571. 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
  572. };
  573. int8_t index;
  574. uint32_t z;
  575. index = ( a>>27 ) & 15;
  576. if ( aExp & 1 ) {
  577. z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
  578. z = ( ( a / z )<<14 ) + ( z<<15 );
  579. a >>= 1;
  580. }
  581. else {
  582. z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
  583. z = a / z + z;
  584. z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
  585. if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
  586. }
  587. return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
  588. }
  589. /*----------------------------------------------------------------------------
  590. | Returns the number of leading 0 bits before the most-significant 1 bit of
  591. | `a'. If `a' is zero, 32 is returned.
  592. *----------------------------------------------------------------------------*/
  593. static int8_t countLeadingZeros32( uint32_t a )
  594. {
  595. #if SOFTFLOAT_GNUC_PREREQ(3, 4)
  596. if (a) {
  597. return __builtin_clz(a);
  598. } else {
  599. return 32;
  600. }
  601. #else
  602. static const int8_t countLeadingZerosHigh[] = {
  603. 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  604. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  605. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  606. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  607. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  608. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  609. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  610. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  611. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  612. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  613. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  614. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  615. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  616. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  617. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  618. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  619. };
  620. int8_t shiftCount;
  621. shiftCount = 0;
  622. if ( a < 0x10000 ) {
  623. shiftCount += 16;
  624. a <<= 16;
  625. }
  626. if ( a < 0x1000000 ) {
  627. shiftCount += 8;
  628. a <<= 8;
  629. }
  630. shiftCount += countLeadingZerosHigh[ a>>24 ];
  631. return shiftCount;
  632. #endif
  633. }
  634. /*----------------------------------------------------------------------------
  635. | Returns the number of leading 0 bits before the most-significant 1 bit of
  636. | `a'. If `a' is zero, 64 is returned.
  637. *----------------------------------------------------------------------------*/
  638. static int8_t countLeadingZeros64( uint64_t a )
  639. {
  640. #if SOFTFLOAT_GNUC_PREREQ(3, 4)
  641. if (a) {
  642. return __builtin_clzll(a);
  643. } else {
  644. return 64;
  645. }
  646. #else
  647. int8_t shiftCount;
  648. shiftCount = 0;
  649. if ( a < ( (uint64_t) 1 )<<32 ) {
  650. shiftCount += 32;
  651. }
  652. else {
  653. a >>= 32;
  654. }
  655. shiftCount += countLeadingZeros32( a );
  656. return shiftCount;
  657. #endif
  658. }
  659. /*----------------------------------------------------------------------------
  660. | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
  661. | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
  662. | Otherwise, returns 0.
  663. *----------------------------------------------------------------------------*/
  664. static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
  665. {
  666. return ( a0 == b0 ) && ( a1 == b1 );
  667. }
  668. /*----------------------------------------------------------------------------
  669. | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  670. | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
  671. | Otherwise, returns 0.
  672. *----------------------------------------------------------------------------*/
  673. static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
  674. {
  675. return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
  676. }
  677. /*----------------------------------------------------------------------------
  678. | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  679. | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
  680. | returns 0.
  681. *----------------------------------------------------------------------------*/
  682. static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
  683. {
  684. return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
  685. }
  686. /*----------------------------------------------------------------------------
  687. | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
  688. | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
  689. | Otherwise, returns 0.
  690. *----------------------------------------------------------------------------*/
  691. static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
  692. {
  693. return ( a0 != b0 ) || ( a1 != b1 );
  694. }