123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233 |
- /*
- * QEMU float support
- *
- * The code in this source file is derived from release 2a of the SoftFloat
- * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
- * some later contributions) are provided under that license, as detailed below.
- * It has subsequently been modified by contributors to the QEMU Project,
- * so some portions are provided under:
- * the SoftFloat-2a license
- * the BSD license
- * GPL-v2-or-later
- *
- * Any future contributions to this file after December 1st 2014 will be
- * taken to be licensed under the Softfloat-2a license unless specifically
- * indicated otherwise.
- */
- /*
- ===============================================================================
- This C source file is part of the SoftFloat IEC/IEEE Floating-point
- Arithmetic Package, Release 2a.
- Written by John R. Hauser. This work was made possible in part by the
- International Computer Science Institute, located at Suite 600, 1947 Center
- Street, Berkeley, California 94704. Funding was partially provided by the
- National Science Foundation under grant MIP-9311980. The original version
- of this code was written as part of a project to build a fixed-point vector
- processor in collaboration with the University of California at Berkeley,
- overseen by Profs. Nelson Morgan and John Wawrzynek. More information
- is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
- arithmetic/SoftFloat.html'.
- THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
- has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
- TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
- PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
- AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
- Derivative works are acceptable, even for commercial purposes, so long as
- (1) they include prominent notice that the work is derivative, and (2) they
- include prominent notice akin to these four paragraphs for those parts of
- this code that are retained.
- ===============================================================================
- */
- /* BSD licensing:
- * Copyright (c) 2006, Fabrice Bellard
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
- /* Portions of this work are licensed under the terms of the GNU GPL,
- * version 2 or later. See the COPYING file in the top-level directory.
- */
- /* softfloat (and in particular the code in softfloat-specialize.h) is
- * target-dependent and needs the TARGET_* macros.
- */
- #include "qemu/osdep.h"
- #include <math.h>
- #include "qemu/bitops.h"
- #include "fpu/softfloat.h"
- /* We only need stdlib for abort() */
- /*----------------------------------------------------------------------------
- | Primitive arithmetic functions, including multi-word arithmetic, and
- | division and square root approximations. (Can be specialized to target if
- | desired.)
- *----------------------------------------------------------------------------*/
- #include "fpu/softfloat-macros.h"
- /*
- * Hardfloat
- *
- * Fast emulation of guest FP instructions is challenging for two reasons.
- * First, FP instruction semantics are similar but not identical, particularly
- * when handling NaNs. Second, emulating at reasonable speed the guest FP
- * exception flags is not trivial: reading the host's flags register with a
- * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
- * and trapping on every FP exception is not fast nor pleasant to work with.
- *
- * We address these challenges by leveraging the host FPU for a subset of the
- * operations. To do this we expand on the idea presented in this paper:
- *
- * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
- * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
- *
- * The idea is thus to leverage the host FPU to (1) compute FP operations
- * and (2) identify whether FP exceptions occurred while avoiding
- * expensive exception flag register accesses.
- *
- * An important optimization shown in the paper is that given that exception
- * flags are rarely cleared by the guest, we can avoid recomputing some flags.
- * This is particularly useful for the inexact flag, which is very frequently
- * raised in floating-point workloads.
- *
- * We optimize the code further by deferring to soft-fp whenever FP exception
- * detection might get hairy. Two examples: (1) when at least one operand is
- * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
- * and the result is < the minimum normal.
- */
- #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
- static inline void name(soft_t *a, float_status *s) \
- { \
- if (unlikely(soft_t ## _is_denormal(*a))) { \
- *a = soft_t ## _set_sign(soft_t ## _zero, \
- soft_t ## _is_neg(*a)); \
- s->float_exception_flags |= float_flag_input_denormal; \
- } \
- }
- GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
- GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
- #undef GEN_INPUT_FLUSH__NOCHECK
- #define GEN_INPUT_FLUSH1(name, soft_t) \
- static inline void name(soft_t *a, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- }
- GEN_INPUT_FLUSH1(float32_input_flush1, float32)
- GEN_INPUT_FLUSH1(float64_input_flush1, float64)
- #undef GEN_INPUT_FLUSH1
- #define GEN_INPUT_FLUSH2(name, soft_t) \
- static inline void name(soft_t *a, soft_t *b, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- soft_t ## _input_flush__nocheck(b, s); \
- }
- GEN_INPUT_FLUSH2(float32_input_flush2, float32)
- GEN_INPUT_FLUSH2(float64_input_flush2, float64)
- #undef GEN_INPUT_FLUSH2
- #define GEN_INPUT_FLUSH3(name, soft_t) \
- static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
- { \
- if (likely(!s->flush_inputs_to_zero)) { \
- return; \
- } \
- soft_t ## _input_flush__nocheck(a, s); \
- soft_t ## _input_flush__nocheck(b, s); \
- soft_t ## _input_flush__nocheck(c, s); \
- }
- GEN_INPUT_FLUSH3(float32_input_flush3, float32)
- GEN_INPUT_FLUSH3(float64_input_flush3, float64)
- #undef GEN_INPUT_FLUSH3
- /*
- * Choose whether to use fpclassify or float32/64_* primitives in the generated
- * hardfloat functions. Each combination of number of inputs and float size
- * gets its own value.
- */
- #if defined(__x86_64__)
- # define QEMU_HARDFLOAT_1F32_USE_FP 0
- # define QEMU_HARDFLOAT_1F64_USE_FP 1
- # define QEMU_HARDFLOAT_2F32_USE_FP 0
- # define QEMU_HARDFLOAT_2F64_USE_FP 1
- # define QEMU_HARDFLOAT_3F32_USE_FP 0
- # define QEMU_HARDFLOAT_3F64_USE_FP 1
- #else
- # define QEMU_HARDFLOAT_1F32_USE_FP 0
- # define QEMU_HARDFLOAT_1F64_USE_FP 0
- # define QEMU_HARDFLOAT_2F32_USE_FP 0
- # define QEMU_HARDFLOAT_2F64_USE_FP 0
- # define QEMU_HARDFLOAT_3F32_USE_FP 0
- # define QEMU_HARDFLOAT_3F64_USE_FP 0
- #endif
- /*
- * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
- * float{32,64}_is_infinity when !USE_FP.
- * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
- * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
- */
- #if defined(__x86_64__) || defined(__aarch64__)
- # define QEMU_HARDFLOAT_USE_ISINF 1
- #else
- # define QEMU_HARDFLOAT_USE_ISINF 0
- #endif
- /*
- * Some targets clear the FP flags before most FP operations. This prevents
- * the use of hardfloat, since hardfloat relies on the inexact flag being
- * already set.
- */
- #if defined(TARGET_PPC) || defined(__FAST_MATH__)
- # if defined(__FAST_MATH__)
- # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
- IEEE implementation
- # endif
- # define QEMU_NO_HARDFLOAT 1
- # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
- #else
- # define QEMU_NO_HARDFLOAT 0
- # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
- #endif
- static inline bool can_use_fpu(const float_status *s)
- {
- if (QEMU_NO_HARDFLOAT) {
- return false;
- }
- return likely(s->float_exception_flags & float_flag_inexact &&
- s->float_rounding_mode == float_round_nearest_even);
- }
- /*
- * Hardfloat generation functions. Each operation can have two flavors:
- * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
- * most condition checks, or native ones (e.g. fpclassify).
- *
- * The flavor is chosen by the callers. Instead of using macros, we rely on the
- * compiler to propagate constants and inline everything into the callers.
- *
- * We only generate functions for operations with two inputs, since only
- * these are common enough to justify consolidating them into common code.
- */
- typedef union {
- float32 s;
- float h;
- } union_float32;
- typedef union {
- float64 s;
- double h;
- } union_float64;
- typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
- typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
- typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
- typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
- typedef float (*hard_f32_op2_fn)(float a, float b);
- typedef double (*hard_f64_op2_fn)(double a, double b);
- /* 2-input is-zero-or-normal */
- static inline bool f32_is_zon2(union_float32 a, union_float32 b)
- {
- if (QEMU_HARDFLOAT_2F32_USE_FP) {
- /*
- * Not using a temp variable for consecutive fpclassify calls ends up
- * generating faster code.
- */
- return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
- (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
- }
- return float32_is_zero_or_normal(a.s) &&
- float32_is_zero_or_normal(b.s);
- }
- static inline bool f64_is_zon2(union_float64 a, union_float64 b)
- {
- if (QEMU_HARDFLOAT_2F64_USE_FP) {
- return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
- (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
- }
- return float64_is_zero_or_normal(a.s) &&
- float64_is_zero_or_normal(b.s);
- }
- /* 3-input is-zero-or-normal */
- static inline
- bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
- {
- if (QEMU_HARDFLOAT_3F32_USE_FP) {
- return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
- (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
- (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
- }
- return float32_is_zero_or_normal(a.s) &&
- float32_is_zero_or_normal(b.s) &&
- float32_is_zero_or_normal(c.s);
- }
- static inline
- bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
- {
- if (QEMU_HARDFLOAT_3F64_USE_FP) {
- return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
- (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
- (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
- }
- return float64_is_zero_or_normal(a.s) &&
- float64_is_zero_or_normal(b.s) &&
- float64_is_zero_or_normal(c.s);
- }
- static inline bool f32_is_inf(union_float32 a)
- {
- if (QEMU_HARDFLOAT_USE_ISINF) {
- return isinf(a.h);
- }
- return float32_is_infinity(a.s);
- }
- static inline bool f64_is_inf(union_float64 a)
- {
- if (QEMU_HARDFLOAT_USE_ISINF) {
- return isinf(a.h);
- }
- return float64_is_infinity(a.s);
- }
- static inline float32
- float32_gen2(float32 xa, float32 xb, float_status *s,
- hard_f32_op2_fn hard, soft_f32_op2_fn soft,
- f32_check_fn pre, f32_check_fn post)
- {
- union_float32 ua, ub, ur;
- ua.s = xa;
- ub.s = xb;
- if (unlikely(!can_use_fpu(s))) {
- goto soft;
- }
- float32_input_flush2(&ua.s, &ub.s, s);
- if (unlikely(!pre(ua, ub))) {
- goto soft;
- }
- ur.h = hard(ua.h, ub.h);
- if (unlikely(f32_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
- goto soft;
- }
- return ur.s;
- soft:
- return soft(ua.s, ub.s, s);
- }
- static inline float64
- float64_gen2(float64 xa, float64 xb, float_status *s,
- hard_f64_op2_fn hard, soft_f64_op2_fn soft,
- f64_check_fn pre, f64_check_fn post)
- {
- union_float64 ua, ub, ur;
- ua.s = xa;
- ub.s = xb;
- if (unlikely(!can_use_fpu(s))) {
- goto soft;
- }
- float64_input_flush2(&ua.s, &ub.s, s);
- if (unlikely(!pre(ua, ub))) {
- goto soft;
- }
- ur.h = hard(ua.h, ub.h);
- if (unlikely(f64_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
- goto soft;
- }
- return ur.s;
- soft:
- return soft(ua.s, ub.s, s);
- }
- /*----------------------------------------------------------------------------
- | Returns the fraction bits of the single-precision floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline uint32_t extractFloat32Frac(float32 a)
- {
- return float32_val(a) & 0x007FFFFF;
- }
- /*----------------------------------------------------------------------------
- | Returns the exponent bits of the single-precision floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline int extractFloat32Exp(float32 a)
- {
- return (float32_val(a) >> 23) & 0xFF;
- }
- /*----------------------------------------------------------------------------
- | Returns the sign bit of the single-precision floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline bool extractFloat32Sign(float32 a)
- {
- return float32_val(a) >> 31;
- }
- /*----------------------------------------------------------------------------
- | Returns the fraction bits of the double-precision floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline uint64_t extractFloat64Frac(float64 a)
- {
- return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
- }
- /*----------------------------------------------------------------------------
- | Returns the exponent bits of the double-precision floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline int extractFloat64Exp(float64 a)
- {
- return (float64_val(a) >> 52) & 0x7FF;
- }
- /*----------------------------------------------------------------------------
- | Returns the sign bit of the double-precision floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline bool extractFloat64Sign(float64 a)
- {
- return float64_val(a) >> 63;
- }
- /*
- * Classify a floating point number. Everything above float_class_qnan
- * is a NaN so cls >= float_class_qnan is any NaN.
- */
- typedef enum __attribute__ ((__packed__)) {
- float_class_unclassified,
- float_class_zero,
- float_class_normal,
- float_class_inf,
- float_class_qnan, /* all NaNs from here */
- float_class_snan,
- } FloatClass;
- /* Simple helpers for checking if, or what kind of, NaN we have */
- static inline __attribute__((unused)) bool is_nan(FloatClass c)
- {
- return unlikely(c >= float_class_qnan);
- }
- static inline __attribute__((unused)) bool is_snan(FloatClass c)
- {
- return c == float_class_snan;
- }
- static inline __attribute__((unused)) bool is_qnan(FloatClass c)
- {
- return c == float_class_qnan;
- }
- /*
- * Structure holding all of the decomposed parts of a float. The
- * exponent is unbiased and the fraction is normalized. All
- * calculations are done with a 64 bit fraction and then rounded as
- * appropriate for the final format.
- *
- * Thanks to the packed FloatClass a decent compiler should be able to
- * fit the whole structure into registers and avoid using the stack
- * for parameter passing.
- */
- typedef struct {
- uint64_t frac;
- int32_t exp;
- FloatClass cls;
- bool sign;
- } FloatParts;
- #define DECOMPOSED_BINARY_POINT (64 - 2)
- #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
- #define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
- /* Structure holding all of the relevant parameters for a format.
- * exp_size: the size of the exponent field
- * exp_bias: the offset applied to the exponent field
- * exp_max: the maximum normalised exponent
- * frac_size: the size of the fraction field
- * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
- * The following are computed based the size of fraction
- * frac_lsb: least significant bit of fraction
- * frac_lsbm1: the bit below the least significant bit (for rounding)
- * round_mask/roundeven_mask: masks used for rounding
- * The following optional modifiers are available:
- * arm_althp: handle ARM Alternative Half Precision
- */
- typedef struct {
- int exp_size;
- int exp_bias;
- int exp_max;
- int frac_size;
- int frac_shift;
- uint64_t frac_lsb;
- uint64_t frac_lsbm1;
- uint64_t round_mask;
- uint64_t roundeven_mask;
- bool arm_althp;
- } FloatFmt;
- /* Expand fields based on the size of exponent and fraction */
- #define FLOAT_PARAMS(E, F) \
- .exp_size = E, \
- .exp_bias = ((1 << E) - 1) >> 1, \
- .exp_max = (1 << E) - 1, \
- .frac_size = F, \
- .frac_shift = DECOMPOSED_BINARY_POINT - F, \
- .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
- .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
- .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
- .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
- static const FloatFmt float16_params = {
- FLOAT_PARAMS(5, 10)
- };
- static const FloatFmt float16_params_ahp = {
- FLOAT_PARAMS(5, 10),
- .arm_althp = true
- };
- static const FloatFmt float32_params = {
- FLOAT_PARAMS(8, 23)
- };
- static const FloatFmt float64_params = {
- FLOAT_PARAMS(11, 52)
- };
- /* Unpack a float to parts, but do not canonicalize. */
- static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
- {
- const int sign_pos = fmt.frac_size + fmt.exp_size;
- return (FloatParts) {
- .cls = float_class_unclassified,
- .sign = extract64(raw, sign_pos, 1),
- .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
- .frac = extract64(raw, 0, fmt.frac_size),
- };
- }
- static inline FloatParts float16_unpack_raw(float16 f)
- {
- return unpack_raw(float16_params, f);
- }
- static inline FloatParts float32_unpack_raw(float32 f)
- {
- return unpack_raw(float32_params, f);
- }
- static inline FloatParts float64_unpack_raw(float64 f)
- {
- return unpack_raw(float64_params, f);
- }
- /* Pack a float from parts, but do not canonicalize. */
- static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
- {
- const int sign_pos = fmt.frac_size + fmt.exp_size;
- uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
- return deposit64(ret, sign_pos, 1, p.sign);
- }
- static inline float16 float16_pack_raw(FloatParts p)
- {
- return make_float16(pack_raw(float16_params, p));
- }
- static inline float32 float32_pack_raw(FloatParts p)
- {
- return make_float32(pack_raw(float32_params, p));
- }
- static inline float64 float64_pack_raw(FloatParts p)
- {
- return make_float64(pack_raw(float64_params, p));
- }
- /*----------------------------------------------------------------------------
- | Functions and definitions to determine: (1) whether tininess for underflow
- | is detected before or after rounding by default, (2) what (if anything)
- | happens when exceptions are raised, (3) how signaling NaNs are distinguished
- | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
- | are propagated from function inputs to output. These details are target-
- | specific.
- *----------------------------------------------------------------------------*/
- #include "softfloat-specialize.c.inc"
- /* Canonicalize EXP and FRAC, setting CLS. */
- static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
- float_status *status)
- {
- if (part.exp == parm->exp_max && !parm->arm_althp) {
- if (part.frac == 0) {
- part.cls = float_class_inf;
- } else {
- part.frac <<= parm->frac_shift;
- part.cls = (parts_is_snan_frac(part.frac, status)
- ? float_class_snan : float_class_qnan);
- }
- } else if (part.exp == 0) {
- if (likely(part.frac == 0)) {
- part.cls = float_class_zero;
- } else if (status->flush_inputs_to_zero) {
- float_raise(float_flag_input_denormal, status);
- part.cls = float_class_zero;
- part.frac = 0;
- } else {
- int shift = clz64(part.frac) - 1;
- part.cls = float_class_normal;
- part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
- part.frac <<= shift;
- }
- } else {
- part.cls = float_class_normal;
- part.exp -= parm->exp_bias;
- part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
- }
- return part;
- }
- /* Round and uncanonicalize a floating-point number by parts. There
- * are FRAC_SHIFT bits that may require rounding at the bottom of the
- * fraction; these bits will be removed. The exponent will be biased
- * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
- */
- static FloatParts round_canonical(FloatParts p, float_status *s,
- const FloatFmt *parm)
- {
- const uint64_t frac_lsb = parm->frac_lsb;
- const uint64_t frac_lsbm1 = parm->frac_lsbm1;
- const uint64_t round_mask = parm->round_mask;
- const uint64_t roundeven_mask = parm->roundeven_mask;
- const int exp_max = parm->exp_max;
- const int frac_shift = parm->frac_shift;
- uint64_t frac, inc;
- int exp, flags = 0;
- bool overflow_norm;
- frac = p.frac;
- exp = p.exp;
- switch (p.cls) {
- case float_class_normal:
- switch (s->float_rounding_mode) {
- case float_round_nearest_even:
- overflow_norm = false;
- inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
- break;
- case float_round_ties_away:
- overflow_norm = false;
- inc = frac_lsbm1;
- break;
- case float_round_to_zero:
- overflow_norm = true;
- inc = 0;
- break;
- case float_round_up:
- inc = p.sign ? 0 : round_mask;
- overflow_norm = p.sign;
- break;
- case float_round_down:
- inc = p.sign ? round_mask : 0;
- overflow_norm = !p.sign;
- break;
- case float_round_to_odd:
- overflow_norm = true;
- inc = frac & frac_lsb ? 0 : round_mask;
- break;
- default:
- g_assert_not_reached();
- }
- exp += parm->exp_bias;
- if (likely(exp > 0)) {
- if (frac & round_mask) {
- flags |= float_flag_inexact;
- frac += inc;
- if (frac & DECOMPOSED_OVERFLOW_BIT) {
- frac >>= 1;
- exp++;
- }
- }
- frac >>= frac_shift;
- if (parm->arm_althp) {
- /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
- if (unlikely(exp > exp_max)) {
- /* Overflow. Return the maximum normal. */
- flags = float_flag_invalid;
- exp = exp_max;
- frac = -1;
- }
- } else if (unlikely(exp >= exp_max)) {
- flags |= float_flag_overflow | float_flag_inexact;
- if (overflow_norm) {
- exp = exp_max - 1;
- frac = -1;
- } else {
- p.cls = float_class_inf;
- goto do_inf;
- }
- }
- } else if (s->flush_to_zero) {
- flags |= float_flag_output_denormal;
- p.cls = float_class_zero;
- goto do_zero;
- } else {
- bool is_tiny = s->tininess_before_rounding
- || (exp < 0)
- || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
- shift64RightJamming(frac, 1 - exp, &frac);
- if (frac & round_mask) {
- /* Need to recompute round-to-even. */
- switch (s->float_rounding_mode) {
- case float_round_nearest_even:
- inc = ((frac & roundeven_mask) != frac_lsbm1
- ? frac_lsbm1 : 0);
- break;
- case float_round_to_odd:
- inc = frac & frac_lsb ? 0 : round_mask;
- break;
- default:
- break;
- }
- flags |= float_flag_inexact;
- frac += inc;
- }
- exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
- frac >>= frac_shift;
- if (is_tiny && (flags & float_flag_inexact)) {
- flags |= float_flag_underflow;
- }
- if (exp == 0 && frac == 0) {
- p.cls = float_class_zero;
- }
- }
- break;
- case float_class_zero:
- do_zero:
- exp = 0;
- frac = 0;
- break;
- case float_class_inf:
- do_inf:
- assert(!parm->arm_althp);
- exp = exp_max;
- frac = 0;
- break;
- case float_class_qnan:
- case float_class_snan:
- assert(!parm->arm_althp);
- exp = exp_max;
- frac >>= parm->frac_shift;
- break;
- default:
- g_assert_not_reached();
- }
- float_raise(flags, s);
- p.exp = exp;
- p.frac = frac;
- return p;
- }
- /* Explicit FloatFmt version */
- static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
- const FloatFmt *params)
- {
- return sf_canonicalize(float16_unpack_raw(f), params, s);
- }
- static FloatParts float16_unpack_canonical(float16 f, float_status *s)
- {
- return float16a_unpack_canonical(f, s, &float16_params);
- }
- static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
- const FloatFmt *params)
- {
- return float16_pack_raw(round_canonical(p, s, params));
- }
- static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
- {
- return float16a_round_pack_canonical(p, s, &float16_params);
- }
- static FloatParts float32_unpack_canonical(float32 f, float_status *s)
- {
- return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
- }
- static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
- {
- return float32_pack_raw(round_canonical(p, s, &float32_params));
- }
- static FloatParts float64_unpack_canonical(float64 f, float_status *s)
- {
- return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
- }
- static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
- {
- return float64_pack_raw(round_canonical(p, s, &float64_params));
- }
- static FloatParts return_nan(FloatParts a, float_status *s)
- {
- switch (a.cls) {
- case float_class_snan:
- s->float_exception_flags |= float_flag_invalid;
- a = parts_silence_nan(a, s);
- /* fall through */
- case float_class_qnan:
- if (s->default_nan_mode) {
- return parts_default_nan(s);
- }
- break;
- default:
- g_assert_not_reached();
- }
- return a;
- }
- static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
- {
- if (is_snan(a.cls) || is_snan(b.cls)) {
- s->float_exception_flags |= float_flag_invalid;
- }
- if (s->default_nan_mode) {
- return parts_default_nan(s);
- } else {
- if (pickNaN(a.cls, b.cls,
- a.frac > b.frac ||
- (a.frac == b.frac && a.sign < b.sign))) {
- a = b;
- }
- if (is_snan(a.cls)) {
- return parts_silence_nan(a, s);
- }
- }
- return a;
- }
- static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
- bool inf_zero, float_status *s)
- {
- int which;
- if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
- s->float_exception_flags |= float_flag_invalid;
- }
- which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
- if (s->default_nan_mode) {
- /* Note that this check is after pickNaNMulAdd so that function
- * has an opportunity to set the Invalid flag.
- */
- which = 3;
- }
- switch (which) {
- case 0:
- break;
- case 1:
- a = b;
- break;
- case 2:
- a = c;
- break;
- case 3:
- return parts_default_nan(s);
- default:
- g_assert_not_reached();
- }
- if (is_snan(a.cls)) {
- return parts_silence_nan(a, s);
- }
- return a;
- }
- /*
- * Returns the result of adding or subtracting the values of the
- * floating-point values `a' and `b'. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
- */
- static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
- float_status *s)
- {
- bool a_sign = a.sign;
- bool b_sign = b.sign ^ subtract;
- if (a_sign != b_sign) {
- /* Subtraction */
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
- shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
- a.frac = a.frac - b.frac;
- } else {
- shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
- a.frac = b.frac - a.frac;
- a.exp = b.exp;
- a_sign ^= 1;
- }
- if (a.frac == 0) {
- a.cls = float_class_zero;
- a.sign = s->float_rounding_mode == float_round_down;
- } else {
- int shift = clz64(a.frac) - 1;
- a.frac = a.frac << shift;
- a.exp = a.exp - shift;
- a.sign = a_sign;
- }
- return a;
- }
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
- }
- if (a.cls == float_class_inf) {
- if (b.cls == float_class_inf) {
- float_raise(float_flag_invalid, s);
- return parts_default_nan(s);
- }
- return a;
- }
- if (a.cls == float_class_zero && b.cls == float_class_zero) {
- a.sign = s->float_rounding_mode == float_round_down;
- return a;
- }
- if (a.cls == float_class_zero || b.cls == float_class_inf) {
- b.sign = a_sign ^ 1;
- return b;
- }
- if (b.cls == float_class_zero) {
- return a;
- }
- } else {
- /* Addition */
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- if (a.exp > b.exp) {
- shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
- } else if (a.exp < b.exp) {
- shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
- a.exp = b.exp;
- }
- a.frac += b.frac;
- if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
- shift64RightJamming(a.frac, 1, &a.frac);
- a.exp += 1;
- }
- return a;
- }
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
- }
- if (a.cls == float_class_inf || b.cls == float_class_zero) {
- return a;
- }
- if (b.cls == float_class_inf || a.cls == float_class_zero) {
- b.sign = b_sign;
- return b;
- }
- }
- g_assert_not_reached();
- }
- /*
- * Returns the result of adding or subtracting the floating-point
- * values `a' and `b'. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
- float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
- {
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, false, status);
- return float16_round_pack_canonical(pr, status);
- }
- float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
- {
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, true, status);
- return float16_round_pack_canonical(pr, status);
- }
- static float32 QEMU_SOFTFLOAT_ATTR
- soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
- {
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, subtract, status);
- return float32_round_pack_canonical(pr, status);
- }
- static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
- {
- return soft_f32_addsub(a, b, false, status);
- }
- static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
- {
- return soft_f32_addsub(a, b, true, status);
- }
- static float64 QEMU_SOFTFLOAT_ATTR
- soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
- {
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pr = addsub_floats(pa, pb, subtract, status);
- return float64_round_pack_canonical(pr, status);
- }
- static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
- {
- return soft_f64_addsub(a, b, false, status);
- }
- static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
- {
- return soft_f64_addsub(a, b, true, status);
- }
- static float hard_f32_add(float a, float b)
- {
- return a + b;
- }
- static float hard_f32_sub(float a, float b)
- {
- return a - b;
- }
- static double hard_f64_add(double a, double b)
- {
- return a + b;
- }
- static double hard_f64_sub(double a, double b)
- {
- return a - b;
- }
- static bool f32_addsubmul_post(union_float32 a, union_float32 b)
- {
- if (QEMU_HARDFLOAT_2F32_USE_FP) {
- return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
- }
- return !(float32_is_zero(a.s) && float32_is_zero(b.s));
- }
- static bool f64_addsubmul_post(union_float64 a, union_float64 b)
- {
- if (QEMU_HARDFLOAT_2F64_USE_FP) {
- return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
- } else {
- return !(float64_is_zero(a.s) && float64_is_zero(b.s));
- }
- }
- static float32 float32_addsub(float32 a, float32 b, float_status *s,
- hard_f32_op2_fn hard, soft_f32_op2_fn soft)
- {
- return float32_gen2(a, b, s, hard, soft,
- f32_is_zon2, f32_addsubmul_post);
- }
- static float64 float64_addsub(float64 a, float64 b, float_status *s,
- hard_f64_op2_fn hard, soft_f64_op2_fn soft)
- {
- return float64_gen2(a, b, s, hard, soft,
- f64_is_zon2, f64_addsubmul_post);
- }
- float32 QEMU_FLATTEN
- float32_add(float32 a, float32 b, float_status *s)
- {
- return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
- }
- float32 QEMU_FLATTEN
- float32_sub(float32 a, float32 b, float_status *s)
- {
- return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
- }
- float64 QEMU_FLATTEN
- float64_add(float64 a, float64 b, float_status *s)
- {
- return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
- }
- float64 QEMU_FLATTEN
- float64_sub(float64 a, float64 b, float_status *s)
- {
- return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
- }
- /*
- * Returns the result of multiplying the floating-point values `a' and
- * `b'. The operation is performed according to the IEC/IEEE Standard
- * for Binary Floating-Point Arithmetic.
- */
- static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
- {
- bool sign = a.sign ^ b.sign;
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- uint64_t hi, lo;
- int exp = a.exp + b.exp;
- mul64To128(a.frac, b.frac, &hi, &lo);
- shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
- if (lo & DECOMPOSED_OVERFLOW_BIT) {
- shift64RightJamming(lo, 1, &lo);
- exp += 1;
- }
- /* Re-use a */
- a.exp = exp;
- a.sign = sign;
- a.frac = lo;
- return a;
- }
- /* handle all the NaN cases */
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
- }
- /* Inf * Zero == NaN */
- if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
- (a.cls == float_class_zero && b.cls == float_class_inf)) {
- s->float_exception_flags |= float_flag_invalid;
- return parts_default_nan(s);
- }
- /* Multiply by 0 or Inf */
- if (a.cls == float_class_inf || a.cls == float_class_zero) {
- a.sign = sign;
- return a;
- }
- if (b.cls == float_class_inf || b.cls == float_class_zero) {
- b.sign = sign;
- return b;
- }
- g_assert_not_reached();
- }
- float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
- {
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = mul_floats(pa, pb, status);
- return float16_round_pack_canonical(pr, status);
- }
- static float32 QEMU_SOFTFLOAT_ATTR
- soft_f32_mul(float32 a, float32 b, float_status *status)
- {
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pr = mul_floats(pa, pb, status);
- return float32_round_pack_canonical(pr, status);
- }
- static float64 QEMU_SOFTFLOAT_ATTR
- soft_f64_mul(float64 a, float64 b, float_status *status)
- {
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pr = mul_floats(pa, pb, status);
- return float64_round_pack_canonical(pr, status);
- }
- static float hard_f32_mul(float a, float b)
- {
- return a * b;
- }
- static double hard_f64_mul(double a, double b)
- {
- return a * b;
- }
- float32 QEMU_FLATTEN
- float32_mul(float32 a, float32 b, float_status *s)
- {
- return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
- f32_is_zon2, f32_addsubmul_post);
- }
- float64 QEMU_FLATTEN
- float64_mul(float64 a, float64 b, float_status *s)
- {
- return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
- f64_is_zon2, f64_addsubmul_post);
- }
- /*
- * Returns the result of multiplying the floating-point values `a' and
- * `b' then adding 'c', with no intermediate rounding step after the
- * multiplication. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
- * The flags argument allows the caller to select negation of the
- * addend, the intermediate product, or the final result. (The
- * difference between this and having the caller do a separate
- * negation is that negating externally will flip the sign bit on
- * NaNs.)
- */
- static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
- int flags, float_status *s)
- {
- bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
- ((1 << float_class_inf) | (1 << float_class_zero));
- bool p_sign;
- bool sign_flip = flags & float_muladd_negate_result;
- FloatClass p_class;
- uint64_t hi, lo;
- int p_exp;
- /* It is implementation-defined whether the cases of (0,inf,qnan)
- * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
- * they return if they do), so we have to hand this information
- * off to the target-specific pick-a-NaN routine.
- */
- if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
- return pick_nan_muladd(a, b, c, inf_zero, s);
- }
- if (inf_zero) {
- s->float_exception_flags |= float_flag_invalid;
- return parts_default_nan(s);
- }
- if (flags & float_muladd_negate_c) {
- c.sign ^= 1;
- }
- p_sign = a.sign ^ b.sign;
- if (flags & float_muladd_negate_product) {
- p_sign ^= 1;
- }
- if (a.cls == float_class_inf || b.cls == float_class_inf) {
- p_class = float_class_inf;
- } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
- p_class = float_class_zero;
- } else {
- p_class = float_class_normal;
- }
- if (c.cls == float_class_inf) {
- if (p_class == float_class_inf && p_sign != c.sign) {
- s->float_exception_flags |= float_flag_invalid;
- return parts_default_nan(s);
- } else {
- a.cls = float_class_inf;
- a.sign = c.sign ^ sign_flip;
- return a;
- }
- }
- if (p_class == float_class_inf) {
- a.cls = float_class_inf;
- a.sign = p_sign ^ sign_flip;
- return a;
- }
- if (p_class == float_class_zero) {
- if (c.cls == float_class_zero) {
- if (p_sign != c.sign) {
- p_sign = s->float_rounding_mode == float_round_down;
- }
- c.sign = p_sign;
- } else if (flags & float_muladd_halve_result) {
- c.exp -= 1;
- }
- c.sign ^= sign_flip;
- return c;
- }
- /* a & b should be normals now... */
- assert(a.cls == float_class_normal &&
- b.cls == float_class_normal);
- p_exp = a.exp + b.exp;
- /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
- * result.
- */
- mul64To128(a.frac, b.frac, &hi, &lo);
- /* binary point now at bit 124 */
- /* check for overflow */
- if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
- shift128RightJamming(hi, lo, 1, &hi, &lo);
- p_exp += 1;
- }
- /* + add/sub */
- if (c.cls == float_class_zero) {
- /* move binary point back to 62 */
- shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
- } else {
- int exp_diff = p_exp - c.exp;
- if (p_sign == c.sign) {
- /* Addition */
- if (exp_diff <= 0) {
- shift128RightJamming(hi, lo,
- DECOMPOSED_BINARY_POINT - exp_diff,
- &hi, &lo);
- lo += c.frac;
- p_exp = c.exp;
- } else {
- uint64_t c_hi, c_lo;
- /* shift c to the same binary point as the product (124) */
- c_hi = c.frac >> 2;
- c_lo = 0;
- shift128RightJamming(c_hi, c_lo,
- exp_diff,
- &c_hi, &c_lo);
- add128(hi, lo, c_hi, c_lo, &hi, &lo);
- /* move binary point back to 62 */
- shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
- }
- if (lo & DECOMPOSED_OVERFLOW_BIT) {
- shift64RightJamming(lo, 1, &lo);
- p_exp += 1;
- }
- } else {
- /* Subtraction */
- uint64_t c_hi, c_lo;
- /* make C binary point match product at bit 124 */
- c_hi = c.frac >> 2;
- c_lo = 0;
- if (exp_diff <= 0) {
- shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
- if (exp_diff == 0
- &&
- (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
- sub128(hi, lo, c_hi, c_lo, &hi, &lo);
- } else {
- sub128(c_hi, c_lo, hi, lo, &hi, &lo);
- p_sign ^= 1;
- p_exp = c.exp;
- }
- } else {
- shift128RightJamming(c_hi, c_lo,
- exp_diff,
- &c_hi, &c_lo);
- sub128(hi, lo, c_hi, c_lo, &hi, &lo);
- }
- if (hi == 0 && lo == 0) {
- a.cls = float_class_zero;
- a.sign = s->float_rounding_mode == float_round_down;
- a.sign ^= sign_flip;
- return a;
- } else {
- int shift;
- if (hi != 0) {
- shift = clz64(hi);
- } else {
- shift = clz64(lo) + 64;
- }
- /* Normalizing to a binary point of 124 is the
- correct adjust for the exponent. However since we're
- shifting, we might as well put the binary point back
- at 62 where we really want it. Therefore shift as
- if we're leaving 1 bit at the top of the word, but
- adjust the exponent as if we're leaving 3 bits. */
- shift -= 1;
- if (shift >= 64) {
- lo = lo << (shift - 64);
- } else {
- hi = (hi << shift) | (lo >> (64 - shift));
- lo = hi | ((lo << shift) != 0);
- }
- p_exp -= shift - 2;
- }
- }
- }
- if (flags & float_muladd_halve_result) {
- p_exp -= 1;
- }
- /* finally prepare our result */
- a.cls = float_class_normal;
- a.sign = p_sign ^ sign_flip;
- a.exp = p_exp;
- a.frac = lo;
- return a;
- }
- float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
- int flags, float_status *status)
- {
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pc = float16_unpack_canonical(c, status);
- FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
- return float16_round_pack_canonical(pr, status);
- }
- static float32 QEMU_SOFTFLOAT_ATTR
- soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
- float_status *status)
- {
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pc = float32_unpack_canonical(c, status);
- FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
- return float32_round_pack_canonical(pr, status);
- }
- static float64 QEMU_SOFTFLOAT_ATTR
- soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
- float_status *status)
- {
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pc = float64_unpack_canonical(c, status);
- FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
- return float64_round_pack_canonical(pr, status);
- }
- static bool force_soft_fma;
- float32 QEMU_FLATTEN
- float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
- {
- union_float32 ua, ub, uc, ur;
- ua.s = xa;
- ub.s = xb;
- uc.s = xc;
- if (unlikely(!can_use_fpu(s))) {
- goto soft;
- }
- if (unlikely(flags & float_muladd_halve_result)) {
- goto soft;
- }
- float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
- if (unlikely(!f32_is_zon3(ua, ub, uc))) {
- goto soft;
- }
- if (unlikely(force_soft_fma)) {
- goto soft;
- }
- /*
- * When (a || b) == 0, there's no need to check for under/over flow,
- * since we know the addend is (normal || 0) and the product is 0.
- */
- if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
- union_float32 up;
- bool prod_sign;
- prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
- prod_sign ^= !!(flags & float_muladd_negate_product);
- up.s = float32_set_sign(float32_zero, prod_sign);
- if (flags & float_muladd_negate_c) {
- uc.h = -uc.h;
- }
- ur.h = up.h + uc.h;
- } else {
- union_float32 ua_orig = ua;
- union_float32 uc_orig = uc;
- if (flags & float_muladd_negate_product) {
- ua.h = -ua.h;
- }
- if (flags & float_muladd_negate_c) {
- uc.h = -uc.h;
- }
- ur.h = fmaf(ua.h, ub.h, uc.h);
- if (unlikely(f32_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
- ua = ua_orig;
- uc = uc_orig;
- goto soft;
- }
- }
- if (flags & float_muladd_negate_result) {
- return float32_chs(ur.s);
- }
- return ur.s;
- soft:
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
- }
- float64 QEMU_FLATTEN
- float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
- {
- union_float64 ua, ub, uc, ur;
- ua.s = xa;
- ub.s = xb;
- uc.s = xc;
- if (unlikely(!can_use_fpu(s))) {
- goto soft;
- }
- if (unlikely(flags & float_muladd_halve_result)) {
- goto soft;
- }
- float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
- if (unlikely(!f64_is_zon3(ua, ub, uc))) {
- goto soft;
- }
- if (unlikely(force_soft_fma)) {
- goto soft;
- }
- /*
- * When (a || b) == 0, there's no need to check for under/over flow,
- * since we know the addend is (normal || 0) and the product is 0.
- */
- if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
- union_float64 up;
- bool prod_sign;
- prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
- prod_sign ^= !!(flags & float_muladd_negate_product);
- up.s = float64_set_sign(float64_zero, prod_sign);
- if (flags & float_muladd_negate_c) {
- uc.h = -uc.h;
- }
- ur.h = up.h + uc.h;
- } else {
- union_float64 ua_orig = ua;
- union_float64 uc_orig = uc;
- if (flags & float_muladd_negate_product) {
- ua.h = -ua.h;
- }
- if (flags & float_muladd_negate_c) {
- uc.h = -uc.h;
- }
- ur.h = fma(ua.h, ub.h, uc.h);
- if (unlikely(f64_is_inf(ur))) {
- s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
- ua = ua_orig;
- uc = uc_orig;
- goto soft;
- }
- }
- if (flags & float_muladd_negate_result) {
- return float64_chs(ur.s);
- }
- return ur.s;
- soft:
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
- }
- /*
- * Returns the result of dividing the floating-point value `a' by the
- * corresponding value `b'. The operation is performed according to
- * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
- static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
- {
- bool sign = a.sign ^ b.sign;
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- uint64_t n0, n1, q, r;
- int exp = a.exp - b.exp;
- /*
- * We want a 2*N / N-bit division to produce exactly an N-bit
- * result, so that we do not lose any precision and so that we
- * do not have to renormalize afterward. If A.frac < B.frac,
- * then division would produce an (N-1)-bit result; shift A left
- * by one to produce the an N-bit result, and decrement the
- * exponent to match.
- *
- * The udiv_qrnnd algorithm that we're using requires normalization,
- * i.e. the msb of the denominator must be set. Since we know that
- * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
- * by one (more), and the remainder must be shifted right by one.
- */
- if (a.frac < b.frac) {
- exp -= 1;
- shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
- } else {
- shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
- }
- q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
- /*
- * Set lsb if there is a remainder, to set inexact.
- * As mentioned above, to find the actual value of the remainder we
- * would need to shift right, but (1) we are only concerned about
- * non-zero-ness, and (2) the remainder will always be even because
- * both inputs to the division primitive are even.
- */
- a.frac = q | (r != 0);
- a.sign = sign;
- a.exp = exp;
- return a;
- }
- /* handle all the NaN cases */
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return pick_nan(a, b, s);
- }
- /* 0/0 or Inf/Inf */
- if (a.cls == b.cls
- &&
- (a.cls == float_class_inf || a.cls == float_class_zero)) {
- s->float_exception_flags |= float_flag_invalid;
- return parts_default_nan(s);
- }
- /* Inf / x or 0 / x */
- if (a.cls == float_class_inf || a.cls == float_class_zero) {
- a.sign = sign;
- return a;
- }
- /* Div 0 => Inf */
- if (b.cls == float_class_zero) {
- s->float_exception_flags |= float_flag_divbyzero;
- a.cls = float_class_inf;
- a.sign = sign;
- return a;
- }
- /* Div by Inf */
- if (b.cls == float_class_inf) {
- a.cls = float_class_zero;
- a.sign = sign;
- return a;
- }
- g_assert_not_reached();
- }
- float16 float16_div(float16 a, float16 b, float_status *status)
- {
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pb = float16_unpack_canonical(b, status);
- FloatParts pr = div_floats(pa, pb, status);
- return float16_round_pack_canonical(pr, status);
- }
- static float32 QEMU_SOFTFLOAT_ATTR
- soft_f32_div(float32 a, float32 b, float_status *status)
- {
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pb = float32_unpack_canonical(b, status);
- FloatParts pr = div_floats(pa, pb, status);
- return float32_round_pack_canonical(pr, status);
- }
- static float64 QEMU_SOFTFLOAT_ATTR
- soft_f64_div(float64 a, float64 b, float_status *status)
- {
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pb = float64_unpack_canonical(b, status);
- FloatParts pr = div_floats(pa, pb, status);
- return float64_round_pack_canonical(pr, status);
- }
- static float hard_f32_div(float a, float b)
- {
- return a / b;
- }
- static double hard_f64_div(double a, double b)
- {
- return a / b;
- }
- static bool f32_div_pre(union_float32 a, union_float32 b)
- {
- if (QEMU_HARDFLOAT_2F32_USE_FP) {
- return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
- fpclassify(b.h) == FP_NORMAL;
- }
- return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
- }
- static bool f64_div_pre(union_float64 a, union_float64 b)
- {
- if (QEMU_HARDFLOAT_2F64_USE_FP) {
- return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
- fpclassify(b.h) == FP_NORMAL;
- }
- return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
- }
- static bool f32_div_post(union_float32 a, union_float32 b)
- {
- if (QEMU_HARDFLOAT_2F32_USE_FP) {
- return fpclassify(a.h) != FP_ZERO;
- }
- return !float32_is_zero(a.s);
- }
- static bool f64_div_post(union_float64 a, union_float64 b)
- {
- if (QEMU_HARDFLOAT_2F64_USE_FP) {
- return fpclassify(a.h) != FP_ZERO;
- }
- return !float64_is_zero(a.s);
- }
- float32 QEMU_FLATTEN
- float32_div(float32 a, float32 b, float_status *s)
- {
- return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
- f32_div_pre, f32_div_post);
- }
- float64 QEMU_FLATTEN
- float64_div(float64 a, float64 b, float_status *s)
- {
- return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
- f64_div_pre, f64_div_post);
- }
- /*
- * Float to Float conversions
- *
- * Returns the result of converting one float format to another. The
- * conversion is performed according to the IEC/IEEE Standard for
- * Binary Floating-Point Arithmetic.
- *
- * The float_to_float helper only needs to take care of raising
- * invalid exceptions and handling the conversion on NaNs.
- */
- static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
- float_status *s)
- {
- if (dstf->arm_althp) {
- switch (a.cls) {
- case float_class_qnan:
- case float_class_snan:
- /* There is no NaN in the destination format. Raise Invalid
- * and return a zero with the sign of the input NaN.
- */
- s->float_exception_flags |= float_flag_invalid;
- a.cls = float_class_zero;
- a.frac = 0;
- a.exp = 0;
- break;
- case float_class_inf:
- /* There is no Inf in the destination format. Raise Invalid
- * and return the maximum normal with the correct sign.
- */
- s->float_exception_flags |= float_flag_invalid;
- a.cls = float_class_normal;
- a.exp = dstf->exp_max;
- a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
- break;
- default:
- break;
- }
- } else if (is_nan(a.cls)) {
- if (is_snan(a.cls)) {
- s->float_exception_flags |= float_flag_invalid;
- a = parts_silence_nan(a, s);
- }
- if (s->default_nan_mode) {
- return parts_default_nan(s);
- }
- }
- return a;
- }
- float32 float16_to_float32(float16 a, bool ieee, float_status *s)
- {
- const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float16a_unpack_canonical(a, s, fmt16);
- FloatParts pr = float_to_float(p, &float32_params, s);
- return float32_round_pack_canonical(pr, s);
- }
- float64 float16_to_float64(float16 a, bool ieee, float_status *s)
- {
- const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float16a_unpack_canonical(a, s, fmt16);
- FloatParts pr = float_to_float(p, &float64_params, s);
- return float64_round_pack_canonical(pr, s);
- }
- float16 float32_to_float16(float32 a, bool ieee, float_status *s)
- {
- const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float32_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, fmt16, s);
- return float16a_round_pack_canonical(pr, s, fmt16);
- }
- static float64 QEMU_SOFTFLOAT_ATTR
- soft_float32_to_float64(float32 a, float_status *s)
- {
- FloatParts p = float32_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &float64_params, s);
- return float64_round_pack_canonical(pr, s);
- }
- float64 float32_to_float64(float32 a, float_status *s)
- {
- if (likely(float32_is_normal(a))) {
- /* Widening conversion can never produce inexact results. */
- union_float32 uf;
- union_float64 ud;
- uf.s = a;
- ud.h = uf.h;
- return ud.s;
- } else if (float32_is_zero(a)) {
- return float64_set_sign(float64_zero, float32_is_neg(a));
- } else {
- return soft_float32_to_float64(a, s);
- }
- }
- float16 float64_to_float16(float64 a, bool ieee, float_status *s)
- {
- const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
- FloatParts p = float64_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, fmt16, s);
- return float16a_round_pack_canonical(pr, s, fmt16);
- }
- float32 float64_to_float32(float64 a, float_status *s)
- {
- FloatParts p = float64_unpack_canonical(a, s);
- FloatParts pr = float_to_float(p, &float32_params, s);
- return float32_round_pack_canonical(pr, s);
- }
- /*
- * Rounds the floating-point value `a' to an integer, and returns the
- * result as a floating-point value. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
- */
- static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
- int scale, float_status *s)
- {
- switch (a.cls) {
- case float_class_qnan:
- case float_class_snan:
- return return_nan(a, s);
- case float_class_zero:
- case float_class_inf:
- /* already "integral" */
- break;
- case float_class_normal:
- scale = MIN(MAX(scale, -0x10000), 0x10000);
- a.exp += scale;
- if (a.exp >= DECOMPOSED_BINARY_POINT) {
- /* already integral */
- break;
- }
- if (a.exp < 0) {
- bool one;
- /* all fractional */
- s->float_exception_flags |= float_flag_inexact;
- switch (rmode) {
- case float_round_nearest_even:
- one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
- break;
- case float_round_ties_away:
- one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
- break;
- case float_round_to_zero:
- one = false;
- break;
- case float_round_up:
- one = !a.sign;
- break;
- case float_round_down:
- one = a.sign;
- break;
- case float_round_to_odd:
- one = true;
- break;
- default:
- g_assert_not_reached();
- }
- if (one) {
- a.frac = DECOMPOSED_IMPLICIT_BIT;
- a.exp = 0;
- } else {
- a.cls = float_class_zero;
- }
- } else {
- uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
- uint64_t frac_lsbm1 = frac_lsb >> 1;
- uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
- uint64_t rnd_mask = rnd_even_mask >> 1;
- uint64_t inc;
- switch (rmode) {
- case float_round_nearest_even:
- inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
- break;
- case float_round_ties_away:
- inc = frac_lsbm1;
- break;
- case float_round_to_zero:
- inc = 0;
- break;
- case float_round_up:
- inc = a.sign ? 0 : rnd_mask;
- break;
- case float_round_down:
- inc = a.sign ? rnd_mask : 0;
- break;
- case float_round_to_odd:
- inc = a.frac & frac_lsb ? 0 : rnd_mask;
- break;
- default:
- g_assert_not_reached();
- }
- if (a.frac & rnd_mask) {
- s->float_exception_flags |= float_flag_inexact;
- a.frac += inc;
- a.frac &= ~rnd_mask;
- if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
- a.frac >>= 1;
- a.exp++;
- }
- }
- }
- break;
- default:
- g_assert_not_reached();
- }
- return a;
- }
- float16 float16_round_to_int(float16 a, float_status *s)
- {
- FloatParts pa = float16_unpack_canonical(a, s);
- FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float16_round_pack_canonical(pr, s);
- }
- float32 float32_round_to_int(float32 a, float_status *s)
- {
- FloatParts pa = float32_unpack_canonical(a, s);
- FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float32_round_pack_canonical(pr, s);
- }
- float64 float64_round_to_int(float64 a, float_status *s)
- {
- FloatParts pa = float64_unpack_canonical(a, s);
- FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
- return float64_round_pack_canonical(pr, s);
- }
- /*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format. The conversion is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest positive integer is returned. Otherwise, if the
- * conversion overflows, the largest integer with the same sign as `a'
- * is returned.
- */
- static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
- int scale, int64_t min, int64_t max,
- float_status *s)
- {
- uint64_t r;
- int orig_flags = get_float_exception_flags(s);
- FloatParts p = round_to_int(in, rmode, scale, s);
- switch (p.cls) {
- case float_class_snan:
- case float_class_qnan:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
- case float_class_inf:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return p.sign ? min : max;
- case float_class_zero:
- return 0;
- case float_class_normal:
- if (p.exp < DECOMPOSED_BINARY_POINT) {
- r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
- } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
- r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
- } else {
- r = UINT64_MAX;
- }
- if (p.sign) {
- if (r <= -(uint64_t) min) {
- return -r;
- } else {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return min;
- }
- } else {
- if (r <= max) {
- return r;
- } else {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
- }
- }
- default:
- g_assert_not_reached();
- }
- }
- int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, INT16_MIN, INT16_MAX, s);
- }
- int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, INT32_MIN, INT32_MAX, s);
- }
- int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, INT64_MIN, INT64_MAX, s);
- }
- int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, INT16_MIN, INT16_MAX, s);
- }
- int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, INT32_MIN, INT32_MAX, s);
- }
- int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, INT64_MIN, INT64_MAX, s);
- }
- int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, INT16_MIN, INT16_MAX, s);
- }
- int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, INT32_MIN, INT32_MAX, s);
- }
- int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_int_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, INT64_MIN, INT64_MAX, s);
- }
- int16_t float16_to_int16(float16 a, float_status *s)
- {
- return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int32_t float16_to_int32(float16 a, float_status *s)
- {
- return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int64_t float16_to_int64(float16 a, float_status *s)
- {
- return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int16_t float32_to_int16(float32 a, float_status *s)
- {
- return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int32_t float32_to_int32(float32 a, float_status *s)
- {
- return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int64_t float32_to_int64(float32 a, float_status *s)
- {
- return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int16_t float64_to_int16(float64 a, float_status *s)
- {
- return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int32_t float64_to_int32(float64 a, float_status *s)
- {
- return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int64_t float64_to_int64(float64 a, float_status *s)
- {
- return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
- }
- int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
- {
- return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
- }
- int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
- {
- return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
- }
- int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
- {
- return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
- }
- int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
- {
- return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
- }
- int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
- {
- return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
- }
- int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
- {
- return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
- }
- int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
- {
- return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
- }
- int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
- {
- return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
- }
- int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
- {
- return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
- }
- /*
- * Returns the result of converting the floating-point value `a' to
- * the unsigned integer format. The conversion is performed according
- * to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest unsigned integer is returned. Otherwise, if the
- * conversion overflows, the largest unsigned integer is returned. If
- * the 'a' is negative, the result is rounded and zero is returned;
- * values that do not round to zero will raise the inexact exception
- * flag.
- */
- static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
- int scale, uint64_t max,
- float_status *s)
- {
- int orig_flags = get_float_exception_flags(s);
- FloatParts p = round_to_int(in, rmode, scale, s);
- uint64_t r;
- switch (p.cls) {
- case float_class_snan:
- case float_class_qnan:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
- case float_class_inf:
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return p.sign ? 0 : max;
- case float_class_zero:
- return 0;
- case float_class_normal:
- if (p.sign) {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return 0;
- }
- if (p.exp < DECOMPOSED_BINARY_POINT) {
- r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
- } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
- r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
- } else {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
- }
- /* For uint64 this will never trip, but if p.exp is too large
- * to shift a decomposed fraction we shall have exited via the
- * 3rd leg above.
- */
- if (r > max) {
- s->float_exception_flags = orig_flags | float_flag_invalid;
- return max;
- }
- return r;
- default:
- g_assert_not_reached();
- }
- }
- uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, UINT16_MAX, s);
- }
- uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, UINT32_MAX, s);
- }
- uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float16_unpack_canonical(a, s),
- rmode, scale, UINT64_MAX, s);
- }
- uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, UINT16_MAX, s);
- }
- uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, UINT32_MAX, s);
- }
- uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float32_unpack_canonical(a, s),
- rmode, scale, UINT64_MAX, s);
- }
- uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, UINT16_MAX, s);
- }
- uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, UINT32_MAX, s);
- }
- uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
- float_status *s)
- {
- return round_to_uint_and_pack(float64_unpack_canonical(a, s),
- rmode, scale, UINT64_MAX, s);
- }
- uint16_t float16_to_uint16(float16 a, float_status *s)
- {
- return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint32_t float16_to_uint32(float16 a, float_status *s)
- {
- return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint64_t float16_to_uint64(float16 a, float_status *s)
- {
- return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint16_t float32_to_uint16(float32 a, float_status *s)
- {
- return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint32_t float32_to_uint32(float32 a, float_status *s)
- {
- return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint64_t float32_to_uint64(float32 a, float_status *s)
- {
- return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint16_t float64_to_uint16(float64 a, float_status *s)
- {
- return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint32_t float64_to_uint32(float64 a, float_status *s)
- {
- return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint64_t float64_to_uint64(float64 a, float_status *s)
- {
- return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
- }
- uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
- {
- return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
- }
- uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
- {
- return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
- }
- uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
- {
- return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
- }
- uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
- {
- return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
- }
- uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
- {
- return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
- }
- uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
- {
- return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
- }
- uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
- {
- return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
- }
- uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
- {
- return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
- }
- uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
- {
- return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
- }
- /*
- * Integer to float conversions
- *
- * Returns the result of converting the two's complement integer `a'
- * to the floating-point format. The conversion is performed according
- * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
- static FloatParts int_to_float(int64_t a, int scale, float_status *status)
- {
- FloatParts r = { .sign = false };
- if (a == 0) {
- r.cls = float_class_zero;
- } else {
- uint64_t f = a;
- int shift;
- r.cls = float_class_normal;
- if (a < 0) {
- f = -f;
- r.sign = true;
- }
- shift = clz64(f) - 1;
- scale = MIN(MAX(scale, -0x10000), 0x10000);
- r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
- r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
- }
- return r;
- }
- float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
- {
- FloatParts pa = int_to_float(a, scale, status);
- return float16_round_pack_canonical(pa, status);
- }
- float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
- {
- return int64_to_float16_scalbn(a, scale, status);
- }
- float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
- {
- return int64_to_float16_scalbn(a, scale, status);
- }
- float16 int64_to_float16(int64_t a, float_status *status)
- {
- return int64_to_float16_scalbn(a, 0, status);
- }
- float16 int32_to_float16(int32_t a, float_status *status)
- {
- return int64_to_float16_scalbn(a, 0, status);
- }
- float16 int16_to_float16(int16_t a, float_status *status)
- {
- return int64_to_float16_scalbn(a, 0, status);
- }
- float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
- {
- FloatParts pa = int_to_float(a, scale, status);
- return float32_round_pack_canonical(pa, status);
- }
- float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
- {
- return int64_to_float32_scalbn(a, scale, status);
- }
- float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
- {
- return int64_to_float32_scalbn(a, scale, status);
- }
- float32 int64_to_float32(int64_t a, float_status *status)
- {
- return int64_to_float32_scalbn(a, 0, status);
- }
- float32 int32_to_float32(int32_t a, float_status *status)
- {
- return int64_to_float32_scalbn(a, 0, status);
- }
- float32 int16_to_float32(int16_t a, float_status *status)
- {
- return int64_to_float32_scalbn(a, 0, status);
- }
- float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
- {
- FloatParts pa = int_to_float(a, scale, status);
- return float64_round_pack_canonical(pa, status);
- }
- float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
- {
- return int64_to_float64_scalbn(a, scale, status);
- }
- float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
- {
- return int64_to_float64_scalbn(a, scale, status);
- }
- float64 int64_to_float64(int64_t a, float_status *status)
- {
- return int64_to_float64_scalbn(a, 0, status);
- }
- float64 int32_to_float64(int32_t a, float_status *status)
- {
- return int64_to_float64_scalbn(a, 0, status);
- }
- float64 int16_to_float64(int16_t a, float_status *status)
- {
- return int64_to_float64_scalbn(a, 0, status);
- }
- /*
- * Unsigned Integer to float conversions
- *
- * Returns the result of converting the unsigned integer `a' to the
- * floating-point format. The conversion is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
- static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
- {
- FloatParts r = { .sign = false };
- if (a == 0) {
- r.cls = float_class_zero;
- } else {
- scale = MIN(MAX(scale, -0x10000), 0x10000);
- r.cls = float_class_normal;
- if ((int64_t)a < 0) {
- r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
- shift64RightJamming(a, 1, &a);
- r.frac = a;
- } else {
- int shift = clz64(a) - 1;
- r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
- r.frac = a << shift;
- }
- }
- return r;
- }
- float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
- {
- FloatParts pa = uint_to_float(a, scale, status);
- return float16_round_pack_canonical(pa, status);
- }
- float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
- {
- return uint64_to_float16_scalbn(a, scale, status);
- }
- float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
- {
- return uint64_to_float16_scalbn(a, scale, status);
- }
- float16 uint64_to_float16(uint64_t a, float_status *status)
- {
- return uint64_to_float16_scalbn(a, 0, status);
- }
- float16 uint32_to_float16(uint32_t a, float_status *status)
- {
- return uint64_to_float16_scalbn(a, 0, status);
- }
- float16 uint16_to_float16(uint16_t a, float_status *status)
- {
- return uint64_to_float16_scalbn(a, 0, status);
- }
- float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
- {
- FloatParts pa = uint_to_float(a, scale, status);
- return float32_round_pack_canonical(pa, status);
- }
- float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
- {
- return uint64_to_float32_scalbn(a, scale, status);
- }
- float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
- {
- return uint64_to_float32_scalbn(a, scale, status);
- }
- float32 uint64_to_float32(uint64_t a, float_status *status)
- {
- return uint64_to_float32_scalbn(a, 0, status);
- }
- float32 uint32_to_float32(uint32_t a, float_status *status)
- {
- return uint64_to_float32_scalbn(a, 0, status);
- }
- float32 uint16_to_float32(uint16_t a, float_status *status)
- {
- return uint64_to_float32_scalbn(a, 0, status);
- }
- float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
- {
- FloatParts pa = uint_to_float(a, scale, status);
- return float64_round_pack_canonical(pa, status);
- }
- float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
- {
- return uint64_to_float64_scalbn(a, scale, status);
- }
- float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
- {
- return uint64_to_float64_scalbn(a, scale, status);
- }
- float64 uint64_to_float64(uint64_t a, float_status *status)
- {
- return uint64_to_float64_scalbn(a, 0, status);
- }
- float64 uint32_to_float64(uint32_t a, float_status *status)
- {
- return uint64_to_float64_scalbn(a, 0, status);
- }
- float64 uint16_to_float64(uint16_t a, float_status *status)
- {
- return uint64_to_float64_scalbn(a, 0, status);
- }
- /* Float Min/Max */
- /* min() and max() functions. These can't be implemented as
- * 'compare and pick one input' because that would mishandle
- * NaNs and +0 vs -0.
- *
- * minnum() and maxnum() functions. These are similar to the min()
- * and max() functions but if one of the arguments is a QNaN and
- * the other is numerical then the numerical argument is returned.
- * SNaNs will get quietened before being returned.
- * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
- * and maxNum() operations. min() and max() are the typical min/max
- * semantics provided by many CPUs which predate that specification.
- *
- * minnummag() and maxnummag() functions correspond to minNumMag()
- * and minNumMag() from the IEEE-754 2008.
- */
- static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
- bool ieee, bool ismag, float_status *s)
- {
- if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
- if (ieee) {
- /* Takes two floating-point values `a' and `b', one of
- * which is a NaN, and returns the appropriate NaN
- * result. If either `a' or `b' is a signaling NaN,
- * the invalid exception is raised.
- */
- if (is_snan(a.cls) || is_snan(b.cls)) {
- return pick_nan(a, b, s);
- } else if (is_nan(a.cls) && !is_nan(b.cls)) {
- return b;
- } else if (is_nan(b.cls) && !is_nan(a.cls)) {
- return a;
- }
- }
- return pick_nan(a, b, s);
- } else {
- int a_exp, b_exp;
- switch (a.cls) {
- case float_class_normal:
- a_exp = a.exp;
- break;
- case float_class_inf:
- a_exp = INT_MAX;
- break;
- case float_class_zero:
- a_exp = INT_MIN;
- break;
- default:
- g_assert_not_reached();
- break;
- }
- switch (b.cls) {
- case float_class_normal:
- b_exp = b.exp;
- break;
- case float_class_inf:
- b_exp = INT_MAX;
- break;
- case float_class_zero:
- b_exp = INT_MIN;
- break;
- default:
- g_assert_not_reached();
- break;
- }
- if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
- bool a_less = a_exp < b_exp;
- if (a_exp == b_exp) {
- a_less = a.frac < b.frac;
- }
- return a_less ^ ismin ? b : a;
- }
- if (a.sign == b.sign) {
- bool a_less = a_exp < b_exp;
- if (a_exp == b_exp) {
- a_less = a.frac < b.frac;
- }
- return a.sign ^ a_less ^ ismin ? b : a;
- } else {
- return a.sign ^ ismin ? b : a;
- }
- }
- }
- #define MINMAX(sz, name, ismin, isiee, ismag) \
- float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
- float_status *s) \
- { \
- FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
- FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
- FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
- \
- return float ## sz ## _round_pack_canonical(pr, s); \
- }
- MINMAX(16, min, true, false, false)
- MINMAX(16, minnum, true, true, false)
- MINMAX(16, minnummag, true, true, true)
- MINMAX(16, max, false, false, false)
- MINMAX(16, maxnum, false, true, false)
- MINMAX(16, maxnummag, false, true, true)
- MINMAX(32, min, true, false, false)
- MINMAX(32, minnum, true, true, false)
- MINMAX(32, minnummag, true, true, true)
- MINMAX(32, max, false, false, false)
- MINMAX(32, maxnum, false, true, false)
- MINMAX(32, maxnummag, false, true, true)
- MINMAX(64, min, true, false, false)
- MINMAX(64, minnum, true, true, false)
- MINMAX(64, minnummag, true, true, true)
- MINMAX(64, max, false, false, false)
- MINMAX(64, maxnum, false, true, false)
- MINMAX(64, maxnummag, false, true, true)
- #undef MINMAX
- /* Floating point compare */
- static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
- float_status *s)
- {
- if (is_nan(a.cls) || is_nan(b.cls)) {
- if (!is_quiet ||
- a.cls == float_class_snan ||
- b.cls == float_class_snan) {
- s->float_exception_flags |= float_flag_invalid;
- }
- return float_relation_unordered;
- }
- if (a.cls == float_class_zero) {
- if (b.cls == float_class_zero) {
- return float_relation_equal;
- }
- return b.sign ? float_relation_greater : float_relation_less;
- } else if (b.cls == float_class_zero) {
- return a.sign ? float_relation_less : float_relation_greater;
- }
- /* The only really important thing about infinity is its sign. If
- * both are infinities the sign marks the smallest of the two.
- */
- if (a.cls == float_class_inf) {
- if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
- return float_relation_equal;
- }
- return a.sign ? float_relation_less : float_relation_greater;
- } else if (b.cls == float_class_inf) {
- return b.sign ? float_relation_greater : float_relation_less;
- }
- if (a.sign != b.sign) {
- return a.sign ? float_relation_less : float_relation_greater;
- }
- if (a.exp == b.exp) {
- if (a.frac == b.frac) {
- return float_relation_equal;
- }
- if (a.sign) {
- return a.frac > b.frac ?
- float_relation_less : float_relation_greater;
- } else {
- return a.frac > b.frac ?
- float_relation_greater : float_relation_less;
- }
- } else {
- if (a.sign) {
- return a.exp > b.exp ? float_relation_less : float_relation_greater;
- } else {
- return a.exp > b.exp ? float_relation_greater : float_relation_less;
- }
- }
- }
- #define COMPARE(name, attr, sz) \
- static int attr \
- name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
- { \
- FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
- FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
- return compare_floats(pa, pb, is_quiet, s); \
- }
- COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
- COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
- COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
- #undef COMPARE
- FloatRelation float16_compare(float16 a, float16 b, float_status *s)
- {
- return soft_f16_compare(a, b, false, s);
- }
- FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
- {
- return soft_f16_compare(a, b, true, s);
- }
- static FloatRelation QEMU_FLATTEN
- f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
- {
- union_float32 ua, ub;
- ua.s = xa;
- ub.s = xb;
- if (QEMU_NO_HARDFLOAT) {
- goto soft;
- }
- float32_input_flush2(&ua.s, &ub.s, s);
- if (isgreaterequal(ua.h, ub.h)) {
- if (isgreater(ua.h, ub.h)) {
- return float_relation_greater;
- }
- return float_relation_equal;
- }
- if (likely(isless(ua.h, ub.h))) {
- return float_relation_less;
- }
- /* The only condition remaining is unordered.
- * Fall through to set flags.
- */
- soft:
- return soft_f32_compare(ua.s, ub.s, is_quiet, s);
- }
- FloatRelation float32_compare(float32 a, float32 b, float_status *s)
- {
- return f32_compare(a, b, false, s);
- }
- FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
- {
- return f32_compare(a, b, true, s);
- }
- static FloatRelation QEMU_FLATTEN
- f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
- {
- union_float64 ua, ub;
- ua.s = xa;
- ub.s = xb;
- if (QEMU_NO_HARDFLOAT) {
- goto soft;
- }
- float64_input_flush2(&ua.s, &ub.s, s);
- if (isgreaterequal(ua.h, ub.h)) {
- if (isgreater(ua.h, ub.h)) {
- return float_relation_greater;
- }
- return float_relation_equal;
- }
- if (likely(isless(ua.h, ub.h))) {
- return float_relation_less;
- }
- /* The only condition remaining is unordered.
- * Fall through to set flags.
- */
- soft:
- return soft_f64_compare(ua.s, ub.s, is_quiet, s);
- }
- FloatRelation float64_compare(float64 a, float64 b, float_status *s)
- {
- return f64_compare(a, b, false, s);
- }
- FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
- {
- return f64_compare(a, b, true, s);
- }
- /* Multiply A by 2 raised to the power N. */
- static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
- {
- if (unlikely(is_nan(a.cls))) {
- return return_nan(a, s);
- }
- if (a.cls == float_class_normal) {
- /* The largest float type (even though not supported by FloatParts)
- * is float128, which has a 15 bit exponent. Bounding N to 16 bits
- * still allows rounding to infinity, without allowing overflow
- * within the int32_t that backs FloatParts.exp.
- */
- n = MIN(MAX(n, -0x10000), 0x10000);
- a.exp += n;
- }
- return a;
- }
- float16 float16_scalbn(float16 a, int n, float_status *status)
- {
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pr = scalbn_decomposed(pa, n, status);
- return float16_round_pack_canonical(pr, status);
- }
- float32 float32_scalbn(float32 a, int n, float_status *status)
- {
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pr = scalbn_decomposed(pa, n, status);
- return float32_round_pack_canonical(pr, status);
- }
- float64 float64_scalbn(float64 a, int n, float_status *status)
- {
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pr = scalbn_decomposed(pa, n, status);
- return float64_round_pack_canonical(pr, status);
- }
- /*
- * Square Root
- *
- * The old softfloat code did an approximation step before zeroing in
- * on the final result. However for simpleness we just compute the
- * square root by iterating down from the implicit bit to enough extra
- * bits to ensure we get a correctly rounded result.
- *
- * This does mean however the calculation is slower than before,
- * especially for 64 bit floats.
- */
- static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
- {
- uint64_t a_frac, r_frac, s_frac;
- int bit, last_bit;
- if (is_nan(a.cls)) {
- return return_nan(a, s);
- }
- if (a.cls == float_class_zero) {
- return a; /* sqrt(+-0) = +-0 */
- }
- if (a.sign) {
- s->float_exception_flags |= float_flag_invalid;
- return parts_default_nan(s);
- }
- if (a.cls == float_class_inf) {
- return a; /* sqrt(+inf) = +inf */
- }
- assert(a.cls == float_class_normal);
- /* We need two overflow bits at the top. Adding room for that is a
- * right shift. If the exponent is odd, we can discard the low bit
- * by multiplying the fraction by 2; that's a left shift. Combine
- * those and we shift right if the exponent is even.
- */
- a_frac = a.frac;
- if (!(a.exp & 1)) {
- a_frac >>= 1;
- }
- a.exp >>= 1;
- /* Bit-by-bit computation of sqrt. */
- r_frac = 0;
- s_frac = 0;
- /* Iterate from implicit bit down to the 3 extra bits to compute a
- * properly rounded result. Remember we've inserted one more bit
- * at the top, so these positions are one less.
- */
- bit = DECOMPOSED_BINARY_POINT - 1;
- last_bit = MAX(p->frac_shift - 4, 0);
- do {
- uint64_t q = 1ULL << bit;
- uint64_t t_frac = s_frac + q;
- if (t_frac <= a_frac) {
- s_frac = t_frac + q;
- a_frac -= t_frac;
- r_frac += q;
- }
- a_frac <<= 1;
- } while (--bit >= last_bit);
- /* Undo the right shift done above. If there is any remaining
- * fraction, the result is inexact. Set the sticky bit.
- */
- a.frac = (r_frac << 1) + (a_frac != 0);
- return a;
- }
- float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
- {
- FloatParts pa = float16_unpack_canonical(a, status);
- FloatParts pr = sqrt_float(pa, status, &float16_params);
- return float16_round_pack_canonical(pr, status);
- }
- static float32 QEMU_SOFTFLOAT_ATTR
- soft_f32_sqrt(float32 a, float_status *status)
- {
- FloatParts pa = float32_unpack_canonical(a, status);
- FloatParts pr = sqrt_float(pa, status, &float32_params);
- return float32_round_pack_canonical(pr, status);
- }
- static float64 QEMU_SOFTFLOAT_ATTR
- soft_f64_sqrt(float64 a, float_status *status)
- {
- FloatParts pa = float64_unpack_canonical(a, status);
- FloatParts pr = sqrt_float(pa, status, &float64_params);
- return float64_round_pack_canonical(pr, status);
- }
- float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
- {
- union_float32 ua, ur;
- ua.s = xa;
- if (unlikely(!can_use_fpu(s))) {
- goto soft;
- }
- float32_input_flush1(&ua.s, s);
- if (QEMU_HARDFLOAT_1F32_USE_FP) {
- if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
- fpclassify(ua.h) == FP_ZERO) ||
- signbit(ua.h))) {
- goto soft;
- }
- } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
- float32_is_neg(ua.s))) {
- goto soft;
- }
- ur.h = sqrtf(ua.h);
- return ur.s;
- soft:
- return soft_f32_sqrt(ua.s, s);
- }
- float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
- {
- union_float64 ua, ur;
- ua.s = xa;
- if (unlikely(!can_use_fpu(s))) {
- goto soft;
- }
- float64_input_flush1(&ua.s, s);
- if (QEMU_HARDFLOAT_1F64_USE_FP) {
- if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
- fpclassify(ua.h) == FP_ZERO) ||
- signbit(ua.h))) {
- goto soft;
- }
- } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
- float64_is_neg(ua.s))) {
- goto soft;
- }
- ur.h = sqrt(ua.h);
- return ur.s;
- soft:
- return soft_f64_sqrt(ua.s, s);
- }
- /*----------------------------------------------------------------------------
- | The pattern for a default generated NaN.
- *----------------------------------------------------------------------------*/
- float16 float16_default_nan(float_status *status)
- {
- FloatParts p = parts_default_nan(status);
- p.frac >>= float16_params.frac_shift;
- return float16_pack_raw(p);
- }
- float32 float32_default_nan(float_status *status)
- {
- FloatParts p = parts_default_nan(status);
- p.frac >>= float32_params.frac_shift;
- return float32_pack_raw(p);
- }
- float64 float64_default_nan(float_status *status)
- {
- FloatParts p = parts_default_nan(status);
- p.frac >>= float64_params.frac_shift;
- return float64_pack_raw(p);
- }
- float128 float128_default_nan(float_status *status)
- {
- FloatParts p = parts_default_nan(status);
- float128 r;
- /* Extrapolate from the choices made by parts_default_nan to fill
- * in the quad-floating format. If the low bit is set, assume we
- * want to set all non-snan bits.
- */
- r.low = -(p.frac & 1);
- r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
- r.high |= UINT64_C(0x7FFF000000000000);
- r.high |= (uint64_t)p.sign << 63;
- return r;
- }
- /*----------------------------------------------------------------------------
- | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
- *----------------------------------------------------------------------------*/
- float16 float16_silence_nan(float16 a, float_status *status)
- {
- FloatParts p = float16_unpack_raw(a);
- p.frac <<= float16_params.frac_shift;
- p = parts_silence_nan(p, status);
- p.frac >>= float16_params.frac_shift;
- return float16_pack_raw(p);
- }
- float32 float32_silence_nan(float32 a, float_status *status)
- {
- FloatParts p = float32_unpack_raw(a);
- p.frac <<= float32_params.frac_shift;
- p = parts_silence_nan(p, status);
- p.frac >>= float32_params.frac_shift;
- return float32_pack_raw(p);
- }
- float64 float64_silence_nan(float64 a, float_status *status)
- {
- FloatParts p = float64_unpack_raw(a);
- p.frac <<= float64_params.frac_shift;
- p = parts_silence_nan(p, status);
- p.frac >>= float64_params.frac_shift;
- return float64_pack_raw(p);
- }
- /*----------------------------------------------------------------------------
- | If `a' is denormal and we are in flush-to-zero mode then set the
- | input-denormal exception and return zero. Otherwise just return the value.
- *----------------------------------------------------------------------------*/
- static bool parts_squash_denormal(FloatParts p, float_status *status)
- {
- if (p.exp == 0 && p.frac != 0) {
- float_raise(float_flag_input_denormal, status);
- return true;
- }
- return false;
- }
- float16 float16_squash_input_denormal(float16 a, float_status *status)
- {
- if (status->flush_inputs_to_zero) {
- FloatParts p = float16_unpack_raw(a);
- if (parts_squash_denormal(p, status)) {
- return float16_set_sign(float16_zero, p.sign);
- }
- }
- return a;
- }
- float32 float32_squash_input_denormal(float32 a, float_status *status)
- {
- if (status->flush_inputs_to_zero) {
- FloatParts p = float32_unpack_raw(a);
- if (parts_squash_denormal(p, status)) {
- return float32_set_sign(float32_zero, p.sign);
- }
- }
- return a;
- }
- float64 float64_squash_input_denormal(float64 a, float_status *status)
- {
- if (status->flush_inputs_to_zero) {
- FloatParts p = float64_unpack_raw(a);
- if (parts_squash_denormal(p, status)) {
- return float64_set_sign(float64_zero, p.sign);
- }
- }
- return a;
- }
- /*----------------------------------------------------------------------------
- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
- | and 7, and returns the properly rounded 32-bit integer corresponding to the
- | input. If `zSign' is 1, the input is negated before being converted to an
- | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
- | is simply rounded to an integer, with the inexact exception raised if the
- | input cannot be represented exactly as an integer. However, if the fixed-
- | point input is too large, the invalid exception is raised and the largest
- | positive or negative integer is returned.
- *----------------------------------------------------------------------------*/
- static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
- float_status *status)
- {
- int8_t roundingMode;
- bool roundNearestEven;
- int8_t roundIncrement, roundBits;
- int32_t z;
- roundingMode = status->float_rounding_mode;
- roundNearestEven = ( roundingMode == float_round_nearest_even );
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- roundIncrement = 0x40;
- break;
- case float_round_to_zero:
- roundIncrement = 0;
- break;
- case float_round_up:
- roundIncrement = zSign ? 0 : 0x7f;
- break;
- case float_round_down:
- roundIncrement = zSign ? 0x7f : 0;
- break;
- case float_round_to_odd:
- roundIncrement = absZ & 0x80 ? 0 : 0x7f;
- break;
- default:
- abort();
- }
- roundBits = absZ & 0x7F;
- absZ = ( absZ + roundIncrement )>>7;
- if (!(roundBits ^ 0x40) && roundNearestEven) {
- absZ &= ~1;
- }
- z = absZ;
- if ( zSign ) z = - z;
- if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
- float_raise(float_flag_invalid, status);
- return zSign ? INT32_MIN : INT32_MAX;
- }
- if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return z;
- }
- /*----------------------------------------------------------------------------
- | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
- | `absZ1', with binary point between bits 63 and 64 (between the input words),
- | and returns the properly rounded 64-bit integer corresponding to the input.
- | If `zSign' is 1, the input is negated before being converted to an integer.
- | Ordinarily, the fixed-point input is simply rounded to an integer, with
- | the inexact exception raised if the input cannot be represented exactly as
- | an integer. However, if the fixed-point input is too large, the invalid
- | exception is raised and the largest positive or negative integer is
- | returned.
- *----------------------------------------------------------------------------*/
- static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
- float_status *status)
- {
- int8_t roundingMode;
- bool roundNearestEven, increment;
- int64_t z;
- roundingMode = status->float_rounding_mode;
- roundNearestEven = ( roundingMode == float_round_nearest_even );
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- increment = ((int64_t) absZ1 < 0);
- break;
- case float_round_to_zero:
- increment = 0;
- break;
- case float_round_up:
- increment = !zSign && absZ1;
- break;
- case float_round_down:
- increment = zSign && absZ1;
- break;
- case float_round_to_odd:
- increment = !(absZ0 & 1) && absZ1;
- break;
- default:
- abort();
- }
- if ( increment ) {
- ++absZ0;
- if ( absZ0 == 0 ) goto overflow;
- if (!(absZ1 << 1) && roundNearestEven) {
- absZ0 &= ~1;
- }
- }
- z = absZ0;
- if ( zSign ) z = - z;
- if ( z && ( ( z < 0 ) ^ zSign ) ) {
- overflow:
- float_raise(float_flag_invalid, status);
- return zSign ? INT64_MIN : INT64_MAX;
- }
- if (absZ1) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return z;
- }
- /*----------------------------------------------------------------------------
- | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
- | `absZ1', with binary point between bits 63 and 64 (between the input words),
- | and returns the properly rounded 64-bit unsigned integer corresponding to the
- | input. Ordinarily, the fixed-point input is simply rounded to an integer,
- | with the inexact exception raised if the input cannot be represented exactly
- | as an integer. However, if the fixed-point input is too large, the invalid
- | exception is raised and the largest unsigned integer is returned.
- *----------------------------------------------------------------------------*/
- static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
- uint64_t absZ1, float_status *status)
- {
- int8_t roundingMode;
- bool roundNearestEven, increment;
- roundingMode = status->float_rounding_mode;
- roundNearestEven = (roundingMode == float_round_nearest_even);
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- increment = ((int64_t)absZ1 < 0);
- break;
- case float_round_to_zero:
- increment = 0;
- break;
- case float_round_up:
- increment = !zSign && absZ1;
- break;
- case float_round_down:
- increment = zSign && absZ1;
- break;
- case float_round_to_odd:
- increment = !(absZ0 & 1) && absZ1;
- break;
- default:
- abort();
- }
- if (increment) {
- ++absZ0;
- if (absZ0 == 0) {
- float_raise(float_flag_invalid, status);
- return UINT64_MAX;
- }
- if (!(absZ1 << 1) && roundNearestEven) {
- absZ0 &= ~1;
- }
- }
- if (zSign && absZ0) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- if (absZ1) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return absZ0;
- }
- /*----------------------------------------------------------------------------
- | Normalizes the subnormal single-precision floating-point value represented
- | by the denormalized significand `aSig'. The normalized exponent and
- | significand are stored at the locations pointed to by `zExpPtr' and
- | `zSigPtr', respectively.
- *----------------------------------------------------------------------------*/
- static void
- normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
- {
- int8_t shiftCount;
- shiftCount = clz32(aSig) - 8;
- *zSigPtr = aSig<<shiftCount;
- *zExpPtr = 1 - shiftCount;
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
- | and significand `zSig', and returns the proper single-precision floating-
- | point value corresponding to the abstract input. Ordinarily, the abstract
- | value is simply rounded and packed into the single-precision format, with
- | the inexact exception raised if the abstract input cannot be represented
- | exactly. However, if the abstract value is too large, the overflow and
- | inexact exceptions are raised and an infinity or maximal finite value is
- | returned. If the abstract value is too small, the input value is rounded to
- | a subnormal number, and the underflow and inexact exceptions are raised if
- | the abstract input cannot be represented exactly as a subnormal single-
- | precision floating-point number.
- | The input significand `zSig' has its binary point between bits 30
- | and 29, which is 7 bits to the left of the usual location. This shifted
- | significand must be normalized or smaller. If `zSig' is not normalized,
- | `zExp' must be 0; in that case, the result returned is a subnormal number,
- | and it must not require rounding. In the usual case that `zSig' is
- | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
- | The handling of underflow and overflow follows the IEC/IEEE Standard for
- | Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
- float_status *status)
- {
- int8_t roundingMode;
- bool roundNearestEven;
- int8_t roundIncrement, roundBits;
- bool isTiny;
- roundingMode = status->float_rounding_mode;
- roundNearestEven = ( roundingMode == float_round_nearest_even );
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- roundIncrement = 0x40;
- break;
- case float_round_to_zero:
- roundIncrement = 0;
- break;
- case float_round_up:
- roundIncrement = zSign ? 0 : 0x7f;
- break;
- case float_round_down:
- roundIncrement = zSign ? 0x7f : 0;
- break;
- case float_round_to_odd:
- roundIncrement = zSig & 0x80 ? 0 : 0x7f;
- break;
- default:
- abort();
- break;
- }
- roundBits = zSig & 0x7F;
- if ( 0xFD <= (uint16_t) zExp ) {
- if ( ( 0xFD < zExp )
- || ( ( zExp == 0xFD )
- && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
- ) {
- bool overflow_to_inf = roundingMode != float_round_to_odd &&
- roundIncrement != 0;
- float_raise(float_flag_overflow | float_flag_inexact, status);
- return packFloat32(zSign, 0xFF, -!overflow_to_inf);
- }
- if ( zExp < 0 ) {
- if (status->flush_to_zero) {
- float_raise(float_flag_output_denormal, status);
- return packFloat32(zSign, 0, 0);
- }
- isTiny = status->tininess_before_rounding
- || (zExp < -1)
- || (zSig + roundIncrement < 0x80000000);
- shift32RightJamming( zSig, - zExp, &zSig );
- zExp = 0;
- roundBits = zSig & 0x7F;
- if (isTiny && roundBits) {
- float_raise(float_flag_underflow, status);
- }
- if (roundingMode == float_round_to_odd) {
- /*
- * For round-to-odd case, the roundIncrement depends on
- * zSig which just changed.
- */
- roundIncrement = zSig & 0x80 ? 0 : 0x7f;
- }
- }
- }
- if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
- }
- zSig = ( zSig + roundIncrement )>>7;
- if (!(roundBits ^ 0x40) && roundNearestEven) {
- zSig &= ~1;
- }
- if ( zSig == 0 ) zExp = 0;
- return packFloat32( zSign, zExp, zSig );
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
- | and significand `zSig', and returns the proper single-precision floating-
- | point value corresponding to the abstract input. This routine is just like
- | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
- | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
- | floating-point exponent.
- *----------------------------------------------------------------------------*/
- static float32
- normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
- float_status *status)
- {
- int8_t shiftCount;
- shiftCount = clz32(zSig) - 1;
- return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
- status);
- }
- /*----------------------------------------------------------------------------
- | Normalizes the subnormal double-precision floating-point value represented
- | by the denormalized significand `aSig'. The normalized exponent and
- | significand are stored at the locations pointed to by `zExpPtr' and
- | `zSigPtr', respectively.
- *----------------------------------------------------------------------------*/
- static void
- normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
- {
- int8_t shiftCount;
- shiftCount = clz64(aSig) - 11;
- *zSigPtr = aSig<<shiftCount;
- *zExpPtr = 1 - shiftCount;
- }
- /*----------------------------------------------------------------------------
- | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
- | double-precision floating-point value, returning the result. After being
- | shifted into the proper positions, the three fields are simply added
- | together to form the result. This means that any integer portion of `zSig'
- | will be added into the exponent. Since a properly normalized significand
- | will have an integer portion equal to 1, the `zExp' input should be 1 less
- | than the desired result exponent whenever `zSig' is a complete, normalized
- | significand.
- *----------------------------------------------------------------------------*/
- static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
- {
- return make_float64(
- ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
- | and significand `zSig', and returns the proper double-precision floating-
- | point value corresponding to the abstract input. Ordinarily, the abstract
- | value is simply rounded and packed into the double-precision format, with
- | the inexact exception raised if the abstract input cannot be represented
- | exactly. However, if the abstract value is too large, the overflow and
- | inexact exceptions are raised and an infinity or maximal finite value is
- | returned. If the abstract value is too small, the input value is rounded to
- | a subnormal number, and the underflow and inexact exceptions are raised if
- | the abstract input cannot be represented exactly as a subnormal double-
- | precision floating-point number.
- | The input significand `zSig' has its binary point between bits 62
- | and 61, which is 10 bits to the left of the usual location. This shifted
- | significand must be normalized or smaller. If `zSig' is not normalized,
- | `zExp' must be 0; in that case, the result returned is a subnormal number,
- | and it must not require rounding. In the usual case that `zSig' is
- | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
- | The handling of underflow and overflow follows the IEC/IEEE Standard for
- | Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
- float_status *status)
- {
- int8_t roundingMode;
- bool roundNearestEven;
- int roundIncrement, roundBits;
- bool isTiny;
- roundingMode = status->float_rounding_mode;
- roundNearestEven = ( roundingMode == float_round_nearest_even );
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- roundIncrement = 0x200;
- break;
- case float_round_to_zero:
- roundIncrement = 0;
- break;
- case float_round_up:
- roundIncrement = zSign ? 0 : 0x3ff;
- break;
- case float_round_down:
- roundIncrement = zSign ? 0x3ff : 0;
- break;
- case float_round_to_odd:
- roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
- break;
- default:
- abort();
- }
- roundBits = zSig & 0x3FF;
- if ( 0x7FD <= (uint16_t) zExp ) {
- if ( ( 0x7FD < zExp )
- || ( ( zExp == 0x7FD )
- && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
- ) {
- bool overflow_to_inf = roundingMode != float_round_to_odd &&
- roundIncrement != 0;
- float_raise(float_flag_overflow | float_flag_inexact, status);
- return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
- }
- if ( zExp < 0 ) {
- if (status->flush_to_zero) {
- float_raise(float_flag_output_denormal, status);
- return packFloat64(zSign, 0, 0);
- }
- isTiny = status->tininess_before_rounding
- || (zExp < -1)
- || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
- shift64RightJamming( zSig, - zExp, &zSig );
- zExp = 0;
- roundBits = zSig & 0x3FF;
- if (isTiny && roundBits) {
- float_raise(float_flag_underflow, status);
- }
- if (roundingMode == float_round_to_odd) {
- /*
- * For round-to-odd case, the roundIncrement depends on
- * zSig which just changed.
- */
- roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
- }
- }
- }
- if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
- }
- zSig = ( zSig + roundIncrement )>>10;
- if (!(roundBits ^ 0x200) && roundNearestEven) {
- zSig &= ~1;
- }
- if ( zSig == 0 ) zExp = 0;
- return packFloat64( zSign, zExp, zSig );
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
- | and significand `zSig', and returns the proper double-precision floating-
- | point value corresponding to the abstract input. This routine is just like
- | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
- | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
- | floating-point exponent.
- *----------------------------------------------------------------------------*/
- static float64
- normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
- float_status *status)
- {
- int8_t shiftCount;
- shiftCount = clz64(zSig) - 1;
- return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
- status);
- }
- /*----------------------------------------------------------------------------
- | Normalizes the subnormal extended double-precision floating-point value
- | represented by the denormalized significand `aSig'. The normalized exponent
- | and significand are stored at the locations pointed to by `zExpPtr' and
- | `zSigPtr', respectively.
- *----------------------------------------------------------------------------*/
- void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
- uint64_t *zSigPtr)
- {
- int8_t shiftCount;
- shiftCount = clz64(aSig);
- *zSigPtr = aSig<<shiftCount;
- *zExpPtr = 1 - shiftCount;
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
- | and extended significand formed by the concatenation of `zSig0' and `zSig1',
- | and returns the proper extended double-precision floating-point value
- | corresponding to the abstract input. Ordinarily, the abstract value is
- | rounded and packed into the extended double-precision format, with the
- | inexact exception raised if the abstract input cannot be represented
- | exactly. However, if the abstract value is too large, the overflow and
- | inexact exceptions are raised and an infinity or maximal finite value is
- | returned. If the abstract value is too small, the input value is rounded to
- | a subnormal number, and the underflow and inexact exceptions are raised if
- | the abstract input cannot be represented exactly as a subnormal extended
- | double-precision floating-point number.
- | If `roundingPrecision' is 32 or 64, the result is rounded to the same
- | number of bits as single or double precision, respectively. Otherwise, the
- | result is rounded to the full precision of the extended double-precision
- | format.
- | The input significand must be normalized or smaller. If the input
- | significand is not normalized, `zExp' must be 0; in that case, the result
- | returned is a subnormal number, and it must not require rounding. The
- | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
- int32_t zExp, uint64_t zSig0, uint64_t zSig1,
- float_status *status)
- {
- int8_t roundingMode;
- bool roundNearestEven, increment, isTiny;
- int64_t roundIncrement, roundMask, roundBits;
- roundingMode = status->float_rounding_mode;
- roundNearestEven = ( roundingMode == float_round_nearest_even );
- if ( roundingPrecision == 80 ) goto precision80;
- if ( roundingPrecision == 64 ) {
- roundIncrement = UINT64_C(0x0000000000000400);
- roundMask = UINT64_C(0x00000000000007FF);
- }
- else if ( roundingPrecision == 32 ) {
- roundIncrement = UINT64_C(0x0000008000000000);
- roundMask = UINT64_C(0x000000FFFFFFFFFF);
- }
- else {
- goto precision80;
- }
- zSig0 |= ( zSig1 != 0 );
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- break;
- case float_round_to_zero:
- roundIncrement = 0;
- break;
- case float_round_up:
- roundIncrement = zSign ? 0 : roundMask;
- break;
- case float_round_down:
- roundIncrement = zSign ? roundMask : 0;
- break;
- default:
- abort();
- }
- roundBits = zSig0 & roundMask;
- if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
- if ( ( 0x7FFE < zExp )
- || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
- ) {
- goto overflow;
- }
- if ( zExp <= 0 ) {
- if (status->flush_to_zero) {
- float_raise(float_flag_output_denormal, status);
- return packFloatx80(zSign, 0, 0);
- }
- isTiny = status->tininess_before_rounding
- || (zExp < 0 )
- || (zSig0 <= zSig0 + roundIncrement);
- shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
- zExp = 0;
- roundBits = zSig0 & roundMask;
- if (isTiny && roundBits) {
- float_raise(float_flag_underflow, status);
- }
- if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
- }
- zSig0 += roundIncrement;
- if ( (int64_t) zSig0 < 0 ) zExp = 1;
- roundIncrement = roundMask + 1;
- if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
- roundMask |= roundIncrement;
- }
- zSig0 &= ~ roundMask;
- return packFloatx80( zSign, zExp, zSig0 );
- }
- }
- if (roundBits) {
- status->float_exception_flags |= float_flag_inexact;
- }
- zSig0 += roundIncrement;
- if ( zSig0 < roundIncrement ) {
- ++zExp;
- zSig0 = UINT64_C(0x8000000000000000);
- }
- roundIncrement = roundMask + 1;
- if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
- roundMask |= roundIncrement;
- }
- zSig0 &= ~ roundMask;
- if ( zSig0 == 0 ) zExp = 0;
- return packFloatx80( zSign, zExp, zSig0 );
- precision80:
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- increment = ((int64_t)zSig1 < 0);
- break;
- case float_round_to_zero:
- increment = 0;
- break;
- case float_round_up:
- increment = !zSign && zSig1;
- break;
- case float_round_down:
- increment = zSign && zSig1;
- break;
- default:
- abort();
- }
- if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
- if ( ( 0x7FFE < zExp )
- || ( ( zExp == 0x7FFE )
- && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
- && increment
- )
- ) {
- roundMask = 0;
- overflow:
- float_raise(float_flag_overflow | float_flag_inexact, status);
- if ( ( roundingMode == float_round_to_zero )
- || ( zSign && ( roundingMode == float_round_up ) )
- || ( ! zSign && ( roundingMode == float_round_down ) )
- ) {
- return packFloatx80( zSign, 0x7FFE, ~ roundMask );
- }
- return packFloatx80(zSign,
- floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( zExp <= 0 ) {
- isTiny = status->tininess_before_rounding
- || (zExp < 0)
- || !increment
- || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
- shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
- zExp = 0;
- if (isTiny && zSig1) {
- float_raise(float_flag_underflow, status);
- }
- if (zSig1) {
- status->float_exception_flags |= float_flag_inexact;
- }
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- increment = ((int64_t)zSig1 < 0);
- break;
- case float_round_to_zero:
- increment = 0;
- break;
- case float_round_up:
- increment = !zSign && zSig1;
- break;
- case float_round_down:
- increment = zSign && zSig1;
- break;
- default:
- abort();
- }
- if ( increment ) {
- ++zSig0;
- if (!(zSig1 << 1) && roundNearestEven) {
- zSig0 &= ~1;
- }
- if ( (int64_t) zSig0 < 0 ) zExp = 1;
- }
- return packFloatx80( zSign, zExp, zSig0 );
- }
- }
- if (zSig1) {
- status->float_exception_flags |= float_flag_inexact;
- }
- if ( increment ) {
- ++zSig0;
- if ( zSig0 == 0 ) {
- ++zExp;
- zSig0 = UINT64_C(0x8000000000000000);
- }
- else {
- if (!(zSig1 << 1) && roundNearestEven) {
- zSig0 &= ~1;
- }
- }
- }
- else {
- if ( zSig0 == 0 ) zExp = 0;
- }
- return packFloatx80( zSign, zExp, zSig0 );
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent
- | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
- | and returns the proper extended double-precision floating-point value
- | corresponding to the abstract input. This routine is just like
- | `roundAndPackFloatx80' except that the input significand does not have to be
- | normalized.
- *----------------------------------------------------------------------------*/
- floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
- bool zSign, int32_t zExp,
- uint64_t zSig0, uint64_t zSig1,
- float_status *status)
- {
- int8_t shiftCount;
- if ( zSig0 == 0 ) {
- zSig0 = zSig1;
- zSig1 = 0;
- zExp -= 64;
- }
- shiftCount = clz64(zSig0);
- shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
- zExp -= shiftCount;
- return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
- zSig0, zSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the least-significant 64 fraction bits of the quadruple-precision
- | floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline uint64_t extractFloat128Frac1( float128 a )
- {
- return a.low;
- }
- /*----------------------------------------------------------------------------
- | Returns the most-significant 48 fraction bits of the quadruple-precision
- | floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline uint64_t extractFloat128Frac0( float128 a )
- {
- return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
- }
- /*----------------------------------------------------------------------------
- | Returns the exponent bits of the quadruple-precision floating-point value
- | `a'.
- *----------------------------------------------------------------------------*/
- static inline int32_t extractFloat128Exp( float128 a )
- {
- return ( a.high>>48 ) & 0x7FFF;
- }
- /*----------------------------------------------------------------------------
- | Returns the sign bit of the quadruple-precision floating-point value `a'.
- *----------------------------------------------------------------------------*/
- static inline bool extractFloat128Sign(float128 a)
- {
- return a.high >> 63;
- }
- /*----------------------------------------------------------------------------
- | Normalizes the subnormal quadruple-precision floating-point value
- | represented by the denormalized significand formed by the concatenation of
- | `aSig0' and `aSig1'. The normalized exponent is stored at the location
- | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
- | significand are stored at the location pointed to by `zSig0Ptr', and the
- | least significant 64 bits of the normalized significand are stored at the
- | location pointed to by `zSig1Ptr'.
- *----------------------------------------------------------------------------*/
- static void
- normalizeFloat128Subnormal(
- uint64_t aSig0,
- uint64_t aSig1,
- int32_t *zExpPtr,
- uint64_t *zSig0Ptr,
- uint64_t *zSig1Ptr
- )
- {
- int8_t shiftCount;
- if ( aSig0 == 0 ) {
- shiftCount = clz64(aSig1) - 15;
- if ( shiftCount < 0 ) {
- *zSig0Ptr = aSig1>>( - shiftCount );
- *zSig1Ptr = aSig1<<( shiftCount & 63 );
- }
- else {
- *zSig0Ptr = aSig1<<shiftCount;
- *zSig1Ptr = 0;
- }
- *zExpPtr = - shiftCount - 63;
- }
- else {
- shiftCount = clz64(aSig0) - 15;
- shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
- *zExpPtr = 1 - shiftCount;
- }
- }
- /*----------------------------------------------------------------------------
- | Packs the sign `zSign', the exponent `zExp', and the significand formed
- | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
- | floating-point value, returning the result. After being shifted into the
- | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
- | added together to form the most significant 32 bits of the result. This
- | means that any integer portion of `zSig0' will be added into the exponent.
- | Since a properly normalized significand will have an integer portion equal
- | to 1, the `zExp' input should be 1 less than the desired result exponent
- | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
- | significand.
- *----------------------------------------------------------------------------*/
- static inline float128
- packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
- {
- float128 z;
- z.low = zSig1;
- z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
- return z;
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
- | and extended significand formed by the concatenation of `zSig0', `zSig1',
- | and `zSig2', and returns the proper quadruple-precision floating-point value
- | corresponding to the abstract input. Ordinarily, the abstract value is
- | simply rounded and packed into the quadruple-precision format, with the
- | inexact exception raised if the abstract input cannot be represented
- | exactly. However, if the abstract value is too large, the overflow and
- | inexact exceptions are raised and an infinity or maximal finite value is
- | returned. If the abstract value is too small, the input value is rounded to
- | a subnormal number, and the underflow and inexact exceptions are raised if
- | the abstract input cannot be represented exactly as a subnormal quadruple-
- | precision floating-point number.
- | The input significand must be normalized or smaller. If the input
- | significand is not normalized, `zExp' must be 0; in that case, the result
- | returned is a subnormal number, and it must not require rounding. In the
- | usual case that the input significand is normalized, `zExp' must be 1 less
- | than the ``true'' floating-point exponent. The handling of underflow and
- | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
- uint64_t zSig0, uint64_t zSig1,
- uint64_t zSig2, float_status *status)
- {
- int8_t roundingMode;
- bool roundNearestEven, increment, isTiny;
- roundingMode = status->float_rounding_mode;
- roundNearestEven = ( roundingMode == float_round_nearest_even );
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- increment = ((int64_t)zSig2 < 0);
- break;
- case float_round_to_zero:
- increment = 0;
- break;
- case float_round_up:
- increment = !zSign && zSig2;
- break;
- case float_round_down:
- increment = zSign && zSig2;
- break;
- case float_round_to_odd:
- increment = !(zSig1 & 0x1) && zSig2;
- break;
- default:
- abort();
- }
- if ( 0x7FFD <= (uint32_t) zExp ) {
- if ( ( 0x7FFD < zExp )
- || ( ( zExp == 0x7FFD )
- && eq128(
- UINT64_C(0x0001FFFFFFFFFFFF),
- UINT64_C(0xFFFFFFFFFFFFFFFF),
- zSig0,
- zSig1
- )
- && increment
- )
- ) {
- float_raise(float_flag_overflow | float_flag_inexact, status);
- if ( ( roundingMode == float_round_to_zero )
- || ( zSign && ( roundingMode == float_round_up ) )
- || ( ! zSign && ( roundingMode == float_round_down ) )
- || (roundingMode == float_round_to_odd)
- ) {
- return
- packFloat128(
- zSign,
- 0x7FFE,
- UINT64_C(0x0000FFFFFFFFFFFF),
- UINT64_C(0xFFFFFFFFFFFFFFFF)
- );
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( zExp < 0 ) {
- if (status->flush_to_zero) {
- float_raise(float_flag_output_denormal, status);
- return packFloat128(zSign, 0, 0, 0);
- }
- isTiny = status->tininess_before_rounding
- || (zExp < -1)
- || !increment
- || lt128(zSig0, zSig1,
- UINT64_C(0x0001FFFFFFFFFFFF),
- UINT64_C(0xFFFFFFFFFFFFFFFF));
- shift128ExtraRightJamming(
- zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
- zExp = 0;
- if (isTiny && zSig2) {
- float_raise(float_flag_underflow, status);
- }
- switch (roundingMode) {
- case float_round_nearest_even:
- case float_round_ties_away:
- increment = ((int64_t)zSig2 < 0);
- break;
- case float_round_to_zero:
- increment = 0;
- break;
- case float_round_up:
- increment = !zSign && zSig2;
- break;
- case float_round_down:
- increment = zSign && zSig2;
- break;
- case float_round_to_odd:
- increment = !(zSig1 & 0x1) && zSig2;
- break;
- default:
- abort();
- }
- }
- }
- if (zSig2) {
- status->float_exception_flags |= float_flag_inexact;
- }
- if ( increment ) {
- add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
- if ((zSig2 + zSig2 == 0) && roundNearestEven) {
- zSig1 &= ~1;
- }
- }
- else {
- if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
- }
- return packFloat128( zSign, zExp, zSig0, zSig1 );
- }
- /*----------------------------------------------------------------------------
- | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
- | and significand formed by the concatenation of `zSig0' and `zSig1', and
- | returns the proper quadruple-precision floating-point value corresponding
- | to the abstract input. This routine is just like `roundAndPackFloat128'
- | except that the input significand has fewer bits and does not have to be
- | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
- | point exponent.
- *----------------------------------------------------------------------------*/
- static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
- uint64_t zSig0, uint64_t zSig1,
- float_status *status)
- {
- int8_t shiftCount;
- uint64_t zSig2;
- if ( zSig0 == 0 ) {
- zSig0 = zSig1;
- zSig1 = 0;
- zExp -= 64;
- }
- shiftCount = clz64(zSig0) - 15;
- if ( 0 <= shiftCount ) {
- zSig2 = 0;
- shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
- }
- else {
- shift128ExtraRightJamming(
- zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
- }
- zExp -= shiftCount;
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the 32-bit two's complement integer `a'
- | to the extended double-precision floating-point format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 int32_to_floatx80(int32_t a, float_status *status)
- {
- bool zSign;
- uint32_t absA;
- int8_t shiftCount;
- uint64_t zSig;
- if ( a == 0 ) return packFloatx80( 0, 0, 0 );
- zSign = ( a < 0 );
- absA = zSign ? - a : a;
- shiftCount = clz32(absA) + 32;
- zSig = absA;
- return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the 32-bit two's complement integer `a' to
- | the quadruple-precision floating-point format. The conversion is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 int32_to_float128(int32_t a, float_status *status)
- {
- bool zSign;
- uint32_t absA;
- int8_t shiftCount;
- uint64_t zSig0;
- if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
- zSign = ( a < 0 );
- absA = zSign ? - a : a;
- shiftCount = clz32(absA) + 17;
- zSig0 = absA;
- return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the 64-bit two's complement integer `a'
- | to the extended double-precision floating-point format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 int64_to_floatx80(int64_t a, float_status *status)
- {
- bool zSign;
- uint64_t absA;
- int8_t shiftCount;
- if ( a == 0 ) return packFloatx80( 0, 0, 0 );
- zSign = ( a < 0 );
- absA = zSign ? - a : a;
- shiftCount = clz64(absA);
- return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the 64-bit two's complement integer `a' to
- | the quadruple-precision floating-point format. The conversion is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 int64_to_float128(int64_t a, float_status *status)
- {
- bool zSign;
- uint64_t absA;
- int8_t shiftCount;
- int32_t zExp;
- uint64_t zSig0, zSig1;
- if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
- zSign = ( a < 0 );
- absA = zSign ? - a : a;
- shiftCount = clz64(absA) + 49;
- zExp = 0x406E - shiftCount;
- if ( 64 <= shiftCount ) {
- zSig1 = 0;
- zSig0 = absA;
- shiftCount -= 64;
- }
- else {
- zSig1 = absA;
- zSig0 = 0;
- }
- shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
- return packFloat128( zSign, zExp, zSig0, zSig1 );
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the 64-bit unsigned integer `a'
- | to the quadruple-precision floating-point format. The conversion is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 uint64_to_float128(uint64_t a, float_status *status)
- {
- if (a == 0) {
- return float128_zero;
- }
- return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the single-precision floating-point value
- | `a' to the extended double-precision floating-point format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 float32_to_floatx80(float32 a, float_status *status)
- {
- bool aSign;
- int aExp;
- uint32_t aSig;
- a = float32_squash_input_denormal(a, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
- if ( aExp == 0xFF ) {
- if (aSig) {
- floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
- status);
- return floatx80_silence_nan(res, status);
- }
- return packFloatx80(aSign,
- floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
- normalizeFloat32Subnormal( aSig, &aExp, &aSig );
- }
- aSig |= 0x00800000;
- return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the single-precision floating-point value
- | `a' to the double-precision floating-point format. The conversion is
- | performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float32_to_float128(float32 a, float_status *status)
- {
- bool aSign;
- int aExp;
- uint32_t aSig;
- a = float32_squash_input_denormal(a, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
- if ( aExp == 0xFF ) {
- if (aSig) {
- return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
- }
- return packFloat128( aSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
- normalizeFloat32Subnormal( aSig, &aExp, &aSig );
- --aExp;
- }
- return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
- }
- /*----------------------------------------------------------------------------
- | Returns the remainder of the single-precision floating-point value `a'
- | with respect to the corresponding value `b'. The operation is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float32 float32_rem(float32 a, float32 b, float_status *status)
- {
- bool aSign, zSign;
- int aExp, bExp, expDiff;
- uint32_t aSig, bSig;
- uint32_t q;
- uint64_t aSig64, bSig64, q64;
- uint32_t alternateASig;
- int32_t sigMean;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
- bSig = extractFloat32Frac( b );
- bExp = extractFloat32Exp( b );
- if ( aExp == 0xFF ) {
- if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
- return propagateFloat32NaN(a, b, status);
- }
- float_raise(float_flag_invalid, status);
- return float32_default_nan(status);
- }
- if ( bExp == 0xFF ) {
- if (bSig) {
- return propagateFloat32NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- if ( bSig == 0 ) {
- float_raise(float_flag_invalid, status);
- return float32_default_nan(status);
- }
- normalizeFloat32Subnormal( bSig, &bExp, &bSig );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return a;
- normalizeFloat32Subnormal( aSig, &aExp, &aSig );
- }
- expDiff = aExp - bExp;
- aSig |= 0x00800000;
- bSig |= 0x00800000;
- if ( expDiff < 32 ) {
- aSig <<= 8;
- bSig <<= 8;
- if ( expDiff < 0 ) {
- if ( expDiff < -1 ) return a;
- aSig >>= 1;
- }
- q = ( bSig <= aSig );
- if ( q ) aSig -= bSig;
- if ( 0 < expDiff ) {
- q = ( ( (uint64_t) aSig )<<32 ) / bSig;
- q >>= 32 - expDiff;
- bSig >>= 2;
- aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
- }
- else {
- aSig >>= 2;
- bSig >>= 2;
- }
- }
- else {
- if ( bSig <= aSig ) aSig -= bSig;
- aSig64 = ( (uint64_t) aSig )<<40;
- bSig64 = ( (uint64_t) bSig )<<40;
- expDiff -= 64;
- while ( 0 < expDiff ) {
- q64 = estimateDiv128To64( aSig64, 0, bSig64 );
- q64 = ( 2 < q64 ) ? q64 - 2 : 0;
- aSig64 = - ( ( bSig * q64 )<<38 );
- expDiff -= 62;
- }
- expDiff += 64;
- q64 = estimateDiv128To64( aSig64, 0, bSig64 );
- q64 = ( 2 < q64 ) ? q64 - 2 : 0;
- q = q64>>( 64 - expDiff );
- bSig <<= 6;
- aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
- }
- do {
- alternateASig = aSig;
- ++q;
- aSig -= bSig;
- } while ( 0 <= (int32_t) aSig );
- sigMean = aSig + alternateASig;
- if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
- aSig = alternateASig;
- }
- zSign = ( (int32_t) aSig < 0 );
- if ( zSign ) aSig = - aSig;
- return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the binary exponential of the single-precision floating-point value
- | `a'. The operation is performed according to the IEC/IEEE Standard for
- | Binary Floating-Point Arithmetic.
- |
- | Uses the following identities:
- |
- | 1. -------------------------------------------------------------------------
- | x x*ln(2)
- | 2 = e
- |
- | 2. -------------------------------------------------------------------------
- | 2 3 4 5 n
- | x x x x x x x
- | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
- | 1! 2! 3! 4! 5! n!
- *----------------------------------------------------------------------------*/
- static const float64 float32_exp2_coefficients[15] =
- {
- const_float64( 0x3ff0000000000000ll ), /* 1 */
- const_float64( 0x3fe0000000000000ll ), /* 2 */
- const_float64( 0x3fc5555555555555ll ), /* 3 */
- const_float64( 0x3fa5555555555555ll ), /* 4 */
- const_float64( 0x3f81111111111111ll ), /* 5 */
- const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
- const_float64( 0x3f2a01a01a01a01all ), /* 7 */
- const_float64( 0x3efa01a01a01a01all ), /* 8 */
- const_float64( 0x3ec71de3a556c734ll ), /* 9 */
- const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
- const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
- const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
- const_float64( 0x3de6124613a86d09ll ), /* 13 */
- const_float64( 0x3da93974a8c07c9dll ), /* 14 */
- const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
- };
- float32 float32_exp2(float32 a, float_status *status)
- {
- bool aSign;
- int aExp;
- uint32_t aSig;
- float64 r, x, xn;
- int i;
- a = float32_squash_input_denormal(a, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
- if ( aExp == 0xFF) {
- if (aSig) {
- return propagateFloat32NaN(a, float32_zero, status);
- }
- return (aSign) ? float32_zero : a;
- }
- if (aExp == 0) {
- if (aSig == 0) return float32_one;
- }
- float_raise(float_flag_inexact, status);
- /* ******************************* */
- /* using float64 for approximation */
- /* ******************************* */
- x = float32_to_float64(a, status);
- x = float64_mul(x, float64_ln2, status);
- xn = x;
- r = float64_one;
- for (i = 0 ; i < 15 ; i++) {
- float64 f;
- f = float64_mul(xn, float32_exp2_coefficients[i], status);
- r = float64_add(r, f, status);
- xn = float64_mul(xn, x, status);
- }
- return float64_to_float32(r, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the binary log of the single-precision floating-point value `a'.
- | The operation is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float32 float32_log2(float32 a, float_status *status)
- {
- bool aSign, zSign;
- int aExp;
- uint32_t aSig, zSig, i;
- a = float32_squash_input_denormal(a, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
- normalizeFloat32Subnormal( aSig, &aExp, &aSig );
- }
- if ( aSign ) {
- float_raise(float_flag_invalid, status);
- return float32_default_nan(status);
- }
- if ( aExp == 0xFF ) {
- if (aSig) {
- return propagateFloat32NaN(a, float32_zero, status);
- }
- return a;
- }
- aExp -= 0x7F;
- aSig |= 0x00800000;
- zSign = aExp < 0;
- zSig = aExp << 23;
- for (i = 1 << 22; i > 0; i >>= 1) {
- aSig = ( (uint64_t)aSig * aSig ) >> 23;
- if ( aSig & 0x01000000 ) {
- aSig >>= 1;
- zSig |= i;
- }
- }
- if ( zSign )
- zSig = -zSig;
- return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the double-precision floating-point value
- | `a' to the extended double-precision floating-point format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 float64_to_floatx80(float64 a, float_status *status)
- {
- bool aSign;
- int aExp;
- uint64_t aSig;
- a = float64_squash_input_denormal(a, status);
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- aSign = extractFloat64Sign( a );
- if ( aExp == 0x7FF ) {
- if (aSig) {
- floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
- status);
- return floatx80_silence_nan(res, status);
- }
- return packFloatx80(aSign,
- floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
- normalizeFloat64Subnormal( aSig, &aExp, &aSig );
- }
- return
- packFloatx80(
- aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the double-precision floating-point value
- | `a' to the quadruple-precision floating-point format. The conversion is
- | performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float64_to_float128(float64 a, float_status *status)
- {
- bool aSign;
- int aExp;
- uint64_t aSig, zSig0, zSig1;
- a = float64_squash_input_denormal(a, status);
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- aSign = extractFloat64Sign( a );
- if ( aExp == 0x7FF ) {
- if (aSig) {
- return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
- }
- return packFloat128( aSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
- normalizeFloat64Subnormal( aSig, &aExp, &aSig );
- --aExp;
- }
- shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
- return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
- }
- /*----------------------------------------------------------------------------
- | Returns the remainder of the double-precision floating-point value `a'
- | with respect to the corresponding value `b'. The operation is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float64 float64_rem(float64 a, float64 b, float_status *status)
- {
- bool aSign, zSign;
- int aExp, bExp, expDiff;
- uint64_t aSig, bSig;
- uint64_t q, alternateASig;
- int64_t sigMean;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- aSign = extractFloat64Sign( a );
- bSig = extractFloat64Frac( b );
- bExp = extractFloat64Exp( b );
- if ( aExp == 0x7FF ) {
- if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
- return propagateFloat64NaN(a, b, status);
- }
- float_raise(float_flag_invalid, status);
- return float64_default_nan(status);
- }
- if ( bExp == 0x7FF ) {
- if (bSig) {
- return propagateFloat64NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- if ( bSig == 0 ) {
- float_raise(float_flag_invalid, status);
- return float64_default_nan(status);
- }
- normalizeFloat64Subnormal( bSig, &bExp, &bSig );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return a;
- normalizeFloat64Subnormal( aSig, &aExp, &aSig );
- }
- expDiff = aExp - bExp;
- aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
- bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
- if ( expDiff < 0 ) {
- if ( expDiff < -1 ) return a;
- aSig >>= 1;
- }
- q = ( bSig <= aSig );
- if ( q ) aSig -= bSig;
- expDiff -= 64;
- while ( 0 < expDiff ) {
- q = estimateDiv128To64( aSig, 0, bSig );
- q = ( 2 < q ) ? q - 2 : 0;
- aSig = - ( ( bSig>>2 ) * q );
- expDiff -= 62;
- }
- expDiff += 64;
- if ( 0 < expDiff ) {
- q = estimateDiv128To64( aSig, 0, bSig );
- q = ( 2 < q ) ? q - 2 : 0;
- q >>= 64 - expDiff;
- bSig >>= 2;
- aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
- }
- else {
- aSig >>= 2;
- bSig >>= 2;
- }
- do {
- alternateASig = aSig;
- ++q;
- aSig -= bSig;
- } while ( 0 <= (int64_t) aSig );
- sigMean = aSig + alternateASig;
- if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
- aSig = alternateASig;
- }
- zSign = ( (int64_t) aSig < 0 );
- if ( zSign ) aSig = - aSig;
- return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the binary log of the double-precision floating-point value `a'.
- | The operation is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float64 float64_log2(float64 a, float_status *status)
- {
- bool aSign, zSign;
- int aExp;
- uint64_t aSig, aSig0, aSig1, zSig, i;
- a = float64_squash_input_denormal(a, status);
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- aSign = extractFloat64Sign( a );
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
- normalizeFloat64Subnormal( aSig, &aExp, &aSig );
- }
- if ( aSign ) {
- float_raise(float_flag_invalid, status);
- return float64_default_nan(status);
- }
- if ( aExp == 0x7FF ) {
- if (aSig) {
- return propagateFloat64NaN(a, float64_zero, status);
- }
- return a;
- }
- aExp -= 0x3FF;
- aSig |= UINT64_C(0x0010000000000000);
- zSign = aExp < 0;
- zSig = (uint64_t)aExp << 52;
- for (i = 1LL << 51; i > 0; i >>= 1) {
- mul64To128( aSig, aSig, &aSig0, &aSig1 );
- aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
- if ( aSig & UINT64_C(0x0020000000000000) ) {
- aSig >>= 1;
- zSig |= i;
- }
- }
- if ( zSign )
- zSig = -zSig;
- return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the extended double-precision floating-
- | point value `a' to the 32-bit two's complement integer format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic---which means in particular that the conversion
- | is rounded according to the current rounding mode. If `a' is a NaN, the
- | largest positive integer is returned. Otherwise, if the conversion
- | overflows, the largest integer with the same sign as `a' is returned.
- *----------------------------------------------------------------------------*/
- int32_t floatx80_to_int32(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return 1 << 31;
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
- shiftCount = 0x4037 - aExp;
- if ( shiftCount <= 0 ) shiftCount = 1;
- shift64RightJamming( aSig, shiftCount, &aSig );
- return roundAndPackInt32(aSign, aSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the extended double-precision floating-
- | point value `a' to the 32-bit two's complement integer format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic, except that the conversion is always rounded
- | toward zero. If `a' is a NaN, the largest positive integer is returned.
- | Otherwise, if the conversion overflows, the largest integer with the same
- | sign as `a' is returned.
- *----------------------------------------------------------------------------*/
- int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig, savedASig;
- int32_t z;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return 1 << 31;
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- if ( 0x401E < aExp ) {
- if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
- goto invalid;
- }
- else if ( aExp < 0x3FFF ) {
- if (aExp || aSig) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return 0;
- }
- shiftCount = 0x403E - aExp;
- savedASig = aSig;
- aSig >>= shiftCount;
- z = aSig;
- if ( aSign ) z = - z;
- if ( ( z < 0 ) ^ aSign ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
- }
- if ( ( aSig<<shiftCount ) != savedASig ) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return z;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the extended double-precision floating-
- | point value `a' to the 64-bit two's complement integer format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic---which means in particular that the conversion
- | is rounded according to the current rounding mode. If `a' is a NaN,
- | the largest positive integer is returned. Otherwise, if the conversion
- | overflows, the largest integer with the same sign as `a' is returned.
- *----------------------------------------------------------------------------*/
- int64_t floatx80_to_int64(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig, aSigExtra;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return 1ULL << 63;
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- shiftCount = 0x403E - aExp;
- if ( shiftCount <= 0 ) {
- if ( shiftCount ) {
- float_raise(float_flag_invalid, status);
- if (!aSign || floatx80_is_any_nan(a)) {
- return INT64_MAX;
- }
- return INT64_MIN;
- }
- aSigExtra = 0;
- }
- else {
- shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
- }
- return roundAndPackInt64(aSign, aSig, aSigExtra, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the extended double-precision floating-
- | point value `a' to the 64-bit two's complement integer format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic, except that the conversion is always rounded
- | toward zero. If `a' is a NaN, the largest positive integer is returned.
- | Otherwise, if the conversion overflows, the largest integer with the same
- | sign as `a' is returned.
- *----------------------------------------------------------------------------*/
- int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig;
- int64_t z;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return 1ULL << 63;
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- shiftCount = aExp - 0x403E;
- if ( 0 <= shiftCount ) {
- aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
- if ( ( a.high != 0xC03E ) || aSig ) {
- float_raise(float_flag_invalid, status);
- if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
- return INT64_MAX;
- }
- }
- return INT64_MIN;
- }
- else if ( aExp < 0x3FFF ) {
- if (aExp | aSig) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return 0;
- }
- z = aSig>>( - shiftCount );
- if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
- status->float_exception_flags |= float_flag_inexact;
- }
- if ( aSign ) z = - z;
- return z;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the extended double-precision floating-
- | point value `a' to the single-precision floating-point format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float32 floatx80_to_float32(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t aSig;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return float32_default_nan(status);
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( (uint64_t) ( aSig<<1 ) ) {
- float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
- status);
- return float32_silence_nan(res, status);
- }
- return packFloat32( aSign, 0xFF, 0 );
- }
- shift64RightJamming( aSig, 33, &aSig );
- if ( aExp || aSig ) aExp -= 0x3F81;
- return roundAndPackFloat32(aSign, aExp, aSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the extended double-precision floating-
- | point value `a' to the double-precision floating-point format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float64 floatx80_to_float64(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t aSig, zSig;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return float64_default_nan(status);
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( (uint64_t) ( aSig<<1 ) ) {
- float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
- status);
- return float64_silence_nan(res, status);
- }
- return packFloat64( aSign, 0x7FF, 0 );
- }
- shift64RightJamming( aSig, 1, &zSig );
- if ( aExp || aSig ) aExp -= 0x3C01;
- return roundAndPackFloat64(aSign, aExp, zSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the extended double-precision floating-
- | point value `a' to the quadruple-precision floating-point format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 floatx80_to_float128(floatx80 a, float_status *status)
- {
- bool aSign;
- int aExp;
- uint64_t aSig, zSig0, zSig1;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
- float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
- status);
- return float128_silence_nan(res, status);
- }
- shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
- return packFloat128( aSign, aExp, zSig0, zSig1 );
- }
- /*----------------------------------------------------------------------------
- | Rounds the extended double-precision floating-point value `a'
- | to the precision provided by floatx80_rounding_precision and returns the
- | result as an extended double-precision floating-point value.
- | The operation is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_round(floatx80 a, float_status *status)
- {
- return roundAndPackFloatx80(status->floatx80_rounding_precision,
- extractFloatx80Sign(a),
- extractFloatx80Exp(a),
- extractFloatx80Frac(a), 0, status);
- }
- /*----------------------------------------------------------------------------
- | Rounds the extended double-precision floating-point value `a' to an integer,
- | and returns the result as an extended quadruple-precision floating-point
- | value. The operation is performed according to the IEC/IEEE Standard for
- | Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t lastBitMask, roundBitsMask;
- floatx80 z;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aExp = extractFloatx80Exp( a );
- if ( 0x403E <= aExp ) {
- if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
- return propagateFloatx80NaN(a, a, status);
- }
- return a;
- }
- if ( aExp < 0x3FFF ) {
- if ( ( aExp == 0 )
- && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
- return a;
- }
- status->float_exception_flags |= float_flag_inexact;
- aSign = extractFloatx80Sign( a );
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
- ) {
- return
- packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
- }
- break;
- case float_round_ties_away:
- if (aExp == 0x3FFE) {
- return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
- }
- break;
- case float_round_down:
- return
- aSign ?
- packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
- : packFloatx80( 0, 0, 0 );
- case float_round_up:
- return
- aSign ? packFloatx80( 1, 0, 0 )
- : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
- case float_round_to_zero:
- break;
- default:
- g_assert_not_reached();
- }
- return packFloatx80( aSign, 0, 0 );
- }
- lastBitMask = 1;
- lastBitMask <<= 0x403E - aExp;
- roundBitsMask = lastBitMask - 1;
- z = a;
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- z.low += lastBitMask>>1;
- if ((z.low & roundBitsMask) == 0) {
- z.low &= ~lastBitMask;
- }
- break;
- case float_round_ties_away:
- z.low += lastBitMask >> 1;
- break;
- case float_round_to_zero:
- break;
- case float_round_up:
- if (!extractFloatx80Sign(z)) {
- z.low += roundBitsMask;
- }
- break;
- case float_round_down:
- if (extractFloatx80Sign(z)) {
- z.low += roundBitsMask;
- }
- break;
- default:
- abort();
- }
- z.low &= ~ roundBitsMask;
- if ( z.low == 0 ) {
- ++z.high;
- z.low = UINT64_C(0x8000000000000000);
- }
- if (z.low != a.low) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return z;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of adding the absolute values of the extended double-
- | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
- | negated before being returned. `zSign' is ignored if the result is a NaN.
- | The addition is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
- float_status *status)
- {
- int32_t aExp, bExp, zExp;
- uint64_t aSig, bSig, zSig0, zSig1;
- int32_t expDiff;
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- bSig = extractFloatx80Frac( b );
- bExp = extractFloatx80Exp( b );
- expDiff = aExp - bExp;
- if ( 0 < expDiff ) {
- if ( aExp == 0x7FFF ) {
- if ((uint64_t)(aSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) --expDiff;
- shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
- zExp = aExp;
- }
- else if ( expDiff < 0 ) {
- if ( bExp == 0x7FFF ) {
- if ((uint64_t)(bSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- return packFloatx80(zSign,
- floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( aExp == 0 ) ++expDiff;
- shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
- zExp = bExp;
- }
- else {
- if ( aExp == 0x7FFF ) {
- if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
- return propagateFloatx80NaN(a, b, status);
- }
- return a;
- }
- zSig1 = 0;
- zSig0 = aSig + bSig;
- if ( aExp == 0 ) {
- if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
- /* At least one of the values is a pseudo-denormal,
- * and there is a carry out of the result. */
- zExp = 1;
- goto shiftRight1;
- }
- if (zSig0 == 0) {
- return packFloatx80(zSign, 0, 0);
- }
- normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
- goto roundAndPack;
- }
- zExp = aExp;
- goto shiftRight1;
- }
- zSig0 = aSig + bSig;
- if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
- shiftRight1:
- shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
- zSig0 |= UINT64_C(0x8000000000000000);
- ++zExp;
- roundAndPack:
- return roundAndPackFloatx80(status->floatx80_rounding_precision,
- zSign, zExp, zSig0, zSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of subtracting the absolute values of the extended
- | double-precision floating-point values `a' and `b'. If `zSign' is 1, the
- | difference is negated before being returned. `zSign' is ignored if the
- | result is a NaN. The subtraction is performed according to the IEC/IEEE
- | Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
- float_status *status)
- {
- int32_t aExp, bExp, zExp;
- uint64_t aSig, bSig, zSig0, zSig1;
- int32_t expDiff;
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- bSig = extractFloatx80Frac( b );
- bExp = extractFloatx80Exp( b );
- expDiff = aExp - bExp;
- if ( 0 < expDiff ) goto aExpBigger;
- if ( expDiff < 0 ) goto bExpBigger;
- if ( aExp == 0x7FFF ) {
- if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
- return propagateFloatx80NaN(a, b, status);
- }
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- if ( aExp == 0 ) {
- aExp = 1;
- bExp = 1;
- }
- zSig1 = 0;
- if ( bSig < aSig ) goto aBigger;
- if ( aSig < bSig ) goto bBigger;
- return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
- bExpBigger:
- if ( bExp == 0x7FFF ) {
- if ((uint64_t)(bSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- return packFloatx80(zSign ^ 1, floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( aExp == 0 ) ++expDiff;
- shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
- bBigger:
- sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
- zExp = bExp;
- zSign ^= 1;
- goto normalizeRoundAndPack;
- aExpBigger:
- if ( aExp == 0x7FFF ) {
- if ((uint64_t)(aSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) --expDiff;
- shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
- aBigger:
- sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
- zExp = aExp;
- normalizeRoundAndPack:
- return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
- zSign, zExp, zSig0, zSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of adding the extended double-precision floating-point
- | values `a' and `b'. The operation is performed according to the IEC/IEEE
- | Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
- {
- bool aSign, bSign;
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aSign = extractFloatx80Sign( a );
- bSign = extractFloatx80Sign( b );
- if ( aSign == bSign ) {
- return addFloatx80Sigs(a, b, aSign, status);
- }
- else {
- return subFloatx80Sigs(a, b, aSign, status);
- }
- }
- /*----------------------------------------------------------------------------
- | Returns the result of subtracting the extended double-precision floating-
- | point values `a' and `b'. The operation is performed according to the
- | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
- {
- bool aSign, bSign;
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aSign = extractFloatx80Sign( a );
- bSign = extractFloatx80Sign( b );
- if ( aSign == bSign ) {
- return subFloatx80Sigs(a, b, aSign, status);
- }
- else {
- return addFloatx80Sigs(a, b, aSign, status);
- }
- }
- /*----------------------------------------------------------------------------
- | Returns the result of multiplying the extended double-precision floating-
- | point values `a' and `b'. The operation is performed according to the
- | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
- {
- bool aSign, bSign, zSign;
- int32_t aExp, bExp, zExp;
- uint64_t aSig, bSig, zSig0, zSig1;
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- bSig = extractFloatx80Frac( b );
- bExp = extractFloatx80Exp( b );
- bSign = extractFloatx80Sign( b );
- zSign = aSign ^ bSign;
- if ( aExp == 0x7FFF ) {
- if ( (uint64_t) ( aSig<<1 )
- || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
- return propagateFloatx80NaN(a, b, status);
- }
- if ( ( bExp | bSig ) == 0 ) goto invalid;
- return packFloatx80(zSign, floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( bExp == 0x7FFF ) {
- if ((uint64_t)(bSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- if ( ( aExp | aSig ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- return packFloatx80(zSign, floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
- normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
- }
- if ( bExp == 0 ) {
- if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
- normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
- }
- zExp = aExp + bExp - 0x3FFE;
- mul64To128( aSig, bSig, &zSig0, &zSig1 );
- if ( 0 < (int64_t) zSig0 ) {
- shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
- --zExp;
- }
- return roundAndPackFloatx80(status->floatx80_rounding_precision,
- zSign, zExp, zSig0, zSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of dividing the extended double-precision floating-point
- | value `a' by the corresponding value `b'. The operation is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
- {
- bool aSign, bSign, zSign;
- int32_t aExp, bExp, zExp;
- uint64_t aSig, bSig, zSig0, zSig1;
- uint64_t rem0, rem1, rem2, term0, term1, term2;
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- bSig = extractFloatx80Frac( b );
- bExp = extractFloatx80Exp( b );
- bSign = extractFloatx80Sign( b );
- zSign = aSign ^ bSign;
- if ( aExp == 0x7FFF ) {
- if ((uint64_t)(aSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- if ( bExp == 0x7FFF ) {
- if ((uint64_t)(bSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- goto invalid;
- }
- return packFloatx80(zSign, floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( bExp == 0x7FFF ) {
- if ((uint64_t)(bSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- return packFloatx80( zSign, 0, 0 );
- }
- if ( bExp == 0 ) {
- if ( bSig == 0 ) {
- if ( ( aExp | aSig ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- float_raise(float_flag_divbyzero, status);
- return packFloatx80(zSign, floatx80_infinity_high,
- floatx80_infinity_low);
- }
- normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
- normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
- }
- zExp = aExp - bExp + 0x3FFE;
- rem1 = 0;
- if ( bSig <= aSig ) {
- shift128Right( aSig, 0, 1, &aSig, &rem1 );
- ++zExp;
- }
- zSig0 = estimateDiv128To64( aSig, rem1, bSig );
- mul64To128( bSig, zSig0, &term0, &term1 );
- sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
- while ( (int64_t) rem0 < 0 ) {
- --zSig0;
- add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
- }
- zSig1 = estimateDiv128To64( rem1, 0, bSig );
- if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
- mul64To128( bSig, zSig1, &term1, &term2 );
- sub128( rem1, 0, term1, term2, &rem1, &rem2 );
- while ( (int64_t) rem1 < 0 ) {
- --zSig1;
- add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
- }
- zSig1 |= ( ( rem1 | rem2 ) != 0 );
- }
- return roundAndPackFloatx80(status->floatx80_rounding_precision,
- zSign, zExp, zSig0, zSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the remainder of the extended double-precision floating-point value
- | `a' with respect to the corresponding value `b'. The operation is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
- | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
- | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
- | the absolute value of the integer quotient.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
- float_status *status)
- {
- bool aSign, zSign;
- int32_t aExp, bExp, expDiff, aExpOrig;
- uint64_t aSig0, aSig1, bSig;
- uint64_t q, term0, term1, alternateASig0, alternateASig1;
- *quotient = 0;
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aSig0 = extractFloatx80Frac( a );
- aExpOrig = aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- bSig = extractFloatx80Frac( b );
- bExp = extractFloatx80Exp( b );
- if ( aExp == 0x7FFF ) {
- if ( (uint64_t) ( aSig0<<1 )
- || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
- return propagateFloatx80NaN(a, b, status);
- }
- goto invalid;
- }
- if ( bExp == 0x7FFF ) {
- if ((uint64_t)(bSig << 1)) {
- return propagateFloatx80NaN(a, b, status);
- }
- if (aExp == 0 && aSig0 >> 63) {
- /*
- * Pseudo-denormal argument must be returned in normalized
- * form.
- */
- return packFloatx80(aSign, 1, aSig0);
- }
- return a;
- }
- if ( bExp == 0 ) {
- if ( bSig == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
- }
- if ( aExp == 0 ) {
- if ( aSig0 == 0 ) return a;
- normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
- }
- zSign = aSign;
- expDiff = aExp - bExp;
- aSig1 = 0;
- if ( expDiff < 0 ) {
- if ( mod || expDiff < -1 ) {
- if (aExp == 1 && aExpOrig == 0) {
- /*
- * Pseudo-denormal argument must be returned in
- * normalized form.
- */
- return packFloatx80(aSign, aExp, aSig0);
- }
- return a;
- }
- shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
- expDiff = 0;
- }
- *quotient = q = ( bSig <= aSig0 );
- if ( q ) aSig0 -= bSig;
- expDiff -= 64;
- while ( 0 < expDiff ) {
- q = estimateDiv128To64( aSig0, aSig1, bSig );
- q = ( 2 < q ) ? q - 2 : 0;
- mul64To128( bSig, q, &term0, &term1 );
- sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
- shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
- expDiff -= 62;
- *quotient <<= 62;
- *quotient += q;
- }
- expDiff += 64;
- if ( 0 < expDiff ) {
- q = estimateDiv128To64( aSig0, aSig1, bSig );
- q = ( 2 < q ) ? q - 2 : 0;
- q >>= 64 - expDiff;
- mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
- sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
- shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
- while ( le128( term0, term1, aSig0, aSig1 ) ) {
- ++q;
- sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
- }
- if (expDiff < 64) {
- *quotient <<= expDiff;
- } else {
- *quotient = 0;
- }
- *quotient += q;
- }
- else {
- term1 = 0;
- term0 = bSig;
- }
- if (!mod) {
- sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
- if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
- || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
- && ( q & 1 ) )
- ) {
- aSig0 = alternateASig0;
- aSig1 = alternateASig1;
- zSign = ! zSign;
- ++*quotient;
- }
- }
- return
- normalizeRoundAndPackFloatx80(
- 80, zSign, bExp + expDiff, aSig0, aSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the remainder of the extended double-precision floating-point value
- | `a' with respect to the corresponding value `b'. The operation is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
- {
- uint64_t quotient;
- return floatx80_modrem(a, b, false, "ient, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the remainder of the extended double-precision floating-point value
- | `a' with respect to the corresponding value `b', with the quotient truncated
- | toward zero.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
- {
- uint64_t quotient;
- return floatx80_modrem(a, b, true, "ient, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the square root of the extended double-precision floating-point
- | value `a'. The operation is performed according to the IEC/IEEE Standard
- | for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 floatx80_sqrt(floatx80 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, zExp;
- uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
- uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aSig0 = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- if ( aExp == 0x7FFF ) {
- if ((uint64_t)(aSig0 << 1)) {
- return propagateFloatx80NaN(a, a, status);
- }
- if ( ! aSign ) return a;
- goto invalid;
- }
- if ( aSign ) {
- if ( ( aExp | aSig0 ) == 0 ) return a;
- invalid:
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- if ( aExp == 0 ) {
- if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
- normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
- }
- zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
- zSig0 = estimateSqrt32( aExp, aSig0>>32 );
- shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
- zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
- doubleZSig0 = zSig0<<1;
- mul64To128( zSig0, zSig0, &term0, &term1 );
- sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
- while ( (int64_t) rem0 < 0 ) {
- --zSig0;
- doubleZSig0 -= 2;
- add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
- }
- zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
- if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
- if ( zSig1 == 0 ) zSig1 = 1;
- mul64To128( doubleZSig0, zSig1, &term1, &term2 );
- sub128( rem1, 0, term1, term2, &rem1, &rem2 );
- mul64To128( zSig1, zSig1, &term2, &term3 );
- sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
- while ( (int64_t) rem1 < 0 ) {
- --zSig1;
- shortShift128Left( 0, zSig1, 1, &term2, &term3 );
- term3 |= 1;
- term2 |= doubleZSig0;
- add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
- }
- zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
- }
- shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
- zSig0 |= doubleZSig0;
- return roundAndPackFloatx80(status->floatx80_rounding_precision,
- 0, zExp, zSig0, zSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the 32-bit two's complement integer format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic---which means in particular that the conversion is rounded
- | according to the current rounding mode. If `a' is a NaN, the largest
- | positive integer is returned. Otherwise, if the conversion overflows, the
- | largest integer with the same sign as `a' is returned.
- *----------------------------------------------------------------------------*/
- int32_t float128_to_int32(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- aSig0 |= ( aSig1 != 0 );
- shiftCount = 0x4028 - aExp;
- if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
- return roundAndPackInt32(aSign, aSig0, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the 32-bit two's complement integer format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic, except that the conversion is always rounded toward zero. If
- | `a' is a NaN, the largest positive integer is returned. Otherwise, if the
- | conversion overflows, the largest integer with the same sign as `a' is
- | returned.
- *----------------------------------------------------------------------------*/
- int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1, savedASig;
- int32_t z;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- aSig0 |= ( aSig1 != 0 );
- if ( 0x401E < aExp ) {
- if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
- goto invalid;
- }
- else if ( aExp < 0x3FFF ) {
- if (aExp || aSig0) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return 0;
- }
- aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = 0x402F - aExp;
- savedASig = aSig0;
- aSig0 >>= shiftCount;
- z = aSig0;
- if ( aSign ) z = - z;
- if ( ( z < 0 ) ^ aSign ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return aSign ? INT32_MIN : INT32_MAX;
- }
- if ( ( aSig0<<shiftCount ) != savedASig ) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return z;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the 64-bit two's complement integer format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic---which means in particular that the conversion is rounded
- | according to the current rounding mode. If `a' is a NaN, the largest
- | positive integer is returned. Otherwise, if the conversion overflows, the
- | largest integer with the same sign as `a' is returned.
- *----------------------------------------------------------------------------*/
- int64_t float128_to_int64(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = 0x402F - aExp;
- if ( shiftCount <= 0 ) {
- if ( 0x403E < aExp ) {
- float_raise(float_flag_invalid, status);
- if ( ! aSign
- || ( ( aExp == 0x7FFF )
- && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
- )
- ) {
- return INT64_MAX;
- }
- return INT64_MIN;
- }
- shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
- }
- else {
- shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
- }
- return roundAndPackInt64(aSign, aSig0, aSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the 64-bit two's complement integer format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic, except that the conversion is always rounded toward zero.
- | If `a' is a NaN, the largest positive integer is returned. Otherwise, if
- | the conversion overflows, the largest integer with the same sign as `a' is
- | returned.
- *----------------------------------------------------------------------------*/
- int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
- int64_t z;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = aExp - 0x402F;
- if ( 0 < shiftCount ) {
- if ( 0x403E <= aExp ) {
- aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
- if ( ( a.high == UINT64_C(0xC03E000000000000) )
- && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
- if (aSig1) {
- status->float_exception_flags |= float_flag_inexact;
- }
- }
- else {
- float_raise(float_flag_invalid, status);
- if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
- return INT64_MAX;
- }
- }
- return INT64_MIN;
- }
- z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
- if ( (uint64_t) ( aSig1<<shiftCount ) ) {
- status->float_exception_flags |= float_flag_inexact;
- }
- }
- else {
- if ( aExp < 0x3FFF ) {
- if ( aExp | aSig0 | aSig1 ) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return 0;
- }
- z = aSig0>>( - shiftCount );
- if ( aSig1
- || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
- status->float_exception_flags |= float_flag_inexact;
- }
- }
- if ( aSign ) z = - z;
- return z;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point value
- | `a' to the 64-bit unsigned integer format. The conversion is
- | performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic---which means in particular that the conversion is rounded
- | according to the current rounding mode. If `a' is a NaN, the largest
- | positive integer is returned. If the conversion overflows, the
- | largest unsigned integer is returned. If 'a' is negative, the value is
- | rounded and zero is returned; negative values that do not round to zero
- | will raise the inexact exception.
- *----------------------------------------------------------------------------*/
- uint64_t float128_to_uint64(float128 a, float_status *status)
- {
- bool aSign;
- int aExp;
- int shiftCount;
- uint64_t aSig0, aSig1;
- aSig0 = extractFloat128Frac0(a);
- aSig1 = extractFloat128Frac1(a);
- aExp = extractFloat128Exp(a);
- aSign = extractFloat128Sign(a);
- if (aSign && (aExp > 0x3FFE)) {
- float_raise(float_flag_invalid, status);
- if (float128_is_any_nan(a)) {
- return UINT64_MAX;
- } else {
- return 0;
- }
- }
- if (aExp) {
- aSig0 |= UINT64_C(0x0001000000000000);
- }
- shiftCount = 0x402F - aExp;
- if (shiftCount <= 0) {
- if (0x403E < aExp) {
- float_raise(float_flag_invalid, status);
- return UINT64_MAX;
- }
- shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
- } else {
- shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
- }
- return roundAndPackUint64(aSign, aSig0, aSig1, status);
- }
- uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
- {
- uint64_t v;
- signed char current_rounding_mode = status->float_rounding_mode;
- set_float_rounding_mode(float_round_to_zero, status);
- v = float128_to_uint64(a, status);
- set_float_rounding_mode(current_rounding_mode, status);
- return v;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the 32-bit unsigned integer format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic except that the conversion is always rounded toward zero.
- | If `a' is a NaN, the largest positive integer is returned. Otherwise,
- | if the conversion overflows, the largest unsigned integer is returned.
- | If 'a' is negative, the value is rounded and zero is returned; negative
- | values that do not round to zero will raise the inexact exception.
- *----------------------------------------------------------------------------*/
- uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
- {
- uint64_t v;
- uint32_t res;
- int old_exc_flags = get_float_exception_flags(status);
- v = float128_to_uint64_round_to_zero(a, status);
- if (v > 0xffffffff) {
- res = 0xffffffff;
- } else {
- return v;
- }
- set_float_exception_flags(old_exc_flags, status);
- float_raise(float_flag_invalid, status);
- return res;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point value
- | `a' to the 32-bit unsigned integer format. The conversion is
- | performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic---which means in particular that the conversion is rounded
- | according to the current rounding mode. If `a' is a NaN, the largest
- | positive integer is returned. If the conversion overflows, the
- | largest unsigned integer is returned. If 'a' is negative, the value is
- | rounded and zero is returned; negative values that do not round to zero
- | will raise the inexact exception.
- *----------------------------------------------------------------------------*/
- uint32_t float128_to_uint32(float128 a, float_status *status)
- {
- uint64_t v;
- uint32_t res;
- int old_exc_flags = get_float_exception_flags(status);
- v = float128_to_uint64(a, status);
- if (v > 0xffffffff) {
- res = 0xffffffff;
- } else {
- return v;
- }
- set_float_exception_flags(old_exc_flags, status);
- float_raise(float_flag_invalid, status);
- return res;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the single-precision floating-point format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- float32 float128_to_float32(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
- uint32_t zSig;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
- }
- return packFloat32( aSign, 0xFF, 0 );
- }
- aSig0 |= ( aSig1 != 0 );
- shift64RightJamming( aSig0, 18, &aSig0 );
- zSig = aSig0;
- if ( aExp || zSig ) {
- zSig |= 0x40000000;
- aExp -= 0x3F81;
- }
- return roundAndPackFloat32(aSign, aExp, zSig, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the double-precision floating-point format. The conversion
- | is performed according to the IEC/IEEE Standard for Binary Floating-Point
- | Arithmetic.
- *----------------------------------------------------------------------------*/
- float64 float128_to_float64(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
- }
- return packFloat64( aSign, 0x7FF, 0 );
- }
- shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
- aSig0 |= ( aSig1 != 0 );
- if ( aExp || aSig0 ) {
- aSig0 |= UINT64_C(0x4000000000000000);
- aExp -= 0x3C01;
- }
- return roundAndPackFloat64(aSign, aExp, aSig0, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of converting the quadruple-precision floating-point
- | value `a' to the extended double-precision floating-point format. The
- | conversion is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- floatx80 float128_to_floatx80(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
- status);
- return floatx80_silence_nan(res, status);
- }
- return packFloatx80(aSign, floatx80_infinity_high,
- floatx80_infinity_low);
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- else {
- aSig0 |= UINT64_C(0x0001000000000000);
- }
- shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
- return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
- }
- /*----------------------------------------------------------------------------
- | Rounds the quadruple-precision floating-point value `a' to an integer, and
- | returns the result as a quadruple-precision floating-point value. The
- | operation is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float128_round_to_int(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t lastBitMask, roundBitsMask;
- float128 z;
- aExp = extractFloat128Exp( a );
- if ( 0x402F <= aExp ) {
- if ( 0x406F <= aExp ) {
- if ( ( aExp == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
- ) {
- return propagateFloat128NaN(a, a, status);
- }
- return a;
- }
- lastBitMask = 1;
- lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
- roundBitsMask = lastBitMask - 1;
- z = a;
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- if ( lastBitMask ) {
- add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
- if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
- }
- else {
- if ( (int64_t) z.low < 0 ) {
- ++z.high;
- if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
- }
- }
- break;
- case float_round_ties_away:
- if (lastBitMask) {
- add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
- } else {
- if ((int64_t) z.low < 0) {
- ++z.high;
- }
- }
- break;
- case float_round_to_zero:
- break;
- case float_round_up:
- if (!extractFloat128Sign(z)) {
- add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
- }
- break;
- case float_round_down:
- if (extractFloat128Sign(z)) {
- add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
- }
- break;
- case float_round_to_odd:
- /*
- * Note that if lastBitMask == 0, the last bit is the lsb
- * of high, and roundBitsMask == -1.
- */
- if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
- add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
- }
- break;
- default:
- abort();
- }
- z.low &= ~ roundBitsMask;
- }
- else {
- if ( aExp < 0x3FFF ) {
- if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
- status->float_exception_flags |= float_flag_inexact;
- aSign = extractFloat128Sign( a );
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- if ( ( aExp == 0x3FFE )
- && ( extractFloat128Frac0( a )
- | extractFloat128Frac1( a ) )
- ) {
- return packFloat128( aSign, 0x3FFF, 0, 0 );
- }
- break;
- case float_round_ties_away:
- if (aExp == 0x3FFE) {
- return packFloat128(aSign, 0x3FFF, 0, 0);
- }
- break;
- case float_round_down:
- return
- aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
- : packFloat128( 0, 0, 0, 0 );
- case float_round_up:
- return
- aSign ? packFloat128( 1, 0, 0, 0 )
- : packFloat128( 0, 0x3FFF, 0, 0 );
- case float_round_to_odd:
- return packFloat128(aSign, 0x3FFF, 0, 0);
- case float_round_to_zero:
- break;
- }
- return packFloat128( aSign, 0, 0, 0 );
- }
- lastBitMask = 1;
- lastBitMask <<= 0x402F - aExp;
- roundBitsMask = lastBitMask - 1;
- z.low = 0;
- z.high = a.high;
- switch (status->float_rounding_mode) {
- case float_round_nearest_even:
- z.high += lastBitMask>>1;
- if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
- z.high &= ~ lastBitMask;
- }
- break;
- case float_round_ties_away:
- z.high += lastBitMask>>1;
- break;
- case float_round_to_zero:
- break;
- case float_round_up:
- if (!extractFloat128Sign(z)) {
- z.high |= ( a.low != 0 );
- z.high += roundBitsMask;
- }
- break;
- case float_round_down:
- if (extractFloat128Sign(z)) {
- z.high |= (a.low != 0);
- z.high += roundBitsMask;
- }
- break;
- case float_round_to_odd:
- if ((z.high & lastBitMask) == 0) {
- z.high |= (a.low != 0);
- z.high += roundBitsMask;
- }
- break;
- default:
- abort();
- }
- z.high &= ~ roundBitsMask;
- }
- if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
- status->float_exception_flags |= float_flag_inexact;
- }
- return z;
- }
- /*----------------------------------------------------------------------------
- | Returns the result of adding the absolute values of the quadruple-precision
- | floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
- | before being returned. `zSign' is ignored if the result is a NaN.
- | The addition is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
- float_status *status)
- {
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
- int32_t expDiff;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- expDiff = aExp - bExp;
- if ( 0 < expDiff ) {
- if ( aExp == 0x7FFF ) {
- if (aSig0 | aSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig0 |= UINT64_C(0x0001000000000000);
- }
- shift128ExtraRightJamming(
- bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
- zExp = aExp;
- }
- else if ( expDiff < 0 ) {
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig0 |= UINT64_C(0x0001000000000000);
- }
- shift128ExtraRightJamming(
- aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
- zExp = bExp;
- }
- else {
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
- return propagateFloat128NaN(a, b, status);
- }
- return a;
- }
- add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
- if ( aExp == 0 ) {
- if (status->flush_to_zero) {
- if (zSig0 | zSig1) {
- float_raise(float_flag_output_denormal, status);
- }
- return packFloat128(zSign, 0, 0, 0);
- }
- return packFloat128( zSign, 0, zSig0, zSig1 );
- }
- zSig2 = 0;
- zSig0 |= UINT64_C(0x0002000000000000);
- zExp = aExp;
- goto shiftRight1;
- }
- aSig0 |= UINT64_C(0x0001000000000000);
- add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
- --zExp;
- if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
- ++zExp;
- shiftRight1:
- shift128ExtraRightJamming(
- zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- roundAndPack:
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of subtracting the absolute values of the quadruple-
- | precision floating-point values `a' and `b'. If `zSign' is 1, the
- | difference is negated before being returned. `zSign' is ignored if the
- | result is a NaN. The subtraction is performed according to the IEC/IEEE
- | Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
- float_status *status)
- {
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
- int32_t expDiff;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- expDiff = aExp - bExp;
- shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
- shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
- if ( 0 < expDiff ) goto aExpBigger;
- if ( expDiff < 0 ) goto bExpBigger;
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
- return propagateFloat128NaN(a, b, status);
- }
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- if ( aExp == 0 ) {
- aExp = 1;
- bExp = 1;
- }
- if ( bSig0 < aSig0 ) goto aBigger;
- if ( aSig0 < bSig0 ) goto bBigger;
- if ( bSig1 < aSig1 ) goto aBigger;
- if ( aSig1 < bSig1 ) goto bBigger;
- return packFloat128(status->float_rounding_mode == float_round_down,
- 0, 0, 0);
- bExpBigger:
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig0 |= UINT64_C(0x4000000000000000);
- }
- shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
- bSig0 |= UINT64_C(0x4000000000000000);
- bBigger:
- sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
- zExp = bExp;
- zSign ^= 1;
- goto normalizeRoundAndPack;
- aExpBigger:
- if ( aExp == 0x7FFF ) {
- if (aSig0 | aSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig0 |= UINT64_C(0x4000000000000000);
- }
- shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
- aSig0 |= UINT64_C(0x4000000000000000);
- aBigger:
- sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
- zExp = aExp;
- normalizeRoundAndPack:
- --zExp;
- return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
- status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of adding the quadruple-precision floating-point values
- | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
- | for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float128_add(float128 a, float128 b, float_status *status)
- {
- bool aSign, bSign;
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign == bSign ) {
- return addFloat128Sigs(a, b, aSign, status);
- }
- else {
- return subFloat128Sigs(a, b, aSign, status);
- }
- }
- /*----------------------------------------------------------------------------
- | Returns the result of subtracting the quadruple-precision floating-point
- | values `a' and `b'. The operation is performed according to the IEC/IEEE
- | Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float128_sub(float128 a, float128 b, float_status *status)
- {
- bool aSign, bSign;
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign == bSign ) {
- return subFloat128Sigs(a, b, aSign, status);
- }
- else {
- return addFloat128Sigs(a, b, aSign, status);
- }
- }
- /*----------------------------------------------------------------------------
- | Returns the result of multiplying the quadruple-precision floating-point
- | values `a' and `b'. The operation is performed according to the IEC/IEEE
- | Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float128_mul(float128 a, float128 b, float_status *status)
- {
- bool aSign, bSign, zSign;
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- bSign = extractFloat128Sign( b );
- zSign = aSign ^ bSign;
- if ( aExp == 0x7FFF ) {
- if ( ( aSig0 | aSig1 )
- || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- if ( bExp == 0 ) {
- if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
- }
- zExp = aExp + bExp - 0x4000;
- aSig0 |= UINT64_C(0x0001000000000000);
- shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
- mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
- add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
- zSig2 |= ( zSig3 != 0 );
- if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
- shift128ExtraRightJamming(
- zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- ++zExp;
- }
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the result of dividing the quadruple-precision floating-point value
- | `a' by the corresponding value `b'. The operation is performed according to
- | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float128_div(float128 a, float128 b, float_status *status)
- {
- bool aSign, bSign, zSign;
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
- uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- bSign = extractFloat128Sign( b );
- zSign = aSign ^ bSign;
- if ( aExp == 0x7FFF ) {
- if (aSig0 | aSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- goto invalid;
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return packFloat128( zSign, 0, 0, 0 );
- }
- if ( bExp == 0 ) {
- if ( ( bSig0 | bSig1 ) == 0 ) {
- if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- float_raise(float_flag_divbyzero, status);
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- zExp = aExp - bExp + 0x3FFD;
- shortShift128Left(
- aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
- shortShift128Left(
- bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
- if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
- shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
- ++zExp;
- }
- zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
- mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
- sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
- while ( (int64_t) rem0 < 0 ) {
- --zSig0;
- add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
- }
- zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
- if ( ( zSig1 & 0x3FFF ) <= 4 ) {
- mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
- sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
- while ( (int64_t) rem1 < 0 ) {
- --zSig1;
- add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
- }
- zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
- }
- shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
- }
- /*----------------------------------------------------------------------------
- | Returns the remainder of the quadruple-precision floating-point value `a'
- | with respect to the corresponding value `b'. The operation is performed
- | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float128_rem(float128 a, float128 b, float_status *status)
- {
- bool aSign, zSign;
- int32_t aExp, bExp, expDiff;
- uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
- uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
- int64_t sigMean0;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- if ( aExp == 0x7FFF ) {
- if ( ( aSig0 | aSig1 )
- || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
- return propagateFloat128NaN(a, b, status);
- }
- goto invalid;
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- if ( ( bSig0 | bSig1 ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return a;
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- expDiff = aExp - bExp;
- if ( expDiff < -1 ) return a;
- shortShift128Left(
- aSig0 | UINT64_C(0x0001000000000000),
- aSig1,
- 15 - ( expDiff < 0 ),
- &aSig0,
- &aSig1
- );
- shortShift128Left(
- bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
- q = le128( bSig0, bSig1, aSig0, aSig1 );
- if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
- expDiff -= 64;
- while ( 0 < expDiff ) {
- q = estimateDiv128To64( aSig0, aSig1, bSig0 );
- q = ( 4 < q ) ? q - 4 : 0;
- mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
- shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
- shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
- sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
- expDiff -= 61;
- }
- if ( -64 < expDiff ) {
- q = estimateDiv128To64( aSig0, aSig1, bSig0 );
- q = ( 4 < q ) ? q - 4 : 0;
- q >>= - expDiff;
- shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
- expDiff += 52;
- if ( expDiff < 0 ) {
- shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
- }
- else {
- shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
- }
- mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
- sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
- }
- else {
- shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
- shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
- }
- do {
- alternateASig0 = aSig0;
- alternateASig1 = aSig1;
- ++q;
- sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
- } while ( 0 <= (int64_t) aSig0 );
- add128(
- aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
- if ( ( sigMean0 < 0 )
- || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
- aSig0 = alternateASig0;
- aSig1 = alternateASig1;
- }
- zSign = ( (int64_t) aSig0 < 0 );
- if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
- return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
- status);
- }
- /*----------------------------------------------------------------------------
- | Returns the square root of the quadruple-precision floating-point value `a'.
- | The operation is performed according to the IEC/IEEE Standard for Binary
- | Floating-Point Arithmetic.
- *----------------------------------------------------------------------------*/
- float128 float128_sqrt(float128 a, float_status *status)
- {
- bool aSign;
- int32_t aExp, zExp;
- uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
- uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if (aSig0 | aSig1) {
- return propagateFloat128NaN(a, a, status);
- }
- if ( ! aSign ) return a;
- goto invalid;
- }
- if ( aSign ) {
- if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
- invalid:
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
- aSig0 |= UINT64_C(0x0001000000000000);
- zSig0 = estimateSqrt32( aExp, aSig0>>17 );
- shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
- zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
- doubleZSig0 = zSig0<<1;
- mul64To128( zSig0, zSig0, &term0, &term1 );
- sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
- while ( (int64_t) rem0 < 0 ) {
- --zSig0;
- doubleZSig0 -= 2;
- add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
- }
- zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
- if ( ( zSig1 & 0x1FFF ) <= 5 ) {
- if ( zSig1 == 0 ) zSig1 = 1;
- mul64To128( doubleZSig0, zSig1, &term1, &term2 );
- sub128( rem1, 0, term1, term2, &rem1, &rem2 );
- mul64To128( zSig1, zSig1, &term2, &term3 );
- sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
- while ( (int64_t) rem1 < 0 ) {
- --zSig1;
- shortShift128Left( 0, zSig1, 1, &term2, &term3 );
- term3 |= 1;
- term2 |= doubleZSig0;
- add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
- }
- zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
- }
- shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
- return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
- }
- static inline FloatRelation
- floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
- float_status *status)
- {
- bool aSign, bSign;
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return float_relation_unordered;
- }
- if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
- ( extractFloatx80Frac( a )<<1 ) ) ||
- ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
- ( extractFloatx80Frac( b )<<1 ) )) {
- if (!is_quiet ||
- floatx80_is_signaling_nan(a, status) ||
- floatx80_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return float_relation_unordered;
- }
- aSign = extractFloatx80Sign( a );
- bSign = extractFloatx80Sign( b );
- if ( aSign != bSign ) {
- if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
- ( ( a.low | b.low ) == 0 ) ) {
- /* zero case */
- return float_relation_equal;
- } else {
- return 1 - (2 * aSign);
- }
- } else {
- /* Normalize pseudo-denormals before comparison. */
- if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
- ++a.high;
- }
- if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
- ++b.high;
- }
- if (a.low == b.low && a.high == b.high) {
- return float_relation_equal;
- } else {
- return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
- }
- }
- }
- FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
- {
- return floatx80_compare_internal(a, b, 0, status);
- }
- FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
- float_status *status)
- {
- return floatx80_compare_internal(a, b, 1, status);
- }
- static inline FloatRelation
- float128_compare_internal(float128 a, float128 b, bool is_quiet,
- float_status *status)
- {
- bool aSign, bSign;
- if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
- ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
- ( ( extractFloat128Exp( b ) == 0x7fff ) &&
- ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
- if (!is_quiet ||
- float128_is_signaling_nan(a, status) ||
- float128_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return float_relation_unordered;
- }
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign != bSign ) {
- if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
- /* zero case */
- return float_relation_equal;
- } else {
- return 1 - (2 * aSign);
- }
- } else {
- if (a.low == b.low && a.high == b.high) {
- return float_relation_equal;
- } else {
- return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
- }
- }
- }
- FloatRelation float128_compare(float128 a, float128 b, float_status *status)
- {
- return float128_compare_internal(a, b, 0, status);
- }
- FloatRelation float128_compare_quiet(float128 a, float128 b,
- float_status *status)
- {
- return float128_compare_internal(a, b, 1, status);
- }
- floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t aSig;
- if (floatx80_invalid_encoding(a)) {
- float_raise(float_flag_invalid, status);
- return floatx80_default_nan(status);
- }
- aSig = extractFloatx80Frac( a );
- aExp = extractFloatx80Exp( a );
- aSign = extractFloatx80Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig<<1 ) {
- return propagateFloatx80NaN(a, a, status);
- }
- return a;
- }
- if (aExp == 0) {
- if (aSig == 0) {
- return a;
- }
- aExp++;
- }
- if (n > 0x10000) {
- n = 0x10000;
- } else if (n < -0x10000) {
- n = -0x10000;
- }
- aExp += n;
- return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
- aSign, aExp, aSig, 0, status);
- }
- float128 float128_scalbn(float128 a, int n, float_status *status)
- {
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- return propagateFloat128NaN(a, a, status);
- }
- return a;
- }
- if (aExp != 0) {
- aSig0 |= UINT64_C(0x0001000000000000);
- } else if (aSig0 == 0 && aSig1 == 0) {
- return a;
- } else {
- aExp++;
- }
- if (n > 0x10000) {
- n = 0x10000;
- } else if (n < -0x10000) {
- n = -0x10000;
- }
- aExp += n - 1;
- return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
- , status);
- }
- static void __attribute__((constructor)) softfloat_init(void)
- {
- union_float64 ua, ub, uc, ur;
- if (QEMU_NO_HARDFLOAT) {
- return;
- }
- /*
- * Test that the host's FMA is not obviously broken. For example,
- * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
- * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
- */
- ua.s = 0x0020000000000001ULL;
- ub.s = 0x3ca0000000000000ULL;
- uc.s = 0x0020000000000000ULL;
- ur.h = fma(ua.h, ub.h, uc.h);
- if (ur.s != 0x0020000000000001ULL) {
- force_soft_fma = true;
- }
- }
|