ctrl.c 263 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242
  1. /*
  2. * QEMU NVM Express Controller
  3. *
  4. * Copyright (c) 2012, Intel Corporation
  5. *
  6. * Written by Keith Busch <keith.busch@intel.com>
  7. *
  8. * This code is licensed under the GNU GPL v2 or later.
  9. */
  10. /**
  11. * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e
  12. *
  13. * https://nvmexpress.org/developers/nvme-specification/
  14. *
  15. *
  16. * Notes on coding style
  17. * ---------------------
  18. * While QEMU coding style prefers lowercase hexadecimals in constants, the
  19. * NVMe subsystem use this format from the NVMe specifications in the comments
  20. * (i.e. 'h' suffix instead of '0x' prefix).
  21. *
  22. * Usage
  23. * -----
  24. * See docs/system/nvme.rst for extensive documentation.
  25. *
  26. * Add options:
  27. * -drive file=<file>,if=none,id=<drive_id>
  28. * -device nvme-subsys,id=<subsys_id>,nqn=<nqn_id>
  29. * -device nvme,serial=<serial>,id=<bus_name>, \
  30. * cmb_size_mb=<cmb_size_mb[optional]>, \
  31. * [pmrdev=<mem_backend_file_id>,] \
  32. * max_ioqpairs=<N[optional]>, \
  33. * aerl=<N[optional]>,aer_max_queued=<N[optional]>, \
  34. * mdts=<N[optional]>,vsl=<N[optional]>, \
  35. * zoned.zasl=<N[optional]>, \
  36. * zoned.auto_transition=<on|off[optional]>, \
  37. * sriov_max_vfs=<N[optional]> \
  38. * sriov_vq_flexible=<N[optional]> \
  39. * sriov_vi_flexible=<N[optional]> \
  40. * sriov_max_vi_per_vf=<N[optional]> \
  41. * sriov_max_vq_per_vf=<N[optional]> \
  42. * atomic.dn=<on|off[optional]>, \
  43. * atomic.awun<N[optional]>, \
  44. * atomic.awupf<N[optional]>, \
  45. * subsys=<subsys_id>
  46. * -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
  47. * zoned=<true|false[optional]>, \
  48. * subsys=<subsys_id>,shared=<true|false[optional]>, \
  49. * detached=<true|false[optional]>, \
  50. * zoned.zone_size=<N[optional]>, \
  51. * zoned.zone_capacity=<N[optional]>, \
  52. * zoned.descr_ext_size=<N[optional]>, \
  53. * zoned.max_active=<N[optional]>, \
  54. * zoned.max_open=<N[optional]>, \
  55. * zoned.cross_read=<true|false[optional]>
  56. *
  57. * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  58. * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the
  59. * device will use the "v1.4 CMB scheme" - use the `legacy-cmb` parameter to
  60. * always enable the CMBLOC and CMBSZ registers (v1.3 behavior).
  61. *
  62. * Enabling pmr emulation can be achieved by pointing to memory-backend-file.
  63. * For example:
  64. * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
  65. * size=<size> .... -device nvme,...,pmrdev=<mem_id>
  66. *
  67. * The PMR will use BAR 4/5 exclusively.
  68. *
  69. * To place controller(s) and namespace(s) to a subsystem, then provide
  70. * nvme-subsys device as above.
  71. *
  72. * nvme subsystem device parameters
  73. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  74. * - `nqn`
  75. * This parameter provides the `<nqn_id>` part of the string
  76. * `nqn.2019-08.org.qemu:<nqn_id>` which will be reported in the SUBNQN field
  77. * of subsystem controllers. Note that `<nqn_id>` should be unique per
  78. * subsystem, but this is not enforced by QEMU. If not specified, it will
  79. * default to the value of the `id` parameter (`<subsys_id>`).
  80. *
  81. * nvme device parameters
  82. * ~~~~~~~~~~~~~~~~~~~~~~
  83. * - `subsys`
  84. * Specifying this parameter attaches the controller to the subsystem and
  85. * the SUBNQN field in the controller will report the NQN of the subsystem
  86. * device. This also enables multi controller capability represented in
  87. * Identify Controller data structure in CMIC (Controller Multi-path I/O and
  88. * Namespace Sharing Capabilities).
  89. *
  90. * - `aerl`
  91. * The Asynchronous Event Request Limit (AERL). Indicates the maximum number
  92. * of concurrently outstanding Asynchronous Event Request commands support
  93. * by the controller. This is a 0's based value.
  94. *
  95. * - `aer_max_queued`
  96. * This is the maximum number of events that the device will enqueue for
  97. * completion when there are no outstanding AERs. When the maximum number of
  98. * enqueued events are reached, subsequent events will be dropped.
  99. *
  100. * - `mdts`
  101. * Indicates the maximum data transfer size for a command that transfers data
  102. * between host-accessible memory and the controller. The value is specified
  103. * as a power of two (2^n) and is in units of the minimum memory page size
  104. * (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB).
  105. *
  106. * - `vsl`
  107. * Indicates the maximum data size limit for the Verify command. Like `mdts`,
  108. * this value is specified as a power of two (2^n) and is in units of the
  109. * minimum memory page size (CAP.MPSMIN). The default value is 7 (i.e. 512
  110. * KiB).
  111. *
  112. * - `zoned.zasl`
  113. * Indicates the maximum data transfer size for the Zone Append command. Like
  114. * `mdts`, the value is specified as a power of two (2^n) and is in units of
  115. * the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e.
  116. * defaulting to the value of `mdts`).
  117. *
  118. * - `zoned.auto_transition`
  119. * Indicates if zones in zone state implicitly opened can be automatically
  120. * transitioned to zone state closed for resource management purposes.
  121. * Defaults to 'on'.
  122. *
  123. * - `sriov_max_vfs`
  124. * Indicates the maximum number of PCIe virtual functions supported
  125. * by the controller. The default value is 0. Specifying a non-zero value
  126. * enables reporting of both SR-IOV and ARI capabilities by the NVMe device.
  127. * Virtual function controllers will not report SR-IOV capability.
  128. *
  129. * NOTE: Single Root I/O Virtualization support is experimental.
  130. * All the related parameters may be subject to change.
  131. *
  132. * - `sriov_vq_flexible`
  133. * Indicates the total number of flexible queue resources assignable to all
  134. * the secondary controllers. Implicitly sets the number of primary
  135. * controller's private resources to `(max_ioqpairs - sriov_vq_flexible)`.
  136. *
  137. * - `sriov_vi_flexible`
  138. * Indicates the total number of flexible interrupt resources assignable to
  139. * all the secondary controllers. Implicitly sets the number of primary
  140. * controller's private resources to `(msix_qsize - sriov_vi_flexible)`.
  141. *
  142. * - `sriov_max_vi_per_vf`
  143. * Indicates the maximum number of virtual interrupt resources assignable
  144. * to a secondary controller. The default 0 resolves to
  145. * `(sriov_vi_flexible / sriov_max_vfs)`.
  146. *
  147. * - `sriov_max_vq_per_vf`
  148. * Indicates the maximum number of virtual queue resources assignable to
  149. * a secondary controller. The default 0 resolves to
  150. * `(sriov_vq_flexible / sriov_max_vfs)`.
  151. *
  152. * nvme namespace device parameters
  153. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  154. * - `shared`
  155. * When the parent nvme device (as defined explicitly by the 'bus' parameter
  156. * or implicitly by the most recently defined NvmeBus) is linked to an
  157. * nvme-subsys device, the namespace will be attached to all controllers in
  158. * the subsystem. If set to 'off' (the default), the namespace will remain a
  159. * private namespace and may only be attached to a single controller at a
  160. * time.
  161. *
  162. * - `detached`
  163. * This parameter is only valid together with the `subsys` parameter. If left
  164. * at the default value (`false/off`), the namespace will be attached to all
  165. * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the
  166. * namespace will be available in the subsystem but not attached to any
  167. * controllers.
  168. *
  169. * Setting `zoned` to true selects Zoned Command Set at the namespace.
  170. * In this case, the following namespace properties are available to configure
  171. * zoned operation:
  172. * zoned.zone_size=<zone size in bytes, default: 128MiB>
  173. * The number may be followed by K, M, G as in kilo-, mega- or giga-.
  174. *
  175. * zoned.zone_capacity=<zone capacity in bytes, default: zone size>
  176. * The value 0 (default) forces zone capacity to be the same as zone
  177. * size. The value of this property may not exceed zone size.
  178. *
  179. * zoned.descr_ext_size=<zone descriptor extension size, default 0>
  180. * This value needs to be specified in 64B units. If it is zero,
  181. * namespace(s) will not support zone descriptor extensions.
  182. *
  183. * zoned.max_active=<Maximum Active Resources (zones), default: 0>
  184. * The default value means there is no limit to the number of
  185. * concurrently active zones.
  186. *
  187. * zoned.max_open=<Maximum Open Resources (zones), default: 0>
  188. * The default value means there is no limit to the number of
  189. * concurrently open zones.
  190. *
  191. * zoned.cross_read=<enable RAZB, default: false>
  192. * Setting this property to true enables Read Across Zone Boundaries.
  193. */
  194. #include "qemu/osdep.h"
  195. #include "qemu/cutils.h"
  196. #include "qemu/error-report.h"
  197. #include "qemu/log.h"
  198. #include "qemu/units.h"
  199. #include "qemu/range.h"
  200. #include "qapi/error.h"
  201. #include "qapi/visitor.h"
  202. #include "system/system.h"
  203. #include "system/block-backend.h"
  204. #include "system/hostmem.h"
  205. #include "hw/pci/msix.h"
  206. #include "hw/pci/pcie_sriov.h"
  207. #include "system/spdm-socket.h"
  208. #include "migration/vmstate.h"
  209. #include "nvme.h"
  210. #include "dif.h"
  211. #include "trace.h"
  212. #define NVME_MAX_IOQPAIRS 0xffff
  213. #define NVME_DB_SIZE 4
  214. #define NVME_SPEC_VER 0x00010400
  215. #define NVME_CMB_BIR 2
  216. #define NVME_PMR_BIR 4
  217. #define NVME_TEMPERATURE 0x143
  218. #define NVME_TEMPERATURE_WARNING 0x157
  219. #define NVME_TEMPERATURE_CRITICAL 0x175
  220. #define NVME_NUM_FW_SLOTS 1
  221. #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
  222. #define NVME_VF_RES_GRANULARITY 1
  223. #define NVME_VF_OFFSET 0x1
  224. #define NVME_VF_STRIDE 1
  225. #define NVME_GUEST_ERR(trace, fmt, ...) \
  226. do { \
  227. (trace_##trace)(__VA_ARGS__); \
  228. qemu_log_mask(LOG_GUEST_ERROR, #trace \
  229. " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
  230. } while (0)
  231. static const bool nvme_feature_support[NVME_FID_MAX] = {
  232. [NVME_ARBITRATION] = true,
  233. [NVME_POWER_MANAGEMENT] = true,
  234. [NVME_TEMPERATURE_THRESHOLD] = true,
  235. [NVME_ERROR_RECOVERY] = true,
  236. [NVME_VOLATILE_WRITE_CACHE] = true,
  237. [NVME_NUMBER_OF_QUEUES] = true,
  238. [NVME_INTERRUPT_COALESCING] = true,
  239. [NVME_INTERRUPT_VECTOR_CONF] = true,
  240. [NVME_WRITE_ATOMICITY] = true,
  241. [NVME_ASYNCHRONOUS_EVENT_CONF] = true,
  242. [NVME_TIMESTAMP] = true,
  243. [NVME_HOST_BEHAVIOR_SUPPORT] = true,
  244. [NVME_COMMAND_SET_PROFILE] = true,
  245. [NVME_FDP_MODE] = true,
  246. [NVME_FDP_EVENTS] = true,
  247. };
  248. static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
  249. [NVME_TEMPERATURE_THRESHOLD] = NVME_FEAT_CAP_CHANGE,
  250. [NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
  251. [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
  252. [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
  253. [NVME_WRITE_ATOMICITY] = NVME_FEAT_CAP_CHANGE,
  254. [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
  255. [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
  256. [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
  257. [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE,
  258. [NVME_FDP_MODE] = NVME_FEAT_CAP_CHANGE,
  259. [NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
  260. };
  261. static const uint32_t nvme_cse_acs_default[256] = {
  262. [NVME_ADM_CMD_DELETE_SQ] = NVME_CMD_EFF_CSUPP,
  263. [NVME_ADM_CMD_CREATE_SQ] = NVME_CMD_EFF_CSUPP,
  264. [NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP,
  265. [NVME_ADM_CMD_DELETE_CQ] = NVME_CMD_EFF_CSUPP,
  266. [NVME_ADM_CMD_CREATE_CQ] = NVME_CMD_EFF_CSUPP,
  267. [NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFF_CSUPP,
  268. [NVME_ADM_CMD_ABORT] = NVME_CMD_EFF_CSUPP,
  269. [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
  270. [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
  271. [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
  272. [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC |
  273. NVME_CMD_EFF_CCC,
  274. [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  275. [NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP,
  276. [NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP,
  277. };
  278. static const uint32_t nvme_cse_iocs_nvm_default[256] = {
  279. [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  280. [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  281. [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  282. [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
  283. [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  284. [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
  285. [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  286. [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
  287. [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
  288. [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  289. };
  290. static const uint32_t nvme_cse_iocs_zoned_default[256] = {
  291. [NVME_CMD_FLUSH] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  292. [NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  293. [NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  294. [NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
  295. [NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  296. [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP,
  297. [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  298. [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
  299. [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
  300. [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  301. [NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  302. [NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
  303. [NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFF_CSUPP,
  304. };
  305. static void nvme_process_sq(void *opaque);
  306. static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst);
  307. static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n);
  308. static uint16_t nvme_sqid(NvmeRequest *req)
  309. {
  310. return le16_to_cpu(req->sq->sqid);
  311. }
  312. static inline uint16_t nvme_make_pid(NvmeNamespace *ns, uint16_t rg,
  313. uint16_t ph)
  314. {
  315. uint16_t rgif = ns->endgrp->fdp.rgif;
  316. if (!rgif) {
  317. return ph;
  318. }
  319. return (rg << (16 - rgif)) | ph;
  320. }
  321. static inline bool nvme_ph_valid(NvmeNamespace *ns, uint16_t ph)
  322. {
  323. return ph < ns->fdp.nphs;
  324. }
  325. static inline bool nvme_rg_valid(NvmeEnduranceGroup *endgrp, uint16_t rg)
  326. {
  327. return rg < endgrp->fdp.nrg;
  328. }
  329. static inline uint16_t nvme_pid2ph(NvmeNamespace *ns, uint16_t pid)
  330. {
  331. uint16_t rgif = ns->endgrp->fdp.rgif;
  332. if (!rgif) {
  333. return pid;
  334. }
  335. return pid & ((1 << (15 - rgif)) - 1);
  336. }
  337. static inline uint16_t nvme_pid2rg(NvmeNamespace *ns, uint16_t pid)
  338. {
  339. uint16_t rgif = ns->endgrp->fdp.rgif;
  340. if (!rgif) {
  341. return 0;
  342. }
  343. return pid >> (16 - rgif);
  344. }
  345. static inline bool nvme_parse_pid(NvmeNamespace *ns, uint16_t pid,
  346. uint16_t *ph, uint16_t *rg)
  347. {
  348. *rg = nvme_pid2rg(ns, pid);
  349. *ph = nvme_pid2ph(ns, pid);
  350. return nvme_ph_valid(ns, *ph) && nvme_rg_valid(ns->endgrp, *rg);
  351. }
  352. static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
  353. NvmeZoneState state)
  354. {
  355. if (QTAILQ_IN_USE(zone, entry)) {
  356. switch (nvme_get_zone_state(zone)) {
  357. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  358. QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
  359. break;
  360. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  361. QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
  362. break;
  363. case NVME_ZONE_STATE_CLOSED:
  364. QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
  365. break;
  366. case NVME_ZONE_STATE_FULL:
  367. QTAILQ_REMOVE(&ns->full_zones, zone, entry);
  368. default:
  369. ;
  370. }
  371. }
  372. nvme_set_zone_state(zone, state);
  373. switch (state) {
  374. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  375. QTAILQ_INSERT_TAIL(&ns->exp_open_zones, zone, entry);
  376. break;
  377. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  378. QTAILQ_INSERT_TAIL(&ns->imp_open_zones, zone, entry);
  379. break;
  380. case NVME_ZONE_STATE_CLOSED:
  381. QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry);
  382. break;
  383. case NVME_ZONE_STATE_FULL:
  384. QTAILQ_INSERT_TAIL(&ns->full_zones, zone, entry);
  385. case NVME_ZONE_STATE_READ_ONLY:
  386. break;
  387. default:
  388. zone->d.za = 0;
  389. }
  390. }
  391. static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act,
  392. uint32_t opn, uint32_t zrwa)
  393. {
  394. if (ns->params.max_active_zones != 0 &&
  395. ns->nr_active_zones + act > ns->params.max_active_zones) {
  396. trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
  397. return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
  398. }
  399. if (ns->params.max_open_zones != 0 &&
  400. ns->nr_open_zones + opn > ns->params.max_open_zones) {
  401. trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
  402. return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
  403. }
  404. if (zrwa > ns->zns.numzrwa) {
  405. return NVME_NOZRWA | NVME_DNR;
  406. }
  407. return NVME_SUCCESS;
  408. }
  409. /*
  410. * Check if we can open a zone without exceeding open/active limits.
  411. * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
  412. */
  413. static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
  414. {
  415. return nvme_zns_check_resources(ns, act, opn, 0);
  416. }
  417. static NvmeFdpEvent *nvme_fdp_alloc_event(NvmeCtrl *n, NvmeFdpEventBuffer *ebuf)
  418. {
  419. NvmeFdpEvent *ret = NULL;
  420. bool is_full = ebuf->next == ebuf->start && ebuf->nelems;
  421. ret = &ebuf->events[ebuf->next++];
  422. if (unlikely(ebuf->next == NVME_FDP_MAX_EVENTS)) {
  423. ebuf->next = 0;
  424. }
  425. if (is_full) {
  426. ebuf->start = ebuf->next;
  427. } else {
  428. ebuf->nelems++;
  429. }
  430. memset(ret, 0, sizeof(NvmeFdpEvent));
  431. ret->timestamp = nvme_get_timestamp(n);
  432. return ret;
  433. }
  434. static inline int log_event(NvmeRuHandle *ruh, uint8_t event_type)
  435. {
  436. return (ruh->event_filter >> nvme_fdp_evf_shifts[event_type]) & 0x1;
  437. }
  438. static bool nvme_update_ruh(NvmeCtrl *n, NvmeNamespace *ns, uint16_t pid)
  439. {
  440. NvmeEnduranceGroup *endgrp = ns->endgrp;
  441. NvmeRuHandle *ruh;
  442. NvmeReclaimUnit *ru;
  443. NvmeFdpEvent *e = NULL;
  444. uint16_t ph, rg, ruhid;
  445. if (!nvme_parse_pid(ns, pid, &ph, &rg)) {
  446. return false;
  447. }
  448. ruhid = ns->fdp.phs[ph];
  449. ruh = &endgrp->fdp.ruhs[ruhid];
  450. ru = &ruh->rus[rg];
  451. if (ru->ruamw) {
  452. if (log_event(ruh, FDP_EVT_RU_NOT_FULLY_WRITTEN)) {
  453. e = nvme_fdp_alloc_event(n, &endgrp->fdp.host_events);
  454. e->type = FDP_EVT_RU_NOT_FULLY_WRITTEN;
  455. e->flags = FDPEF_PIV | FDPEF_NSIDV | FDPEF_LV;
  456. e->pid = cpu_to_le16(pid);
  457. e->nsid = cpu_to_le32(ns->params.nsid);
  458. e->rgid = cpu_to_le16(rg);
  459. e->ruhid = cpu_to_le16(ruhid);
  460. }
  461. /* log (eventual) GC overhead of prematurely swapping the RU */
  462. nvme_fdp_stat_inc(&endgrp->fdp.mbmw, nvme_l2b(ns, ru->ruamw));
  463. }
  464. ru->ruamw = ruh->ruamw;
  465. return true;
  466. }
  467. static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
  468. {
  469. hwaddr hi, lo;
  470. if (!n->cmb.cmse) {
  471. return false;
  472. }
  473. lo = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
  474. hi = lo + int128_get64(n->cmb.mem.size);
  475. return addr >= lo && addr < hi;
  476. }
  477. static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
  478. {
  479. hwaddr base = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
  480. return &n->cmb.buf[addr - base];
  481. }
  482. static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr)
  483. {
  484. hwaddr hi;
  485. if (!n->pmr.cmse) {
  486. return false;
  487. }
  488. hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size);
  489. return addr >= n->pmr.cba && addr < hi;
  490. }
  491. static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr)
  492. {
  493. return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba);
  494. }
  495. static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr)
  496. {
  497. hwaddr hi, lo;
  498. /*
  499. * The purpose of this check is to guard against invalid "local" access to
  500. * the iomem (i.e. controller registers). Thus, we check against the range
  501. * covered by the 'bar0' MemoryRegion since that is currently composed of
  502. * two subregions (the NVMe "MBAR" and the MSI-X table/pba). Note, however,
  503. * that if the device model is ever changed to allow the CMB to be located
  504. * in BAR0 as well, then this must be changed.
  505. */
  506. lo = n->bar0.addr;
  507. hi = lo + int128_get64(n->bar0.size);
  508. return addr >= lo && addr < hi;
  509. }
  510. static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
  511. {
  512. hwaddr hi = addr + size - 1;
  513. if (hi < addr) {
  514. return 1;
  515. }
  516. if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
  517. memcpy(buf, nvme_addr_to_cmb(n, addr), size);
  518. return 0;
  519. }
  520. if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
  521. memcpy(buf, nvme_addr_to_pmr(n, addr), size);
  522. return 0;
  523. }
  524. return pci_dma_read(PCI_DEVICE(n), addr, buf, size);
  525. }
  526. static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, const void *buf, int size)
  527. {
  528. hwaddr hi = addr + size - 1;
  529. if (hi < addr) {
  530. return 1;
  531. }
  532. if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
  533. memcpy(nvme_addr_to_cmb(n, addr), buf, size);
  534. return 0;
  535. }
  536. if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
  537. memcpy(nvme_addr_to_pmr(n, addr), buf, size);
  538. return 0;
  539. }
  540. return pci_dma_write(PCI_DEVICE(n), addr, buf, size);
  541. }
  542. static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
  543. {
  544. return nsid &&
  545. (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES);
  546. }
  547. static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
  548. {
  549. return sqid < n->conf_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
  550. }
  551. static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
  552. {
  553. return cqid < n->conf_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
  554. }
  555. static void nvme_inc_cq_tail(NvmeCQueue *cq)
  556. {
  557. cq->tail++;
  558. if (cq->tail >= cq->size) {
  559. cq->tail = 0;
  560. cq->phase = !cq->phase;
  561. }
  562. }
  563. static void nvme_inc_sq_head(NvmeSQueue *sq)
  564. {
  565. sq->head = (sq->head + 1) % sq->size;
  566. }
  567. static uint8_t nvme_cq_full(NvmeCQueue *cq)
  568. {
  569. return (cq->tail + 1) % cq->size == cq->head;
  570. }
  571. static uint8_t nvme_sq_empty(NvmeSQueue *sq)
  572. {
  573. return sq->head == sq->tail;
  574. }
  575. static void nvme_irq_check(NvmeCtrl *n)
  576. {
  577. PCIDevice *pci = PCI_DEVICE(n);
  578. uint32_t intms = ldl_le_p(&n->bar.intms);
  579. if (msix_enabled(pci)) {
  580. return;
  581. }
  582. /* vfs does not implement intx */
  583. if (pci_is_vf(pci)) {
  584. return;
  585. }
  586. if (~intms & n->irq_status) {
  587. pci_irq_assert(pci);
  588. } else {
  589. pci_irq_deassert(pci);
  590. }
  591. }
  592. static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
  593. {
  594. PCIDevice *pci = PCI_DEVICE(n);
  595. if (cq->irq_enabled) {
  596. if (msix_enabled(pci)) {
  597. trace_pci_nvme_irq_msix(cq->vector);
  598. msix_notify(pci, cq->vector);
  599. } else {
  600. trace_pci_nvme_irq_pin();
  601. assert(cq->vector < 32);
  602. n->irq_status |= 1 << cq->vector;
  603. nvme_irq_check(n);
  604. }
  605. } else {
  606. trace_pci_nvme_irq_masked();
  607. }
  608. }
  609. static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
  610. {
  611. if (cq->irq_enabled) {
  612. if (msix_enabled(PCI_DEVICE(n))) {
  613. return;
  614. } else {
  615. assert(cq->vector < 32);
  616. if (!n->cq_pending) {
  617. n->irq_status &= ~(1 << cq->vector);
  618. }
  619. nvme_irq_check(n);
  620. }
  621. }
  622. }
  623. static void nvme_req_clear(NvmeRequest *req)
  624. {
  625. req->ns = NULL;
  626. req->opaque = NULL;
  627. req->aiocb = NULL;
  628. memset(&req->cqe, 0x0, sizeof(req->cqe));
  629. req->status = NVME_SUCCESS;
  630. }
  631. static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
  632. {
  633. if (dma) {
  634. pci_dma_sglist_init(&sg->qsg, PCI_DEVICE(n), 0);
  635. sg->flags = NVME_SG_DMA;
  636. } else {
  637. qemu_iovec_init(&sg->iov, 0);
  638. }
  639. sg->flags |= NVME_SG_ALLOC;
  640. }
  641. static inline void nvme_sg_unmap(NvmeSg *sg)
  642. {
  643. if (!(sg->flags & NVME_SG_ALLOC)) {
  644. return;
  645. }
  646. if (sg->flags & NVME_SG_DMA) {
  647. qemu_sglist_destroy(&sg->qsg);
  648. } else {
  649. qemu_iovec_destroy(&sg->iov);
  650. }
  651. memset(sg, 0x0, sizeof(*sg));
  652. }
  653. /*
  654. * When metadata is transferred as extended LBAs, the DPTR mapped into `sg`
  655. * holds both data and metadata. This function splits the data and metadata
  656. * into two separate QSG/IOVs.
  657. */
  658. static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data,
  659. NvmeSg *mdata)
  660. {
  661. NvmeSg *dst = data;
  662. uint32_t trans_len, count = ns->lbasz;
  663. uint64_t offset = 0;
  664. bool dma = sg->flags & NVME_SG_DMA;
  665. size_t sge_len;
  666. size_t sg_len = dma ? sg->qsg.size : sg->iov.size;
  667. int sg_idx = 0;
  668. assert(sg->flags & NVME_SG_ALLOC);
  669. while (sg_len) {
  670. sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
  671. trans_len = MIN(sg_len, count);
  672. trans_len = MIN(trans_len, sge_len - offset);
  673. if (dst) {
  674. if (dma) {
  675. qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset,
  676. trans_len);
  677. } else {
  678. qemu_iovec_add(&dst->iov,
  679. sg->iov.iov[sg_idx].iov_base + offset,
  680. trans_len);
  681. }
  682. }
  683. sg_len -= trans_len;
  684. count -= trans_len;
  685. offset += trans_len;
  686. if (count == 0) {
  687. dst = (dst == data) ? mdata : data;
  688. count = (dst == data) ? ns->lbasz : ns->lbaf.ms;
  689. }
  690. if (sge_len == offset) {
  691. offset = 0;
  692. sg_idx++;
  693. }
  694. }
  695. }
  696. static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
  697. size_t len)
  698. {
  699. if (!len) {
  700. return NVME_SUCCESS;
  701. }
  702. trace_pci_nvme_map_addr_cmb(addr, len);
  703. if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
  704. return NVME_DATA_TRAS_ERROR;
  705. }
  706. qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
  707. return NVME_SUCCESS;
  708. }
  709. static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
  710. size_t len)
  711. {
  712. if (!len) {
  713. return NVME_SUCCESS;
  714. }
  715. if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) {
  716. return NVME_DATA_TRAS_ERROR;
  717. }
  718. qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len);
  719. return NVME_SUCCESS;
  720. }
  721. static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
  722. {
  723. bool cmb = false, pmr = false;
  724. if (!len) {
  725. return NVME_SUCCESS;
  726. }
  727. trace_pci_nvme_map_addr(addr, len);
  728. if (nvme_addr_is_iomem(n, addr)) {
  729. return NVME_DATA_TRAS_ERROR;
  730. }
  731. if (nvme_addr_is_cmb(n, addr)) {
  732. cmb = true;
  733. } else if (nvme_addr_is_pmr(n, addr)) {
  734. pmr = true;
  735. }
  736. if (cmb || pmr) {
  737. if (sg->flags & NVME_SG_DMA) {
  738. return NVME_INVALID_USE_OF_CMB | NVME_DNR;
  739. }
  740. if (sg->iov.niov + 1 > IOV_MAX) {
  741. goto max_mappings_exceeded;
  742. }
  743. if (cmb) {
  744. return nvme_map_addr_cmb(n, &sg->iov, addr, len);
  745. } else {
  746. return nvme_map_addr_pmr(n, &sg->iov, addr, len);
  747. }
  748. }
  749. if (!(sg->flags & NVME_SG_DMA)) {
  750. return NVME_INVALID_USE_OF_CMB | NVME_DNR;
  751. }
  752. if (sg->qsg.nsg + 1 > IOV_MAX) {
  753. goto max_mappings_exceeded;
  754. }
  755. qemu_sglist_add(&sg->qsg, addr, len);
  756. return NVME_SUCCESS;
  757. max_mappings_exceeded:
  758. NVME_GUEST_ERR(pci_nvme_ub_too_many_mappings,
  759. "number of mappings exceed 1024");
  760. return NVME_INTERNAL_DEV_ERROR | NVME_DNR;
  761. }
  762. static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
  763. {
  764. return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
  765. }
  766. static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
  767. uint64_t prp2, uint32_t len)
  768. {
  769. hwaddr trans_len = n->page_size - (prp1 % n->page_size);
  770. trans_len = MIN(len, trans_len);
  771. int num_prps = (len >> n->page_bits) + 1;
  772. uint16_t status;
  773. int ret;
  774. trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
  775. nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1));
  776. status = nvme_map_addr(n, sg, prp1, trans_len);
  777. if (status) {
  778. goto unmap;
  779. }
  780. len -= trans_len;
  781. if (len) {
  782. if (len > n->page_size) {
  783. g_autofree uint64_t *prp_list = g_new(uint64_t, n->max_prp_ents);
  784. uint32_t nents, prp_trans;
  785. int i = 0;
  786. /*
  787. * The first PRP list entry, pointed to by PRP2 may contain offset.
  788. * Hence, we need to calculate the number of entries in based on
  789. * that offset.
  790. */
  791. nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3;
  792. prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
  793. ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
  794. if (ret) {
  795. trace_pci_nvme_err_addr_read(prp2);
  796. status = NVME_DATA_TRAS_ERROR;
  797. goto unmap;
  798. }
  799. while (len != 0) {
  800. uint64_t prp_ent = le64_to_cpu(prp_list[i]);
  801. if (i == nents - 1 && len > n->page_size) {
  802. if (unlikely(prp_ent & (n->page_size - 1))) {
  803. trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
  804. status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
  805. goto unmap;
  806. }
  807. i = 0;
  808. nents = (len + n->page_size - 1) >> n->page_bits;
  809. nents = MIN(nents, n->max_prp_ents);
  810. prp_trans = nents * sizeof(uint64_t);
  811. ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
  812. prp_trans);
  813. if (ret) {
  814. trace_pci_nvme_err_addr_read(prp_ent);
  815. status = NVME_DATA_TRAS_ERROR;
  816. goto unmap;
  817. }
  818. prp_ent = le64_to_cpu(prp_list[i]);
  819. }
  820. if (unlikely(prp_ent & (n->page_size - 1))) {
  821. trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
  822. status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
  823. goto unmap;
  824. }
  825. trans_len = MIN(len, n->page_size);
  826. status = nvme_map_addr(n, sg, prp_ent, trans_len);
  827. if (status) {
  828. goto unmap;
  829. }
  830. len -= trans_len;
  831. i++;
  832. }
  833. } else {
  834. if (unlikely(prp2 & (n->page_size - 1))) {
  835. trace_pci_nvme_err_invalid_prp2_align(prp2);
  836. status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
  837. goto unmap;
  838. }
  839. status = nvme_map_addr(n, sg, prp2, len);
  840. if (status) {
  841. goto unmap;
  842. }
  843. }
  844. }
  845. return NVME_SUCCESS;
  846. unmap:
  847. nvme_sg_unmap(sg);
  848. return status;
  849. }
  850. /*
  851. * Map 'nsgld' data descriptors from 'segment'. The function will subtract the
  852. * number of bytes mapped in len.
  853. */
  854. static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
  855. NvmeSglDescriptor *segment, uint64_t nsgld,
  856. size_t *len, NvmeCmd *cmd)
  857. {
  858. dma_addr_t addr, trans_len;
  859. uint32_t dlen;
  860. uint16_t status;
  861. for (int i = 0; i < nsgld; i++) {
  862. uint8_t type = NVME_SGL_TYPE(segment[i].type);
  863. switch (type) {
  864. case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
  865. break;
  866. case NVME_SGL_DESCR_TYPE_SEGMENT:
  867. case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
  868. return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
  869. default:
  870. return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
  871. }
  872. dlen = le32_to_cpu(segment[i].len);
  873. if (!dlen) {
  874. continue;
  875. }
  876. if (*len == 0) {
  877. /*
  878. * All data has been mapped, but the SGL contains additional
  879. * segments and/or descriptors. The controller might accept
  880. * ignoring the rest of the SGL.
  881. */
  882. uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls);
  883. if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
  884. break;
  885. }
  886. trace_pci_nvme_err_invalid_sgl_excess_length(dlen);
  887. return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
  888. }
  889. trans_len = MIN(*len, dlen);
  890. addr = le64_to_cpu(segment[i].addr);
  891. if (UINT64_MAX - addr < dlen) {
  892. return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
  893. }
  894. status = nvme_map_addr(n, sg, addr, trans_len);
  895. if (status) {
  896. return status;
  897. }
  898. *len -= trans_len;
  899. }
  900. return NVME_SUCCESS;
  901. }
  902. static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
  903. size_t len, NvmeCmd *cmd)
  904. {
  905. /*
  906. * Read the segment in chunks of 256 descriptors (one 4k page) to avoid
  907. * dynamically allocating a potentially huge SGL. The spec allows the SGL
  908. * to be larger (as in number of bytes required to describe the SGL
  909. * descriptors and segment chain) than the command transfer size, so it is
  910. * not bounded by MDTS.
  911. */
  912. #define SEG_CHUNK_SIZE 256
  913. NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
  914. uint64_t nsgld;
  915. uint32_t seg_len;
  916. uint16_t status;
  917. hwaddr addr;
  918. int ret;
  919. sgld = &sgl;
  920. addr = le64_to_cpu(sgl.addr);
  921. trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len);
  922. nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
  923. /*
  924. * If the entire transfer can be described with a single data block it can
  925. * be mapped directly.
  926. */
  927. if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
  928. status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd);
  929. if (status) {
  930. goto unmap;
  931. }
  932. goto out;
  933. }
  934. for (;;) {
  935. switch (NVME_SGL_TYPE(sgld->type)) {
  936. case NVME_SGL_DESCR_TYPE_SEGMENT:
  937. case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
  938. break;
  939. default:
  940. return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
  941. }
  942. seg_len = le32_to_cpu(sgld->len);
  943. /* check the length of the (Last) Segment descriptor */
  944. if (!seg_len || seg_len & 0xf) {
  945. return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
  946. }
  947. if (UINT64_MAX - addr < seg_len) {
  948. return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
  949. }
  950. nsgld = seg_len / sizeof(NvmeSglDescriptor);
  951. while (nsgld > SEG_CHUNK_SIZE) {
  952. if (nvme_addr_read(n, addr, segment, sizeof(segment))) {
  953. trace_pci_nvme_err_addr_read(addr);
  954. status = NVME_DATA_TRAS_ERROR;
  955. goto unmap;
  956. }
  957. status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
  958. &len, cmd);
  959. if (status) {
  960. goto unmap;
  961. }
  962. nsgld -= SEG_CHUNK_SIZE;
  963. addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor);
  964. }
  965. ret = nvme_addr_read(n, addr, segment, nsgld *
  966. sizeof(NvmeSglDescriptor));
  967. if (ret) {
  968. trace_pci_nvme_err_addr_read(addr);
  969. status = NVME_DATA_TRAS_ERROR;
  970. goto unmap;
  971. }
  972. last_sgld = &segment[nsgld - 1];
  973. /*
  974. * If the segment ends with a Data Block, then we are done.
  975. */
  976. if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
  977. status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
  978. if (status) {
  979. goto unmap;
  980. }
  981. goto out;
  982. }
  983. /*
  984. * If the last descriptor was not a Data Block, then the current
  985. * segment must not be a Last Segment.
  986. */
  987. if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
  988. status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
  989. goto unmap;
  990. }
  991. sgld = last_sgld;
  992. addr = le64_to_cpu(sgld->addr);
  993. /*
  994. * Do not map the last descriptor; it will be a Segment or Last Segment
  995. * descriptor and is handled by the next iteration.
  996. */
  997. status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd);
  998. if (status) {
  999. goto unmap;
  1000. }
  1001. }
  1002. out:
  1003. /* if there is any residual left in len, the SGL was too short */
  1004. if (len) {
  1005. status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
  1006. goto unmap;
  1007. }
  1008. return NVME_SUCCESS;
  1009. unmap:
  1010. nvme_sg_unmap(sg);
  1011. return status;
  1012. }
  1013. uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
  1014. NvmeCmd *cmd)
  1015. {
  1016. uint64_t prp1, prp2;
  1017. switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) {
  1018. case NVME_PSDT_PRP:
  1019. prp1 = le64_to_cpu(cmd->dptr.prp1);
  1020. prp2 = le64_to_cpu(cmd->dptr.prp2);
  1021. return nvme_map_prp(n, sg, prp1, prp2, len);
  1022. case NVME_PSDT_SGL_MPTR_CONTIGUOUS:
  1023. case NVME_PSDT_SGL_MPTR_SGL:
  1024. return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd);
  1025. default:
  1026. return NVME_INVALID_FIELD;
  1027. }
  1028. }
  1029. static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
  1030. NvmeCmd *cmd)
  1031. {
  1032. int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags);
  1033. hwaddr mptr = le64_to_cpu(cmd->mptr);
  1034. uint16_t status;
  1035. if (psdt == NVME_PSDT_SGL_MPTR_SGL) {
  1036. NvmeSglDescriptor sgl;
  1037. if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) {
  1038. return NVME_DATA_TRAS_ERROR;
  1039. }
  1040. status = nvme_map_sgl(n, sg, sgl, len, cmd);
  1041. if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) {
  1042. status = NVME_MD_SGL_LEN_INVALID | NVME_DNR;
  1043. }
  1044. return status;
  1045. }
  1046. nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr));
  1047. status = nvme_map_addr(n, sg, mptr, len);
  1048. if (status) {
  1049. nvme_sg_unmap(sg);
  1050. }
  1051. return status;
  1052. }
  1053. static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
  1054. {
  1055. NvmeNamespace *ns = req->ns;
  1056. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1057. bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
  1058. bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
  1059. size_t len = nvme_l2b(ns, nlb);
  1060. uint16_t status;
  1061. if (nvme_ns_ext(ns) &&
  1062. !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
  1063. NvmeSg sg;
  1064. len += nvme_m2b(ns, nlb);
  1065. status = nvme_map_dptr(n, &sg, len, &req->cmd);
  1066. if (status) {
  1067. return status;
  1068. }
  1069. nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
  1070. nvme_sg_split(&sg, ns, &req->sg, NULL);
  1071. nvme_sg_unmap(&sg);
  1072. return NVME_SUCCESS;
  1073. }
  1074. return nvme_map_dptr(n, &req->sg, len, &req->cmd);
  1075. }
  1076. static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
  1077. {
  1078. NvmeNamespace *ns = req->ns;
  1079. size_t len = nvme_m2b(ns, nlb);
  1080. uint16_t status;
  1081. if (nvme_ns_ext(ns)) {
  1082. NvmeSg sg;
  1083. len += nvme_l2b(ns, nlb);
  1084. status = nvme_map_dptr(n, &sg, len, &req->cmd);
  1085. if (status) {
  1086. return status;
  1087. }
  1088. nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
  1089. nvme_sg_split(&sg, ns, NULL, &req->sg);
  1090. nvme_sg_unmap(&sg);
  1091. return NVME_SUCCESS;
  1092. }
  1093. return nvme_map_mptr(n, &req->sg, len, &req->cmd);
  1094. }
  1095. static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr,
  1096. uint32_t len, uint32_t bytes,
  1097. int32_t skip_bytes, int64_t offset,
  1098. NvmeTxDirection dir)
  1099. {
  1100. hwaddr addr;
  1101. uint32_t trans_len, count = bytes;
  1102. bool dma = sg->flags & NVME_SG_DMA;
  1103. int64_t sge_len;
  1104. int sg_idx = 0;
  1105. int ret;
  1106. assert(sg->flags & NVME_SG_ALLOC);
  1107. while (len) {
  1108. sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
  1109. if (sge_len - offset < 0) {
  1110. offset -= sge_len;
  1111. sg_idx++;
  1112. continue;
  1113. }
  1114. if (sge_len == offset) {
  1115. offset = 0;
  1116. sg_idx++;
  1117. continue;
  1118. }
  1119. trans_len = MIN(len, count);
  1120. trans_len = MIN(trans_len, sge_len - offset);
  1121. if (dma) {
  1122. addr = sg->qsg.sg[sg_idx].base + offset;
  1123. } else {
  1124. addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset;
  1125. }
  1126. if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
  1127. ret = nvme_addr_read(n, addr, ptr, trans_len);
  1128. } else {
  1129. ret = nvme_addr_write(n, addr, ptr, trans_len);
  1130. }
  1131. if (ret) {
  1132. return NVME_DATA_TRAS_ERROR;
  1133. }
  1134. ptr += trans_len;
  1135. len -= trans_len;
  1136. count -= trans_len;
  1137. offset += trans_len;
  1138. if (count == 0) {
  1139. count = bytes;
  1140. offset += skip_bytes;
  1141. }
  1142. }
  1143. return NVME_SUCCESS;
  1144. }
  1145. static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, void *ptr, uint32_t len,
  1146. NvmeTxDirection dir)
  1147. {
  1148. assert(sg->flags & NVME_SG_ALLOC);
  1149. if (sg->flags & NVME_SG_DMA) {
  1150. const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
  1151. dma_addr_t residual;
  1152. if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
  1153. dma_buf_write(ptr, len, &residual, &sg->qsg, attrs);
  1154. } else {
  1155. dma_buf_read(ptr, len, &residual, &sg->qsg, attrs);
  1156. }
  1157. if (unlikely(residual)) {
  1158. trace_pci_nvme_err_invalid_dma();
  1159. return NVME_INVALID_FIELD | NVME_DNR;
  1160. }
  1161. } else {
  1162. size_t bytes;
  1163. if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
  1164. bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len);
  1165. } else {
  1166. bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len);
  1167. }
  1168. if (unlikely(bytes != len)) {
  1169. trace_pci_nvme_err_invalid_dma();
  1170. return NVME_INVALID_FIELD | NVME_DNR;
  1171. }
  1172. }
  1173. return NVME_SUCCESS;
  1174. }
  1175. static inline uint16_t nvme_c2h(NvmeCtrl *n, void *ptr, uint32_t len,
  1176. NvmeRequest *req)
  1177. {
  1178. uint16_t status;
  1179. status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
  1180. if (status) {
  1181. return status;
  1182. }
  1183. return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE);
  1184. }
  1185. static inline uint16_t nvme_h2c(NvmeCtrl *n, void *ptr, uint32_t len,
  1186. NvmeRequest *req)
  1187. {
  1188. uint16_t status;
  1189. status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
  1190. if (status) {
  1191. return status;
  1192. }
  1193. return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
  1194. }
  1195. uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
  1196. NvmeTxDirection dir, NvmeRequest *req)
  1197. {
  1198. NvmeNamespace *ns = req->ns;
  1199. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1200. bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
  1201. bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
  1202. if (nvme_ns_ext(ns) &&
  1203. !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
  1204. return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
  1205. ns->lbaf.ms, 0, dir);
  1206. }
  1207. return nvme_tx(n, &req->sg, ptr, len, dir);
  1208. }
  1209. uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
  1210. NvmeTxDirection dir, NvmeRequest *req)
  1211. {
  1212. NvmeNamespace *ns = req->ns;
  1213. uint16_t status;
  1214. if (nvme_ns_ext(ns)) {
  1215. return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms,
  1216. ns->lbasz, ns->lbasz, dir);
  1217. }
  1218. nvme_sg_unmap(&req->sg);
  1219. status = nvme_map_mptr(n, &req->sg, len, &req->cmd);
  1220. if (status) {
  1221. return status;
  1222. }
  1223. return nvme_tx(n, &req->sg, ptr, len, dir);
  1224. }
  1225. static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
  1226. uint32_t align, BlockCompletionFunc *cb,
  1227. NvmeRequest *req)
  1228. {
  1229. assert(req->sg.flags & NVME_SG_ALLOC);
  1230. if (req->sg.flags & NVME_SG_DMA) {
  1231. req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, align, cb, req);
  1232. } else {
  1233. req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
  1234. }
  1235. }
  1236. static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
  1237. uint32_t align, BlockCompletionFunc *cb,
  1238. NvmeRequest *req)
  1239. {
  1240. assert(req->sg.flags & NVME_SG_ALLOC);
  1241. if (req->sg.flags & NVME_SG_DMA) {
  1242. req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, align, cb, req);
  1243. } else {
  1244. req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
  1245. }
  1246. }
  1247. static void nvme_update_cq_eventidx(const NvmeCQueue *cq)
  1248. {
  1249. trace_pci_nvme_update_cq_eventidx(cq->cqid, cq->head);
  1250. stl_le_pci_dma(PCI_DEVICE(cq->ctrl), cq->ei_addr, cq->head,
  1251. MEMTXATTRS_UNSPECIFIED);
  1252. }
  1253. static void nvme_update_cq_head(NvmeCQueue *cq)
  1254. {
  1255. ldl_le_pci_dma(PCI_DEVICE(cq->ctrl), cq->db_addr, &cq->head,
  1256. MEMTXATTRS_UNSPECIFIED);
  1257. trace_pci_nvme_update_cq_head(cq->cqid, cq->head);
  1258. }
  1259. static void nvme_post_cqes(void *opaque)
  1260. {
  1261. NvmeCQueue *cq = opaque;
  1262. NvmeCtrl *n = cq->ctrl;
  1263. NvmeRequest *req, *next;
  1264. bool pending = cq->head != cq->tail;
  1265. int ret;
  1266. QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
  1267. NvmeSQueue *sq;
  1268. hwaddr addr;
  1269. if (n->dbbuf_enabled) {
  1270. nvme_update_cq_eventidx(cq);
  1271. nvme_update_cq_head(cq);
  1272. }
  1273. if (nvme_cq_full(cq)) {
  1274. break;
  1275. }
  1276. sq = req->sq;
  1277. req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
  1278. req->cqe.sq_id = cpu_to_le16(sq->sqid);
  1279. req->cqe.sq_head = cpu_to_le16(sq->head);
  1280. addr = cq->dma_addr + (cq->tail << NVME_CQES);
  1281. ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)&req->cqe,
  1282. sizeof(req->cqe));
  1283. if (ret) {
  1284. trace_pci_nvme_err_addr_write(addr);
  1285. trace_pci_nvme_err_cfs();
  1286. stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
  1287. break;
  1288. }
  1289. QTAILQ_REMOVE(&cq->req_list, req, entry);
  1290. nvme_inc_cq_tail(cq);
  1291. nvme_sg_unmap(&req->sg);
  1292. if (QTAILQ_EMPTY(&sq->req_list) && !nvme_sq_empty(sq)) {
  1293. qemu_bh_schedule(sq->bh);
  1294. }
  1295. QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
  1296. }
  1297. if (cq->tail != cq->head) {
  1298. if (cq->irq_enabled && !pending) {
  1299. n->cq_pending++;
  1300. }
  1301. nvme_irq_assert(n, cq);
  1302. }
  1303. }
  1304. static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
  1305. {
  1306. assert(cq->cqid == req->sq->cqid);
  1307. trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
  1308. le32_to_cpu(req->cqe.result),
  1309. le32_to_cpu(req->cqe.dw1),
  1310. req->status);
  1311. if (req->status) {
  1312. trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns),
  1313. req->status, req->cmd.opcode);
  1314. }
  1315. QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
  1316. QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
  1317. qemu_bh_schedule(cq->bh);
  1318. }
  1319. static void nvme_process_aers(void *opaque)
  1320. {
  1321. NvmeCtrl *n = opaque;
  1322. NvmeAsyncEvent *event, *next;
  1323. trace_pci_nvme_process_aers(n->aer_queued);
  1324. QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
  1325. NvmeRequest *req;
  1326. NvmeAerResult *result;
  1327. /* can't post cqe if there is nothing to complete */
  1328. if (!n->outstanding_aers) {
  1329. trace_pci_nvme_no_outstanding_aers();
  1330. break;
  1331. }
  1332. /* ignore if masked (cqe posted, but event not cleared) */
  1333. if (n->aer_mask & (1 << event->result.event_type)) {
  1334. trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
  1335. continue;
  1336. }
  1337. QTAILQ_REMOVE(&n->aer_queue, event, entry);
  1338. n->aer_queued--;
  1339. n->aer_mask |= 1 << event->result.event_type;
  1340. n->outstanding_aers--;
  1341. req = n->aer_reqs[n->outstanding_aers];
  1342. result = (NvmeAerResult *) &req->cqe.result;
  1343. result->event_type = event->result.event_type;
  1344. result->event_info = event->result.event_info;
  1345. result->log_page = event->result.log_page;
  1346. g_free(event);
  1347. trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
  1348. result->log_page);
  1349. nvme_enqueue_req_completion(&n->admin_cq, req);
  1350. }
  1351. }
  1352. static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
  1353. uint8_t event_info, uint8_t log_page)
  1354. {
  1355. NvmeAsyncEvent *event;
  1356. trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
  1357. if (n->aer_queued == n->params.aer_max_queued) {
  1358. trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
  1359. return;
  1360. }
  1361. event = g_new(NvmeAsyncEvent, 1);
  1362. event->result = (NvmeAerResult) {
  1363. .event_type = event_type,
  1364. .event_info = event_info,
  1365. .log_page = log_page,
  1366. };
  1367. QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
  1368. n->aer_queued++;
  1369. nvme_process_aers(n);
  1370. }
  1371. static void nvme_smart_event(NvmeCtrl *n, uint8_t event)
  1372. {
  1373. uint8_t aer_info;
  1374. /* Ref SPEC <Asynchronous Event Information 0x2013 SMART / Health Status> */
  1375. if (!(NVME_AEC_SMART(n->features.async_config) & event)) {
  1376. return;
  1377. }
  1378. switch (event) {
  1379. case NVME_SMART_SPARE:
  1380. aer_info = NVME_AER_INFO_SMART_SPARE_THRESH;
  1381. break;
  1382. case NVME_SMART_TEMPERATURE:
  1383. aer_info = NVME_AER_INFO_SMART_TEMP_THRESH;
  1384. break;
  1385. case NVME_SMART_RELIABILITY:
  1386. case NVME_SMART_MEDIA_READ_ONLY:
  1387. case NVME_SMART_FAILED_VOLATILE_MEDIA:
  1388. case NVME_SMART_PMR_UNRELIABLE:
  1389. aer_info = NVME_AER_INFO_SMART_RELIABILITY;
  1390. break;
  1391. default:
  1392. return;
  1393. }
  1394. nvme_enqueue_event(n, NVME_AER_TYPE_SMART, aer_info, NVME_LOG_SMART_INFO);
  1395. }
  1396. static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
  1397. {
  1398. NvmeAsyncEvent *event, *next;
  1399. n->aer_mask &= ~(1 << event_type);
  1400. QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
  1401. if (event->result.event_type == event_type) {
  1402. QTAILQ_REMOVE(&n->aer_queue, event, entry);
  1403. n->aer_queued--;
  1404. g_free(event);
  1405. }
  1406. }
  1407. }
  1408. static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
  1409. {
  1410. uint8_t mdts = n->params.mdts;
  1411. if (mdts && len > n->page_size << mdts) {
  1412. trace_pci_nvme_err_mdts(len);
  1413. return NVME_INVALID_FIELD | NVME_DNR;
  1414. }
  1415. return NVME_SUCCESS;
  1416. }
  1417. static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba,
  1418. uint32_t nlb)
  1419. {
  1420. uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
  1421. if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
  1422. trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze);
  1423. return NVME_LBA_RANGE | NVME_DNR;
  1424. }
  1425. return NVME_SUCCESS;
  1426. }
  1427. static int nvme_block_status_all(NvmeNamespace *ns, uint64_t slba,
  1428. uint32_t nlb, int flags)
  1429. {
  1430. BlockDriverState *bs = blk_bs(ns->blkconf.blk);
  1431. int64_t pnum = 0, bytes = nvme_l2b(ns, nlb);
  1432. int64_t offset = nvme_l2b(ns, slba);
  1433. int ret;
  1434. /*
  1435. * `pnum` holds the number of bytes after offset that shares the same
  1436. * allocation status as the byte at offset. If `pnum` is different from
  1437. * `bytes`, we should check the allocation status of the next range and
  1438. * continue this until all bytes have been checked.
  1439. */
  1440. do {
  1441. bytes -= pnum;
  1442. ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
  1443. if (ret < 0) {
  1444. return ret;
  1445. }
  1446. trace_pci_nvme_block_status(offset, bytes, pnum, ret,
  1447. !!(ret & BDRV_BLOCK_ZERO));
  1448. if (!(ret & flags)) {
  1449. return 1;
  1450. }
  1451. offset += pnum;
  1452. } while (pnum != bytes);
  1453. return 0;
  1454. }
  1455. static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
  1456. uint32_t nlb)
  1457. {
  1458. int ret;
  1459. Error *err = NULL;
  1460. ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_DATA);
  1461. if (ret) {
  1462. if (ret < 0) {
  1463. error_setg_errno(&err, -ret, "unable to get block status");
  1464. error_report_err(err);
  1465. return NVME_INTERNAL_DEV_ERROR;
  1466. }
  1467. return NVME_DULB;
  1468. }
  1469. return NVME_SUCCESS;
  1470. }
  1471. static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
  1472. {
  1473. return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
  1474. slba / ns->zone_size;
  1475. }
  1476. static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
  1477. {
  1478. uint32_t zone_idx = nvme_zone_idx(ns, slba);
  1479. if (zone_idx >= ns->num_zones) {
  1480. return NULL;
  1481. }
  1482. return &ns->zone_array[zone_idx];
  1483. }
  1484. static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
  1485. {
  1486. uint64_t zslba = zone->d.zslba;
  1487. switch (nvme_get_zone_state(zone)) {
  1488. case NVME_ZONE_STATE_EMPTY:
  1489. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  1490. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  1491. case NVME_ZONE_STATE_CLOSED:
  1492. return NVME_SUCCESS;
  1493. case NVME_ZONE_STATE_FULL:
  1494. trace_pci_nvme_err_zone_is_full(zslba);
  1495. return NVME_ZONE_FULL;
  1496. case NVME_ZONE_STATE_OFFLINE:
  1497. trace_pci_nvme_err_zone_is_offline(zslba);
  1498. return NVME_ZONE_OFFLINE;
  1499. case NVME_ZONE_STATE_READ_ONLY:
  1500. trace_pci_nvme_err_zone_is_read_only(zslba);
  1501. return NVME_ZONE_READ_ONLY;
  1502. default:
  1503. g_assert_not_reached();
  1504. }
  1505. return NVME_INTERNAL_DEV_ERROR;
  1506. }
  1507. static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone,
  1508. uint64_t slba, uint32_t nlb)
  1509. {
  1510. uint64_t zcap = nvme_zone_wr_boundary(zone);
  1511. uint16_t status;
  1512. status = nvme_check_zone_state_for_write(zone);
  1513. if (status) {
  1514. return status;
  1515. }
  1516. if (zone->d.za & NVME_ZA_ZRWA_VALID) {
  1517. uint64_t ezrwa = zone->w_ptr + 2 * ns->zns.zrwas;
  1518. if (slba < zone->w_ptr || slba + nlb > ezrwa) {
  1519. trace_pci_nvme_err_zone_invalid_write(slba, zone->w_ptr);
  1520. return NVME_ZONE_INVALID_WRITE;
  1521. }
  1522. } else {
  1523. if (unlikely(slba != zone->w_ptr)) {
  1524. trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
  1525. zone->w_ptr);
  1526. return NVME_ZONE_INVALID_WRITE;
  1527. }
  1528. }
  1529. if (unlikely((slba + nlb) > zcap)) {
  1530. trace_pci_nvme_err_zone_boundary(slba, nlb, zcap);
  1531. return NVME_ZONE_BOUNDARY_ERROR;
  1532. }
  1533. return NVME_SUCCESS;
  1534. }
  1535. static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
  1536. {
  1537. switch (nvme_get_zone_state(zone)) {
  1538. case NVME_ZONE_STATE_EMPTY:
  1539. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  1540. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  1541. case NVME_ZONE_STATE_FULL:
  1542. case NVME_ZONE_STATE_CLOSED:
  1543. case NVME_ZONE_STATE_READ_ONLY:
  1544. return NVME_SUCCESS;
  1545. case NVME_ZONE_STATE_OFFLINE:
  1546. trace_pci_nvme_err_zone_is_offline(zone->d.zslba);
  1547. return NVME_ZONE_OFFLINE;
  1548. default:
  1549. g_assert_not_reached();
  1550. }
  1551. return NVME_INTERNAL_DEV_ERROR;
  1552. }
  1553. static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
  1554. uint32_t nlb)
  1555. {
  1556. NvmeZone *zone;
  1557. uint64_t bndry, end;
  1558. uint16_t status;
  1559. zone = nvme_get_zone_by_slba(ns, slba);
  1560. assert(zone);
  1561. bndry = nvme_zone_rd_boundary(ns, zone);
  1562. end = slba + nlb;
  1563. status = nvme_check_zone_state_for_read(zone);
  1564. if (status) {
  1565. ;
  1566. } else if (unlikely(end > bndry)) {
  1567. if (!ns->params.cross_zone_read) {
  1568. status = NVME_ZONE_BOUNDARY_ERROR;
  1569. } else {
  1570. /*
  1571. * Read across zone boundary - check that all subsequent
  1572. * zones that are being read have an appropriate state.
  1573. */
  1574. do {
  1575. zone++;
  1576. status = nvme_check_zone_state_for_read(zone);
  1577. if (status) {
  1578. break;
  1579. }
  1580. } while (end > nvme_zone_rd_boundary(ns, zone));
  1581. }
  1582. }
  1583. return status;
  1584. }
  1585. static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone)
  1586. {
  1587. switch (nvme_get_zone_state(zone)) {
  1588. case NVME_ZONE_STATE_FULL:
  1589. return NVME_SUCCESS;
  1590. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  1591. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  1592. nvme_aor_dec_open(ns);
  1593. /* fallthrough */
  1594. case NVME_ZONE_STATE_CLOSED:
  1595. nvme_aor_dec_active(ns);
  1596. if (zone->d.za & NVME_ZA_ZRWA_VALID) {
  1597. zone->d.za &= ~NVME_ZA_ZRWA_VALID;
  1598. if (ns->params.numzrwa) {
  1599. ns->zns.numzrwa++;
  1600. }
  1601. }
  1602. /* fallthrough */
  1603. case NVME_ZONE_STATE_EMPTY:
  1604. nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
  1605. return NVME_SUCCESS;
  1606. default:
  1607. return NVME_ZONE_INVAL_TRANSITION;
  1608. }
  1609. }
  1610. static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
  1611. {
  1612. switch (nvme_get_zone_state(zone)) {
  1613. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  1614. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  1615. nvme_aor_dec_open(ns);
  1616. nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
  1617. /* fall through */
  1618. case NVME_ZONE_STATE_CLOSED:
  1619. return NVME_SUCCESS;
  1620. default:
  1621. return NVME_ZONE_INVAL_TRANSITION;
  1622. }
  1623. }
  1624. static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone)
  1625. {
  1626. switch (nvme_get_zone_state(zone)) {
  1627. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  1628. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  1629. nvme_aor_dec_open(ns);
  1630. /* fallthrough */
  1631. case NVME_ZONE_STATE_CLOSED:
  1632. nvme_aor_dec_active(ns);
  1633. if (zone->d.za & NVME_ZA_ZRWA_VALID) {
  1634. if (ns->params.numzrwa) {
  1635. ns->zns.numzrwa++;
  1636. }
  1637. }
  1638. /* fallthrough */
  1639. case NVME_ZONE_STATE_FULL:
  1640. zone->w_ptr = zone->d.zslba;
  1641. zone->d.wp = zone->w_ptr;
  1642. nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
  1643. /* fallthrough */
  1644. case NVME_ZONE_STATE_EMPTY:
  1645. return NVME_SUCCESS;
  1646. default:
  1647. return NVME_ZONE_INVAL_TRANSITION;
  1648. }
  1649. }
  1650. static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
  1651. {
  1652. NvmeZone *zone;
  1653. if (ns->params.max_open_zones &&
  1654. ns->nr_open_zones == ns->params.max_open_zones) {
  1655. zone = QTAILQ_FIRST(&ns->imp_open_zones);
  1656. if (zone) {
  1657. /*
  1658. * Automatically close this implicitly open zone.
  1659. */
  1660. QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
  1661. nvme_zrm_close(ns, zone);
  1662. }
  1663. }
  1664. }
  1665. enum {
  1666. NVME_ZRM_AUTO = 1 << 0,
  1667. NVME_ZRM_ZRWA = 1 << 1,
  1668. };
  1669. static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns,
  1670. NvmeZone *zone, int flags)
  1671. {
  1672. int act = 0;
  1673. uint16_t status;
  1674. switch (nvme_get_zone_state(zone)) {
  1675. case NVME_ZONE_STATE_EMPTY:
  1676. act = 1;
  1677. /* fallthrough */
  1678. case NVME_ZONE_STATE_CLOSED:
  1679. if (n->params.auto_transition_zones) {
  1680. nvme_zrm_auto_transition_zone(ns);
  1681. }
  1682. status = nvme_zns_check_resources(ns, act, 1,
  1683. (flags & NVME_ZRM_ZRWA) ? 1 : 0);
  1684. if (status) {
  1685. return status;
  1686. }
  1687. if (act) {
  1688. nvme_aor_inc_active(ns);
  1689. }
  1690. nvme_aor_inc_open(ns);
  1691. if (flags & NVME_ZRM_AUTO) {
  1692. nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
  1693. return NVME_SUCCESS;
  1694. }
  1695. /* fallthrough */
  1696. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  1697. if (flags & NVME_ZRM_AUTO) {
  1698. return NVME_SUCCESS;
  1699. }
  1700. nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
  1701. /* fallthrough */
  1702. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  1703. if (flags & NVME_ZRM_ZRWA) {
  1704. ns->zns.numzrwa--;
  1705. zone->d.za |= NVME_ZA_ZRWA_VALID;
  1706. }
  1707. return NVME_SUCCESS;
  1708. default:
  1709. return NVME_ZONE_INVAL_TRANSITION;
  1710. }
  1711. }
  1712. static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns,
  1713. NvmeZone *zone)
  1714. {
  1715. return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO);
  1716. }
  1717. static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
  1718. uint32_t nlb)
  1719. {
  1720. zone->d.wp += nlb;
  1721. if (zone->d.wp == nvme_zone_wr_boundary(zone)) {
  1722. nvme_zrm_finish(ns, zone);
  1723. }
  1724. }
  1725. static void nvme_zoned_zrwa_implicit_flush(NvmeNamespace *ns, NvmeZone *zone,
  1726. uint32_t nlbc)
  1727. {
  1728. uint16_t nzrwafgs = DIV_ROUND_UP(nlbc, ns->zns.zrwafg);
  1729. nlbc = nzrwafgs * ns->zns.zrwafg;
  1730. trace_pci_nvme_zoned_zrwa_implicit_flush(zone->d.zslba, nlbc);
  1731. zone->w_ptr += nlbc;
  1732. nvme_advance_zone_wp(ns, zone, nlbc);
  1733. }
  1734. static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req)
  1735. {
  1736. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1737. NvmeZone *zone;
  1738. uint64_t slba;
  1739. uint32_t nlb;
  1740. slba = le64_to_cpu(rw->slba);
  1741. nlb = le16_to_cpu(rw->nlb) + 1;
  1742. zone = nvme_get_zone_by_slba(ns, slba);
  1743. assert(zone);
  1744. if (zone->d.za & NVME_ZA_ZRWA_VALID) {
  1745. uint64_t ezrwa = zone->w_ptr + ns->zns.zrwas - 1;
  1746. uint64_t elba = slba + nlb - 1;
  1747. if (elba > ezrwa) {
  1748. nvme_zoned_zrwa_implicit_flush(ns, zone, elba - ezrwa);
  1749. }
  1750. return;
  1751. }
  1752. nvme_advance_zone_wp(ns, zone, nlb);
  1753. }
  1754. static inline bool nvme_is_write(NvmeRequest *req)
  1755. {
  1756. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1757. return rw->opcode == NVME_CMD_WRITE ||
  1758. rw->opcode == NVME_CMD_ZONE_APPEND ||
  1759. rw->opcode == NVME_CMD_WRITE_ZEROES;
  1760. }
  1761. static void nvme_misc_cb(void *opaque, int ret)
  1762. {
  1763. NvmeRequest *req = opaque;
  1764. uint16_t cid = nvme_cid(req);
  1765. trace_pci_nvme_misc_cb(cid);
  1766. if (ret) {
  1767. if (!req->status) {
  1768. req->status = NVME_INTERNAL_DEV_ERROR;
  1769. }
  1770. trace_pci_nvme_err_aio(cid, strerror(-ret), req->status);
  1771. }
  1772. nvme_enqueue_req_completion(nvme_cq(req), req);
  1773. }
  1774. void nvme_rw_complete_cb(void *opaque, int ret)
  1775. {
  1776. NvmeRequest *req = opaque;
  1777. NvmeNamespace *ns = req->ns;
  1778. BlockBackend *blk = ns->blkconf.blk;
  1779. BlockAcctCookie *acct = &req->acct;
  1780. BlockAcctStats *stats = blk_get_stats(blk);
  1781. trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
  1782. if (ret) {
  1783. Error *err = NULL;
  1784. block_acct_failed(stats, acct);
  1785. switch (req->cmd.opcode) {
  1786. case NVME_CMD_READ:
  1787. req->status = NVME_UNRECOVERED_READ;
  1788. break;
  1789. case NVME_CMD_WRITE:
  1790. case NVME_CMD_WRITE_ZEROES:
  1791. case NVME_CMD_ZONE_APPEND:
  1792. req->status = NVME_WRITE_FAULT;
  1793. break;
  1794. default:
  1795. req->status = NVME_INTERNAL_DEV_ERROR;
  1796. break;
  1797. }
  1798. trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
  1799. error_setg_errno(&err, -ret, "aio failed");
  1800. error_report_err(err);
  1801. } else {
  1802. block_acct_done(stats, acct);
  1803. }
  1804. if (ns->params.zoned && nvme_is_write(req)) {
  1805. nvme_finalize_zoned_write(ns, req);
  1806. }
  1807. nvme_enqueue_req_completion(nvme_cq(req), req);
  1808. }
  1809. static void nvme_rw_cb(void *opaque, int ret)
  1810. {
  1811. NvmeRequest *req = opaque;
  1812. NvmeNamespace *ns = req->ns;
  1813. BlockBackend *blk = ns->blkconf.blk;
  1814. trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
  1815. if (ret) {
  1816. goto out;
  1817. }
  1818. if (ns->lbaf.ms) {
  1819. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1820. uint64_t slba = le64_to_cpu(rw->slba);
  1821. uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
  1822. uint64_t offset = nvme_moff(ns, slba);
  1823. if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) {
  1824. size_t mlen = nvme_m2b(ns, nlb);
  1825. req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen,
  1826. BDRV_REQ_MAY_UNMAP,
  1827. nvme_rw_complete_cb, req);
  1828. return;
  1829. }
  1830. if (nvme_ns_ext(ns) || req->cmd.mptr) {
  1831. uint16_t status;
  1832. nvme_sg_unmap(&req->sg);
  1833. status = nvme_map_mdata(nvme_ctrl(req), nlb, req);
  1834. if (status) {
  1835. ret = -EFAULT;
  1836. goto out;
  1837. }
  1838. if (req->cmd.opcode == NVME_CMD_READ) {
  1839. return nvme_blk_read(blk, offset, 1, nvme_rw_complete_cb, req);
  1840. }
  1841. return nvme_blk_write(blk, offset, 1, nvme_rw_complete_cb, req);
  1842. }
  1843. }
  1844. out:
  1845. nvme_rw_complete_cb(req, ret);
  1846. }
  1847. static void nvme_verify_cb(void *opaque, int ret)
  1848. {
  1849. NvmeBounceContext *ctx = opaque;
  1850. NvmeRequest *req = ctx->req;
  1851. NvmeNamespace *ns = req->ns;
  1852. BlockBackend *blk = ns->blkconf.blk;
  1853. BlockAcctCookie *acct = &req->acct;
  1854. BlockAcctStats *stats = blk_get_stats(blk);
  1855. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1856. uint64_t slba = le64_to_cpu(rw->slba);
  1857. uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
  1858. uint16_t apptag = le16_to_cpu(rw->apptag);
  1859. uint16_t appmask = le16_to_cpu(rw->appmask);
  1860. uint64_t reftag = le32_to_cpu(rw->reftag);
  1861. uint64_t cdw3 = le32_to_cpu(rw->cdw3);
  1862. uint16_t status;
  1863. reftag |= cdw3 << 32;
  1864. trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
  1865. if (ret) {
  1866. block_acct_failed(stats, acct);
  1867. req->status = NVME_UNRECOVERED_READ;
  1868. trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
  1869. goto out;
  1870. }
  1871. block_acct_done(stats, acct);
  1872. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  1873. status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce,
  1874. ctx->mdata.iov.size, slba);
  1875. if (status) {
  1876. req->status = status;
  1877. goto out;
  1878. }
  1879. req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
  1880. ctx->mdata.bounce, ctx->mdata.iov.size,
  1881. prinfo, slba, apptag, appmask, &reftag);
  1882. }
  1883. out:
  1884. qemu_iovec_destroy(&ctx->data.iov);
  1885. g_free(ctx->data.bounce);
  1886. qemu_iovec_destroy(&ctx->mdata.iov);
  1887. g_free(ctx->mdata.bounce);
  1888. g_free(ctx);
  1889. nvme_enqueue_req_completion(nvme_cq(req), req);
  1890. }
  1891. static void nvme_verify_mdata_in_cb(void *opaque, int ret)
  1892. {
  1893. NvmeBounceContext *ctx = opaque;
  1894. NvmeRequest *req = ctx->req;
  1895. NvmeNamespace *ns = req->ns;
  1896. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1897. uint64_t slba = le64_to_cpu(rw->slba);
  1898. uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
  1899. size_t mlen = nvme_m2b(ns, nlb);
  1900. uint64_t offset = nvme_moff(ns, slba);
  1901. BlockBackend *blk = ns->blkconf.blk;
  1902. trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk));
  1903. if (ret) {
  1904. goto out;
  1905. }
  1906. ctx->mdata.bounce = g_malloc(mlen);
  1907. qemu_iovec_reset(&ctx->mdata.iov);
  1908. qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
  1909. req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
  1910. nvme_verify_cb, ctx);
  1911. return;
  1912. out:
  1913. nvme_verify_cb(ctx, ret);
  1914. }
  1915. struct nvme_compare_ctx {
  1916. struct {
  1917. QEMUIOVector iov;
  1918. uint8_t *bounce;
  1919. } data;
  1920. struct {
  1921. QEMUIOVector iov;
  1922. uint8_t *bounce;
  1923. } mdata;
  1924. };
  1925. static void nvme_compare_mdata_cb(void *opaque, int ret)
  1926. {
  1927. NvmeRequest *req = opaque;
  1928. NvmeNamespace *ns = req->ns;
  1929. NvmeCtrl *n = nvme_ctrl(req);
  1930. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  1931. uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
  1932. uint16_t apptag = le16_to_cpu(rw->apptag);
  1933. uint16_t appmask = le16_to_cpu(rw->appmask);
  1934. uint64_t reftag = le32_to_cpu(rw->reftag);
  1935. uint64_t cdw3 = le32_to_cpu(rw->cdw3);
  1936. struct nvme_compare_ctx *ctx = req->opaque;
  1937. g_autofree uint8_t *buf = NULL;
  1938. BlockBackend *blk = ns->blkconf.blk;
  1939. BlockAcctCookie *acct = &req->acct;
  1940. BlockAcctStats *stats = blk_get_stats(blk);
  1941. uint16_t status = NVME_SUCCESS;
  1942. reftag |= cdw3 << 32;
  1943. trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
  1944. if (ret) {
  1945. block_acct_failed(stats, acct);
  1946. req->status = NVME_UNRECOVERED_READ;
  1947. trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
  1948. goto out;
  1949. }
  1950. buf = g_malloc(ctx->mdata.iov.size);
  1951. status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size,
  1952. NVME_TX_DIRECTION_TO_DEVICE, req);
  1953. if (status) {
  1954. req->status = status;
  1955. goto out;
  1956. }
  1957. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  1958. uint64_t slba = le64_to_cpu(rw->slba);
  1959. uint8_t *bufp;
  1960. uint8_t *mbufp = ctx->mdata.bounce;
  1961. uint8_t *end = mbufp + ctx->mdata.iov.size;
  1962. int16_t pil = 0;
  1963. status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
  1964. ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
  1965. slba, apptag, appmask, &reftag);
  1966. if (status) {
  1967. req->status = status;
  1968. goto out;
  1969. }
  1970. /*
  1971. * When formatted with protection information, do not compare the DIF
  1972. * tuple.
  1973. */
  1974. if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
  1975. pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
  1976. }
  1977. for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) {
  1978. if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) {
  1979. req->status = NVME_CMP_FAILURE | NVME_DNR;
  1980. goto out;
  1981. }
  1982. }
  1983. goto out;
  1984. }
  1985. if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) {
  1986. req->status = NVME_CMP_FAILURE | NVME_DNR;
  1987. goto out;
  1988. }
  1989. block_acct_done(stats, acct);
  1990. out:
  1991. qemu_iovec_destroy(&ctx->data.iov);
  1992. g_free(ctx->data.bounce);
  1993. qemu_iovec_destroy(&ctx->mdata.iov);
  1994. g_free(ctx->mdata.bounce);
  1995. g_free(ctx);
  1996. nvme_enqueue_req_completion(nvme_cq(req), req);
  1997. }
  1998. static void nvme_compare_data_cb(void *opaque, int ret)
  1999. {
  2000. NvmeRequest *req = opaque;
  2001. NvmeCtrl *n = nvme_ctrl(req);
  2002. NvmeNamespace *ns = req->ns;
  2003. BlockBackend *blk = ns->blkconf.blk;
  2004. BlockAcctCookie *acct = &req->acct;
  2005. BlockAcctStats *stats = blk_get_stats(blk);
  2006. struct nvme_compare_ctx *ctx = req->opaque;
  2007. g_autofree uint8_t *buf = NULL;
  2008. uint16_t status;
  2009. trace_pci_nvme_compare_data_cb(nvme_cid(req));
  2010. if (ret) {
  2011. block_acct_failed(stats, acct);
  2012. req->status = NVME_UNRECOVERED_READ;
  2013. trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), req->status);
  2014. goto out;
  2015. }
  2016. buf = g_malloc(ctx->data.iov.size);
  2017. status = nvme_bounce_data(n, buf, ctx->data.iov.size,
  2018. NVME_TX_DIRECTION_TO_DEVICE, req);
  2019. if (status) {
  2020. req->status = status;
  2021. goto out;
  2022. }
  2023. if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) {
  2024. req->status = NVME_CMP_FAILURE | NVME_DNR;
  2025. goto out;
  2026. }
  2027. if (ns->lbaf.ms) {
  2028. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  2029. uint64_t slba = le64_to_cpu(rw->slba);
  2030. uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
  2031. size_t mlen = nvme_m2b(ns, nlb);
  2032. uint64_t offset = nvme_moff(ns, slba);
  2033. ctx->mdata.bounce = g_malloc(mlen);
  2034. qemu_iovec_init(&ctx->mdata.iov, 1);
  2035. qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
  2036. req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
  2037. nvme_compare_mdata_cb, req);
  2038. return;
  2039. }
  2040. block_acct_done(stats, acct);
  2041. out:
  2042. qemu_iovec_destroy(&ctx->data.iov);
  2043. g_free(ctx->data.bounce);
  2044. g_free(ctx);
  2045. nvme_enqueue_req_completion(nvme_cq(req), req);
  2046. }
  2047. typedef struct NvmeDSMAIOCB {
  2048. BlockAIOCB common;
  2049. BlockAIOCB *aiocb;
  2050. NvmeRequest *req;
  2051. int ret;
  2052. NvmeDsmRange *range;
  2053. unsigned int nr;
  2054. unsigned int idx;
  2055. } NvmeDSMAIOCB;
  2056. static void nvme_dsm_cancel(BlockAIOCB *aiocb)
  2057. {
  2058. NvmeDSMAIOCB *iocb = container_of(aiocb, NvmeDSMAIOCB, common);
  2059. /* break nvme_dsm_cb loop */
  2060. iocb->idx = iocb->nr;
  2061. iocb->ret = -ECANCELED;
  2062. if (iocb->aiocb) {
  2063. blk_aio_cancel_async(iocb->aiocb);
  2064. iocb->aiocb = NULL;
  2065. } else {
  2066. /*
  2067. * We only reach this if nvme_dsm_cancel() has already been called or
  2068. * the command ran to completion.
  2069. */
  2070. assert(iocb->idx == iocb->nr);
  2071. }
  2072. }
  2073. static const AIOCBInfo nvme_dsm_aiocb_info = {
  2074. .aiocb_size = sizeof(NvmeDSMAIOCB),
  2075. .cancel_async = nvme_dsm_cancel,
  2076. };
  2077. static void nvme_dsm_cb(void *opaque, int ret);
  2078. static void nvme_dsm_md_cb(void *opaque, int ret)
  2079. {
  2080. NvmeDSMAIOCB *iocb = opaque;
  2081. NvmeRequest *req = iocb->req;
  2082. NvmeNamespace *ns = req->ns;
  2083. NvmeDsmRange *range;
  2084. uint64_t slba;
  2085. uint32_t nlb;
  2086. if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
  2087. goto done;
  2088. }
  2089. range = &iocb->range[iocb->idx - 1];
  2090. slba = le64_to_cpu(range->slba);
  2091. nlb = le32_to_cpu(range->nlb);
  2092. /*
  2093. * Check that all block were discarded (zeroed); otherwise we do not zero
  2094. * the metadata.
  2095. */
  2096. ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO);
  2097. if (ret) {
  2098. if (ret < 0) {
  2099. goto done;
  2100. }
  2101. nvme_dsm_cb(iocb, 0);
  2102. return;
  2103. }
  2104. iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba),
  2105. nvme_m2b(ns, nlb), BDRV_REQ_MAY_UNMAP,
  2106. nvme_dsm_cb, iocb);
  2107. return;
  2108. done:
  2109. nvme_dsm_cb(iocb, ret);
  2110. }
  2111. static void nvme_dsm_cb(void *opaque, int ret)
  2112. {
  2113. NvmeDSMAIOCB *iocb = opaque;
  2114. NvmeRequest *req = iocb->req;
  2115. NvmeCtrl *n = nvme_ctrl(req);
  2116. NvmeNamespace *ns = req->ns;
  2117. NvmeDsmRange *range;
  2118. uint64_t slba;
  2119. uint32_t nlb;
  2120. if (iocb->ret < 0) {
  2121. goto done;
  2122. } else if (ret < 0) {
  2123. iocb->ret = ret;
  2124. goto done;
  2125. }
  2126. next:
  2127. if (iocb->idx == iocb->nr) {
  2128. goto done;
  2129. }
  2130. range = &iocb->range[iocb->idx++];
  2131. slba = le64_to_cpu(range->slba);
  2132. nlb = le32_to_cpu(range->nlb);
  2133. trace_pci_nvme_dsm_deallocate(slba, nlb);
  2134. if (nlb > n->dmrsl) {
  2135. trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
  2136. goto next;
  2137. }
  2138. if (nvme_check_bounds(ns, slba, nlb)) {
  2139. trace_pci_nvme_err_invalid_lba_range(slba, nlb,
  2140. ns->id_ns.nsze);
  2141. goto next;
  2142. }
  2143. iocb->aiocb = blk_aio_pdiscard(ns->blkconf.blk, nvme_l2b(ns, slba),
  2144. nvme_l2b(ns, nlb),
  2145. nvme_dsm_md_cb, iocb);
  2146. return;
  2147. done:
  2148. iocb->aiocb = NULL;
  2149. iocb->common.cb(iocb->common.opaque, iocb->ret);
  2150. g_free(iocb->range);
  2151. qemu_aio_unref(iocb);
  2152. }
  2153. static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
  2154. {
  2155. NvmeNamespace *ns = req->ns;
  2156. NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
  2157. uint32_t attr = le32_to_cpu(dsm->attributes);
  2158. uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
  2159. uint16_t status = NVME_SUCCESS;
  2160. trace_pci_nvme_dsm(nr, attr);
  2161. if (attr & NVME_DSMGMT_AD) {
  2162. NvmeDSMAIOCB *iocb = blk_aio_get(&nvme_dsm_aiocb_info, ns->blkconf.blk,
  2163. nvme_misc_cb, req);
  2164. iocb->req = req;
  2165. iocb->ret = 0;
  2166. iocb->range = g_new(NvmeDsmRange, nr);
  2167. iocb->nr = nr;
  2168. iocb->idx = 0;
  2169. status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr,
  2170. req);
  2171. if (status) {
  2172. g_free(iocb->range);
  2173. qemu_aio_unref(iocb);
  2174. return status;
  2175. }
  2176. req->aiocb = &iocb->common;
  2177. nvme_dsm_cb(iocb, 0);
  2178. return NVME_NO_COMPLETE;
  2179. }
  2180. return status;
  2181. }
  2182. static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
  2183. {
  2184. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  2185. NvmeNamespace *ns = req->ns;
  2186. BlockBackend *blk = ns->blkconf.blk;
  2187. uint64_t slba = le64_to_cpu(rw->slba);
  2188. uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
  2189. size_t len = nvme_l2b(ns, nlb);
  2190. size_t data_len = len;
  2191. int64_t offset = nvme_l2b(ns, slba);
  2192. uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
  2193. uint32_t reftag = le32_to_cpu(rw->reftag);
  2194. NvmeBounceContext *ctx = NULL;
  2195. uint16_t status;
  2196. trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb);
  2197. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  2198. status = nvme_check_prinfo(ns, prinfo, slba, reftag);
  2199. if (status) {
  2200. return status;
  2201. }
  2202. if (prinfo & NVME_PRINFO_PRACT) {
  2203. return NVME_INVALID_PROT_INFO | NVME_DNR;
  2204. }
  2205. }
  2206. if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
  2207. data_len += nvme_m2b(ns, nlb);
  2208. }
  2209. if (data_len > (n->page_size << n->params.vsl)) {
  2210. return NVME_INVALID_FIELD | NVME_DNR;
  2211. }
  2212. status = nvme_check_bounds(ns, slba, nlb);
  2213. if (status) {
  2214. return status;
  2215. }
  2216. if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
  2217. status = nvme_check_dulbe(ns, slba, nlb);
  2218. if (status) {
  2219. return status;
  2220. }
  2221. }
  2222. ctx = g_new0(NvmeBounceContext, 1);
  2223. ctx->req = req;
  2224. ctx->data.bounce = g_malloc(len);
  2225. qemu_iovec_init(&ctx->data.iov, 1);
  2226. qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
  2227. block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
  2228. BLOCK_ACCT_READ);
  2229. req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
  2230. nvme_verify_mdata_in_cb, ctx);
  2231. return NVME_NO_COMPLETE;
  2232. }
  2233. typedef struct NvmeCopyAIOCB {
  2234. BlockAIOCB common;
  2235. BlockAIOCB *aiocb;
  2236. NvmeRequest *req;
  2237. NvmeCtrl *n;
  2238. int ret;
  2239. void *ranges;
  2240. unsigned int format;
  2241. int nr;
  2242. int idx;
  2243. uint8_t *bounce;
  2244. QEMUIOVector iov;
  2245. struct {
  2246. BlockAcctCookie read;
  2247. BlockAcctCookie write;
  2248. } acct;
  2249. uint64_t reftag;
  2250. uint64_t slba;
  2251. NvmeZone *zone;
  2252. NvmeNamespace *sns;
  2253. uint32_t tcl;
  2254. } NvmeCopyAIOCB;
  2255. static void nvme_copy_cancel(BlockAIOCB *aiocb)
  2256. {
  2257. NvmeCopyAIOCB *iocb = container_of(aiocb, NvmeCopyAIOCB, common);
  2258. iocb->ret = -ECANCELED;
  2259. if (iocb->aiocb) {
  2260. blk_aio_cancel_async(iocb->aiocb);
  2261. iocb->aiocb = NULL;
  2262. }
  2263. }
  2264. static const AIOCBInfo nvme_copy_aiocb_info = {
  2265. .aiocb_size = sizeof(NvmeCopyAIOCB),
  2266. .cancel_async = nvme_copy_cancel,
  2267. };
  2268. static void nvme_copy_done(NvmeCopyAIOCB *iocb)
  2269. {
  2270. NvmeRequest *req = iocb->req;
  2271. NvmeNamespace *ns = req->ns;
  2272. BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk);
  2273. if (iocb->idx != iocb->nr) {
  2274. req->cqe.result = cpu_to_le32(iocb->idx);
  2275. }
  2276. qemu_iovec_destroy(&iocb->iov);
  2277. g_free(iocb->bounce);
  2278. if (iocb->ret < 0) {
  2279. block_acct_failed(stats, &iocb->acct.read);
  2280. block_acct_failed(stats, &iocb->acct.write);
  2281. } else {
  2282. block_acct_done(stats, &iocb->acct.read);
  2283. block_acct_done(stats, &iocb->acct.write);
  2284. }
  2285. iocb->common.cb(iocb->common.opaque, iocb->ret);
  2286. qemu_aio_unref(iocb);
  2287. }
  2288. static void nvme_do_copy(NvmeCopyAIOCB *iocb);
  2289. static void nvme_copy_source_range_parse_format0_2(void *ranges,
  2290. int idx, uint64_t *slba,
  2291. uint32_t *nlb,
  2292. uint32_t *snsid,
  2293. uint16_t *apptag,
  2294. uint16_t *appmask,
  2295. uint64_t *reftag)
  2296. {
  2297. NvmeCopySourceRangeFormat0_2 *_ranges = ranges;
  2298. if (snsid) {
  2299. *snsid = le32_to_cpu(_ranges[idx].sparams);
  2300. }
  2301. if (slba) {
  2302. *slba = le64_to_cpu(_ranges[idx].slba);
  2303. }
  2304. if (nlb) {
  2305. *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
  2306. }
  2307. if (apptag) {
  2308. *apptag = le16_to_cpu(_ranges[idx].apptag);
  2309. }
  2310. if (appmask) {
  2311. *appmask = le16_to_cpu(_ranges[idx].appmask);
  2312. }
  2313. if (reftag) {
  2314. *reftag = le32_to_cpu(_ranges[idx].reftag);
  2315. }
  2316. }
  2317. static void nvme_copy_source_range_parse_format1_3(void *ranges, int idx,
  2318. uint64_t *slba,
  2319. uint32_t *nlb,
  2320. uint32_t *snsid,
  2321. uint16_t *apptag,
  2322. uint16_t *appmask,
  2323. uint64_t *reftag)
  2324. {
  2325. NvmeCopySourceRangeFormat1_3 *_ranges = ranges;
  2326. if (snsid) {
  2327. *snsid = le32_to_cpu(_ranges[idx].sparams);
  2328. }
  2329. if (slba) {
  2330. *slba = le64_to_cpu(_ranges[idx].slba);
  2331. }
  2332. if (nlb) {
  2333. *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
  2334. }
  2335. if (apptag) {
  2336. *apptag = le16_to_cpu(_ranges[idx].apptag);
  2337. }
  2338. if (appmask) {
  2339. *appmask = le16_to_cpu(_ranges[idx].appmask);
  2340. }
  2341. if (reftag) {
  2342. *reftag = 0;
  2343. *reftag |= (uint64_t)_ranges[idx].sr[4] << 40;
  2344. *reftag |= (uint64_t)_ranges[idx].sr[5] << 32;
  2345. *reftag |= (uint64_t)_ranges[idx].sr[6] << 24;
  2346. *reftag |= (uint64_t)_ranges[idx].sr[7] << 16;
  2347. *reftag |= (uint64_t)_ranges[idx].sr[8] << 8;
  2348. *reftag |= (uint64_t)_ranges[idx].sr[9];
  2349. }
  2350. }
  2351. static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format,
  2352. uint64_t *slba, uint32_t *nlb,
  2353. uint32_t *snsid, uint16_t *apptag,
  2354. uint16_t *appmask, uint64_t *reftag)
  2355. {
  2356. switch (format) {
  2357. case NVME_COPY_FORMAT_0:
  2358. case NVME_COPY_FORMAT_2:
  2359. nvme_copy_source_range_parse_format0_2(ranges, idx, slba, nlb, snsid,
  2360. apptag, appmask, reftag);
  2361. break;
  2362. case NVME_COPY_FORMAT_1:
  2363. case NVME_COPY_FORMAT_3:
  2364. nvme_copy_source_range_parse_format1_3(ranges, idx, slba, nlb, snsid,
  2365. apptag, appmask, reftag);
  2366. break;
  2367. default:
  2368. abort();
  2369. }
  2370. }
  2371. static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns,
  2372. NvmeCopyAIOCB *iocb, uint16_t nr)
  2373. {
  2374. uint32_t copy_len = 0;
  2375. for (int idx = 0; idx < nr; idx++) {
  2376. uint32_t nlb;
  2377. nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL,
  2378. &nlb, NULL, NULL, NULL, NULL);
  2379. copy_len += nlb;
  2380. }
  2381. iocb->tcl = copy_len;
  2382. if (copy_len > ns->id_ns.mcl) {
  2383. return NVME_CMD_SIZE_LIMIT | NVME_DNR;
  2384. }
  2385. return NVME_SUCCESS;
  2386. }
  2387. static void nvme_copy_out_completed_cb(void *opaque, int ret)
  2388. {
  2389. NvmeCopyAIOCB *iocb = opaque;
  2390. NvmeRequest *req = iocb->req;
  2391. NvmeNamespace *dns = req->ns;
  2392. uint32_t nlb;
  2393. nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
  2394. &nlb, NULL, NULL, NULL, NULL);
  2395. if (ret < 0) {
  2396. iocb->ret = ret;
  2397. req->status = NVME_WRITE_FAULT;
  2398. goto out;
  2399. } else if (iocb->ret < 0) {
  2400. goto out;
  2401. }
  2402. if (dns->params.zoned) {
  2403. nvme_advance_zone_wp(dns, iocb->zone, nlb);
  2404. }
  2405. iocb->idx++;
  2406. iocb->slba += nlb;
  2407. out:
  2408. nvme_do_copy(iocb);
  2409. }
  2410. static void nvme_copy_out_cb(void *opaque, int ret)
  2411. {
  2412. NvmeCopyAIOCB *iocb = opaque;
  2413. NvmeRequest *req = iocb->req;
  2414. NvmeNamespace *dns = req->ns;
  2415. uint32_t nlb;
  2416. size_t mlen;
  2417. uint8_t *mbounce;
  2418. if (ret < 0 || iocb->ret < 0 || !dns->lbaf.ms) {
  2419. goto out;
  2420. }
  2421. nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
  2422. &nlb, NULL, NULL, NULL, NULL);
  2423. mlen = nvme_m2b(dns, nlb);
  2424. mbounce = iocb->bounce + nvme_l2b(dns, nlb);
  2425. qemu_iovec_reset(&iocb->iov);
  2426. qemu_iovec_add(&iocb->iov, mbounce, mlen);
  2427. iocb->aiocb = blk_aio_pwritev(dns->blkconf.blk, nvme_moff(dns, iocb->slba),
  2428. &iocb->iov, 0, nvme_copy_out_completed_cb,
  2429. iocb);
  2430. return;
  2431. out:
  2432. nvme_copy_out_completed_cb(iocb, ret);
  2433. }
  2434. static void nvme_copy_in_completed_cb(void *opaque, int ret)
  2435. {
  2436. NvmeCopyAIOCB *iocb = opaque;
  2437. NvmeRequest *req = iocb->req;
  2438. NvmeNamespace *sns = iocb->sns;
  2439. NvmeNamespace *dns = req->ns;
  2440. NvmeCopyCmd *copy = NULL;
  2441. uint8_t *mbounce = NULL;
  2442. uint32_t nlb;
  2443. uint64_t slba;
  2444. uint16_t apptag, appmask;
  2445. uint64_t reftag;
  2446. size_t len, mlen;
  2447. uint16_t status;
  2448. if (ret < 0) {
  2449. iocb->ret = ret;
  2450. req->status = NVME_UNRECOVERED_READ;
  2451. goto out;
  2452. } else if (iocb->ret < 0) {
  2453. goto out;
  2454. }
  2455. nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
  2456. &nlb, NULL, &apptag, &appmask, &reftag);
  2457. trace_pci_nvme_copy_out(iocb->slba, nlb);
  2458. len = nvme_l2b(sns, nlb);
  2459. if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps)) {
  2460. copy = (NvmeCopyCmd *)&req->cmd;
  2461. uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
  2462. mlen = nvme_m2b(sns, nlb);
  2463. mbounce = iocb->bounce + nvme_l2b(sns, nlb);
  2464. status = nvme_dif_mangle_mdata(sns, mbounce, mlen, slba);
  2465. if (status) {
  2466. goto invalid;
  2467. }
  2468. status = nvme_dif_check(sns, iocb->bounce, len, mbounce, mlen, prinfor,
  2469. slba, apptag, appmask, &reftag);
  2470. if (status) {
  2471. goto invalid;
  2472. }
  2473. }
  2474. if (NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
  2475. copy = (NvmeCopyCmd *)&req->cmd;
  2476. uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
  2477. mlen = nvme_m2b(dns, nlb);
  2478. mbounce = iocb->bounce + nvme_l2b(dns, nlb);
  2479. apptag = le16_to_cpu(copy->apptag);
  2480. appmask = le16_to_cpu(copy->appmask);
  2481. if (prinfow & NVME_PRINFO_PRACT) {
  2482. status = nvme_check_prinfo(dns, prinfow, iocb->slba, iocb->reftag);
  2483. if (status) {
  2484. goto invalid;
  2485. }
  2486. nvme_dif_pract_generate_dif(dns, iocb->bounce, len, mbounce, mlen,
  2487. apptag, &iocb->reftag);
  2488. } else {
  2489. status = nvme_dif_check(dns, iocb->bounce, len, mbounce, mlen,
  2490. prinfow, iocb->slba, apptag, appmask,
  2491. &iocb->reftag);
  2492. if (status) {
  2493. goto invalid;
  2494. }
  2495. }
  2496. }
  2497. status = nvme_check_bounds(dns, iocb->slba, nlb);
  2498. if (status) {
  2499. goto invalid;
  2500. }
  2501. if (dns->params.zoned) {
  2502. status = nvme_check_zone_write(dns, iocb->zone, iocb->slba, nlb);
  2503. if (status) {
  2504. goto invalid;
  2505. }
  2506. if (!(iocb->zone->d.za & NVME_ZA_ZRWA_VALID)) {
  2507. iocb->zone->w_ptr += nlb;
  2508. }
  2509. }
  2510. qemu_iovec_reset(&iocb->iov);
  2511. qemu_iovec_add(&iocb->iov, iocb->bounce, len);
  2512. block_acct_start(blk_get_stats(dns->blkconf.blk), &iocb->acct.write, 0,
  2513. BLOCK_ACCT_WRITE);
  2514. iocb->aiocb = blk_aio_pwritev(dns->blkconf.blk, nvme_l2b(dns, iocb->slba),
  2515. &iocb->iov, 0, nvme_copy_out_cb, iocb);
  2516. return;
  2517. invalid:
  2518. req->status = status;
  2519. iocb->ret = -1;
  2520. out:
  2521. nvme_do_copy(iocb);
  2522. }
  2523. static void nvme_copy_in_cb(void *opaque, int ret)
  2524. {
  2525. NvmeCopyAIOCB *iocb = opaque;
  2526. NvmeNamespace *sns = iocb->sns;
  2527. uint64_t slba;
  2528. uint32_t nlb;
  2529. if (ret < 0 || iocb->ret < 0 || !sns->lbaf.ms) {
  2530. goto out;
  2531. }
  2532. nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
  2533. &nlb, NULL, NULL, NULL, NULL);
  2534. qemu_iovec_reset(&iocb->iov);
  2535. qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(sns, nlb),
  2536. nvme_m2b(sns, nlb));
  2537. iocb->aiocb = blk_aio_preadv(sns->blkconf.blk, nvme_moff(sns, slba),
  2538. &iocb->iov, 0, nvme_copy_in_completed_cb,
  2539. iocb);
  2540. return;
  2541. out:
  2542. nvme_copy_in_completed_cb(iocb, ret);
  2543. }
  2544. static inline bool nvme_csi_supports_copy(uint8_t csi)
  2545. {
  2546. return csi == NVME_CSI_NVM || csi == NVME_CSI_ZONED;
  2547. }
  2548. static inline bool nvme_copy_ns_format_match(NvmeNamespace *sns,
  2549. NvmeNamespace *dns)
  2550. {
  2551. return sns->lbaf.ds == dns->lbaf.ds && sns->lbaf.ms == dns->lbaf.ms;
  2552. }
  2553. static bool nvme_copy_matching_ns_format(NvmeNamespace *sns, NvmeNamespace *dns,
  2554. bool pi_enable)
  2555. {
  2556. if (!nvme_csi_supports_copy(sns->csi) ||
  2557. !nvme_csi_supports_copy(dns->csi)) {
  2558. return false;
  2559. }
  2560. if (!pi_enable && !nvme_copy_ns_format_match(sns, dns)) {
  2561. return false;
  2562. }
  2563. if (pi_enable && (!nvme_copy_ns_format_match(sns, dns) ||
  2564. sns->id_ns.dps != dns->id_ns.dps)) {
  2565. return false;
  2566. }
  2567. return true;
  2568. }
  2569. static inline bool nvme_copy_corresp_pi_match(NvmeNamespace *sns,
  2570. NvmeNamespace *dns)
  2571. {
  2572. return sns->lbaf.ms == 0 &&
  2573. ((dns->lbaf.ms == 8 && dns->pif == 0) ||
  2574. (dns->lbaf.ms == 16 && dns->pif == 1));
  2575. }
  2576. static bool nvme_copy_corresp_pi_format(NvmeNamespace *sns, NvmeNamespace *dns,
  2577. bool sns_pi_en)
  2578. {
  2579. if (!nvme_csi_supports_copy(sns->csi) ||
  2580. !nvme_csi_supports_copy(dns->csi)) {
  2581. return false;
  2582. }
  2583. if (!sns_pi_en && !nvme_copy_corresp_pi_match(sns, dns)) {
  2584. return false;
  2585. }
  2586. if (sns_pi_en && !nvme_copy_corresp_pi_match(dns, sns)) {
  2587. return false;
  2588. }
  2589. return true;
  2590. }
  2591. static void nvme_do_copy(NvmeCopyAIOCB *iocb)
  2592. {
  2593. NvmeRequest *req = iocb->req;
  2594. NvmeNamespace *sns;
  2595. NvmeNamespace *dns = req->ns;
  2596. NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
  2597. uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
  2598. uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
  2599. uint64_t slba;
  2600. uint32_t nlb;
  2601. size_t len;
  2602. uint16_t status;
  2603. uint32_t dnsid = le32_to_cpu(req->cmd.nsid);
  2604. uint32_t snsid = dnsid;
  2605. if (iocb->ret < 0) {
  2606. goto done;
  2607. }
  2608. if (iocb->idx == iocb->nr) {
  2609. goto done;
  2610. }
  2611. if (iocb->format == 2 || iocb->format == 3) {
  2612. nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format,
  2613. &slba, &nlb, &snsid, NULL, NULL, NULL);
  2614. if (snsid != dnsid) {
  2615. if (snsid == NVME_NSID_BROADCAST ||
  2616. !nvme_nsid_valid(iocb->n, snsid)) {
  2617. status = NVME_INVALID_NSID | NVME_DNR;
  2618. goto invalid;
  2619. }
  2620. iocb->sns = nvme_ns(iocb->n, snsid);
  2621. if (unlikely(!iocb->sns)) {
  2622. status = NVME_INVALID_FIELD | NVME_DNR;
  2623. goto invalid;
  2624. }
  2625. } else {
  2626. if (((slba + nlb) > iocb->slba) &&
  2627. ((slba + nlb) < (iocb->slba + iocb->tcl))) {
  2628. status = NVME_CMD_OVERLAP_IO_RANGE | NVME_DNR;
  2629. goto invalid;
  2630. }
  2631. }
  2632. } else {
  2633. nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format,
  2634. &slba, &nlb, NULL, NULL, NULL, NULL);
  2635. }
  2636. sns = iocb->sns;
  2637. if ((snsid == dnsid) && NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
  2638. ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) {
  2639. status = NVME_INVALID_FIELD | NVME_DNR;
  2640. goto invalid;
  2641. } else if (snsid != dnsid) {
  2642. if (!NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
  2643. !NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
  2644. if (!nvme_copy_matching_ns_format(sns, dns, false)) {
  2645. status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
  2646. goto invalid;
  2647. }
  2648. }
  2649. if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
  2650. NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
  2651. if ((prinfor & NVME_PRINFO_PRACT) !=
  2652. (prinfow & NVME_PRINFO_PRACT)) {
  2653. status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
  2654. goto invalid;
  2655. } else {
  2656. if (!nvme_copy_matching_ns_format(sns, dns, true)) {
  2657. status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
  2658. goto invalid;
  2659. }
  2660. }
  2661. }
  2662. if (!NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
  2663. NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
  2664. if (!(prinfow & NVME_PRINFO_PRACT)) {
  2665. status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
  2666. goto invalid;
  2667. } else {
  2668. if (!nvme_copy_corresp_pi_format(sns, dns, false)) {
  2669. status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
  2670. goto invalid;
  2671. }
  2672. }
  2673. }
  2674. if (NVME_ID_NS_DPS_TYPE(sns->id_ns.dps) &&
  2675. !NVME_ID_NS_DPS_TYPE(dns->id_ns.dps)) {
  2676. if (!(prinfor & NVME_PRINFO_PRACT)) {
  2677. status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
  2678. goto invalid;
  2679. } else {
  2680. if (!nvme_copy_corresp_pi_format(sns, dns, true)) {
  2681. status = NVME_CMD_INCOMP_NS_OR_FMT | NVME_DNR;
  2682. goto invalid;
  2683. }
  2684. }
  2685. }
  2686. }
  2687. len = nvme_l2b(sns, nlb);
  2688. trace_pci_nvme_copy_source_range(slba, nlb);
  2689. if (nlb > le16_to_cpu(sns->id_ns.mssrl)) {
  2690. status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
  2691. goto invalid;
  2692. }
  2693. status = nvme_check_bounds(sns, slba, nlb);
  2694. if (status) {
  2695. goto invalid;
  2696. }
  2697. if (NVME_ERR_REC_DULBE(sns->features.err_rec)) {
  2698. status = nvme_check_dulbe(sns, slba, nlb);
  2699. if (status) {
  2700. goto invalid;
  2701. }
  2702. }
  2703. if (sns->params.zoned) {
  2704. status = nvme_check_zone_read(sns, slba, nlb);
  2705. if (status) {
  2706. goto invalid;
  2707. }
  2708. }
  2709. g_free(iocb->bounce);
  2710. iocb->bounce = g_malloc_n(le16_to_cpu(sns->id_ns.mssrl),
  2711. sns->lbasz + sns->lbaf.ms);
  2712. qemu_iovec_reset(&iocb->iov);
  2713. qemu_iovec_add(&iocb->iov, iocb->bounce, len);
  2714. block_acct_start(blk_get_stats(sns->blkconf.blk), &iocb->acct.read, 0,
  2715. BLOCK_ACCT_READ);
  2716. iocb->aiocb = blk_aio_preadv(sns->blkconf.blk, nvme_l2b(sns, slba),
  2717. &iocb->iov, 0, nvme_copy_in_cb, iocb);
  2718. return;
  2719. invalid:
  2720. req->status = status;
  2721. iocb->ret = -1;
  2722. done:
  2723. nvme_copy_done(iocb);
  2724. }
  2725. static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
  2726. {
  2727. NvmeNamespace *ns = req->ns;
  2728. NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
  2729. NvmeCopyAIOCB *iocb = blk_aio_get(&nvme_copy_aiocb_info, ns->blkconf.blk,
  2730. nvme_misc_cb, req);
  2731. uint16_t nr = copy->nr + 1;
  2732. uint8_t format = copy->control[0] & 0xf;
  2733. size_t len = sizeof(NvmeCopySourceRangeFormat0_2);
  2734. uint16_t status;
  2735. trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
  2736. iocb->ranges = NULL;
  2737. iocb->zone = NULL;
  2738. if (!(n->id_ctrl.ocfs & (1 << format)) ||
  2739. ((format == 2 || format == 3) &&
  2740. !(n->features.hbs.cdfe & (1 << format)))) {
  2741. trace_pci_nvme_err_copy_invalid_format(format);
  2742. status = NVME_INVALID_FIELD | NVME_DNR;
  2743. goto invalid;
  2744. }
  2745. if (nr > ns->id_ns.msrc + 1) {
  2746. status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
  2747. goto invalid;
  2748. }
  2749. if ((ns->pif == 0x0 && (format != 0x0 && format != 0x2)) ||
  2750. (ns->pif != 0x0 && (format != 0x1 && format != 0x3))) {
  2751. status = NVME_INVALID_FORMAT | NVME_DNR;
  2752. goto invalid;
  2753. }
  2754. if (ns->pif) {
  2755. len = sizeof(NvmeCopySourceRangeFormat1_3);
  2756. }
  2757. iocb->format = format;
  2758. iocb->ranges = g_malloc_n(nr, len);
  2759. status = nvme_h2c(n, (uint8_t *)iocb->ranges, len * nr, req);
  2760. if (status) {
  2761. goto invalid;
  2762. }
  2763. iocb->slba = le64_to_cpu(copy->sdlba);
  2764. if (ns->params.zoned) {
  2765. iocb->zone = nvme_get_zone_by_slba(ns, iocb->slba);
  2766. if (!iocb->zone) {
  2767. status = NVME_LBA_RANGE | NVME_DNR;
  2768. goto invalid;
  2769. }
  2770. status = nvme_zrm_auto(n, ns, iocb->zone);
  2771. if (status) {
  2772. goto invalid;
  2773. }
  2774. }
  2775. status = nvme_check_copy_mcl(ns, iocb, nr);
  2776. if (status) {
  2777. goto invalid;
  2778. }
  2779. iocb->req = req;
  2780. iocb->ret = 0;
  2781. iocb->nr = nr;
  2782. iocb->idx = 0;
  2783. iocb->reftag = le32_to_cpu(copy->reftag);
  2784. iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32;
  2785. qemu_iovec_init(&iocb->iov, 1);
  2786. req->aiocb = &iocb->common;
  2787. iocb->sns = req->ns;
  2788. iocb->n = n;
  2789. iocb->bounce = NULL;
  2790. nvme_do_copy(iocb);
  2791. return NVME_NO_COMPLETE;
  2792. invalid:
  2793. g_free(iocb->ranges);
  2794. qemu_aio_unref(iocb);
  2795. return status;
  2796. }
  2797. static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
  2798. {
  2799. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  2800. NvmeNamespace *ns = req->ns;
  2801. BlockBackend *blk = ns->blkconf.blk;
  2802. uint64_t slba = le64_to_cpu(rw->slba);
  2803. uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
  2804. uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
  2805. size_t data_len = nvme_l2b(ns, nlb);
  2806. size_t len = data_len;
  2807. int64_t offset = nvme_l2b(ns, slba);
  2808. struct nvme_compare_ctx *ctx = NULL;
  2809. uint16_t status;
  2810. trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
  2811. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (prinfo & NVME_PRINFO_PRACT)) {
  2812. return NVME_INVALID_PROT_INFO | NVME_DNR;
  2813. }
  2814. if (nvme_ns_ext(ns)) {
  2815. len += nvme_m2b(ns, nlb);
  2816. }
  2817. if (NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt)) {
  2818. status = nvme_check_mdts(n, data_len);
  2819. } else {
  2820. status = nvme_check_mdts(n, len);
  2821. }
  2822. if (status) {
  2823. return status;
  2824. }
  2825. status = nvme_check_bounds(ns, slba, nlb);
  2826. if (status) {
  2827. return status;
  2828. }
  2829. if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
  2830. status = nvme_check_dulbe(ns, slba, nlb);
  2831. if (status) {
  2832. return status;
  2833. }
  2834. }
  2835. status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
  2836. if (status) {
  2837. return status;
  2838. }
  2839. ctx = g_new(struct nvme_compare_ctx, 1);
  2840. ctx->data.bounce = g_malloc(data_len);
  2841. req->opaque = ctx;
  2842. qemu_iovec_init(&ctx->data.iov, 1);
  2843. qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len);
  2844. block_acct_start(blk_get_stats(blk), &req->acct, data_len,
  2845. BLOCK_ACCT_READ);
  2846. req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0,
  2847. nvme_compare_data_cb, req);
  2848. return NVME_NO_COMPLETE;
  2849. }
  2850. typedef struct NvmeFlushAIOCB {
  2851. BlockAIOCB common;
  2852. BlockAIOCB *aiocb;
  2853. NvmeRequest *req;
  2854. int ret;
  2855. NvmeNamespace *ns;
  2856. uint32_t nsid;
  2857. bool broadcast;
  2858. } NvmeFlushAIOCB;
  2859. static void nvme_flush_cancel(BlockAIOCB *acb)
  2860. {
  2861. NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common);
  2862. iocb->ret = -ECANCELED;
  2863. if (iocb->aiocb) {
  2864. blk_aio_cancel_async(iocb->aiocb);
  2865. iocb->aiocb = NULL;
  2866. }
  2867. }
  2868. static const AIOCBInfo nvme_flush_aiocb_info = {
  2869. .aiocb_size = sizeof(NvmeFlushAIOCB),
  2870. .cancel_async = nvme_flush_cancel,
  2871. };
  2872. static void nvme_do_flush(NvmeFlushAIOCB *iocb);
  2873. static void nvme_flush_ns_cb(void *opaque, int ret)
  2874. {
  2875. NvmeFlushAIOCB *iocb = opaque;
  2876. NvmeNamespace *ns = iocb->ns;
  2877. if (ret < 0) {
  2878. iocb->ret = ret;
  2879. iocb->req->status = NVME_WRITE_FAULT;
  2880. goto out;
  2881. } else if (iocb->ret < 0) {
  2882. goto out;
  2883. }
  2884. if (ns) {
  2885. trace_pci_nvme_flush_ns(iocb->nsid);
  2886. iocb->ns = NULL;
  2887. iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb);
  2888. return;
  2889. }
  2890. out:
  2891. nvme_do_flush(iocb);
  2892. }
  2893. static void nvme_do_flush(NvmeFlushAIOCB *iocb)
  2894. {
  2895. NvmeRequest *req = iocb->req;
  2896. NvmeCtrl *n = nvme_ctrl(req);
  2897. int i;
  2898. if (iocb->ret < 0) {
  2899. goto done;
  2900. }
  2901. if (iocb->broadcast) {
  2902. for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
  2903. iocb->ns = nvme_ns(n, i);
  2904. if (iocb->ns) {
  2905. iocb->nsid = i;
  2906. break;
  2907. }
  2908. }
  2909. }
  2910. if (!iocb->ns) {
  2911. goto done;
  2912. }
  2913. nvme_flush_ns_cb(iocb, 0);
  2914. return;
  2915. done:
  2916. iocb->common.cb(iocb->common.opaque, iocb->ret);
  2917. qemu_aio_unref(iocb);
  2918. }
  2919. static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
  2920. {
  2921. NvmeFlushAIOCB *iocb;
  2922. uint32_t nsid = le32_to_cpu(req->cmd.nsid);
  2923. uint16_t status;
  2924. iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req);
  2925. iocb->req = req;
  2926. iocb->ret = 0;
  2927. iocb->ns = NULL;
  2928. iocb->nsid = 0;
  2929. iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
  2930. if (!iocb->broadcast) {
  2931. if (!nvme_nsid_valid(n, nsid)) {
  2932. status = NVME_INVALID_NSID | NVME_DNR;
  2933. goto out;
  2934. }
  2935. iocb->ns = nvme_ns(n, nsid);
  2936. if (!iocb->ns) {
  2937. status = NVME_INVALID_FIELD | NVME_DNR;
  2938. goto out;
  2939. }
  2940. iocb->nsid = nsid;
  2941. }
  2942. req->aiocb = &iocb->common;
  2943. nvme_do_flush(iocb);
  2944. return NVME_NO_COMPLETE;
  2945. out:
  2946. qemu_aio_unref(iocb);
  2947. return status;
  2948. }
  2949. static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
  2950. {
  2951. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  2952. NvmeNamespace *ns = req->ns;
  2953. uint64_t slba = le64_to_cpu(rw->slba);
  2954. uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
  2955. uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
  2956. uint64_t data_size = nvme_l2b(ns, nlb);
  2957. uint64_t mapped_size = data_size;
  2958. uint64_t data_offset;
  2959. BlockBackend *blk = ns->blkconf.blk;
  2960. uint16_t status;
  2961. if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
  2962. mapped_size += nvme_m2b(ns, nlb);
  2963. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  2964. bool pract = prinfo & NVME_PRINFO_PRACT;
  2965. if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
  2966. mapped_size = data_size;
  2967. }
  2968. }
  2969. }
  2970. trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba);
  2971. status = nvme_check_mdts(n, mapped_size);
  2972. if (status) {
  2973. goto invalid;
  2974. }
  2975. status = nvme_check_bounds(ns, slba, nlb);
  2976. if (status) {
  2977. goto invalid;
  2978. }
  2979. if (ns->params.zoned) {
  2980. status = nvme_check_zone_read(ns, slba, nlb);
  2981. if (status) {
  2982. trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status);
  2983. goto invalid;
  2984. }
  2985. }
  2986. if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
  2987. status = nvme_check_dulbe(ns, slba, nlb);
  2988. if (status) {
  2989. goto invalid;
  2990. }
  2991. }
  2992. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  2993. return nvme_dif_rw(n, req);
  2994. }
  2995. status = nvme_map_data(n, nlb, req);
  2996. if (status) {
  2997. goto invalid;
  2998. }
  2999. data_offset = nvme_l2b(ns, slba);
  3000. block_acct_start(blk_get_stats(blk), &req->acct, data_size,
  3001. BLOCK_ACCT_READ);
  3002. nvme_blk_read(blk, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req);
  3003. return NVME_NO_COMPLETE;
  3004. invalid:
  3005. block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
  3006. return status | NVME_DNR;
  3007. }
  3008. static void nvme_do_write_fdp(NvmeCtrl *n, NvmeRequest *req, uint64_t slba,
  3009. uint32_t nlb)
  3010. {
  3011. NvmeNamespace *ns = req->ns;
  3012. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  3013. uint64_t data_size = nvme_l2b(ns, nlb);
  3014. uint32_t dw12 = le32_to_cpu(req->cmd.cdw12);
  3015. uint8_t dtype = (dw12 >> 20) & 0xf;
  3016. uint16_t pid = le16_to_cpu(rw->dspec);
  3017. uint16_t ph, rg, ruhid;
  3018. NvmeReclaimUnit *ru;
  3019. if (dtype != NVME_DIRECTIVE_DATA_PLACEMENT ||
  3020. !nvme_parse_pid(ns, pid, &ph, &rg)) {
  3021. ph = 0;
  3022. rg = 0;
  3023. }
  3024. ruhid = ns->fdp.phs[ph];
  3025. ru = &ns->endgrp->fdp.ruhs[ruhid].rus[rg];
  3026. nvme_fdp_stat_inc(&ns->endgrp->fdp.hbmw, data_size);
  3027. nvme_fdp_stat_inc(&ns->endgrp->fdp.mbmw, data_size);
  3028. while (nlb) {
  3029. if (nlb < ru->ruamw) {
  3030. ru->ruamw -= nlb;
  3031. break;
  3032. }
  3033. nlb -= ru->ruamw;
  3034. nvme_update_ruh(n, ns, pid);
  3035. }
  3036. }
  3037. static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
  3038. bool wrz)
  3039. {
  3040. NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
  3041. NvmeNamespace *ns = req->ns;
  3042. uint64_t slba = le64_to_cpu(rw->slba);
  3043. uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
  3044. uint16_t ctrl = le16_to_cpu(rw->control);
  3045. uint8_t prinfo = NVME_RW_PRINFO(ctrl);
  3046. uint64_t data_size = nvme_l2b(ns, nlb);
  3047. uint64_t mapped_size = data_size;
  3048. uint64_t data_offset;
  3049. NvmeZone *zone;
  3050. NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe;
  3051. BlockBackend *blk = ns->blkconf.blk;
  3052. uint16_t status;
  3053. if (nvme_ns_ext(ns) && !(NVME_ID_CTRL_CTRATT_MEM(n->id_ctrl.ctratt))) {
  3054. mapped_size += nvme_m2b(ns, nlb);
  3055. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  3056. bool pract = prinfo & NVME_PRINFO_PRACT;
  3057. if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
  3058. mapped_size -= nvme_m2b(ns, nlb);
  3059. }
  3060. }
  3061. }
  3062. trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
  3063. nvme_nsid(ns), nlb, mapped_size, slba);
  3064. if (!wrz) {
  3065. status = nvme_check_mdts(n, mapped_size);
  3066. if (status) {
  3067. goto invalid;
  3068. }
  3069. }
  3070. status = nvme_check_bounds(ns, slba, nlb);
  3071. if (status) {
  3072. goto invalid;
  3073. }
  3074. if (ns->params.zoned) {
  3075. zone = nvme_get_zone_by_slba(ns, slba);
  3076. assert(zone);
  3077. if (append) {
  3078. bool piremap = !!(ctrl & NVME_RW_PIREMAP);
  3079. if (unlikely(zone->d.za & NVME_ZA_ZRWA_VALID)) {
  3080. return NVME_INVALID_ZONE_OP | NVME_DNR;
  3081. }
  3082. if (unlikely(slba != zone->d.zslba)) {
  3083. trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
  3084. status = NVME_INVALID_FIELD;
  3085. goto invalid;
  3086. }
  3087. if (n->params.zasl &&
  3088. data_size > (uint64_t)n->page_size << n->params.zasl) {
  3089. trace_pci_nvme_err_zasl(data_size);
  3090. return NVME_INVALID_FIELD | NVME_DNR;
  3091. }
  3092. slba = zone->w_ptr;
  3093. rw->slba = cpu_to_le64(slba);
  3094. res->slba = cpu_to_le64(slba);
  3095. switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  3096. case NVME_ID_NS_DPS_TYPE_1:
  3097. if (!piremap) {
  3098. return NVME_INVALID_PROT_INFO | NVME_DNR;
  3099. }
  3100. /* fallthrough */
  3101. case NVME_ID_NS_DPS_TYPE_2:
  3102. if (piremap) {
  3103. uint32_t reftag = le32_to_cpu(rw->reftag);
  3104. rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba));
  3105. }
  3106. break;
  3107. case NVME_ID_NS_DPS_TYPE_3:
  3108. if (piremap) {
  3109. return NVME_INVALID_PROT_INFO | NVME_DNR;
  3110. }
  3111. break;
  3112. }
  3113. }
  3114. status = nvme_check_zone_write(ns, zone, slba, nlb);
  3115. if (status) {
  3116. goto invalid;
  3117. }
  3118. status = nvme_zrm_auto(n, ns, zone);
  3119. if (status) {
  3120. goto invalid;
  3121. }
  3122. if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) {
  3123. zone->w_ptr += nlb;
  3124. }
  3125. } else if (ns->endgrp && ns->endgrp->fdp.enabled) {
  3126. nvme_do_write_fdp(n, req, slba, nlb);
  3127. }
  3128. data_offset = nvme_l2b(ns, slba);
  3129. if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
  3130. return nvme_dif_rw(n, req);
  3131. }
  3132. if (!wrz) {
  3133. status = nvme_map_data(n, nlb, req);
  3134. if (status) {
  3135. goto invalid;
  3136. }
  3137. block_acct_start(blk_get_stats(blk), &req->acct, data_size,
  3138. BLOCK_ACCT_WRITE);
  3139. nvme_blk_write(blk, data_offset, BDRV_SECTOR_SIZE, nvme_rw_cb, req);
  3140. } else {
  3141. req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
  3142. BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
  3143. req);
  3144. }
  3145. return NVME_NO_COMPLETE;
  3146. invalid:
  3147. block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
  3148. return status | NVME_DNR;
  3149. }
  3150. static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
  3151. {
  3152. return nvme_do_write(n, req, false, false);
  3153. }
  3154. static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
  3155. {
  3156. return nvme_do_write(n, req, false, true);
  3157. }
  3158. static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
  3159. {
  3160. return nvme_do_write(n, req, true, false);
  3161. }
  3162. static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
  3163. uint64_t *slba, uint32_t *zone_idx)
  3164. {
  3165. uint32_t dw10 = le32_to_cpu(c->cdw10);
  3166. uint32_t dw11 = le32_to_cpu(c->cdw11);
  3167. if (!ns->params.zoned) {
  3168. trace_pci_nvme_err_invalid_opc(c->opcode);
  3169. return NVME_INVALID_OPCODE | NVME_DNR;
  3170. }
  3171. *slba = ((uint64_t)dw11) << 32 | dw10;
  3172. if (unlikely(*slba >= ns->id_ns.nsze)) {
  3173. trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze);
  3174. *slba = 0;
  3175. return NVME_LBA_RANGE | NVME_DNR;
  3176. }
  3177. *zone_idx = nvme_zone_idx(ns, *slba);
  3178. assert(*zone_idx < ns->num_zones);
  3179. return NVME_SUCCESS;
  3180. }
  3181. typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState,
  3182. NvmeRequest *);
  3183. enum NvmeZoneProcessingMask {
  3184. NVME_PROC_CURRENT_ZONE = 0,
  3185. NVME_PROC_OPENED_ZONES = 1 << 0,
  3186. NVME_PROC_CLOSED_ZONES = 1 << 1,
  3187. NVME_PROC_READ_ONLY_ZONES = 1 << 2,
  3188. NVME_PROC_FULL_ZONES = 1 << 3,
  3189. };
  3190. static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
  3191. NvmeZoneState state, NvmeRequest *req)
  3192. {
  3193. NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd;
  3194. int flags = 0;
  3195. if (cmd->zsflags & NVME_ZSFLAG_ZRWA_ALLOC) {
  3196. uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs);
  3197. if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) {
  3198. return NVME_INVALID_ZONE_OP | NVME_DNR;
  3199. }
  3200. if (zone->w_ptr % ns->zns.zrwafg) {
  3201. return NVME_NOZRWA | NVME_DNR;
  3202. }
  3203. flags = NVME_ZRM_ZRWA;
  3204. }
  3205. return nvme_zrm_open_flags(nvme_ctrl(req), ns, zone, flags);
  3206. }
  3207. static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
  3208. NvmeZoneState state, NvmeRequest *req)
  3209. {
  3210. return nvme_zrm_close(ns, zone);
  3211. }
  3212. static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
  3213. NvmeZoneState state, NvmeRequest *req)
  3214. {
  3215. return nvme_zrm_finish(ns, zone);
  3216. }
  3217. static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
  3218. NvmeZoneState state, NvmeRequest *req)
  3219. {
  3220. switch (state) {
  3221. case NVME_ZONE_STATE_READ_ONLY:
  3222. nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_OFFLINE);
  3223. /* fall through */
  3224. case NVME_ZONE_STATE_OFFLINE:
  3225. return NVME_SUCCESS;
  3226. default:
  3227. return NVME_ZONE_INVAL_TRANSITION;
  3228. }
  3229. }
  3230. static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone)
  3231. {
  3232. uint16_t status;
  3233. uint8_t state = nvme_get_zone_state(zone);
  3234. if (state == NVME_ZONE_STATE_EMPTY) {
  3235. status = nvme_aor_check(ns, 1, 0);
  3236. if (status) {
  3237. return status;
  3238. }
  3239. nvme_aor_inc_active(ns);
  3240. zone->d.za |= NVME_ZA_ZD_EXT_VALID;
  3241. nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
  3242. return NVME_SUCCESS;
  3243. }
  3244. return NVME_ZONE_INVAL_TRANSITION;
  3245. }
  3246. static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
  3247. enum NvmeZoneProcessingMask proc_mask,
  3248. op_handler_t op_hndlr, NvmeRequest *req)
  3249. {
  3250. uint16_t status = NVME_SUCCESS;
  3251. NvmeZoneState zs = nvme_get_zone_state(zone);
  3252. bool proc_zone;
  3253. switch (zs) {
  3254. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  3255. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  3256. proc_zone = proc_mask & NVME_PROC_OPENED_ZONES;
  3257. break;
  3258. case NVME_ZONE_STATE_CLOSED:
  3259. proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES;
  3260. break;
  3261. case NVME_ZONE_STATE_READ_ONLY:
  3262. proc_zone = proc_mask & NVME_PROC_READ_ONLY_ZONES;
  3263. break;
  3264. case NVME_ZONE_STATE_FULL:
  3265. proc_zone = proc_mask & NVME_PROC_FULL_ZONES;
  3266. break;
  3267. default:
  3268. proc_zone = false;
  3269. }
  3270. if (proc_zone) {
  3271. status = op_hndlr(ns, zone, zs, req);
  3272. }
  3273. return status;
  3274. }
  3275. static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
  3276. enum NvmeZoneProcessingMask proc_mask,
  3277. op_handler_t op_hndlr, NvmeRequest *req)
  3278. {
  3279. NvmeZone *next;
  3280. uint16_t status = NVME_SUCCESS;
  3281. int i;
  3282. if (!proc_mask) {
  3283. status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req);
  3284. } else {
  3285. if (proc_mask & NVME_PROC_CLOSED_ZONES) {
  3286. QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
  3287. status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
  3288. req);
  3289. if (status && status != NVME_NO_COMPLETE) {
  3290. goto out;
  3291. }
  3292. }
  3293. }
  3294. if (proc_mask & NVME_PROC_OPENED_ZONES) {
  3295. QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
  3296. status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
  3297. req);
  3298. if (status && status != NVME_NO_COMPLETE) {
  3299. goto out;
  3300. }
  3301. }
  3302. QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
  3303. status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
  3304. req);
  3305. if (status && status != NVME_NO_COMPLETE) {
  3306. goto out;
  3307. }
  3308. }
  3309. }
  3310. if (proc_mask & NVME_PROC_FULL_ZONES) {
  3311. QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) {
  3312. status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
  3313. req);
  3314. if (status && status != NVME_NO_COMPLETE) {
  3315. goto out;
  3316. }
  3317. }
  3318. }
  3319. if (proc_mask & NVME_PROC_READ_ONLY_ZONES) {
  3320. for (i = 0; i < ns->num_zones; i++, zone++) {
  3321. status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
  3322. req);
  3323. if (status && status != NVME_NO_COMPLETE) {
  3324. goto out;
  3325. }
  3326. }
  3327. }
  3328. }
  3329. out:
  3330. return status;
  3331. }
  3332. typedef struct NvmeZoneResetAIOCB {
  3333. BlockAIOCB common;
  3334. BlockAIOCB *aiocb;
  3335. NvmeRequest *req;
  3336. int ret;
  3337. bool all;
  3338. int idx;
  3339. NvmeZone *zone;
  3340. } NvmeZoneResetAIOCB;
  3341. static void nvme_zone_reset_cancel(BlockAIOCB *aiocb)
  3342. {
  3343. NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common);
  3344. NvmeRequest *req = iocb->req;
  3345. NvmeNamespace *ns = req->ns;
  3346. iocb->idx = ns->num_zones;
  3347. iocb->ret = -ECANCELED;
  3348. if (iocb->aiocb) {
  3349. blk_aio_cancel_async(iocb->aiocb);
  3350. iocb->aiocb = NULL;
  3351. }
  3352. }
  3353. static const AIOCBInfo nvme_zone_reset_aiocb_info = {
  3354. .aiocb_size = sizeof(NvmeZoneResetAIOCB),
  3355. .cancel_async = nvme_zone_reset_cancel,
  3356. };
  3357. static void nvme_zone_reset_cb(void *opaque, int ret);
  3358. static void nvme_zone_reset_epilogue_cb(void *opaque, int ret)
  3359. {
  3360. NvmeZoneResetAIOCB *iocb = opaque;
  3361. NvmeRequest *req = iocb->req;
  3362. NvmeNamespace *ns = req->ns;
  3363. int64_t moff;
  3364. int count;
  3365. if (ret < 0 || iocb->ret < 0 || !ns->lbaf.ms) {
  3366. goto out;
  3367. }
  3368. moff = nvme_moff(ns, iocb->zone->d.zslba);
  3369. count = nvme_m2b(ns, ns->zone_size);
  3370. iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count,
  3371. BDRV_REQ_MAY_UNMAP,
  3372. nvme_zone_reset_cb, iocb);
  3373. return;
  3374. out:
  3375. nvme_zone_reset_cb(iocb, ret);
  3376. }
  3377. static void nvme_zone_reset_cb(void *opaque, int ret)
  3378. {
  3379. NvmeZoneResetAIOCB *iocb = opaque;
  3380. NvmeRequest *req = iocb->req;
  3381. NvmeNamespace *ns = req->ns;
  3382. if (iocb->ret < 0) {
  3383. goto done;
  3384. } else if (ret < 0) {
  3385. iocb->ret = ret;
  3386. goto done;
  3387. }
  3388. if (iocb->zone) {
  3389. nvme_zrm_reset(ns, iocb->zone);
  3390. if (!iocb->all) {
  3391. goto done;
  3392. }
  3393. }
  3394. while (iocb->idx < ns->num_zones) {
  3395. NvmeZone *zone = &ns->zone_array[iocb->idx++];
  3396. switch (nvme_get_zone_state(zone)) {
  3397. case NVME_ZONE_STATE_EMPTY:
  3398. if (!iocb->all) {
  3399. goto done;
  3400. }
  3401. continue;
  3402. case NVME_ZONE_STATE_EXPLICITLY_OPEN:
  3403. case NVME_ZONE_STATE_IMPLICITLY_OPEN:
  3404. case NVME_ZONE_STATE_CLOSED:
  3405. case NVME_ZONE_STATE_FULL:
  3406. iocb->zone = zone;
  3407. break;
  3408. default:
  3409. continue;
  3410. }
  3411. trace_pci_nvme_zns_zone_reset(zone->d.zslba);
  3412. iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk,
  3413. nvme_l2b(ns, zone->d.zslba),
  3414. nvme_l2b(ns, ns->zone_size),
  3415. BDRV_REQ_MAY_UNMAP,
  3416. nvme_zone_reset_epilogue_cb,
  3417. iocb);
  3418. return;
  3419. }
  3420. done:
  3421. iocb->aiocb = NULL;
  3422. iocb->common.cb(iocb->common.opaque, iocb->ret);
  3423. qemu_aio_unref(iocb);
  3424. }
  3425. static uint16_t nvme_zone_mgmt_send_zrwa_flush(NvmeCtrl *n, NvmeZone *zone,
  3426. uint64_t elba, NvmeRequest *req)
  3427. {
  3428. NvmeNamespace *ns = req->ns;
  3429. uint16_t ozcs = le16_to_cpu(ns->id_ns_zoned->ozcs);
  3430. uint64_t wp = zone->d.wp;
  3431. uint32_t nlb = elba - wp + 1;
  3432. uint16_t status;
  3433. if (!(ozcs & NVME_ID_NS_ZONED_OZCS_ZRWASUP)) {
  3434. return NVME_INVALID_ZONE_OP | NVME_DNR;
  3435. }
  3436. if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) {
  3437. return NVME_INVALID_FIELD | NVME_DNR;
  3438. }
  3439. if (elba < wp || elba > wp + ns->zns.zrwas) {
  3440. return NVME_ZONE_BOUNDARY_ERROR | NVME_DNR;
  3441. }
  3442. if (nlb % ns->zns.zrwafg) {
  3443. return NVME_INVALID_FIELD | NVME_DNR;
  3444. }
  3445. status = nvme_zrm_auto(n, ns, zone);
  3446. if (status) {
  3447. return status;
  3448. }
  3449. zone->w_ptr += nlb;
  3450. nvme_advance_zone_wp(ns, zone, nlb);
  3451. return NVME_SUCCESS;
  3452. }
  3453. static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
  3454. {
  3455. NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)&req->cmd;
  3456. NvmeNamespace *ns = req->ns;
  3457. NvmeZone *zone;
  3458. NvmeZoneResetAIOCB *iocb;
  3459. uint8_t *zd_ext;
  3460. uint64_t slba = 0;
  3461. uint32_t zone_idx = 0;
  3462. uint16_t status;
  3463. uint8_t action = cmd->zsa;
  3464. bool all;
  3465. enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
  3466. all = cmd->zsflags & NVME_ZSFLAG_SELECT_ALL;
  3467. req->status = NVME_SUCCESS;
  3468. if (!all) {
  3469. status = nvme_get_mgmt_zone_slba_idx(ns, &req->cmd, &slba, &zone_idx);
  3470. if (status) {
  3471. return status;
  3472. }
  3473. }
  3474. zone = &ns->zone_array[zone_idx];
  3475. if (slba != zone->d.zslba && action != NVME_ZONE_ACTION_ZRWA_FLUSH) {
  3476. trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba);
  3477. return NVME_INVALID_FIELD | NVME_DNR;
  3478. }
  3479. switch (action) {
  3480. case NVME_ZONE_ACTION_OPEN:
  3481. if (all) {
  3482. proc_mask = NVME_PROC_CLOSED_ZONES;
  3483. }
  3484. trace_pci_nvme_open_zone(slba, zone_idx, all);
  3485. status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req);
  3486. break;
  3487. case NVME_ZONE_ACTION_CLOSE:
  3488. if (all) {
  3489. proc_mask = NVME_PROC_OPENED_ZONES;
  3490. }
  3491. trace_pci_nvme_close_zone(slba, zone_idx, all);
  3492. status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req);
  3493. break;
  3494. case NVME_ZONE_ACTION_FINISH:
  3495. if (all) {
  3496. proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES;
  3497. }
  3498. trace_pci_nvme_finish_zone(slba, zone_idx, all);
  3499. status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req);
  3500. break;
  3501. case NVME_ZONE_ACTION_RESET:
  3502. trace_pci_nvme_reset_zone(slba, zone_idx, all);
  3503. iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk,
  3504. nvme_misc_cb, req);
  3505. iocb->req = req;
  3506. iocb->ret = 0;
  3507. iocb->all = all;
  3508. iocb->idx = zone_idx;
  3509. iocb->zone = NULL;
  3510. req->aiocb = &iocb->common;
  3511. nvme_zone_reset_cb(iocb, 0);
  3512. return NVME_NO_COMPLETE;
  3513. case NVME_ZONE_ACTION_OFFLINE:
  3514. if (all) {
  3515. proc_mask = NVME_PROC_READ_ONLY_ZONES;
  3516. }
  3517. trace_pci_nvme_offline_zone(slba, zone_idx, all);
  3518. status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req);
  3519. break;
  3520. case NVME_ZONE_ACTION_SET_ZD_EXT:
  3521. trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
  3522. if (all || !ns->params.zd_extension_size) {
  3523. return NVME_INVALID_FIELD | NVME_DNR;
  3524. }
  3525. zd_ext = nvme_get_zd_extension(ns, zone_idx);
  3526. status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req);
  3527. if (status) {
  3528. trace_pci_nvme_err_zd_extension_map_error(zone_idx);
  3529. return status;
  3530. }
  3531. status = nvme_set_zd_ext(ns, zone);
  3532. if (status == NVME_SUCCESS) {
  3533. trace_pci_nvme_zd_extension_set(zone_idx);
  3534. return status;
  3535. }
  3536. break;
  3537. case NVME_ZONE_ACTION_ZRWA_FLUSH:
  3538. if (all) {
  3539. return NVME_INVALID_FIELD | NVME_DNR;
  3540. }
  3541. return nvme_zone_mgmt_send_zrwa_flush(n, zone, slba, req);
  3542. default:
  3543. trace_pci_nvme_err_invalid_mgmt_action(action);
  3544. status = NVME_INVALID_FIELD;
  3545. }
  3546. if (status == NVME_ZONE_INVAL_TRANSITION) {
  3547. trace_pci_nvme_err_invalid_zone_state_transition(action, slba,
  3548. zone->d.za);
  3549. }
  3550. if (status) {
  3551. status |= NVME_DNR;
  3552. }
  3553. return status;
  3554. }
  3555. static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl)
  3556. {
  3557. NvmeZoneState zs = nvme_get_zone_state(zl);
  3558. switch (zafs) {
  3559. case NVME_ZONE_REPORT_ALL:
  3560. return true;
  3561. case NVME_ZONE_REPORT_EMPTY:
  3562. return zs == NVME_ZONE_STATE_EMPTY;
  3563. case NVME_ZONE_REPORT_IMPLICITLY_OPEN:
  3564. return zs == NVME_ZONE_STATE_IMPLICITLY_OPEN;
  3565. case NVME_ZONE_REPORT_EXPLICITLY_OPEN:
  3566. return zs == NVME_ZONE_STATE_EXPLICITLY_OPEN;
  3567. case NVME_ZONE_REPORT_CLOSED:
  3568. return zs == NVME_ZONE_STATE_CLOSED;
  3569. case NVME_ZONE_REPORT_FULL:
  3570. return zs == NVME_ZONE_STATE_FULL;
  3571. case NVME_ZONE_REPORT_READ_ONLY:
  3572. return zs == NVME_ZONE_STATE_READ_ONLY;
  3573. case NVME_ZONE_REPORT_OFFLINE:
  3574. return zs == NVME_ZONE_STATE_OFFLINE;
  3575. default:
  3576. return false;
  3577. }
  3578. }
  3579. static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
  3580. {
  3581. NvmeCmd *cmd = &req->cmd;
  3582. NvmeNamespace *ns = req->ns;
  3583. /* cdw12 is zero-based number of dwords to return. Convert to bytes */
  3584. uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2;
  3585. uint32_t dw13 = le32_to_cpu(cmd->cdw13);
  3586. uint32_t zone_idx, zra, zrasf, partial;
  3587. uint64_t max_zones, nr_zones = 0;
  3588. uint16_t status;
  3589. uint64_t slba;
  3590. NvmeZoneDescr *z;
  3591. NvmeZone *zone;
  3592. NvmeZoneReportHeader *header;
  3593. void *buf, *buf_p;
  3594. size_t zone_entry_sz;
  3595. int i;
  3596. req->status = NVME_SUCCESS;
  3597. status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
  3598. if (status) {
  3599. return status;
  3600. }
  3601. zra = dw13 & 0xff;
  3602. if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) {
  3603. return NVME_INVALID_FIELD | NVME_DNR;
  3604. }
  3605. if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) {
  3606. return NVME_INVALID_FIELD | NVME_DNR;
  3607. }
  3608. zrasf = (dw13 >> 8) & 0xff;
  3609. if (zrasf > NVME_ZONE_REPORT_OFFLINE) {
  3610. return NVME_INVALID_FIELD | NVME_DNR;
  3611. }
  3612. if (data_size < sizeof(NvmeZoneReportHeader)) {
  3613. return NVME_INVALID_FIELD | NVME_DNR;
  3614. }
  3615. status = nvme_check_mdts(n, data_size);
  3616. if (status) {
  3617. return status;
  3618. }
  3619. partial = (dw13 >> 16) & 0x01;
  3620. zone_entry_sz = sizeof(NvmeZoneDescr);
  3621. if (zra == NVME_ZONE_REPORT_EXTENDED) {
  3622. zone_entry_sz += ns->params.zd_extension_size;
  3623. }
  3624. max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz;
  3625. buf = g_malloc0(data_size);
  3626. zone = &ns->zone_array[zone_idx];
  3627. for (i = zone_idx; i < ns->num_zones; i++) {
  3628. if (partial && nr_zones >= max_zones) {
  3629. break;
  3630. }
  3631. if (nvme_zone_matches_filter(zrasf, zone++)) {
  3632. nr_zones++;
  3633. }
  3634. }
  3635. header = buf;
  3636. header->nr_zones = cpu_to_le64(nr_zones);
  3637. buf_p = buf + sizeof(NvmeZoneReportHeader);
  3638. for (; zone_idx < ns->num_zones && max_zones > 0; zone_idx++) {
  3639. zone = &ns->zone_array[zone_idx];
  3640. if (nvme_zone_matches_filter(zrasf, zone)) {
  3641. z = buf_p;
  3642. buf_p += sizeof(NvmeZoneDescr);
  3643. z->zt = zone->d.zt;
  3644. z->zs = zone->d.zs;
  3645. z->zcap = cpu_to_le64(zone->d.zcap);
  3646. z->zslba = cpu_to_le64(zone->d.zslba);
  3647. z->za = zone->d.za;
  3648. if (nvme_wp_is_valid(zone)) {
  3649. z->wp = cpu_to_le64(zone->d.wp);
  3650. } else {
  3651. z->wp = cpu_to_le64(~0ULL);
  3652. }
  3653. if (zra == NVME_ZONE_REPORT_EXTENDED) {
  3654. if (zone->d.za & NVME_ZA_ZD_EXT_VALID) {
  3655. memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx),
  3656. ns->params.zd_extension_size);
  3657. }
  3658. buf_p += ns->params.zd_extension_size;
  3659. }
  3660. max_zones--;
  3661. }
  3662. }
  3663. status = nvme_c2h(n, (uint8_t *)buf, data_size, req);
  3664. g_free(buf);
  3665. return status;
  3666. }
  3667. static uint16_t nvme_io_mgmt_recv_ruhs(NvmeCtrl *n, NvmeRequest *req,
  3668. size_t len)
  3669. {
  3670. NvmeNamespace *ns = req->ns;
  3671. NvmeEnduranceGroup *endgrp;
  3672. NvmeRuhStatus *hdr;
  3673. NvmeRuhStatusDescr *ruhsd;
  3674. unsigned int nruhsd;
  3675. uint16_t rg, ph, *ruhid;
  3676. size_t trans_len;
  3677. g_autofree uint8_t *buf = NULL;
  3678. if (!n->subsys) {
  3679. return NVME_INVALID_FIELD | NVME_DNR;
  3680. }
  3681. if (ns->params.nsid == 0 || ns->params.nsid == 0xffffffff) {
  3682. return NVME_INVALID_NSID | NVME_DNR;
  3683. }
  3684. if (!n->subsys->endgrp.fdp.enabled) {
  3685. return NVME_FDP_DISABLED | NVME_DNR;
  3686. }
  3687. endgrp = ns->endgrp;
  3688. nruhsd = ns->fdp.nphs * endgrp->fdp.nrg;
  3689. trans_len = sizeof(NvmeRuhStatus) + nruhsd * sizeof(NvmeRuhStatusDescr);
  3690. buf = g_malloc0(trans_len);
  3691. trans_len = MIN(trans_len, len);
  3692. hdr = (NvmeRuhStatus *)buf;
  3693. ruhsd = (NvmeRuhStatusDescr *)(buf + sizeof(NvmeRuhStatus));
  3694. hdr->nruhsd = cpu_to_le16(nruhsd);
  3695. ruhid = ns->fdp.phs;
  3696. for (ph = 0; ph < ns->fdp.nphs; ph++, ruhid++) {
  3697. NvmeRuHandle *ruh = &endgrp->fdp.ruhs[*ruhid];
  3698. for (rg = 0; rg < endgrp->fdp.nrg; rg++, ruhsd++) {
  3699. uint16_t pid = nvme_make_pid(ns, rg, ph);
  3700. ruhsd->pid = cpu_to_le16(pid);
  3701. ruhsd->ruhid = *ruhid;
  3702. ruhsd->earutr = 0;
  3703. ruhsd->ruamw = cpu_to_le64(ruh->rus[rg].ruamw);
  3704. }
  3705. }
  3706. return nvme_c2h(n, buf, trans_len, req);
  3707. }
  3708. static uint16_t nvme_io_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
  3709. {
  3710. NvmeCmd *cmd = &req->cmd;
  3711. uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
  3712. uint32_t numd = le32_to_cpu(cmd->cdw11);
  3713. uint8_t mo = (cdw10 & 0xff);
  3714. size_t len = (numd + 1) << 2;
  3715. switch (mo) {
  3716. case NVME_IOMR_MO_NOP:
  3717. return 0;
  3718. case NVME_IOMR_MO_RUH_STATUS:
  3719. return nvme_io_mgmt_recv_ruhs(n, req, len);
  3720. default:
  3721. return NVME_INVALID_FIELD | NVME_DNR;
  3722. };
  3723. }
  3724. static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl *n, NvmeRequest *req)
  3725. {
  3726. NvmeCmd *cmd = &req->cmd;
  3727. NvmeNamespace *ns = req->ns;
  3728. uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
  3729. uint16_t ret = NVME_SUCCESS;
  3730. uint32_t npid = (cdw10 >> 16) + 1;
  3731. unsigned int i = 0;
  3732. g_autofree uint16_t *pids = NULL;
  3733. uint32_t maxnpid;
  3734. if (!ns->endgrp || !ns->endgrp->fdp.enabled) {
  3735. return NVME_FDP_DISABLED | NVME_DNR;
  3736. }
  3737. maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;
  3738. if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) {
  3739. return NVME_INVALID_FIELD | NVME_DNR;
  3740. }
  3741. pids = g_new(uint16_t, npid);
  3742. ret = nvme_h2c(n, pids, npid * sizeof(uint16_t), req);
  3743. if (ret) {
  3744. return ret;
  3745. }
  3746. for (; i < npid; i++) {
  3747. if (!nvme_update_ruh(n, ns, pids[i])) {
  3748. return NVME_INVALID_FIELD | NVME_DNR;
  3749. }
  3750. }
  3751. return ret;
  3752. }
  3753. static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
  3754. {
  3755. NvmeCmd *cmd = &req->cmd;
  3756. uint32_t cdw10 = le32_to_cpu(cmd->cdw10);
  3757. uint8_t mo = (cdw10 & 0xff);
  3758. switch (mo) {
  3759. case NVME_IOMS_MO_NOP:
  3760. return 0;
  3761. case NVME_IOMS_MO_RUH_UPDATE:
  3762. return nvme_io_mgmt_send_ruh_update(n, req);
  3763. default:
  3764. return NVME_INVALID_FIELD | NVME_DNR;
  3765. };
  3766. }
  3767. static uint16_t __nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
  3768. {
  3769. switch (req->cmd.opcode) {
  3770. case NVME_CMD_WRITE:
  3771. return nvme_write(n, req);
  3772. case NVME_CMD_READ:
  3773. return nvme_read(n, req);
  3774. case NVME_CMD_COMPARE:
  3775. return nvme_compare(n, req);
  3776. case NVME_CMD_WRITE_ZEROES:
  3777. return nvme_write_zeroes(n, req);
  3778. case NVME_CMD_DSM:
  3779. return nvme_dsm(n, req);
  3780. case NVME_CMD_VERIFY:
  3781. return nvme_verify(n, req);
  3782. case NVME_CMD_COPY:
  3783. return nvme_copy(n, req);
  3784. case NVME_CMD_IO_MGMT_RECV:
  3785. return nvme_io_mgmt_recv(n, req);
  3786. case NVME_CMD_IO_MGMT_SEND:
  3787. return nvme_io_mgmt_send(n, req);
  3788. }
  3789. g_assert_not_reached();
  3790. }
  3791. static uint16_t nvme_io_cmd_nvm(NvmeCtrl *n, NvmeRequest *req)
  3792. {
  3793. if (!(n->cse.iocs.nvm[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
  3794. trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
  3795. return NVME_INVALID_OPCODE | NVME_DNR;
  3796. }
  3797. return __nvme_io_cmd_nvm(n, req);
  3798. }
  3799. static uint16_t nvme_io_cmd_zoned(NvmeCtrl *n, NvmeRequest *req)
  3800. {
  3801. if (!(n->cse.iocs.zoned[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
  3802. trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
  3803. return NVME_INVALID_OPCODE | NVME_DNR;
  3804. }
  3805. switch (req->cmd.opcode) {
  3806. case NVME_CMD_ZONE_APPEND:
  3807. return nvme_zone_append(n, req);
  3808. case NVME_CMD_ZONE_MGMT_SEND:
  3809. return nvme_zone_mgmt_send(n, req);
  3810. case NVME_CMD_ZONE_MGMT_RECV:
  3811. return nvme_zone_mgmt_recv(n, req);
  3812. }
  3813. return __nvme_io_cmd_nvm(n, req);
  3814. }
  3815. static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
  3816. {
  3817. NvmeNamespace *ns;
  3818. uint32_t nsid = le32_to_cpu(req->cmd.nsid);
  3819. trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
  3820. req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
  3821. /*
  3822. * In the base NVM command set, Flush may apply to all namespaces
  3823. * (indicated by NSID being set to FFFFFFFFh). But if that feature is used
  3824. * along with TP 4056 (Namespace Types), it may be pretty screwed up.
  3825. *
  3826. * If NSID is indeed set to FFFFFFFFh, we simply cannot associate the
  3827. * opcode with a specific command since we cannot determine a unique I/O
  3828. * command set. Opcode 0h could have any other meaning than something
  3829. * equivalent to flushing and say it DOES have completely different
  3830. * semantics in some other command set - does an NSID of FFFFFFFFh then
  3831. * mean "for all namespaces, apply whatever command set specific command
  3832. * that uses the 0h opcode?" Or does it mean "for all namespaces, apply
  3833. * whatever command that uses the 0h opcode if, and only if, it allows NSID
  3834. * to be FFFFFFFFh"?
  3835. *
  3836. * Anyway (and luckily), for now, we do not care about this since the
  3837. * device only supports namespace types that includes the NVM Flush command
  3838. * (NVM and Zoned), so always do an NVM Flush.
  3839. */
  3840. if (req->cmd.opcode == NVME_CMD_FLUSH) {
  3841. return nvme_flush(n, req);
  3842. }
  3843. if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
  3844. return NVME_INVALID_NSID | NVME_DNR;
  3845. }
  3846. ns = nvme_ns(n, nsid);
  3847. if (unlikely(!ns)) {
  3848. return NVME_INVALID_FIELD | NVME_DNR;
  3849. }
  3850. if (ns->status) {
  3851. return ns->status;
  3852. }
  3853. if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
  3854. return NVME_INVALID_FIELD;
  3855. }
  3856. req->ns = ns;
  3857. switch (ns->csi) {
  3858. case NVME_CSI_NVM:
  3859. return nvme_io_cmd_nvm(n, req);
  3860. case NVME_CSI_ZONED:
  3861. return nvme_io_cmd_zoned(n, req);
  3862. }
  3863. g_assert_not_reached();
  3864. }
  3865. static void nvme_cq_notifier(EventNotifier *e)
  3866. {
  3867. NvmeCQueue *cq = container_of(e, NvmeCQueue, notifier);
  3868. NvmeCtrl *n = cq->ctrl;
  3869. if (!event_notifier_test_and_clear(e)) {
  3870. return;
  3871. }
  3872. nvme_update_cq_head(cq);
  3873. if (cq->tail == cq->head) {
  3874. if (cq->irq_enabled) {
  3875. n->cq_pending--;
  3876. }
  3877. nvme_irq_deassert(n, cq);
  3878. }
  3879. qemu_bh_schedule(cq->bh);
  3880. }
  3881. static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
  3882. {
  3883. NvmeCtrl *n = cq->ctrl;
  3884. uint16_t offset = (cq->cqid << 3) + (1 << 2);
  3885. int ret;
  3886. ret = event_notifier_init(&cq->notifier, 0);
  3887. if (ret < 0) {
  3888. return ret;
  3889. }
  3890. event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
  3891. memory_region_add_eventfd(&n->iomem,
  3892. 0x1000 + offset, 4, false, 0, &cq->notifier);
  3893. return 0;
  3894. }
  3895. static void nvme_sq_notifier(EventNotifier *e)
  3896. {
  3897. NvmeSQueue *sq = container_of(e, NvmeSQueue, notifier);
  3898. if (!event_notifier_test_and_clear(e)) {
  3899. return;
  3900. }
  3901. nvme_process_sq(sq);
  3902. }
  3903. static int nvme_init_sq_ioeventfd(NvmeSQueue *sq)
  3904. {
  3905. NvmeCtrl *n = sq->ctrl;
  3906. uint16_t offset = sq->sqid << 3;
  3907. int ret;
  3908. ret = event_notifier_init(&sq->notifier, 0);
  3909. if (ret < 0) {
  3910. return ret;
  3911. }
  3912. event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
  3913. memory_region_add_eventfd(&n->iomem,
  3914. 0x1000 + offset, 4, false, 0, &sq->notifier);
  3915. return 0;
  3916. }
  3917. static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
  3918. {
  3919. uint16_t offset = sq->sqid << 3;
  3920. n->sq[sq->sqid] = NULL;
  3921. qemu_bh_delete(sq->bh);
  3922. if (sq->ioeventfd_enabled) {
  3923. memory_region_del_eventfd(&n->iomem,
  3924. 0x1000 + offset, 4, false, 0, &sq->notifier);
  3925. event_notifier_set_handler(&sq->notifier, NULL);
  3926. event_notifier_cleanup(&sq->notifier);
  3927. }
  3928. g_free(sq->io_req);
  3929. if (sq->sqid) {
  3930. g_free(sq);
  3931. }
  3932. }
  3933. static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
  3934. {
  3935. NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
  3936. NvmeRequest *r, *next;
  3937. NvmeSQueue *sq;
  3938. NvmeCQueue *cq;
  3939. uint16_t qid = le16_to_cpu(c->qid);
  3940. if (unlikely(!qid || nvme_check_sqid(n, qid))) {
  3941. trace_pci_nvme_err_invalid_del_sq(qid);
  3942. return NVME_INVALID_QID | NVME_DNR;
  3943. }
  3944. trace_pci_nvme_del_sq(qid);
  3945. sq = n->sq[qid];
  3946. while (!QTAILQ_EMPTY(&sq->out_req_list)) {
  3947. r = QTAILQ_FIRST(&sq->out_req_list);
  3948. assert(r->aiocb);
  3949. r->status = NVME_CMD_ABORT_SQ_DEL;
  3950. blk_aio_cancel(r->aiocb);
  3951. }
  3952. assert(QTAILQ_EMPTY(&sq->out_req_list));
  3953. if (!nvme_check_cqid(n, sq->cqid)) {
  3954. cq = n->cq[sq->cqid];
  3955. QTAILQ_REMOVE(&cq->sq_list, sq, entry);
  3956. nvme_post_cqes(cq);
  3957. QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
  3958. if (r->sq == sq) {
  3959. QTAILQ_REMOVE(&cq->req_list, r, entry);
  3960. QTAILQ_INSERT_TAIL(&sq->req_list, r, entry);
  3961. }
  3962. }
  3963. }
  3964. nvme_free_sq(sq, n);
  3965. return NVME_SUCCESS;
  3966. }
  3967. static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
  3968. uint16_t sqid, uint16_t cqid, uint16_t size)
  3969. {
  3970. int i;
  3971. NvmeCQueue *cq;
  3972. sq->ctrl = n;
  3973. sq->dma_addr = dma_addr;
  3974. sq->sqid = sqid;
  3975. sq->size = size;
  3976. sq->cqid = cqid;
  3977. sq->head = sq->tail = 0;
  3978. sq->io_req = g_new0(NvmeRequest, sq->size);
  3979. QTAILQ_INIT(&sq->req_list);
  3980. QTAILQ_INIT(&sq->out_req_list);
  3981. for (i = 0; i < sq->size; i++) {
  3982. sq->io_req[i].sq = sq;
  3983. QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
  3984. }
  3985. sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
  3986. &DEVICE(sq->ctrl)->mem_reentrancy_guard);
  3987. if (n->dbbuf_enabled) {
  3988. sq->db_addr = n->dbbuf_dbs + (sqid << 3);
  3989. sq->ei_addr = n->dbbuf_eis + (sqid << 3);
  3990. if (n->params.ioeventfd && sq->sqid != 0) {
  3991. if (!nvme_init_sq_ioeventfd(sq)) {
  3992. sq->ioeventfd_enabled = true;
  3993. }
  3994. }
  3995. }
  3996. assert(n->cq[cqid]);
  3997. cq = n->cq[cqid];
  3998. QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
  3999. n->sq[sqid] = sq;
  4000. }
  4001. static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
  4002. {
  4003. NvmeSQueue *sq;
  4004. NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd;
  4005. uint16_t cqid = le16_to_cpu(c->cqid);
  4006. uint16_t sqid = le16_to_cpu(c->sqid);
  4007. uint16_t qsize = le16_to_cpu(c->qsize);
  4008. uint16_t qflags = le16_to_cpu(c->sq_flags);
  4009. uint64_t prp1 = le64_to_cpu(c->prp1);
  4010. trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
  4011. if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
  4012. trace_pci_nvme_err_invalid_create_sq_cqid(cqid);
  4013. return NVME_INVALID_CQID | NVME_DNR;
  4014. }
  4015. if (unlikely(!sqid || sqid > n->conf_ioqpairs || n->sq[sqid] != NULL)) {
  4016. trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
  4017. return NVME_INVALID_QID | NVME_DNR;
  4018. }
  4019. if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
  4020. trace_pci_nvme_err_invalid_create_sq_size(qsize);
  4021. return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
  4022. }
  4023. if (unlikely(prp1 & (n->page_size - 1))) {
  4024. trace_pci_nvme_err_invalid_create_sq_addr(prp1);
  4025. return NVME_INVALID_PRP_OFFSET | NVME_DNR;
  4026. }
  4027. if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
  4028. trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
  4029. return NVME_INVALID_FIELD | NVME_DNR;
  4030. }
  4031. sq = g_malloc0(sizeof(*sq));
  4032. nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
  4033. return NVME_SUCCESS;
  4034. }
  4035. struct nvme_stats {
  4036. uint64_t units_read;
  4037. uint64_t units_written;
  4038. uint64_t read_commands;
  4039. uint64_t write_commands;
  4040. };
  4041. static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
  4042. {
  4043. BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
  4044. stats->units_read += s->nr_bytes[BLOCK_ACCT_READ];
  4045. stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE];
  4046. stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
  4047. stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
  4048. }
  4049. static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
  4050. uint32_t buf_len, uint64_t off,
  4051. NvmeRequest *req)
  4052. {
  4053. NvmeNamespace *ns = NULL;
  4054. NvmeSmartLogExtended smart_l = { 0 };
  4055. struct nvme_stats stats = { 0 };
  4056. uint32_t trans_len;
  4057. if (off >= sizeof(smart_l)) {
  4058. return NVME_INVALID_FIELD | NVME_DNR;
  4059. }
  4060. /* accumulate all stats from all namespaces */
  4061. for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  4062. ns = nvme_ns(n, i);
  4063. if (ns) {
  4064. nvme_set_blk_stats(ns, &stats);
  4065. }
  4066. }
  4067. smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written);
  4068. smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read);
  4069. smart_l.log_page_version = 0x0005;
  4070. static const uint8_t guid[16] = {
  4071. 0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4,
  4072. 0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF
  4073. };
  4074. memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid));
  4075. if (!rae) {
  4076. nvme_clear_events(n, NVME_AER_TYPE_SMART);
  4077. }
  4078. trans_len = MIN(sizeof(smart_l) - off, buf_len);
  4079. return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req);
  4080. }
  4081. static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
  4082. uint64_t off, NvmeRequest *req)
  4083. {
  4084. uint32_t nsid = le32_to_cpu(req->cmd.nsid);
  4085. struct nvme_stats stats = { 0 };
  4086. NvmeSmartLog smart = { 0 };
  4087. uint32_t trans_len;
  4088. NvmeNamespace *ns;
  4089. time_t current_ms;
  4090. uint64_t u_read, u_written;
  4091. if (off >= sizeof(smart)) {
  4092. return NVME_INVALID_FIELD | NVME_DNR;
  4093. }
  4094. if (nsid != 0xffffffff) {
  4095. ns = nvme_ns(n, nsid);
  4096. if (!ns) {
  4097. return NVME_INVALID_NSID | NVME_DNR;
  4098. }
  4099. nvme_set_blk_stats(ns, &stats);
  4100. } else {
  4101. int i;
  4102. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  4103. ns = nvme_ns(n, i);
  4104. if (!ns) {
  4105. continue;
  4106. }
  4107. nvme_set_blk_stats(ns, &stats);
  4108. }
  4109. }
  4110. trans_len = MIN(sizeof(smart) - off, buf_len);
  4111. smart.critical_warning = n->smart_critical_warning;
  4112. u_read = DIV_ROUND_UP(stats.units_read >> BDRV_SECTOR_BITS, 1000);
  4113. u_written = DIV_ROUND_UP(stats.units_written >> BDRV_SECTOR_BITS, 1000);
  4114. smart.data_units_read[0] = cpu_to_le64(u_read);
  4115. smart.data_units_written[0] = cpu_to_le64(u_written);
  4116. smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
  4117. smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
  4118. smart.temperature = cpu_to_le16(n->temperature);
  4119. if ((n->temperature >= n->features.temp_thresh_hi) ||
  4120. (n->temperature <= n->features.temp_thresh_low)) {
  4121. smart.critical_warning |= NVME_SMART_TEMPERATURE;
  4122. }
  4123. current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
  4124. smart.power_on_hours[0] =
  4125. cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
  4126. if (!rae) {
  4127. nvme_clear_events(n, NVME_AER_TYPE_SMART);
  4128. }
  4129. return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req);
  4130. }
  4131. static uint16_t nvme_endgrp_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
  4132. uint64_t off, NvmeRequest *req)
  4133. {
  4134. uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
  4135. uint16_t endgrpid = (dw11 >> 16) & 0xffff;
  4136. struct nvme_stats stats = {};
  4137. NvmeEndGrpLog info = {};
  4138. int i;
  4139. if (!n->subsys || endgrpid != 0x1) {
  4140. return NVME_INVALID_FIELD | NVME_DNR;
  4141. }
  4142. if (off >= sizeof(info)) {
  4143. return NVME_INVALID_FIELD | NVME_DNR;
  4144. }
  4145. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  4146. NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
  4147. if (!ns) {
  4148. continue;
  4149. }
  4150. nvme_set_blk_stats(ns, &stats);
  4151. }
  4152. info.data_units_read[0] =
  4153. cpu_to_le64(DIV_ROUND_UP(stats.units_read / 1000000000, 1000000000));
  4154. info.data_units_written[0] =
  4155. cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000));
  4156. info.media_units_written[0] =
  4157. cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000));
  4158. info.host_read_commands[0] = cpu_to_le64(stats.read_commands);
  4159. info.host_write_commands[0] = cpu_to_le64(stats.write_commands);
  4160. buf_len = MIN(sizeof(info) - off, buf_len);
  4161. return nvme_c2h(n, (uint8_t *)&info + off, buf_len, req);
  4162. }
  4163. static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
  4164. NvmeRequest *req)
  4165. {
  4166. uint32_t trans_len;
  4167. NvmeFwSlotInfoLog fw_log = {
  4168. .afi = 0x1,
  4169. };
  4170. if (off >= sizeof(fw_log)) {
  4171. return NVME_INVALID_FIELD | NVME_DNR;
  4172. }
  4173. strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
  4174. trans_len = MIN(sizeof(fw_log) - off, buf_len);
  4175. return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req);
  4176. }
  4177. static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
  4178. uint64_t off, NvmeRequest *req)
  4179. {
  4180. uint32_t trans_len;
  4181. NvmeErrorLog errlog;
  4182. if (off >= sizeof(errlog)) {
  4183. return NVME_INVALID_FIELD | NVME_DNR;
  4184. }
  4185. if (!rae) {
  4186. nvme_clear_events(n, NVME_AER_TYPE_ERROR);
  4187. }
  4188. memset(&errlog, 0x0, sizeof(errlog));
  4189. trans_len = MIN(sizeof(errlog) - off, buf_len);
  4190. return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req);
  4191. }
  4192. static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
  4193. uint64_t off, NvmeRequest *req)
  4194. {
  4195. uint32_t nslist[1024];
  4196. uint32_t trans_len;
  4197. int i = 0;
  4198. uint32_t nsid;
  4199. if (off >= sizeof(nslist)) {
  4200. trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist));
  4201. return NVME_INVALID_FIELD | NVME_DNR;
  4202. }
  4203. memset(nslist, 0x0, sizeof(nslist));
  4204. trans_len = MIN(sizeof(nslist) - off, buf_len);
  4205. while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
  4206. NVME_CHANGED_NSID_SIZE) {
  4207. /*
  4208. * If more than 1024 namespaces, the first entry in the log page should
  4209. * be set to FFFFFFFFh and the others to 0 as spec.
  4210. */
  4211. if (i == ARRAY_SIZE(nslist)) {
  4212. memset(nslist, 0x0, sizeof(nslist));
  4213. nslist[0] = 0xffffffff;
  4214. break;
  4215. }
  4216. nslist[i++] = nsid;
  4217. clear_bit(nsid, n->changed_nsids);
  4218. }
  4219. /*
  4220. * Remove all the remaining list entries in case returns directly due to
  4221. * more than 1024 namespaces.
  4222. */
  4223. if (nslist[0] == 0xffffffff) {
  4224. bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE);
  4225. }
  4226. if (!rae) {
  4227. nvme_clear_events(n, NVME_AER_TYPE_NOTICE);
  4228. }
  4229. return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req);
  4230. }
  4231. static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
  4232. uint64_t off, NvmeRequest *req)
  4233. {
  4234. NvmeEffectsLog log = {};
  4235. const uint32_t *iocs = NULL;
  4236. uint32_t trans_len;
  4237. if (off >= sizeof(log)) {
  4238. trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(log));
  4239. return NVME_INVALID_FIELD | NVME_DNR;
  4240. }
  4241. switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
  4242. case NVME_CC_CSS_NVM:
  4243. iocs = n->cse.iocs.nvm;
  4244. break;
  4245. case NVME_CC_CSS_ALL:
  4246. switch (csi) {
  4247. case NVME_CSI_NVM:
  4248. iocs = n->cse.iocs.nvm;
  4249. break;
  4250. case NVME_CSI_ZONED:
  4251. iocs = n->cse.iocs.zoned;
  4252. break;
  4253. }
  4254. break;
  4255. }
  4256. memcpy(log.acs, n->cse.acs, sizeof(log.acs));
  4257. if (iocs) {
  4258. memcpy(log.iocs, iocs, sizeof(log.iocs));
  4259. }
  4260. trans_len = MIN(sizeof(log) - off, buf_len);
  4261. return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
  4262. }
  4263. static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae,
  4264. uint32_t buf_len, uint64_t off,
  4265. NvmeRequest *req, uint8_t lid)
  4266. {
  4267. switch (lid) {
  4268. case NVME_OCP_EXTENDED_SMART_INFO:
  4269. if (n->params.ocp) {
  4270. return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req);
  4271. }
  4272. break;
  4273. /* add a case for each additional vendor specific log id */
  4274. }
  4275. trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
  4276. return NVME_INVALID_FIELD | NVME_DNR;
  4277. }
  4278. static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss)
  4279. {
  4280. size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr)
  4281. + vss;
  4282. return ROUND_UP(entry_siz, 8);
  4283. }
  4284. static uint16_t nvme_fdp_confs(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len,
  4285. uint64_t off, NvmeRequest *req)
  4286. {
  4287. uint32_t log_size, trans_len;
  4288. g_autofree uint8_t *buf = NULL;
  4289. NvmeFdpDescrHdr *hdr;
  4290. NvmeRuhDescr *ruhd;
  4291. NvmeEnduranceGroup *endgrp;
  4292. NvmeFdpConfsHdr *log;
  4293. size_t nruh, fdp_descr_size;
  4294. int i;
  4295. if (endgrpid != 1 || !n->subsys) {
  4296. return NVME_INVALID_FIELD | NVME_DNR;
  4297. }
  4298. endgrp = &n->subsys->endgrp;
  4299. if (endgrp->fdp.enabled) {
  4300. nruh = endgrp->fdp.nruh;
  4301. } else {
  4302. nruh = 1;
  4303. }
  4304. fdp_descr_size = sizeof_fdp_conf_descr(nruh, FDPVSS);
  4305. log_size = sizeof(NvmeFdpConfsHdr) + fdp_descr_size;
  4306. if (off >= log_size) {
  4307. return NVME_INVALID_FIELD | NVME_DNR;
  4308. }
  4309. trans_len = MIN(log_size - off, buf_len);
  4310. buf = g_malloc0(log_size);
  4311. log = (NvmeFdpConfsHdr *)buf;
  4312. hdr = (NvmeFdpDescrHdr *)(log + 1);
  4313. ruhd = (NvmeRuhDescr *)(buf + sizeof(*log) + sizeof(*hdr));
  4314. log->num_confs = cpu_to_le16(0);
  4315. log->size = cpu_to_le32(log_size);
  4316. hdr->descr_size = cpu_to_le16(fdp_descr_size);
  4317. if (endgrp->fdp.enabled) {
  4318. hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, VALID, 1);
  4319. hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, RGIF, endgrp->fdp.rgif);
  4320. hdr->nrg = cpu_to_le16(endgrp->fdp.nrg);
  4321. hdr->nruh = cpu_to_le16(endgrp->fdp.nruh);
  4322. hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1);
  4323. hdr->nnss = cpu_to_le32(NVME_MAX_NAMESPACES);
  4324. hdr->runs = cpu_to_le64(endgrp->fdp.runs);
  4325. for (i = 0; i < nruh; i++) {
  4326. ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED;
  4327. ruhd++;
  4328. }
  4329. } else {
  4330. /* 1 bit for RUH in PIF -> 2 RUHs max. */
  4331. hdr->nrg = cpu_to_le16(1);
  4332. hdr->nruh = cpu_to_le16(1);
  4333. hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1);
  4334. hdr->nnss = cpu_to_le32(1);
  4335. hdr->runs = cpu_to_le64(96 * MiB);
  4336. ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED;
  4337. }
  4338. return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req);
  4339. }
  4340. static uint16_t nvme_fdp_ruh_usage(NvmeCtrl *n, uint32_t endgrpid,
  4341. uint32_t dw10, uint32_t dw12,
  4342. uint32_t buf_len, uint64_t off,
  4343. NvmeRequest *req)
  4344. {
  4345. NvmeRuHandle *ruh;
  4346. NvmeRuhuLog *hdr;
  4347. NvmeRuhuDescr *ruhud;
  4348. NvmeEnduranceGroup *endgrp;
  4349. g_autofree uint8_t *buf = NULL;
  4350. uint32_t log_size, trans_len;
  4351. uint16_t i;
  4352. if (endgrpid != 1 || !n->subsys) {
  4353. return NVME_INVALID_FIELD | NVME_DNR;
  4354. }
  4355. endgrp = &n->subsys->endgrp;
  4356. if (!endgrp->fdp.enabled) {
  4357. return NVME_FDP_DISABLED | NVME_DNR;
  4358. }
  4359. log_size = sizeof(NvmeRuhuLog) + endgrp->fdp.nruh * sizeof(NvmeRuhuDescr);
  4360. if (off >= log_size) {
  4361. return NVME_INVALID_FIELD | NVME_DNR;
  4362. }
  4363. trans_len = MIN(log_size - off, buf_len);
  4364. buf = g_malloc0(log_size);
  4365. hdr = (NvmeRuhuLog *)buf;
  4366. ruhud = (NvmeRuhuDescr *)(hdr + 1);
  4367. ruh = endgrp->fdp.ruhs;
  4368. hdr->nruh = cpu_to_le16(endgrp->fdp.nruh);
  4369. for (i = 0; i < endgrp->fdp.nruh; i++, ruhud++, ruh++) {
  4370. ruhud->ruha = ruh->ruha;
  4371. }
  4372. return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req);
  4373. }
  4374. static uint16_t nvme_fdp_stats(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len,
  4375. uint64_t off, NvmeRequest *req)
  4376. {
  4377. NvmeEnduranceGroup *endgrp;
  4378. NvmeFdpStatsLog log = {};
  4379. uint32_t trans_len;
  4380. if (off >= sizeof(NvmeFdpStatsLog)) {
  4381. return NVME_INVALID_FIELD | NVME_DNR;
  4382. }
  4383. if (endgrpid != 1 || !n->subsys) {
  4384. return NVME_INVALID_FIELD | NVME_DNR;
  4385. }
  4386. if (!n->subsys->endgrp.fdp.enabled) {
  4387. return NVME_FDP_DISABLED | NVME_DNR;
  4388. }
  4389. endgrp = &n->subsys->endgrp;
  4390. trans_len = MIN(sizeof(log) - off, buf_len);
  4391. /* spec value is 128 bit, we only use 64 bit */
  4392. log.hbmw[0] = cpu_to_le64(endgrp->fdp.hbmw);
  4393. log.mbmw[0] = cpu_to_le64(endgrp->fdp.mbmw);
  4394. log.mbe[0] = cpu_to_le64(endgrp->fdp.mbe);
  4395. return nvme_c2h(n, (uint8_t *)&log + off, trans_len, req);
  4396. }
  4397. static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t endgrpid,
  4398. uint32_t buf_len, uint64_t off,
  4399. NvmeRequest *req)
  4400. {
  4401. NvmeEnduranceGroup *endgrp;
  4402. NvmeCmd *cmd = &req->cmd;
  4403. bool host_events = (cmd->cdw10 >> 8) & 0x1;
  4404. uint32_t log_size, trans_len;
  4405. NvmeFdpEventBuffer *ebuf;
  4406. g_autofree NvmeFdpEventsLog *elog = NULL;
  4407. NvmeFdpEvent *event;
  4408. if (endgrpid != 1 || !n->subsys) {
  4409. return NVME_INVALID_FIELD | NVME_DNR;
  4410. }
  4411. endgrp = &n->subsys->endgrp;
  4412. if (!endgrp->fdp.enabled) {
  4413. return NVME_FDP_DISABLED | NVME_DNR;
  4414. }
  4415. if (host_events) {
  4416. ebuf = &endgrp->fdp.host_events;
  4417. } else {
  4418. ebuf = &endgrp->fdp.ctrl_events;
  4419. }
  4420. log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent);
  4421. if (off >= log_size) {
  4422. return NVME_INVALID_FIELD | NVME_DNR;
  4423. }
  4424. trans_len = MIN(log_size - off, buf_len);
  4425. elog = g_malloc0(log_size);
  4426. elog->num_events = cpu_to_le32(ebuf->nelems);
  4427. event = (NvmeFdpEvent *)(elog + 1);
  4428. if (ebuf->nelems && ebuf->start == ebuf->next) {
  4429. unsigned int nelems = (NVME_FDP_MAX_EVENTS - ebuf->start);
  4430. /* wrap over, copy [start;NVME_FDP_MAX_EVENTS[ and [0; next[ */
  4431. memcpy(event, &ebuf->events[ebuf->start],
  4432. sizeof(NvmeFdpEvent) * nelems);
  4433. memcpy(event + nelems, ebuf->events,
  4434. sizeof(NvmeFdpEvent) * ebuf->next);
  4435. } else if (ebuf->start < ebuf->next) {
  4436. memcpy(event, &ebuf->events[ebuf->start],
  4437. sizeof(NvmeFdpEvent) * (ebuf->next - ebuf->start));
  4438. }
  4439. return nvme_c2h(n, (uint8_t *)elog + off, trans_len, req);
  4440. }
  4441. static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
  4442. {
  4443. NvmeCmd *cmd = &req->cmd;
  4444. uint32_t dw10 = le32_to_cpu(cmd->cdw10);
  4445. uint32_t dw11 = le32_to_cpu(cmd->cdw11);
  4446. uint32_t dw12 = le32_to_cpu(cmd->cdw12);
  4447. uint32_t dw13 = le32_to_cpu(cmd->cdw13);
  4448. uint8_t lid = dw10 & 0xff;
  4449. uint8_t lsp = (dw10 >> 8) & 0xf;
  4450. uint8_t rae = (dw10 >> 15) & 0x1;
  4451. uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24;
  4452. uint32_t numdl, numdu, lspi;
  4453. uint64_t off, lpol, lpou;
  4454. size_t len;
  4455. uint16_t status;
  4456. numdl = (dw10 >> 16);
  4457. numdu = (dw11 & 0xffff);
  4458. lspi = (dw11 >> 16);
  4459. lpol = dw12;
  4460. lpou = dw13;
  4461. len = (((numdu << 16) | numdl) + 1) << 2;
  4462. off = (lpou << 32ULL) | lpol;
  4463. if (off & 0x3) {
  4464. return NVME_INVALID_FIELD | NVME_DNR;
  4465. }
  4466. trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
  4467. status = nvme_check_mdts(n, len);
  4468. if (status) {
  4469. return status;
  4470. }
  4471. switch (lid) {
  4472. case NVME_LOG_ERROR_INFO:
  4473. return nvme_error_info(n, rae, len, off, req);
  4474. case NVME_LOG_SMART_INFO:
  4475. return nvme_smart_info(n, rae, len, off, req);
  4476. case NVME_LOG_FW_SLOT_INFO:
  4477. return nvme_fw_log_info(n, len, off, req);
  4478. case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END:
  4479. return nvme_vendor_specific_log(n, rae, len, off, req, lid);
  4480. case NVME_LOG_CHANGED_NSLIST:
  4481. return nvme_changed_nslist(n, rae, len, off, req);
  4482. case NVME_LOG_CMD_EFFECTS:
  4483. return nvme_cmd_effects(n, csi, len, off, req);
  4484. case NVME_LOG_ENDGRP:
  4485. return nvme_endgrp_info(n, rae, len, off, req);
  4486. case NVME_LOG_FDP_CONFS:
  4487. return nvme_fdp_confs(n, lspi, len, off, req);
  4488. case NVME_LOG_FDP_RUH_USAGE:
  4489. return nvme_fdp_ruh_usage(n, lspi, dw10, dw12, len, off, req);
  4490. case NVME_LOG_FDP_STATS:
  4491. return nvme_fdp_stats(n, lspi, len, off, req);
  4492. case NVME_LOG_FDP_EVENTS:
  4493. return nvme_fdp_events(n, lspi, len, off, req);
  4494. default:
  4495. trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
  4496. return NVME_INVALID_FIELD | NVME_DNR;
  4497. }
  4498. }
  4499. static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
  4500. {
  4501. PCIDevice *pci = PCI_DEVICE(n);
  4502. uint16_t offset = (cq->cqid << 3) + (1 << 2);
  4503. n->cq[cq->cqid] = NULL;
  4504. qemu_bh_delete(cq->bh);
  4505. if (cq->ioeventfd_enabled) {
  4506. memory_region_del_eventfd(&n->iomem,
  4507. 0x1000 + offset, 4, false, 0, &cq->notifier);
  4508. event_notifier_set_handler(&cq->notifier, NULL);
  4509. event_notifier_cleanup(&cq->notifier);
  4510. }
  4511. if (msix_enabled(pci) && cq->irq_enabled) {
  4512. msix_vector_unuse(pci, cq->vector);
  4513. }
  4514. if (cq->cqid) {
  4515. g_free(cq);
  4516. }
  4517. }
  4518. static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
  4519. {
  4520. NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
  4521. NvmeCQueue *cq;
  4522. uint16_t qid = le16_to_cpu(c->qid);
  4523. if (unlikely(!qid || nvme_check_cqid(n, qid))) {
  4524. trace_pci_nvme_err_invalid_del_cq_cqid(qid);
  4525. return NVME_INVALID_CQID | NVME_DNR;
  4526. }
  4527. cq = n->cq[qid];
  4528. if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
  4529. trace_pci_nvme_err_invalid_del_cq_notempty(qid);
  4530. return NVME_INVALID_QUEUE_DEL;
  4531. }
  4532. if (cq->irq_enabled && cq->tail != cq->head) {
  4533. n->cq_pending--;
  4534. }
  4535. nvme_irq_deassert(n, cq);
  4536. trace_pci_nvme_del_cq(qid);
  4537. nvme_free_cq(cq, n);
  4538. return NVME_SUCCESS;
  4539. }
  4540. static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
  4541. uint16_t cqid, uint16_t vector, uint16_t size,
  4542. uint16_t irq_enabled)
  4543. {
  4544. PCIDevice *pci = PCI_DEVICE(n);
  4545. if (msix_enabled(pci) && irq_enabled) {
  4546. msix_vector_use(pci, vector);
  4547. }
  4548. cq->ctrl = n;
  4549. cq->cqid = cqid;
  4550. cq->size = size;
  4551. cq->dma_addr = dma_addr;
  4552. cq->phase = 1;
  4553. cq->irq_enabled = irq_enabled;
  4554. cq->vector = vector;
  4555. cq->head = cq->tail = 0;
  4556. QTAILQ_INIT(&cq->req_list);
  4557. QTAILQ_INIT(&cq->sq_list);
  4558. if (n->dbbuf_enabled) {
  4559. cq->db_addr = n->dbbuf_dbs + (cqid << 3) + (1 << 2);
  4560. cq->ei_addr = n->dbbuf_eis + (cqid << 3) + (1 << 2);
  4561. if (n->params.ioeventfd && cqid != 0) {
  4562. if (!nvme_init_cq_ioeventfd(cq)) {
  4563. cq->ioeventfd_enabled = true;
  4564. }
  4565. }
  4566. }
  4567. n->cq[cqid] = cq;
  4568. cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
  4569. &DEVICE(cq->ctrl)->mem_reentrancy_guard);
  4570. }
  4571. static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
  4572. {
  4573. NvmeCQueue *cq;
  4574. NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd;
  4575. uint16_t cqid = le16_to_cpu(c->cqid);
  4576. uint16_t vector = le16_to_cpu(c->irq_vector);
  4577. uint16_t qsize = le16_to_cpu(c->qsize);
  4578. uint16_t qflags = le16_to_cpu(c->cq_flags);
  4579. uint64_t prp1 = le64_to_cpu(c->prp1);
  4580. uint32_t cc = ldq_le_p(&n->bar.cc);
  4581. uint8_t iocqes = NVME_CC_IOCQES(cc);
  4582. uint8_t iosqes = NVME_CC_IOSQES(cc);
  4583. trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
  4584. NVME_CQ_FLAGS_IEN(qflags) != 0);
  4585. if (iosqes != NVME_SQES || iocqes != NVME_CQES) {
  4586. trace_pci_nvme_err_invalid_create_cq_entry_size(iosqes, iocqes);
  4587. return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
  4588. }
  4589. if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) {
  4590. trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
  4591. return NVME_INVALID_QID | NVME_DNR;
  4592. }
  4593. if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
  4594. trace_pci_nvme_err_invalid_create_cq_size(qsize);
  4595. return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
  4596. }
  4597. if (unlikely(prp1 & (n->page_size - 1))) {
  4598. trace_pci_nvme_err_invalid_create_cq_addr(prp1);
  4599. return NVME_INVALID_PRP_OFFSET | NVME_DNR;
  4600. }
  4601. if (unlikely(!msix_enabled(PCI_DEVICE(n)) && vector)) {
  4602. trace_pci_nvme_err_invalid_create_cq_vector(vector);
  4603. return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
  4604. }
  4605. if (unlikely(vector >= n->conf_msix_qsize)) {
  4606. trace_pci_nvme_err_invalid_create_cq_vector(vector);
  4607. return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
  4608. }
  4609. if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
  4610. trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
  4611. return NVME_INVALID_FIELD | NVME_DNR;
  4612. }
  4613. cq = g_malloc0(sizeof(*cq));
  4614. nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
  4615. NVME_CQ_FLAGS_IEN(qflags));
  4616. /*
  4617. * It is only required to set qs_created when creating a completion queue;
  4618. * creating a submission queue without a matching completion queue will
  4619. * fail.
  4620. */
  4621. n->qs_created = true;
  4622. return NVME_SUCCESS;
  4623. }
  4624. static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req)
  4625. {
  4626. uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
  4627. return nvme_c2h(n, id, sizeof(id), req);
  4628. }
  4629. static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
  4630. {
  4631. trace_pci_nvme_identify_ctrl();
  4632. return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req);
  4633. }
  4634. static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
  4635. {
  4636. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4637. uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
  4638. NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id;
  4639. trace_pci_nvme_identify_ctrl_csi(c->csi);
  4640. switch (c->csi) {
  4641. case NVME_CSI_NVM:
  4642. id_nvm->vsl = n->params.vsl;
  4643. id_nvm->dmrl = NVME_ID_CTRL_NVM_DMRL_MAX;
  4644. id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
  4645. id_nvm->dmsl = NVME_ID_CTRL_NVM_DMRL_MAX * n->dmrsl;
  4646. break;
  4647. case NVME_CSI_ZONED:
  4648. ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl;
  4649. break;
  4650. default:
  4651. return NVME_INVALID_FIELD | NVME_DNR;
  4652. }
  4653. return nvme_c2h(n, id, sizeof(id), req);
  4654. }
  4655. static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
  4656. {
  4657. NvmeNamespace *ns;
  4658. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4659. uint32_t nsid = le32_to_cpu(c->nsid);
  4660. trace_pci_nvme_identify_ns(nsid);
  4661. if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
  4662. return NVME_INVALID_NSID | NVME_DNR;
  4663. }
  4664. ns = nvme_ns(n, nsid);
  4665. if (unlikely(!ns)) {
  4666. if (!active) {
  4667. ns = nvme_subsys_ns(n->subsys, nsid);
  4668. if (!ns) {
  4669. return nvme_rpt_empty_id_struct(n, req);
  4670. }
  4671. } else {
  4672. return nvme_rpt_empty_id_struct(n, req);
  4673. }
  4674. }
  4675. if (active || ns->csi == NVME_CSI_NVM) {
  4676. return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
  4677. }
  4678. return NVME_INVALID_IOCS | NVME_DNR;
  4679. }
  4680. static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
  4681. bool attached)
  4682. {
  4683. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4684. uint32_t nsid = le32_to_cpu(c->nsid);
  4685. uint16_t min_id = le16_to_cpu(c->ctrlid);
  4686. uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
  4687. uint16_t *ids = &list[1];
  4688. NvmeNamespace *ns;
  4689. NvmeCtrl *ctrl;
  4690. int cntlid, nr_ids = 0;
  4691. trace_pci_nvme_identify_ctrl_list(c->cns, min_id);
  4692. if (!n->subsys) {
  4693. return NVME_INVALID_FIELD | NVME_DNR;
  4694. }
  4695. if (attached) {
  4696. if (nsid == NVME_NSID_BROADCAST) {
  4697. return NVME_INVALID_FIELD | NVME_DNR;
  4698. }
  4699. ns = nvme_subsys_ns(n->subsys, nsid);
  4700. if (!ns) {
  4701. return NVME_INVALID_FIELD | NVME_DNR;
  4702. }
  4703. }
  4704. for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
  4705. ctrl = nvme_subsys_ctrl(n->subsys, cntlid);
  4706. if (!ctrl) {
  4707. continue;
  4708. }
  4709. if (attached && !nvme_ns(ctrl, nsid)) {
  4710. continue;
  4711. }
  4712. ids[nr_ids++] = cntlid;
  4713. }
  4714. list[0] = nr_ids;
  4715. return nvme_c2h(n, (uint8_t *)list, sizeof(list), req);
  4716. }
  4717. static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req)
  4718. {
  4719. trace_pci_nvme_identify_pri_ctrl_cap(le16_to_cpu(n->pri_ctrl_cap.cntlid));
  4720. return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap,
  4721. sizeof(NvmePriCtrlCap), req);
  4722. }
  4723. static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
  4724. {
  4725. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4726. uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid);
  4727. uint16_t min_id = le16_to_cpu(c->ctrlid);
  4728. uint8_t num_sec_ctrl = n->nr_sec_ctrls;
  4729. NvmeSecCtrlList list = {0};
  4730. uint8_t i;
  4731. for (i = 0; i < num_sec_ctrl; i++) {
  4732. if (n->sec_ctrl_list[i].scid >= min_id) {
  4733. list.numcntl = MIN(num_sec_ctrl - i, 127);
  4734. memcpy(&list.sec, n->sec_ctrl_list + i,
  4735. list.numcntl * sizeof(NvmeSecCtrlEntry));
  4736. break;
  4737. }
  4738. }
  4739. trace_pci_nvme_identify_sec_ctrl_list(pri_ctrl_id, list.numcntl);
  4740. return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req);
  4741. }
  4742. static uint16_t nvme_identify_ns_ind(NvmeCtrl *n, NvmeRequest *req, bool alloc)
  4743. {
  4744. NvmeNamespace *ns;
  4745. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4746. uint32_t nsid = le32_to_cpu(c->nsid);
  4747. trace_pci_nvme_identify_ns_ind(nsid);
  4748. if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
  4749. return NVME_INVALID_NSID | NVME_DNR;
  4750. }
  4751. ns = nvme_ns(n, nsid);
  4752. if (unlikely(!ns)) {
  4753. if (alloc) {
  4754. ns = nvme_subsys_ns(n->subsys, nsid);
  4755. if (!ns) {
  4756. return nvme_rpt_empty_id_struct(n, req);
  4757. }
  4758. } else {
  4759. return nvme_rpt_empty_id_struct(n, req);
  4760. }
  4761. }
  4762. return nvme_c2h(n, (uint8_t *)&ns->id_ns_ind, sizeof(NvmeIdNsInd), req);
  4763. }
  4764. static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
  4765. bool active)
  4766. {
  4767. NvmeNamespace *ns;
  4768. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4769. uint32_t nsid = le32_to_cpu(c->nsid);
  4770. trace_pci_nvme_identify_ns_csi(nsid, c->csi);
  4771. if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
  4772. return NVME_INVALID_NSID | NVME_DNR;
  4773. }
  4774. ns = nvme_ns(n, nsid);
  4775. if (unlikely(!ns)) {
  4776. if (!active) {
  4777. ns = nvme_subsys_ns(n->subsys, nsid);
  4778. if (!ns) {
  4779. return nvme_rpt_empty_id_struct(n, req);
  4780. }
  4781. } else {
  4782. return nvme_rpt_empty_id_struct(n, req);
  4783. }
  4784. }
  4785. if (c->csi == NVME_CSI_NVM) {
  4786. return nvme_c2h(n, (uint8_t *)&ns->id_ns_nvm, sizeof(NvmeIdNsNvm),
  4787. req);
  4788. } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
  4789. return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
  4790. req);
  4791. }
  4792. return NVME_INVALID_FIELD | NVME_DNR;
  4793. }
  4794. static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
  4795. bool active)
  4796. {
  4797. NvmeNamespace *ns;
  4798. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4799. uint32_t min_nsid = le32_to_cpu(c->nsid);
  4800. uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
  4801. static const int data_len = sizeof(list);
  4802. uint32_t *list_ptr = (uint32_t *)list;
  4803. int i, j = 0;
  4804. trace_pci_nvme_identify_nslist(min_nsid);
  4805. /*
  4806. * Both FFFFFFFFh (NVME_NSID_BROADCAST) and FFFFFFFFEh are invalid values
  4807. * since the Active Namespace ID List should return namespaces with ids
  4808. * *higher* than the NSID specified in the command. This is also specified
  4809. * in the spec (NVM Express v1.3d, Section 5.15.4).
  4810. */
  4811. if (min_nsid >= NVME_NSID_BROADCAST - 1) {
  4812. return NVME_INVALID_NSID | NVME_DNR;
  4813. }
  4814. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  4815. ns = nvme_ns(n, i);
  4816. if (!ns) {
  4817. if (!active) {
  4818. ns = nvme_subsys_ns(n->subsys, i);
  4819. if (!ns) {
  4820. continue;
  4821. }
  4822. } else {
  4823. continue;
  4824. }
  4825. }
  4826. if (ns->params.nsid <= min_nsid) {
  4827. continue;
  4828. }
  4829. list_ptr[j++] = cpu_to_le32(ns->params.nsid);
  4830. if (j == data_len / sizeof(uint32_t)) {
  4831. break;
  4832. }
  4833. }
  4834. return nvme_c2h(n, list, data_len, req);
  4835. }
  4836. static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
  4837. bool active)
  4838. {
  4839. NvmeNamespace *ns;
  4840. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4841. uint32_t min_nsid = le32_to_cpu(c->nsid);
  4842. uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
  4843. static const int data_len = sizeof(list);
  4844. uint32_t *list_ptr = (uint32_t *)list;
  4845. int i, j = 0;
  4846. trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi);
  4847. /*
  4848. * Same as in nvme_identify_nslist(), FFFFFFFFh/FFFFFFFFEh are invalid.
  4849. */
  4850. if (min_nsid >= NVME_NSID_BROADCAST - 1) {
  4851. return NVME_INVALID_NSID | NVME_DNR;
  4852. }
  4853. if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) {
  4854. return NVME_INVALID_FIELD | NVME_DNR;
  4855. }
  4856. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  4857. ns = nvme_ns(n, i);
  4858. if (!ns) {
  4859. if (!active) {
  4860. ns = nvme_subsys_ns(n->subsys, i);
  4861. if (!ns) {
  4862. continue;
  4863. }
  4864. } else {
  4865. continue;
  4866. }
  4867. }
  4868. if (ns->params.nsid <= min_nsid || c->csi != ns->csi) {
  4869. continue;
  4870. }
  4871. list_ptr[j++] = cpu_to_le32(ns->params.nsid);
  4872. if (j == data_len / sizeof(uint32_t)) {
  4873. break;
  4874. }
  4875. }
  4876. return nvme_c2h(n, list, data_len, req);
  4877. }
  4878. static uint16_t nvme_endurance_group_list(NvmeCtrl *n, NvmeRequest *req)
  4879. {
  4880. uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
  4881. uint16_t *nr_ids = &list[0];
  4882. uint16_t *ids = &list[1];
  4883. uint16_t endgid = le32_to_cpu(req->cmd.cdw11) & 0xffff;
  4884. /*
  4885. * The current nvme-subsys only supports Endurance Group #1.
  4886. */
  4887. if (!endgid) {
  4888. *nr_ids = 1;
  4889. ids[0] = 1;
  4890. } else {
  4891. *nr_ids = 0;
  4892. }
  4893. return nvme_c2h(n, list, sizeof(list), req);
  4894. }
  4895. static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
  4896. {
  4897. NvmeNamespace *ns;
  4898. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4899. uint32_t nsid = le32_to_cpu(c->nsid);
  4900. uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
  4901. uint8_t *pos = list;
  4902. struct {
  4903. NvmeIdNsDescr hdr;
  4904. uint8_t v[NVME_NIDL_UUID];
  4905. } QEMU_PACKED uuid = {};
  4906. struct {
  4907. NvmeIdNsDescr hdr;
  4908. uint8_t v[NVME_NIDL_NGUID];
  4909. } QEMU_PACKED nguid = {};
  4910. struct {
  4911. NvmeIdNsDescr hdr;
  4912. uint64_t v;
  4913. } QEMU_PACKED eui64 = {};
  4914. struct {
  4915. NvmeIdNsDescr hdr;
  4916. uint8_t v;
  4917. } QEMU_PACKED csi = {};
  4918. trace_pci_nvme_identify_ns_descr_list(nsid);
  4919. if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
  4920. return NVME_INVALID_NSID | NVME_DNR;
  4921. }
  4922. ns = nvme_ns(n, nsid);
  4923. if (unlikely(!ns)) {
  4924. return NVME_INVALID_FIELD | NVME_DNR;
  4925. }
  4926. if (!qemu_uuid_is_null(&ns->params.uuid)) {
  4927. uuid.hdr.nidt = NVME_NIDT_UUID;
  4928. uuid.hdr.nidl = NVME_NIDL_UUID;
  4929. memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
  4930. memcpy(pos, &uuid, sizeof(uuid));
  4931. pos += sizeof(uuid);
  4932. }
  4933. if (!nvme_nguid_is_null(&ns->params.nguid)) {
  4934. nguid.hdr.nidt = NVME_NIDT_NGUID;
  4935. nguid.hdr.nidl = NVME_NIDL_NGUID;
  4936. memcpy(nguid.v, ns->params.nguid.data, NVME_NIDL_NGUID);
  4937. memcpy(pos, &nguid, sizeof(nguid));
  4938. pos += sizeof(nguid);
  4939. }
  4940. if (ns->params.eui64) {
  4941. eui64.hdr.nidt = NVME_NIDT_EUI64;
  4942. eui64.hdr.nidl = NVME_NIDL_EUI64;
  4943. eui64.v = cpu_to_be64(ns->params.eui64);
  4944. memcpy(pos, &eui64, sizeof(eui64));
  4945. pos += sizeof(eui64);
  4946. }
  4947. csi.hdr.nidt = NVME_NIDT_CSI;
  4948. csi.hdr.nidl = NVME_NIDL_CSI;
  4949. csi.v = ns->csi;
  4950. memcpy(pos, &csi, sizeof(csi));
  4951. pos += sizeof(csi);
  4952. return nvme_c2h(n, list, sizeof(list), req);
  4953. }
  4954. static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
  4955. {
  4956. uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
  4957. static const int data_len = sizeof(list);
  4958. trace_pci_nvme_identify_cmd_set();
  4959. NVME_SET_CSI(*list, NVME_CSI_NVM);
  4960. NVME_SET_CSI(*list, NVME_CSI_ZONED);
  4961. return nvme_c2h(n, list, data_len, req);
  4962. }
  4963. static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
  4964. {
  4965. NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
  4966. trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid),
  4967. c->csi);
  4968. switch (c->cns) {
  4969. case NVME_ID_CNS_NS:
  4970. return nvme_identify_ns(n, req, true);
  4971. case NVME_ID_CNS_NS_PRESENT:
  4972. return nvme_identify_ns(n, req, false);
  4973. case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
  4974. return nvme_identify_ctrl_list(n, req, true);
  4975. case NVME_ID_CNS_CTRL_LIST:
  4976. return nvme_identify_ctrl_list(n, req, false);
  4977. case NVME_ID_CNS_PRIMARY_CTRL_CAP:
  4978. return nvme_identify_pri_ctrl_cap(n, req);
  4979. case NVME_ID_CNS_SECONDARY_CTRL_LIST:
  4980. return nvme_identify_sec_ctrl_list(n, req);
  4981. case NVME_ID_CNS_CS_NS:
  4982. return nvme_identify_ns_csi(n, req, true);
  4983. case NVME_ID_CNS_CS_IND_NS:
  4984. return nvme_identify_ns_ind(n, req, false);
  4985. case NVME_ID_CNS_CS_IND_NS_ALLOCATED:
  4986. return nvme_identify_ns_ind(n, req, true);
  4987. case NVME_ID_CNS_CS_NS_PRESENT:
  4988. return nvme_identify_ns_csi(n, req, false);
  4989. case NVME_ID_CNS_CTRL:
  4990. return nvme_identify_ctrl(n, req);
  4991. case NVME_ID_CNS_CS_CTRL:
  4992. return nvme_identify_ctrl_csi(n, req);
  4993. case NVME_ID_CNS_NS_ACTIVE_LIST:
  4994. return nvme_identify_nslist(n, req, true);
  4995. case NVME_ID_CNS_NS_PRESENT_LIST:
  4996. return nvme_identify_nslist(n, req, false);
  4997. case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
  4998. return nvme_identify_nslist_csi(n, req, true);
  4999. case NVME_ID_CNS_ENDURANCE_GROUP_LIST:
  5000. return nvme_endurance_group_list(n, req);
  5001. case NVME_ID_CNS_CS_NS_PRESENT_LIST:
  5002. return nvme_identify_nslist_csi(n, req, false);
  5003. case NVME_ID_CNS_NS_DESCR_LIST:
  5004. return nvme_identify_ns_descr_list(n, req);
  5005. case NVME_ID_CNS_IO_COMMAND_SET:
  5006. return nvme_identify_cmd_set(n, req);
  5007. default:
  5008. trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
  5009. return NVME_INVALID_FIELD | NVME_DNR;
  5010. }
  5011. }
  5012. static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
  5013. {
  5014. uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
  5015. uint16_t cid = (le32_to_cpu(req->cmd.cdw10) >> 16) & 0xffff;
  5016. NvmeSQueue *sq = n->sq[sqid];
  5017. NvmeRequest *r, *next;
  5018. int i;
  5019. req->cqe.result = 1;
  5020. if (nvme_check_sqid(n, sqid)) {
  5021. return NVME_INVALID_FIELD | NVME_DNR;
  5022. }
  5023. if (sqid == 0) {
  5024. for (i = 0; i < n->outstanding_aers; i++) {
  5025. NvmeRequest *re = n->aer_reqs[i];
  5026. if (re->cqe.cid == cid) {
  5027. memmove(n->aer_reqs + i, n->aer_reqs + i + 1,
  5028. (n->outstanding_aers - i - 1) * sizeof(NvmeRequest *));
  5029. n->outstanding_aers--;
  5030. re->status = NVME_CMD_ABORT_REQ;
  5031. req->cqe.result = 0;
  5032. nvme_enqueue_req_completion(&n->admin_cq, re);
  5033. return NVME_SUCCESS;
  5034. }
  5035. }
  5036. }
  5037. QTAILQ_FOREACH_SAFE(r, &sq->out_req_list, entry, next) {
  5038. if (r->cqe.cid == cid) {
  5039. if (r->aiocb) {
  5040. r->status = NVME_CMD_ABORT_REQ;
  5041. blk_aio_cancel_async(r->aiocb);
  5042. }
  5043. break;
  5044. }
  5045. }
  5046. return NVME_SUCCESS;
  5047. }
  5048. static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
  5049. {
  5050. trace_pci_nvme_setfeat_timestamp(ts);
  5051. n->host_timestamp = le64_to_cpu(ts);
  5052. n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
  5053. }
  5054. static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
  5055. {
  5056. uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
  5057. uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms;
  5058. union nvme_timestamp {
  5059. struct {
  5060. uint64_t timestamp:48;
  5061. uint64_t sync:1;
  5062. uint64_t origin:3;
  5063. uint64_t rsvd1:12;
  5064. };
  5065. uint64_t all;
  5066. };
  5067. union nvme_timestamp ts;
  5068. ts.all = 0;
  5069. ts.timestamp = n->host_timestamp + elapsed_time;
  5070. /* If the host timestamp is non-zero, set the timestamp origin */
  5071. ts.origin = n->host_timestamp ? 0x01 : 0x00;
  5072. trace_pci_nvme_getfeat_timestamp(ts.all);
  5073. return cpu_to_le64(ts.all);
  5074. }
  5075. static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
  5076. {
  5077. uint64_t timestamp = nvme_get_timestamp(n);
  5078. return nvme_c2h(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
  5079. }
  5080. static int nvme_get_feature_fdp(NvmeCtrl *n, uint32_t endgrpid,
  5081. uint32_t *result)
  5082. {
  5083. *result = 0;
  5084. if (!n->subsys || !n->subsys->endgrp.fdp.enabled) {
  5085. return NVME_INVALID_FIELD | NVME_DNR;
  5086. }
  5087. *result = FIELD_DP16(0, FEAT_FDP, FDPE, 1);
  5088. *result = FIELD_DP16(*result, FEAT_FDP, CONF_NDX, 0);
  5089. return NVME_SUCCESS;
  5090. }
  5091. static uint16_t nvme_get_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns,
  5092. NvmeRequest *req, uint32_t *result)
  5093. {
  5094. NvmeCmd *cmd = &req->cmd;
  5095. uint32_t cdw11 = le32_to_cpu(cmd->cdw11);
  5096. uint16_t ph = cdw11 & 0xffff;
  5097. uint8_t noet = (cdw11 >> 16) & 0xff;
  5098. uint16_t ruhid, ret;
  5099. uint32_t nentries = 0;
  5100. uint8_t s_events_ndx = 0;
  5101. size_t s_events_siz = sizeof(NvmeFdpEventDescr) * noet;
  5102. g_autofree NvmeFdpEventDescr *s_events = g_malloc0(s_events_siz);
  5103. NvmeRuHandle *ruh;
  5104. NvmeFdpEventDescr *s_event;
  5105. if (!n->subsys || !n->subsys->endgrp.fdp.enabled) {
  5106. return NVME_FDP_DISABLED | NVME_DNR;
  5107. }
  5108. if (!nvme_ph_valid(ns, ph)) {
  5109. return NVME_INVALID_FIELD | NVME_DNR;
  5110. }
  5111. ruhid = ns->fdp.phs[ph];
  5112. ruh = &n->subsys->endgrp.fdp.ruhs[ruhid];
  5113. assert(ruh);
  5114. if (unlikely(noet == 0)) {
  5115. return NVME_INVALID_FIELD | NVME_DNR;
  5116. }
  5117. for (uint8_t event_type = 0; event_type < FDP_EVT_MAX; event_type++) {
  5118. uint8_t shift = nvme_fdp_evf_shifts[event_type];
  5119. if (!shift && event_type) {
  5120. /*
  5121. * only first entry (event_type == 0) has a shift value of 0
  5122. * other entries are simply unpopulated.
  5123. */
  5124. continue;
  5125. }
  5126. nentries++;
  5127. s_event = &s_events[s_events_ndx];
  5128. s_event->evt = event_type;
  5129. s_event->evta = (ruh->event_filter >> shift) & 0x1;
  5130. /* break if all `noet` entries are filled */
  5131. if ((++s_events_ndx) == noet) {
  5132. break;
  5133. }
  5134. }
  5135. ret = nvme_c2h(n, s_events, s_events_siz, req);
  5136. if (ret) {
  5137. return ret;
  5138. }
  5139. *result = nentries;
  5140. return NVME_SUCCESS;
  5141. }
  5142. static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
  5143. {
  5144. NvmeCmd *cmd = &req->cmd;
  5145. uint32_t dw10 = le32_to_cpu(cmd->cdw10);
  5146. uint32_t dw11 = le32_to_cpu(cmd->cdw11);
  5147. uint32_t nsid = le32_to_cpu(cmd->nsid);
  5148. uint32_t result = 0;
  5149. uint8_t fid = NVME_GETSETFEAT_FID(dw10);
  5150. NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
  5151. uint16_t iv;
  5152. NvmeNamespace *ns;
  5153. int i;
  5154. uint16_t endgrpid = 0, ret = NVME_SUCCESS;
  5155. static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
  5156. [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
  5157. };
  5158. trace_pci_nvme_getfeat(nvme_cid(req), nsid, fid, sel, dw11);
  5159. if (!nvme_feature_support[fid]) {
  5160. return NVME_INVALID_FIELD | NVME_DNR;
  5161. }
  5162. if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
  5163. if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
  5164. /*
  5165. * The Reservation Notification Mask and Reservation Persistence
  5166. * features require a status code of Invalid Field in Command when
  5167. * NSID is FFFFFFFFh. Since the device does not support those
  5168. * features we can always return Invalid Namespace or Format as we
  5169. * should do for all other features.
  5170. */
  5171. return NVME_INVALID_NSID | NVME_DNR;
  5172. }
  5173. if (!nvme_ns(n, nsid)) {
  5174. return NVME_INVALID_FIELD | NVME_DNR;
  5175. }
  5176. }
  5177. switch (sel) {
  5178. case NVME_GETFEAT_SELECT_CURRENT:
  5179. break;
  5180. case NVME_GETFEAT_SELECT_SAVED:
  5181. /* no features are saveable by the controller; fallthrough */
  5182. case NVME_GETFEAT_SELECT_DEFAULT:
  5183. goto defaults;
  5184. case NVME_GETFEAT_SELECT_CAP:
  5185. result = nvme_feature_cap[fid];
  5186. goto out;
  5187. }
  5188. switch (fid) {
  5189. case NVME_TEMPERATURE_THRESHOLD:
  5190. result = 0;
  5191. /*
  5192. * The controller only implements the Composite Temperature sensor, so
  5193. * return 0 for all other sensors.
  5194. */
  5195. if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
  5196. goto out;
  5197. }
  5198. switch (NVME_TEMP_THSEL(dw11)) {
  5199. case NVME_TEMP_THSEL_OVER:
  5200. result = n->features.temp_thresh_hi;
  5201. goto out;
  5202. case NVME_TEMP_THSEL_UNDER:
  5203. result = n->features.temp_thresh_low;
  5204. goto out;
  5205. }
  5206. return NVME_INVALID_FIELD | NVME_DNR;
  5207. case NVME_ERROR_RECOVERY:
  5208. if (!nvme_nsid_valid(n, nsid)) {
  5209. return NVME_INVALID_NSID | NVME_DNR;
  5210. }
  5211. ns = nvme_ns(n, nsid);
  5212. if (unlikely(!ns)) {
  5213. return NVME_INVALID_FIELD | NVME_DNR;
  5214. }
  5215. result = ns->features.err_rec;
  5216. goto out;
  5217. case NVME_VOLATILE_WRITE_CACHE:
  5218. result = 0;
  5219. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  5220. ns = nvme_ns(n, i);
  5221. if (!ns) {
  5222. continue;
  5223. }
  5224. result = blk_enable_write_cache(ns->blkconf.blk);
  5225. if (result) {
  5226. break;
  5227. }
  5228. }
  5229. trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
  5230. goto out;
  5231. case NVME_ASYNCHRONOUS_EVENT_CONF:
  5232. result = n->features.async_config;
  5233. goto out;
  5234. case NVME_TIMESTAMP:
  5235. return nvme_get_feature_timestamp(n, req);
  5236. case NVME_HOST_BEHAVIOR_SUPPORT:
  5237. return nvme_c2h(n, (uint8_t *)&n->features.hbs,
  5238. sizeof(n->features.hbs), req);
  5239. case NVME_FDP_MODE:
  5240. endgrpid = dw11 & 0xff;
  5241. if (endgrpid != 0x1) {
  5242. return NVME_INVALID_FIELD | NVME_DNR;
  5243. }
  5244. ret = nvme_get_feature_fdp(n, endgrpid, &result);
  5245. if (ret) {
  5246. return ret;
  5247. }
  5248. goto out;
  5249. case NVME_FDP_EVENTS:
  5250. if (!nvme_nsid_valid(n, nsid)) {
  5251. return NVME_INVALID_NSID | NVME_DNR;
  5252. }
  5253. ns = nvme_ns(n, nsid);
  5254. if (unlikely(!ns)) {
  5255. return NVME_INVALID_FIELD | NVME_DNR;
  5256. }
  5257. ret = nvme_get_feature_fdp_events(n, ns, req, &result);
  5258. if (ret) {
  5259. return ret;
  5260. }
  5261. goto out;
  5262. default:
  5263. break;
  5264. }
  5265. defaults:
  5266. switch (fid) {
  5267. case NVME_TEMPERATURE_THRESHOLD:
  5268. result = 0;
  5269. if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
  5270. break;
  5271. }
  5272. if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) {
  5273. result = NVME_TEMPERATURE_WARNING;
  5274. }
  5275. break;
  5276. case NVME_NUMBER_OF_QUEUES:
  5277. result = (n->conf_ioqpairs - 1) | ((n->conf_ioqpairs - 1) << 16);
  5278. trace_pci_nvme_getfeat_numq(result);
  5279. break;
  5280. case NVME_INTERRUPT_VECTOR_CONF:
  5281. iv = dw11 & 0xffff;
  5282. if (iv >= n->conf_ioqpairs + 1) {
  5283. return NVME_INVALID_FIELD | NVME_DNR;
  5284. }
  5285. result = iv;
  5286. if (iv == n->admin_cq.vector) {
  5287. result |= NVME_INTVC_NOCOALESCING;
  5288. }
  5289. break;
  5290. case NVME_FDP_MODE:
  5291. endgrpid = dw11 & 0xff;
  5292. if (endgrpid != 0x1) {
  5293. return NVME_INVALID_FIELD | NVME_DNR;
  5294. }
  5295. ret = nvme_get_feature_fdp(n, endgrpid, &result);
  5296. if (ret) {
  5297. return ret;
  5298. }
  5299. break;
  5300. case NVME_WRITE_ATOMICITY:
  5301. result = n->dn;
  5302. break;
  5303. default:
  5304. result = nvme_feature_default[fid];
  5305. break;
  5306. }
  5307. out:
  5308. req->cqe.result = cpu_to_le32(result);
  5309. return ret;
  5310. }
  5311. static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
  5312. {
  5313. uint16_t ret;
  5314. uint64_t timestamp;
  5315. ret = nvme_h2c(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
  5316. if (ret) {
  5317. return ret;
  5318. }
  5319. nvme_set_timestamp(n, timestamp);
  5320. return NVME_SUCCESS;
  5321. }
  5322. static uint16_t nvme_set_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns,
  5323. NvmeRequest *req)
  5324. {
  5325. NvmeCmd *cmd = &req->cmd;
  5326. uint32_t cdw11 = le32_to_cpu(cmd->cdw11);
  5327. uint16_t ph = cdw11 & 0xffff;
  5328. uint8_t noet = (cdw11 >> 16) & 0xff;
  5329. uint16_t ret, ruhid;
  5330. uint8_t enable = le32_to_cpu(cmd->cdw12) & 0x1;
  5331. uint8_t event_mask = 0;
  5332. unsigned int i;
  5333. g_autofree uint8_t *events = g_malloc0(noet);
  5334. NvmeRuHandle *ruh = NULL;
  5335. assert(ns);
  5336. if (!n->subsys || !n->subsys->endgrp.fdp.enabled) {
  5337. return NVME_FDP_DISABLED | NVME_DNR;
  5338. }
  5339. if (!nvme_ph_valid(ns, ph)) {
  5340. return NVME_INVALID_FIELD | NVME_DNR;
  5341. }
  5342. ruhid = ns->fdp.phs[ph];
  5343. ruh = &n->subsys->endgrp.fdp.ruhs[ruhid];
  5344. ret = nvme_h2c(n, events, noet, req);
  5345. if (ret) {
  5346. return ret;
  5347. }
  5348. for (i = 0; i < noet; i++) {
  5349. event_mask |= (1 << nvme_fdp_evf_shifts[events[i]]);
  5350. }
  5351. if (enable) {
  5352. ruh->event_filter |= event_mask;
  5353. } else {
  5354. ruh->event_filter = ruh->event_filter & ~event_mask;
  5355. }
  5356. return NVME_SUCCESS;
  5357. }
  5358. static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
  5359. {
  5360. NvmeNamespace *ns = NULL;
  5361. NvmeCmd *cmd = &req->cmd;
  5362. uint32_t dw10 = le32_to_cpu(cmd->cdw10);
  5363. uint32_t dw11 = le32_to_cpu(cmd->cdw11);
  5364. uint32_t nsid = le32_to_cpu(cmd->nsid);
  5365. uint8_t fid = NVME_GETSETFEAT_FID(dw10);
  5366. uint8_t save = NVME_SETFEAT_SAVE(dw10);
  5367. uint16_t status;
  5368. int i;
  5369. NvmeIdCtrl *id = &n->id_ctrl;
  5370. NvmeAtomic *atomic = &n->atomic;
  5371. trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
  5372. if (save && !(nvme_feature_cap[fid] & NVME_FEAT_CAP_SAVE)) {
  5373. return NVME_FID_NOT_SAVEABLE | NVME_DNR;
  5374. }
  5375. if (!nvme_feature_support[fid]) {
  5376. return NVME_INVALID_FIELD | NVME_DNR;
  5377. }
  5378. if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
  5379. if (nsid != NVME_NSID_BROADCAST) {
  5380. if (!nvme_nsid_valid(n, nsid)) {
  5381. return NVME_INVALID_NSID | NVME_DNR;
  5382. }
  5383. ns = nvme_ns(n, nsid);
  5384. if (unlikely(!ns)) {
  5385. return NVME_INVALID_FIELD | NVME_DNR;
  5386. }
  5387. }
  5388. } else if (nsid && nsid != NVME_NSID_BROADCAST) {
  5389. if (!nvme_nsid_valid(n, nsid)) {
  5390. return NVME_INVALID_NSID | NVME_DNR;
  5391. }
  5392. return NVME_FEAT_NOT_NS_SPEC | NVME_DNR;
  5393. }
  5394. if (!(nvme_feature_cap[fid] & NVME_FEAT_CAP_CHANGE)) {
  5395. return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
  5396. }
  5397. switch (fid) {
  5398. case NVME_TEMPERATURE_THRESHOLD:
  5399. if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
  5400. break;
  5401. }
  5402. switch (NVME_TEMP_THSEL(dw11)) {
  5403. case NVME_TEMP_THSEL_OVER:
  5404. n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
  5405. break;
  5406. case NVME_TEMP_THSEL_UNDER:
  5407. n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
  5408. break;
  5409. default:
  5410. return NVME_INVALID_FIELD | NVME_DNR;
  5411. }
  5412. if ((n->temperature >= n->features.temp_thresh_hi) ||
  5413. (n->temperature <= n->features.temp_thresh_low)) {
  5414. nvme_smart_event(n, NVME_SMART_TEMPERATURE);
  5415. }
  5416. break;
  5417. case NVME_ERROR_RECOVERY:
  5418. if (nsid == NVME_NSID_BROADCAST) {
  5419. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  5420. ns = nvme_ns(n, i);
  5421. if (!ns) {
  5422. continue;
  5423. }
  5424. if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
  5425. ns->features.err_rec = dw11;
  5426. }
  5427. }
  5428. break;
  5429. }
  5430. assert(ns);
  5431. if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
  5432. ns->features.err_rec = dw11;
  5433. }
  5434. break;
  5435. case NVME_VOLATILE_WRITE_CACHE:
  5436. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  5437. ns = nvme_ns(n, i);
  5438. if (!ns) {
  5439. continue;
  5440. }
  5441. if (!(dw11 & 0x1) && blk_enable_write_cache(ns->blkconf.blk)) {
  5442. blk_flush(ns->blkconf.blk);
  5443. }
  5444. blk_set_enable_write_cache(ns->blkconf.blk, dw11 & 1);
  5445. }
  5446. break;
  5447. case NVME_NUMBER_OF_QUEUES:
  5448. if (n->qs_created) {
  5449. return NVME_CMD_SEQ_ERROR | NVME_DNR;
  5450. }
  5451. /*
  5452. * NVMe v1.3, Section 5.21.1.7: FFFFh is not an allowed value for NCQR
  5453. * and NSQR.
  5454. */
  5455. if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
  5456. return NVME_INVALID_FIELD | NVME_DNR;
  5457. }
  5458. trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1,
  5459. ((dw11 >> 16) & 0xffff) + 1,
  5460. n->conf_ioqpairs,
  5461. n->conf_ioqpairs);
  5462. req->cqe.result = cpu_to_le32((n->conf_ioqpairs - 1) |
  5463. ((n->conf_ioqpairs - 1) << 16));
  5464. break;
  5465. case NVME_ASYNCHRONOUS_EVENT_CONF:
  5466. n->features.async_config = dw11;
  5467. break;
  5468. case NVME_TIMESTAMP:
  5469. return nvme_set_feature_timestamp(n, req);
  5470. case NVME_HOST_BEHAVIOR_SUPPORT:
  5471. status = nvme_h2c(n, (uint8_t *)&n->features.hbs,
  5472. sizeof(n->features.hbs), req);
  5473. if (status) {
  5474. return status;
  5475. }
  5476. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  5477. ns = nvme_ns(n, i);
  5478. if (!ns) {
  5479. continue;
  5480. }
  5481. ns->id_ns.nlbaf = ns->nlbaf - 1;
  5482. if (!n->features.hbs.lbafee) {
  5483. ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15);
  5484. }
  5485. }
  5486. return status;
  5487. case NVME_COMMAND_SET_PROFILE:
  5488. if (dw11 & 0x1ff) {
  5489. trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
  5490. return NVME_IOCS_COMBINATION_REJECTED | NVME_DNR;
  5491. }
  5492. break;
  5493. case NVME_FDP_MODE:
  5494. /* spec: abort with cmd seq err if there's one or more NS' in endgrp */
  5495. return NVME_CMD_SEQ_ERROR | NVME_DNR;
  5496. case NVME_FDP_EVENTS:
  5497. return nvme_set_feature_fdp_events(n, ns, req);
  5498. case NVME_WRITE_ATOMICITY:
  5499. n->dn = 0x1 & dw11;
  5500. if (n->dn) {
  5501. atomic->atomic_max_write_size = le16_to_cpu(id->awupf) + 1;
  5502. } else {
  5503. atomic->atomic_max_write_size = le16_to_cpu(id->awun) + 1;
  5504. }
  5505. if (atomic->atomic_max_write_size == 1) {
  5506. atomic->atomic_writes = 0;
  5507. } else {
  5508. atomic->atomic_writes = 1;
  5509. }
  5510. break;
  5511. default:
  5512. return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
  5513. }
  5514. return NVME_SUCCESS;
  5515. }
  5516. static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
  5517. {
  5518. trace_pci_nvme_aer(nvme_cid(req));
  5519. if (n->outstanding_aers > n->params.aerl) {
  5520. trace_pci_nvme_aer_aerl_exceeded();
  5521. return NVME_AER_LIMIT_EXCEEDED;
  5522. }
  5523. n->aer_reqs[n->outstanding_aers] = req;
  5524. n->outstanding_aers++;
  5525. if (!QTAILQ_EMPTY(&n->aer_queue)) {
  5526. nvme_process_aers(n);
  5527. }
  5528. return NVME_NO_COMPLETE;
  5529. }
  5530. static void nvme_update_dsm_limits(NvmeCtrl *n, NvmeNamespace *ns)
  5531. {
  5532. if (ns) {
  5533. n->dmrsl =
  5534. MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
  5535. return;
  5536. }
  5537. for (uint32_t nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
  5538. ns = nvme_ns(n, nsid);
  5539. if (!ns) {
  5540. continue;
  5541. }
  5542. n->dmrsl =
  5543. MIN_NON_ZERO(n->dmrsl, BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
  5544. }
  5545. }
  5546. static bool nvme_csi_supported(NvmeCtrl *n, uint8_t csi)
  5547. {
  5548. uint32_t cc;
  5549. switch (csi) {
  5550. case NVME_CSI_NVM:
  5551. return true;
  5552. case NVME_CSI_ZONED:
  5553. cc = ldl_le_p(&n->bar.cc);
  5554. return NVME_CC_CSS(cc) == NVME_CC_CSS_ALL;
  5555. }
  5556. g_assert_not_reached();
  5557. }
  5558. static void nvme_detach_ns(NvmeCtrl *n, NvmeNamespace *ns)
  5559. {
  5560. assert(ns->attached > 0);
  5561. n->namespaces[ns->params.nsid] = NULL;
  5562. ns->attached--;
  5563. }
  5564. static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
  5565. {
  5566. NvmeNamespace *ns;
  5567. NvmeCtrl *ctrl;
  5568. uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
  5569. uint32_t nsid = le32_to_cpu(req->cmd.nsid);
  5570. uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
  5571. uint8_t sel = dw10 & 0xf;
  5572. uint16_t *nr_ids = &list[0];
  5573. uint16_t *ids = &list[1];
  5574. uint16_t ret;
  5575. int i;
  5576. trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
  5577. if (!nvme_nsid_valid(n, nsid)) {
  5578. return NVME_INVALID_NSID | NVME_DNR;
  5579. }
  5580. ns = nvme_subsys_ns(n->subsys, nsid);
  5581. if (!ns) {
  5582. return NVME_INVALID_FIELD | NVME_DNR;
  5583. }
  5584. ret = nvme_h2c(n, (uint8_t *)list, 4096, req);
  5585. if (ret) {
  5586. return ret;
  5587. }
  5588. if (!*nr_ids) {
  5589. return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
  5590. }
  5591. *nr_ids = MIN(*nr_ids, NVME_CONTROLLER_LIST_SIZE - 1);
  5592. for (i = 0; i < *nr_ids; i++) {
  5593. ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
  5594. if (!ctrl) {
  5595. return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
  5596. }
  5597. switch (sel) {
  5598. case NVME_NS_ATTACHMENT_ATTACH:
  5599. if (nvme_ns(n, nsid)) {
  5600. return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
  5601. }
  5602. if (ns->attached && !ns->params.shared) {
  5603. return NVME_NS_PRIVATE | NVME_DNR;
  5604. }
  5605. if (!nvme_csi_supported(n, ns->csi)) {
  5606. return NVME_IOCS_NOT_SUPPORTED | NVME_DNR;
  5607. }
  5608. nvme_attach_ns(ctrl, ns);
  5609. nvme_update_dsm_limits(ctrl, ns);
  5610. break;
  5611. case NVME_NS_ATTACHMENT_DETACH:
  5612. nvme_detach_ns(ctrl, ns);
  5613. nvme_update_dsm_limits(ctrl, NULL);
  5614. break;
  5615. default:
  5616. return NVME_INVALID_FIELD | NVME_DNR;
  5617. }
  5618. /*
  5619. * Add namespace id to the changed namespace id list for event clearing
  5620. * via Get Log Page command.
  5621. */
  5622. if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
  5623. nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
  5624. NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
  5625. NVME_LOG_CHANGED_NSLIST);
  5626. }
  5627. }
  5628. return NVME_SUCCESS;
  5629. }
  5630. typedef struct NvmeFormatAIOCB {
  5631. BlockAIOCB common;
  5632. BlockAIOCB *aiocb;
  5633. NvmeRequest *req;
  5634. int ret;
  5635. NvmeNamespace *ns;
  5636. uint32_t nsid;
  5637. bool broadcast;
  5638. int64_t offset;
  5639. uint8_t lbaf;
  5640. uint8_t mset;
  5641. uint8_t pi;
  5642. uint8_t pil;
  5643. } NvmeFormatAIOCB;
  5644. static void nvme_format_cancel(BlockAIOCB *aiocb)
  5645. {
  5646. NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common);
  5647. iocb->ret = -ECANCELED;
  5648. if (iocb->aiocb) {
  5649. blk_aio_cancel_async(iocb->aiocb);
  5650. iocb->aiocb = NULL;
  5651. }
  5652. }
  5653. static const AIOCBInfo nvme_format_aiocb_info = {
  5654. .aiocb_size = sizeof(NvmeFormatAIOCB),
  5655. .cancel_async = nvme_format_cancel,
  5656. };
  5657. static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
  5658. uint8_t pi, uint8_t pil)
  5659. {
  5660. uint8_t lbafl = lbaf & 0xf;
  5661. uint8_t lbafu = lbaf >> 4;
  5662. trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
  5663. ns->id_ns.dps = (pil << 3) | pi;
  5664. ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl;
  5665. nvme_ns_init_format(ns);
  5666. }
  5667. static void nvme_do_format(NvmeFormatAIOCB *iocb);
  5668. static void nvme_format_ns_cb(void *opaque, int ret)
  5669. {
  5670. NvmeFormatAIOCB *iocb = opaque;
  5671. NvmeNamespace *ns = iocb->ns;
  5672. int bytes;
  5673. if (iocb->ret < 0) {
  5674. goto done;
  5675. } else if (ret < 0) {
  5676. iocb->ret = ret;
  5677. goto done;
  5678. }
  5679. assert(ns);
  5680. if (iocb->offset < ns->size) {
  5681. bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset);
  5682. iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset,
  5683. bytes, BDRV_REQ_MAY_UNMAP,
  5684. nvme_format_ns_cb, iocb);
  5685. iocb->offset += bytes;
  5686. return;
  5687. }
  5688. nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil);
  5689. ns->status = 0x0;
  5690. iocb->ns = NULL;
  5691. iocb->offset = 0;
  5692. done:
  5693. nvme_do_format(iocb);
  5694. }
  5695. static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi)
  5696. {
  5697. if (ns->params.zoned) {
  5698. return NVME_INVALID_FORMAT | NVME_DNR;
  5699. }
  5700. if (lbaf > ns->id_ns.nlbaf) {
  5701. return NVME_INVALID_FORMAT | NVME_DNR;
  5702. }
  5703. if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) {
  5704. return NVME_INVALID_FORMAT | NVME_DNR;
  5705. }
  5706. if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
  5707. return NVME_INVALID_FIELD | NVME_DNR;
  5708. }
  5709. return NVME_SUCCESS;
  5710. }
  5711. static void nvme_do_format(NvmeFormatAIOCB *iocb)
  5712. {
  5713. NvmeRequest *req = iocb->req;
  5714. NvmeCtrl *n = nvme_ctrl(req);
  5715. uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
  5716. uint8_t lbaf = dw10 & 0xf;
  5717. uint8_t pi = (dw10 >> 5) & 0x7;
  5718. uint16_t status;
  5719. int i;
  5720. if (iocb->ret < 0) {
  5721. goto done;
  5722. }
  5723. if (iocb->broadcast) {
  5724. for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
  5725. iocb->ns = nvme_ns(n, i);
  5726. if (iocb->ns) {
  5727. iocb->nsid = i;
  5728. break;
  5729. }
  5730. }
  5731. }
  5732. if (!iocb->ns) {
  5733. goto done;
  5734. }
  5735. status = nvme_format_check(iocb->ns, lbaf, pi);
  5736. if (status) {
  5737. req->status = status;
  5738. goto done;
  5739. }
  5740. iocb->ns->status = NVME_FORMAT_IN_PROGRESS;
  5741. nvme_format_ns_cb(iocb, 0);
  5742. return;
  5743. done:
  5744. iocb->common.cb(iocb->common.opaque, iocb->ret);
  5745. qemu_aio_unref(iocb);
  5746. }
  5747. static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
  5748. {
  5749. NvmeFormatAIOCB *iocb;
  5750. uint32_t nsid = le32_to_cpu(req->cmd.nsid);
  5751. uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
  5752. uint8_t lbaf = dw10 & 0xf;
  5753. uint8_t mset = (dw10 >> 4) & 0x1;
  5754. uint8_t pi = (dw10 >> 5) & 0x7;
  5755. uint8_t pil = (dw10 >> 8) & 0x1;
  5756. uint8_t lbafu = (dw10 >> 12) & 0x3;
  5757. uint16_t status;
  5758. iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
  5759. iocb->req = req;
  5760. iocb->ret = 0;
  5761. iocb->ns = NULL;
  5762. iocb->nsid = 0;
  5763. iocb->lbaf = lbaf;
  5764. iocb->mset = mset;
  5765. iocb->pi = pi;
  5766. iocb->pil = pil;
  5767. iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
  5768. iocb->offset = 0;
  5769. if (n->features.hbs.lbafee) {
  5770. iocb->lbaf |= lbafu << 4;
  5771. }
  5772. if (!iocb->broadcast) {
  5773. if (!nvme_nsid_valid(n, nsid)) {
  5774. status = NVME_INVALID_NSID | NVME_DNR;
  5775. goto out;
  5776. }
  5777. iocb->ns = nvme_ns(n, nsid);
  5778. if (!iocb->ns) {
  5779. status = NVME_INVALID_FIELD | NVME_DNR;
  5780. goto out;
  5781. }
  5782. }
  5783. req->aiocb = &iocb->common;
  5784. nvme_do_format(iocb);
  5785. return NVME_NO_COMPLETE;
  5786. out:
  5787. qemu_aio_unref(iocb);
  5788. return status;
  5789. }
  5790. static void nvme_get_virt_res_num(NvmeCtrl *n, uint8_t rt, int *num_total,
  5791. int *num_prim, int *num_sec)
  5792. {
  5793. *num_total = le32_to_cpu(rt ?
  5794. n->pri_ctrl_cap.vifrt : n->pri_ctrl_cap.vqfrt);
  5795. *num_prim = le16_to_cpu(rt ?
  5796. n->pri_ctrl_cap.virfap : n->pri_ctrl_cap.vqrfap);
  5797. *num_sec = le16_to_cpu(rt ? n->pri_ctrl_cap.virfa : n->pri_ctrl_cap.vqrfa);
  5798. }
  5799. static uint16_t nvme_assign_virt_res_to_prim(NvmeCtrl *n, NvmeRequest *req,
  5800. uint16_t cntlid, uint8_t rt,
  5801. int nr)
  5802. {
  5803. int num_total, num_prim, num_sec;
  5804. if (cntlid != n->cntlid) {
  5805. return NVME_INVALID_CTRL_ID | NVME_DNR;
  5806. }
  5807. nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
  5808. if (nr > num_total) {
  5809. return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
  5810. }
  5811. if (nr > num_total - num_sec) {
  5812. return NVME_INVALID_RESOURCE_ID | NVME_DNR;
  5813. }
  5814. if (rt) {
  5815. n->next_pri_ctrl_cap.virfap = cpu_to_le16(nr);
  5816. } else {
  5817. n->next_pri_ctrl_cap.vqrfap = cpu_to_le16(nr);
  5818. }
  5819. req->cqe.result = cpu_to_le32(nr);
  5820. return req->status;
  5821. }
  5822. static void nvme_update_virt_res(NvmeCtrl *n, NvmeSecCtrlEntry *sctrl,
  5823. uint8_t rt, int nr)
  5824. {
  5825. int prev_nr, prev_total;
  5826. if (rt) {
  5827. prev_nr = le16_to_cpu(sctrl->nvi);
  5828. prev_total = le32_to_cpu(n->pri_ctrl_cap.virfa);
  5829. sctrl->nvi = cpu_to_le16(nr);
  5830. n->pri_ctrl_cap.virfa = cpu_to_le32(prev_total + nr - prev_nr);
  5831. } else {
  5832. prev_nr = le16_to_cpu(sctrl->nvq);
  5833. prev_total = le32_to_cpu(n->pri_ctrl_cap.vqrfa);
  5834. sctrl->nvq = cpu_to_le16(nr);
  5835. n->pri_ctrl_cap.vqrfa = cpu_to_le32(prev_total + nr - prev_nr);
  5836. }
  5837. }
  5838. static uint16_t nvme_assign_virt_res_to_sec(NvmeCtrl *n, NvmeRequest *req,
  5839. uint16_t cntlid, uint8_t rt, int nr)
  5840. {
  5841. int num_total, num_prim, num_sec, num_free, diff, limit;
  5842. NvmeSecCtrlEntry *sctrl;
  5843. sctrl = nvme_sctrl_for_cntlid(n, cntlid);
  5844. if (!sctrl) {
  5845. return NVME_INVALID_CTRL_ID | NVME_DNR;
  5846. }
  5847. if (sctrl->scs) {
  5848. return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
  5849. }
  5850. limit = le16_to_cpu(rt ? n->pri_ctrl_cap.vifrsm : n->pri_ctrl_cap.vqfrsm);
  5851. if (nr > limit) {
  5852. return NVME_INVALID_NUM_RESOURCES | NVME_DNR;
  5853. }
  5854. nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec);
  5855. num_free = num_total - num_prim - num_sec;
  5856. diff = nr - le16_to_cpu(rt ? sctrl->nvi : sctrl->nvq);
  5857. if (diff > num_free) {
  5858. return NVME_INVALID_RESOURCE_ID | NVME_DNR;
  5859. }
  5860. nvme_update_virt_res(n, sctrl, rt, nr);
  5861. req->cqe.result = cpu_to_le32(nr);
  5862. return req->status;
  5863. }
  5864. static uint16_t nvme_virt_set_state(NvmeCtrl *n, uint16_t cntlid, bool online)
  5865. {
  5866. PCIDevice *pci = PCI_DEVICE(n);
  5867. NvmeCtrl *sn = NULL;
  5868. NvmeSecCtrlEntry *sctrl;
  5869. int vf_index;
  5870. sctrl = nvme_sctrl_for_cntlid(n, cntlid);
  5871. if (!sctrl) {
  5872. return NVME_INVALID_CTRL_ID | NVME_DNR;
  5873. }
  5874. if (!pci_is_vf(pci)) {
  5875. vf_index = le16_to_cpu(sctrl->vfn) - 1;
  5876. sn = NVME(pcie_sriov_get_vf_at_index(pci, vf_index));
  5877. }
  5878. if (online) {
  5879. if (!sctrl->nvi || (le16_to_cpu(sctrl->nvq) < 2) || !sn) {
  5880. return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR;
  5881. }
  5882. if (!sctrl->scs) {
  5883. sctrl->scs = 0x1;
  5884. nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
  5885. }
  5886. } else {
  5887. nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_INTERRUPT, 0);
  5888. nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_QUEUE, 0);
  5889. if (sctrl->scs) {
  5890. sctrl->scs = 0x0;
  5891. if (sn) {
  5892. nvme_ctrl_reset(sn, NVME_RESET_FUNCTION);
  5893. }
  5894. }
  5895. }
  5896. return NVME_SUCCESS;
  5897. }
  5898. static uint16_t nvme_virt_mngmt(NvmeCtrl *n, NvmeRequest *req)
  5899. {
  5900. uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
  5901. uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
  5902. uint8_t act = dw10 & 0xf;
  5903. uint8_t rt = (dw10 >> 8) & 0x7;
  5904. uint16_t cntlid = (dw10 >> 16) & 0xffff;
  5905. int nr = dw11 & 0xffff;
  5906. trace_pci_nvme_virt_mngmt(nvme_cid(req), act, cntlid, rt ? "VI" : "VQ", nr);
  5907. if (rt != NVME_VIRT_RES_QUEUE && rt != NVME_VIRT_RES_INTERRUPT) {
  5908. return NVME_INVALID_RESOURCE_ID | NVME_DNR;
  5909. }
  5910. switch (act) {
  5911. case NVME_VIRT_MNGMT_ACTION_SEC_ASSIGN:
  5912. return nvme_assign_virt_res_to_sec(n, req, cntlid, rt, nr);
  5913. case NVME_VIRT_MNGMT_ACTION_PRM_ALLOC:
  5914. return nvme_assign_virt_res_to_prim(n, req, cntlid, rt, nr);
  5915. case NVME_VIRT_MNGMT_ACTION_SEC_ONLINE:
  5916. return nvme_virt_set_state(n, cntlid, true);
  5917. case NVME_VIRT_MNGMT_ACTION_SEC_OFFLINE:
  5918. return nvme_virt_set_state(n, cntlid, false);
  5919. default:
  5920. return NVME_INVALID_FIELD | NVME_DNR;
  5921. }
  5922. }
  5923. static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
  5924. {
  5925. PCIDevice *pci = PCI_DEVICE(n);
  5926. uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
  5927. uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
  5928. int i;
  5929. /* Address should be page aligned */
  5930. if (dbs_addr & (n->page_size - 1) || eis_addr & (n->page_size - 1)) {
  5931. return NVME_INVALID_FIELD | NVME_DNR;
  5932. }
  5933. /* Save shadow buffer base addr for use during queue creation */
  5934. n->dbbuf_dbs = dbs_addr;
  5935. n->dbbuf_eis = eis_addr;
  5936. n->dbbuf_enabled = true;
  5937. for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
  5938. NvmeSQueue *sq = n->sq[i];
  5939. NvmeCQueue *cq = n->cq[i];
  5940. if (sq) {
  5941. /*
  5942. * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
  5943. * nvme_process_db() uses this hard-coded way to calculate
  5944. * doorbell offsets. Be consistent with that here.
  5945. */
  5946. sq->db_addr = dbs_addr + (i << 3);
  5947. sq->ei_addr = eis_addr + (i << 3);
  5948. stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED);
  5949. if (n->params.ioeventfd && sq->sqid != 0) {
  5950. if (!nvme_init_sq_ioeventfd(sq)) {
  5951. sq->ioeventfd_enabled = true;
  5952. }
  5953. }
  5954. }
  5955. if (cq) {
  5956. /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
  5957. cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
  5958. cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
  5959. stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED);
  5960. if (n->params.ioeventfd && cq->cqid != 0) {
  5961. if (!nvme_init_cq_ioeventfd(cq)) {
  5962. cq->ioeventfd_enabled = true;
  5963. }
  5964. }
  5965. }
  5966. }
  5967. trace_pci_nvme_dbbuf_config(dbs_addr, eis_addr);
  5968. return NVME_SUCCESS;
  5969. }
  5970. static uint16_t nvme_directive_send(NvmeCtrl *n, NvmeRequest *req)
  5971. {
  5972. return NVME_INVALID_FIELD | NVME_DNR;
  5973. }
  5974. static uint16_t nvme_directive_receive(NvmeCtrl *n, NvmeRequest *req)
  5975. {
  5976. NvmeNamespace *ns;
  5977. uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
  5978. uint32_t dw11 = le32_to_cpu(req->cmd.cdw11);
  5979. uint32_t nsid = le32_to_cpu(req->cmd.nsid);
  5980. uint8_t doper, dtype;
  5981. uint32_t numd, trans_len;
  5982. NvmeDirectiveIdentify id = {
  5983. .supported = 1 << NVME_DIRECTIVE_IDENTIFY,
  5984. .enabled = 1 << NVME_DIRECTIVE_IDENTIFY,
  5985. };
  5986. numd = dw10 + 1;
  5987. doper = dw11 & 0xff;
  5988. dtype = (dw11 >> 8) & 0xff;
  5989. trans_len = MIN(sizeof(NvmeDirectiveIdentify), numd << 2);
  5990. if (nsid == NVME_NSID_BROADCAST || dtype != NVME_DIRECTIVE_IDENTIFY ||
  5991. doper != NVME_DIRECTIVE_RETURN_PARAMS) {
  5992. return NVME_INVALID_FIELD | NVME_DNR;
  5993. }
  5994. ns = nvme_ns(n, nsid);
  5995. if (!ns) {
  5996. return NVME_INVALID_FIELD | NVME_DNR;
  5997. }
  5998. switch (dtype) {
  5999. case NVME_DIRECTIVE_IDENTIFY:
  6000. switch (doper) {
  6001. case NVME_DIRECTIVE_RETURN_PARAMS:
  6002. if (ns->endgrp && ns->endgrp->fdp.enabled) {
  6003. id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
  6004. id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
  6005. id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
  6006. }
  6007. return nvme_c2h(n, (uint8_t *)&id, trans_len, req);
  6008. default:
  6009. return NVME_INVALID_FIELD | NVME_DNR;
  6010. }
  6011. default:
  6012. return NVME_INVALID_FIELD;
  6013. }
  6014. }
  6015. static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
  6016. {
  6017. trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
  6018. nvme_adm_opc_str(req->cmd.opcode));
  6019. if (!(n->cse.acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
  6020. trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
  6021. return NVME_INVALID_OPCODE | NVME_DNR;
  6022. }
  6023. /* SGLs shall not be used for Admin commands in NVMe over PCIe */
  6024. if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) {
  6025. return NVME_INVALID_FIELD | NVME_DNR;
  6026. }
  6027. if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
  6028. return NVME_INVALID_FIELD;
  6029. }
  6030. switch (req->cmd.opcode) {
  6031. case NVME_ADM_CMD_DELETE_SQ:
  6032. return nvme_del_sq(n, req);
  6033. case NVME_ADM_CMD_CREATE_SQ:
  6034. return nvme_create_sq(n, req);
  6035. case NVME_ADM_CMD_GET_LOG_PAGE:
  6036. return nvme_get_log(n, req);
  6037. case NVME_ADM_CMD_DELETE_CQ:
  6038. return nvme_del_cq(n, req);
  6039. case NVME_ADM_CMD_CREATE_CQ:
  6040. return nvme_create_cq(n, req);
  6041. case NVME_ADM_CMD_IDENTIFY:
  6042. return nvme_identify(n, req);
  6043. case NVME_ADM_CMD_ABORT:
  6044. return nvme_abort(n, req);
  6045. case NVME_ADM_CMD_SET_FEATURES:
  6046. return nvme_set_feature(n, req);
  6047. case NVME_ADM_CMD_GET_FEATURES:
  6048. return nvme_get_feature(n, req);
  6049. case NVME_ADM_CMD_ASYNC_EV_REQ:
  6050. return nvme_aer(n, req);
  6051. case NVME_ADM_CMD_NS_ATTACHMENT:
  6052. return nvme_ns_attachment(n, req);
  6053. case NVME_ADM_CMD_VIRT_MNGMT:
  6054. return nvme_virt_mngmt(n, req);
  6055. case NVME_ADM_CMD_DBBUF_CONFIG:
  6056. return nvme_dbbuf_config(n, req);
  6057. case NVME_ADM_CMD_FORMAT_NVM:
  6058. return nvme_format(n, req);
  6059. case NVME_ADM_CMD_DIRECTIVE_SEND:
  6060. return nvme_directive_send(n, req);
  6061. case NVME_ADM_CMD_DIRECTIVE_RECV:
  6062. return nvme_directive_receive(n, req);
  6063. default:
  6064. g_assert_not_reached();
  6065. }
  6066. return NVME_INVALID_OPCODE | NVME_DNR;
  6067. }
  6068. static void nvme_update_sq_eventidx(const NvmeSQueue *sq)
  6069. {
  6070. trace_pci_nvme_update_sq_eventidx(sq->sqid, sq->tail);
  6071. stl_le_pci_dma(PCI_DEVICE(sq->ctrl), sq->ei_addr, sq->tail,
  6072. MEMTXATTRS_UNSPECIFIED);
  6073. }
  6074. static void nvme_update_sq_tail(NvmeSQueue *sq)
  6075. {
  6076. ldl_le_pci_dma(PCI_DEVICE(sq->ctrl), sq->db_addr, &sq->tail,
  6077. MEMTXATTRS_UNSPECIFIED);
  6078. trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
  6079. }
  6080. #define NVME_ATOMIC_NO_START 0
  6081. #define NVME_ATOMIC_START_ATOMIC 1
  6082. #define NVME_ATOMIC_START_NONATOMIC 2
  6083. static int nvme_atomic_write_check(NvmeCtrl *n, NvmeCmd *cmd,
  6084. NvmeAtomic *atomic)
  6085. {
  6086. NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
  6087. uint64_t slba = le64_to_cpu(rw->slba);
  6088. uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb);
  6089. uint64_t elba = slba + nlb;
  6090. bool cmd_atomic_wr = true;
  6091. int i;
  6092. if ((cmd->opcode == NVME_CMD_READ) || ((cmd->opcode == NVME_CMD_WRITE) &&
  6093. ((rw->nlb + 1) > atomic->atomic_max_write_size))) {
  6094. cmd_atomic_wr = false;
  6095. }
  6096. /*
  6097. * Walk the queues to see if there are any atomic conflicts.
  6098. */
  6099. for (i = 1; i < n->params.max_ioqpairs + 1; i++) {
  6100. NvmeSQueue *sq;
  6101. NvmeRequest *req;
  6102. NvmeRwCmd *req_rw;
  6103. uint64_t req_slba;
  6104. uint32_t req_nlb;
  6105. uint64_t req_elba;
  6106. sq = n->sq[i];
  6107. if (!sq) {
  6108. continue;
  6109. }
  6110. /*
  6111. * Walk all the requests on a given queue.
  6112. */
  6113. QTAILQ_FOREACH(req, &sq->out_req_list, entry) {
  6114. req_rw = (NvmeRwCmd *)&req->cmd;
  6115. if (((req_rw->opcode == NVME_CMD_WRITE) ||
  6116. (req_rw->opcode == NVME_CMD_READ)) &&
  6117. (cmd->nsid == req->ns->params.nsid)) {
  6118. req_slba = le64_to_cpu(req_rw->slba);
  6119. req_nlb = (uint32_t)le16_to_cpu(req_rw->nlb);
  6120. req_elba = req_slba + req_nlb;
  6121. if (cmd_atomic_wr) {
  6122. if ((elba >= req_slba) && (slba <= req_elba)) {
  6123. return NVME_ATOMIC_NO_START;
  6124. }
  6125. } else {
  6126. if (req->atomic_write && ((elba >= req_slba) &&
  6127. (slba <= req_elba))) {
  6128. return NVME_ATOMIC_NO_START;
  6129. }
  6130. }
  6131. }
  6132. }
  6133. }
  6134. if (cmd_atomic_wr) {
  6135. return NVME_ATOMIC_START_ATOMIC;
  6136. }
  6137. return NVME_ATOMIC_START_NONATOMIC;
  6138. }
  6139. static NvmeAtomic *nvme_get_atomic(NvmeCtrl *n, NvmeCmd *cmd)
  6140. {
  6141. if (n->atomic.atomic_writes) {
  6142. return &n->atomic;
  6143. }
  6144. return NULL;
  6145. }
  6146. static void nvme_process_sq(void *opaque)
  6147. {
  6148. NvmeSQueue *sq = opaque;
  6149. NvmeCtrl *n = sq->ctrl;
  6150. NvmeCQueue *cq = n->cq[sq->cqid];
  6151. uint16_t status;
  6152. hwaddr addr;
  6153. NvmeCmd cmd;
  6154. NvmeRequest *req;
  6155. if (n->dbbuf_enabled) {
  6156. nvme_update_sq_tail(sq);
  6157. }
  6158. while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
  6159. NvmeAtomic *atomic;
  6160. bool cmd_is_atomic;
  6161. addr = sq->dma_addr + (sq->head << NVME_SQES);
  6162. if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
  6163. trace_pci_nvme_err_addr_read(addr);
  6164. trace_pci_nvme_err_cfs();
  6165. stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
  6166. break;
  6167. }
  6168. atomic = nvme_get_atomic(n, &cmd);
  6169. cmd_is_atomic = false;
  6170. if (sq->sqid && atomic) {
  6171. int ret;
  6172. ret = nvme_atomic_write_check(n, &cmd, atomic);
  6173. switch (ret) {
  6174. case NVME_ATOMIC_NO_START:
  6175. qemu_bh_schedule(sq->bh);
  6176. return;
  6177. case NVME_ATOMIC_START_ATOMIC:
  6178. cmd_is_atomic = true;
  6179. break;
  6180. case NVME_ATOMIC_START_NONATOMIC:
  6181. default:
  6182. break;
  6183. }
  6184. }
  6185. nvme_inc_sq_head(sq);
  6186. req = QTAILQ_FIRST(&sq->req_list);
  6187. QTAILQ_REMOVE(&sq->req_list, req, entry);
  6188. QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
  6189. nvme_req_clear(req);
  6190. req->cqe.cid = cmd.cid;
  6191. memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
  6192. if (sq->sqid && atomic) {
  6193. req->atomic_write = cmd_is_atomic;
  6194. }
  6195. status = sq->sqid ? nvme_io_cmd(n, req) :
  6196. nvme_admin_cmd(n, req);
  6197. if (status != NVME_NO_COMPLETE) {
  6198. req->status = status;
  6199. nvme_enqueue_req_completion(cq, req);
  6200. }
  6201. if (n->dbbuf_enabled) {
  6202. nvme_update_sq_eventidx(sq);
  6203. nvme_update_sq_tail(sq);
  6204. }
  6205. }
  6206. }
  6207. static void nvme_update_msixcap_ts(PCIDevice *pci_dev, uint32_t table_size)
  6208. {
  6209. uint8_t *config;
  6210. if (!msix_present(pci_dev)) {
  6211. return;
  6212. }
  6213. assert(table_size > 0 && table_size <= pci_dev->msix_entries_nr);
  6214. config = pci_dev->config + pci_dev->msix_cap;
  6215. pci_set_word_by_mask(config + PCI_MSIX_FLAGS, PCI_MSIX_FLAGS_QSIZE,
  6216. table_size - 1);
  6217. }
  6218. static void nvme_activate_virt_res(NvmeCtrl *n)
  6219. {
  6220. PCIDevice *pci_dev = PCI_DEVICE(n);
  6221. NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
  6222. NvmeSecCtrlEntry *sctrl;
  6223. /* -1 to account for the admin queue */
  6224. if (pci_is_vf(pci_dev)) {
  6225. sctrl = nvme_sctrl(n);
  6226. cap->vqprt = sctrl->nvq;
  6227. cap->viprt = sctrl->nvi;
  6228. n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
  6229. n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
  6230. } else {
  6231. cap->vqrfap = n->next_pri_ctrl_cap.vqrfap;
  6232. cap->virfap = n->next_pri_ctrl_cap.virfap;
  6233. n->conf_ioqpairs = le16_to_cpu(cap->vqprt) +
  6234. le16_to_cpu(cap->vqrfap) - 1;
  6235. n->conf_msix_qsize = le16_to_cpu(cap->viprt) +
  6236. le16_to_cpu(cap->virfap);
  6237. }
  6238. }
  6239. static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
  6240. {
  6241. PCIDevice *pci_dev = PCI_DEVICE(n);
  6242. NvmeSecCtrlEntry *sctrl;
  6243. NvmeNamespace *ns;
  6244. int i;
  6245. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  6246. ns = nvme_ns(n, i);
  6247. if (!ns) {
  6248. continue;
  6249. }
  6250. nvme_ns_drain(ns);
  6251. }
  6252. for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
  6253. if (n->sq[i] != NULL) {
  6254. nvme_free_sq(n->sq[i], n);
  6255. }
  6256. }
  6257. for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
  6258. if (n->cq[i] != NULL) {
  6259. nvme_free_cq(n->cq[i], n);
  6260. }
  6261. }
  6262. while (!QTAILQ_EMPTY(&n->aer_queue)) {
  6263. NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
  6264. QTAILQ_REMOVE(&n->aer_queue, event, entry);
  6265. g_free(event);
  6266. }
  6267. if (n->params.sriov_max_vfs) {
  6268. if (!pci_is_vf(pci_dev)) {
  6269. for (i = 0; i < n->nr_sec_ctrls; i++) {
  6270. sctrl = &n->sec_ctrl_list[i];
  6271. nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
  6272. }
  6273. }
  6274. if (rst != NVME_RESET_CONTROLLER) {
  6275. nvme_activate_virt_res(n);
  6276. }
  6277. }
  6278. n->aer_queued = 0;
  6279. n->aer_mask = 0;
  6280. n->outstanding_aers = 0;
  6281. n->qs_created = false;
  6282. n->dn = n->params.atomic_dn; /* Set Disable Normal */
  6283. nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
  6284. if (pci_is_vf(pci_dev)) {
  6285. sctrl = nvme_sctrl(n);
  6286. stl_le_p(&n->bar.csts, sctrl->scs ? 0 : NVME_CSTS_FAILED);
  6287. } else {
  6288. stl_le_p(&n->bar.csts, 0);
  6289. }
  6290. stl_le_p(&n->bar.intms, 0);
  6291. stl_le_p(&n->bar.intmc, 0);
  6292. stl_le_p(&n->bar.cc, 0);
  6293. n->dbbuf_dbs = 0;
  6294. n->dbbuf_eis = 0;
  6295. n->dbbuf_enabled = false;
  6296. }
  6297. static void nvme_ctrl_shutdown(NvmeCtrl *n)
  6298. {
  6299. NvmeNamespace *ns;
  6300. int i;
  6301. if (n->pmr.dev) {
  6302. memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
  6303. }
  6304. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  6305. ns = nvme_ns(n, i);
  6306. if (!ns) {
  6307. continue;
  6308. }
  6309. nvme_ns_shutdown(ns);
  6310. }
  6311. }
  6312. static int nvme_start_ctrl(NvmeCtrl *n)
  6313. {
  6314. uint64_t cap = ldq_le_p(&n->bar.cap);
  6315. uint32_t cc = ldl_le_p(&n->bar.cc);
  6316. uint32_t aqa = ldl_le_p(&n->bar.aqa);
  6317. uint64_t asq = ldq_le_p(&n->bar.asq);
  6318. uint64_t acq = ldq_le_p(&n->bar.acq);
  6319. uint32_t page_bits = NVME_CC_MPS(cc) + 12;
  6320. uint32_t page_size = 1 << page_bits;
  6321. NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
  6322. if (pci_is_vf(PCI_DEVICE(n)) && !sctrl->scs) {
  6323. trace_pci_nvme_err_startfail_virt_state(le16_to_cpu(sctrl->nvi),
  6324. le16_to_cpu(sctrl->nvq));
  6325. return -1;
  6326. }
  6327. if (unlikely(n->cq[0])) {
  6328. trace_pci_nvme_err_startfail_cq();
  6329. return -1;
  6330. }
  6331. if (unlikely(n->sq[0])) {
  6332. trace_pci_nvme_err_startfail_sq();
  6333. return -1;
  6334. }
  6335. if (unlikely(asq & (page_size - 1))) {
  6336. trace_pci_nvme_err_startfail_asq_misaligned(asq);
  6337. return -1;
  6338. }
  6339. if (unlikely(acq & (page_size - 1))) {
  6340. trace_pci_nvme_err_startfail_acq_misaligned(acq);
  6341. return -1;
  6342. }
  6343. if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) {
  6344. trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc));
  6345. return -1;
  6346. }
  6347. if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) {
  6348. trace_pci_nvme_err_startfail_page_too_small(
  6349. NVME_CC_MPS(cc),
  6350. NVME_CAP_MPSMIN(cap));
  6351. return -1;
  6352. }
  6353. if (unlikely(NVME_CC_MPS(cc) >
  6354. NVME_CAP_MPSMAX(cap))) {
  6355. trace_pci_nvme_err_startfail_page_too_large(
  6356. NVME_CC_MPS(cc),
  6357. NVME_CAP_MPSMAX(cap));
  6358. return -1;
  6359. }
  6360. if (unlikely(!NVME_AQA_ASQS(aqa))) {
  6361. trace_pci_nvme_err_startfail_asqent_sz_zero();
  6362. return -1;
  6363. }
  6364. if (unlikely(!NVME_AQA_ACQS(aqa))) {
  6365. trace_pci_nvme_err_startfail_acqent_sz_zero();
  6366. return -1;
  6367. }
  6368. n->page_bits = page_bits;
  6369. n->page_size = page_size;
  6370. n->max_prp_ents = n->page_size / sizeof(uint64_t);
  6371. nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
  6372. nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
  6373. nvme_set_timestamp(n, 0ULL);
  6374. /* verify that the command sets of attached namespaces are supported */
  6375. for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  6376. NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
  6377. if (!ns || (!ns->params.shared && ns->ctrl != n)) {
  6378. continue;
  6379. }
  6380. if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
  6381. if (!ns->attached || ns->params.shared) {
  6382. nvme_attach_ns(n, ns);
  6383. }
  6384. }
  6385. }
  6386. nvme_update_dsm_limits(n, NULL);
  6387. return 0;
  6388. }
  6389. static void nvme_cmb_enable_regs(NvmeCtrl *n)
  6390. {
  6391. uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc);
  6392. uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz);
  6393. NVME_CMBLOC_SET_CDPCILS(cmbloc, 1);
  6394. NVME_CMBLOC_SET_CDPMLS(cmbloc, 1);
  6395. NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR);
  6396. stl_le_p(&n->bar.cmbloc, cmbloc);
  6397. NVME_CMBSZ_SET_SQS(cmbsz, 1);
  6398. NVME_CMBSZ_SET_CQS(cmbsz, 0);
  6399. NVME_CMBSZ_SET_LISTS(cmbsz, 1);
  6400. NVME_CMBSZ_SET_RDS(cmbsz, 1);
  6401. NVME_CMBSZ_SET_WDS(cmbsz, 1);
  6402. NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */
  6403. NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb);
  6404. stl_le_p(&n->bar.cmbsz, cmbsz);
  6405. }
  6406. static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
  6407. unsigned size)
  6408. {
  6409. PCIDevice *pci = PCI_DEVICE(n);
  6410. uint64_t cap = ldq_le_p(&n->bar.cap);
  6411. uint32_t cc = ldl_le_p(&n->bar.cc);
  6412. uint32_t intms = ldl_le_p(&n->bar.intms);
  6413. uint32_t csts = ldl_le_p(&n->bar.csts);
  6414. uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts);
  6415. if (unlikely(offset & (sizeof(uint32_t) - 1))) {
  6416. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
  6417. "MMIO write not 32-bit aligned,"
  6418. " offset=0x%"PRIx64"", offset);
  6419. /* should be ignored, fall through for now */
  6420. }
  6421. if (unlikely(size < sizeof(uint32_t))) {
  6422. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
  6423. "MMIO write smaller than 32-bits,"
  6424. " offset=0x%"PRIx64", size=%u",
  6425. offset, size);
  6426. /* should be ignored, fall through for now */
  6427. }
  6428. switch (offset) {
  6429. case NVME_REG_INTMS:
  6430. if (unlikely(msix_enabled(pci))) {
  6431. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
  6432. "undefined access to interrupt mask set"
  6433. " when MSI-X is enabled");
  6434. /* should be ignored, fall through for now */
  6435. }
  6436. intms |= data;
  6437. stl_le_p(&n->bar.intms, intms);
  6438. n->bar.intmc = n->bar.intms;
  6439. trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms);
  6440. nvme_irq_check(n);
  6441. break;
  6442. case NVME_REG_INTMC:
  6443. if (unlikely(msix_enabled(pci))) {
  6444. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
  6445. "undefined access to interrupt mask clr"
  6446. " when MSI-X is enabled");
  6447. /* should be ignored, fall through for now */
  6448. }
  6449. intms &= ~data;
  6450. stl_le_p(&n->bar.intms, intms);
  6451. n->bar.intmc = n->bar.intms;
  6452. trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms);
  6453. nvme_irq_check(n);
  6454. break;
  6455. case NVME_REG_CC:
  6456. stl_le_p(&n->bar.cc, data);
  6457. trace_pci_nvme_mmio_cfg(data & 0xffffffff);
  6458. if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
  6459. trace_pci_nvme_mmio_shutdown_set();
  6460. nvme_ctrl_shutdown(n);
  6461. csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
  6462. csts |= NVME_CSTS_SHST_COMPLETE;
  6463. } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
  6464. trace_pci_nvme_mmio_shutdown_cleared();
  6465. csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT);
  6466. }
  6467. if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) {
  6468. if (unlikely(nvme_start_ctrl(n))) {
  6469. trace_pci_nvme_err_startfail();
  6470. csts = NVME_CSTS_FAILED;
  6471. } else {
  6472. trace_pci_nvme_mmio_start_success();
  6473. csts = NVME_CSTS_READY;
  6474. }
  6475. } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) {
  6476. trace_pci_nvme_mmio_stopped();
  6477. nvme_ctrl_reset(n, NVME_RESET_CONTROLLER);
  6478. break;
  6479. }
  6480. stl_le_p(&n->bar.csts, csts);
  6481. break;
  6482. case NVME_REG_CSTS:
  6483. if (data & (1 << 4)) {
  6484. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
  6485. "attempted to W1C CSTS.NSSRO"
  6486. " but CAP.NSSRS is zero (not supported)");
  6487. } else if (data != 0) {
  6488. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts,
  6489. "attempted to set a read only bit"
  6490. " of controller status");
  6491. }
  6492. break;
  6493. case NVME_REG_NSSR:
  6494. if (data == 0x4e564d65) {
  6495. trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
  6496. } else {
  6497. /* The spec says that writes of other values have no effect */
  6498. return;
  6499. }
  6500. break;
  6501. case NVME_REG_AQA:
  6502. stl_le_p(&n->bar.aqa, data);
  6503. trace_pci_nvme_mmio_aqattr(data & 0xffffffff);
  6504. break;
  6505. case NVME_REG_ASQ:
  6506. stn_le_p(&n->bar.asq, size, data);
  6507. trace_pci_nvme_mmio_asqaddr(data);
  6508. break;
  6509. case NVME_REG_ASQ + 4:
  6510. stl_le_p((uint8_t *)&n->bar.asq + 4, data);
  6511. trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(&n->bar.asq));
  6512. break;
  6513. case NVME_REG_ACQ:
  6514. trace_pci_nvme_mmio_acqaddr(data);
  6515. stn_le_p(&n->bar.acq, size, data);
  6516. break;
  6517. case NVME_REG_ACQ + 4:
  6518. stl_le_p((uint8_t *)&n->bar.acq + 4, data);
  6519. trace_pci_nvme_mmio_acqaddr_hi(data, ldq_le_p(&n->bar.acq));
  6520. break;
  6521. case NVME_REG_CMBLOC:
  6522. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved,
  6523. "invalid write to reserved CMBLOC"
  6524. " when CMBSZ is zero, ignored");
  6525. return;
  6526. case NVME_REG_CMBSZ:
  6527. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
  6528. "invalid write to read only CMBSZ, ignored");
  6529. return;
  6530. case NVME_REG_CMBMSC:
  6531. if (!NVME_CAP_CMBS(cap)) {
  6532. return;
  6533. }
  6534. stn_le_p(&n->bar.cmbmsc, size, data);
  6535. n->cmb.cmse = false;
  6536. if (NVME_CMBMSC_CRE(data)) {
  6537. nvme_cmb_enable_regs(n);
  6538. if (NVME_CMBMSC_CMSE(data)) {
  6539. uint64_t cmbmsc = ldq_le_p(&n->bar.cmbmsc);
  6540. hwaddr cba = NVME_CMBMSC_CBA(cmbmsc) << CMBMSC_CBA_SHIFT;
  6541. if (cba + int128_get64(n->cmb.mem.size) < cba) {
  6542. uint32_t cmbsts = ldl_le_p(&n->bar.cmbsts);
  6543. NVME_CMBSTS_SET_CBAI(cmbsts, 1);
  6544. stl_le_p(&n->bar.cmbsts, cmbsts);
  6545. return;
  6546. }
  6547. n->cmb.cba = cba;
  6548. n->cmb.cmse = true;
  6549. }
  6550. } else {
  6551. n->bar.cmbsz = 0;
  6552. n->bar.cmbloc = 0;
  6553. }
  6554. return;
  6555. case NVME_REG_CMBMSC + 4:
  6556. stl_le_p((uint8_t *)&n->bar.cmbmsc + 4, data);
  6557. return;
  6558. case NVME_REG_PMRCAP:
  6559. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly,
  6560. "invalid write to PMRCAP register, ignored");
  6561. return;
  6562. case NVME_REG_PMRCTL:
  6563. if (!NVME_CAP_PMRS(cap)) {
  6564. return;
  6565. }
  6566. stl_le_p(&n->bar.pmrctl, data);
  6567. if (NVME_PMRCTL_EN(data)) {
  6568. memory_region_set_enabled(&n->pmr.dev->mr, true);
  6569. pmrsts = 0;
  6570. } else {
  6571. memory_region_set_enabled(&n->pmr.dev->mr, false);
  6572. NVME_PMRSTS_SET_NRDY(pmrsts, 1);
  6573. n->pmr.cmse = false;
  6574. }
  6575. stl_le_p(&n->bar.pmrsts, pmrsts);
  6576. return;
  6577. case NVME_REG_PMRSTS:
  6578. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly,
  6579. "invalid write to PMRSTS register, ignored");
  6580. return;
  6581. case NVME_REG_PMREBS:
  6582. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly,
  6583. "invalid write to PMREBS register, ignored");
  6584. return;
  6585. case NVME_REG_PMRSWTP:
  6586. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly,
  6587. "invalid write to PMRSWTP register, ignored");
  6588. return;
  6589. case NVME_REG_PMRMSCL:
  6590. if (!NVME_CAP_PMRS(cap)) {
  6591. return;
  6592. }
  6593. stl_le_p(&n->bar.pmrmscl, data);
  6594. n->pmr.cmse = false;
  6595. if (NVME_PMRMSCL_CMSE(data)) {
  6596. uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu);
  6597. hwaddr cba = pmrmscu << 32 |
  6598. (NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT);
  6599. if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
  6600. NVME_PMRSTS_SET_CBAI(pmrsts, 1);
  6601. stl_le_p(&n->bar.pmrsts, pmrsts);
  6602. return;
  6603. }
  6604. n->pmr.cmse = true;
  6605. n->pmr.cba = cba;
  6606. }
  6607. return;
  6608. case NVME_REG_PMRMSCU:
  6609. if (!NVME_CAP_PMRS(cap)) {
  6610. return;
  6611. }
  6612. stl_le_p(&n->bar.pmrmscu, data);
  6613. return;
  6614. default:
  6615. NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
  6616. "invalid MMIO write,"
  6617. " offset=0x%"PRIx64", data=%"PRIx64"",
  6618. offset, data);
  6619. break;
  6620. }
  6621. }
  6622. static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
  6623. {
  6624. NvmeCtrl *n = (NvmeCtrl *)opaque;
  6625. uint8_t *ptr = (uint8_t *)&n->bar;
  6626. trace_pci_nvme_mmio_read(addr, size);
  6627. if (unlikely(addr & (sizeof(uint32_t) - 1))) {
  6628. NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32,
  6629. "MMIO read not 32-bit aligned,"
  6630. " offset=0x%"PRIx64"", addr);
  6631. /* should RAZ, fall through for now */
  6632. } else if (unlikely(size < sizeof(uint32_t))) {
  6633. NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall,
  6634. "MMIO read smaller than 32-bits,"
  6635. " offset=0x%"PRIx64"", addr);
  6636. /* should RAZ, fall through for now */
  6637. }
  6638. if (addr > sizeof(n->bar) - size) {
  6639. NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs,
  6640. "MMIO read beyond last register,"
  6641. " offset=0x%"PRIx64", returning 0", addr);
  6642. return 0;
  6643. }
  6644. if (pci_is_vf(PCI_DEVICE(n)) && !nvme_sctrl(n)->scs &&
  6645. addr != NVME_REG_CSTS) {
  6646. trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
  6647. return 0;
  6648. }
  6649. /*
  6650. * When PMRWBM bit 1 is set then read from
  6651. * from PMRSTS should ensure prior writes
  6652. * made it to persistent media
  6653. */
  6654. if (addr == NVME_REG_PMRSTS &&
  6655. (NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) {
  6656. memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
  6657. }
  6658. return ldn_le_p(ptr + addr, size);
  6659. }
  6660. static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
  6661. {
  6662. PCIDevice *pci = PCI_DEVICE(n);
  6663. uint32_t qid;
  6664. if (unlikely(addr & ((1 << 2) - 1))) {
  6665. NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
  6666. "doorbell write not 32-bit aligned,"
  6667. " offset=0x%"PRIx64", ignoring", addr);
  6668. return;
  6669. }
  6670. if (((addr - 0x1000) >> 2) & 1) {
  6671. /* Completion queue doorbell write */
  6672. uint16_t new_head = val & 0xffff;
  6673. NvmeCQueue *cq;
  6674. qid = (addr - (0x1000 + (1 << 2))) >> 3;
  6675. if (unlikely(nvme_check_cqid(n, qid))) {
  6676. NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq,
  6677. "completion queue doorbell write"
  6678. " for nonexistent queue,"
  6679. " sqid=%"PRIu32", ignoring", qid);
  6680. /*
  6681. * NVM Express v1.3d, Section 4.1 state: "If host software writes
  6682. * an invalid value to the Submission Queue Tail Doorbell or
  6683. * Completion Queue Head Doorbell register and an Asynchronous Event
  6684. * Request command is outstanding, then an asynchronous event is
  6685. * posted to the Admin Completion Queue with a status code of
  6686. * Invalid Doorbell Write Value."
  6687. *
  6688. * Also note that the spec includes the "Invalid Doorbell Register"
  6689. * status code, but nowhere does it specify when to use it.
  6690. * However, it seems reasonable to use it here in a similar
  6691. * fashion.
  6692. */
  6693. if (n->outstanding_aers) {
  6694. nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
  6695. NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
  6696. NVME_LOG_ERROR_INFO);
  6697. }
  6698. return;
  6699. }
  6700. cq = n->cq[qid];
  6701. if (unlikely(new_head >= cq->size)) {
  6702. NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead,
  6703. "completion queue doorbell write value"
  6704. " beyond queue size, sqid=%"PRIu32","
  6705. " new_head=%"PRIu16", ignoring",
  6706. qid, new_head);
  6707. if (n->outstanding_aers) {
  6708. nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
  6709. NVME_AER_INFO_ERR_INVALID_DB_VALUE,
  6710. NVME_LOG_ERROR_INFO);
  6711. }
  6712. return;
  6713. }
  6714. trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head);
  6715. /* scheduled deferred cqe posting if queue was previously full */
  6716. if (nvme_cq_full(cq)) {
  6717. qemu_bh_schedule(cq->bh);
  6718. }
  6719. cq->head = new_head;
  6720. if (!qid && n->dbbuf_enabled) {
  6721. stl_le_pci_dma(pci, cq->db_addr, cq->head, MEMTXATTRS_UNSPECIFIED);
  6722. }
  6723. if (cq->tail == cq->head) {
  6724. if (cq->irq_enabled) {
  6725. n->cq_pending--;
  6726. }
  6727. nvme_irq_deassert(n, cq);
  6728. }
  6729. } else {
  6730. /* Submission queue doorbell write */
  6731. uint16_t new_tail = val & 0xffff;
  6732. NvmeSQueue *sq;
  6733. qid = (addr - 0x1000) >> 3;
  6734. if (unlikely(nvme_check_sqid(n, qid))) {
  6735. NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq,
  6736. "submission queue doorbell write"
  6737. " for nonexistent queue,"
  6738. " sqid=%"PRIu32", ignoring", qid);
  6739. if (n->outstanding_aers) {
  6740. nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
  6741. NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
  6742. NVME_LOG_ERROR_INFO);
  6743. }
  6744. return;
  6745. }
  6746. sq = n->sq[qid];
  6747. if (unlikely(new_tail >= sq->size)) {
  6748. NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail,
  6749. "submission queue doorbell write value"
  6750. " beyond queue size, sqid=%"PRIu32","
  6751. " new_tail=%"PRIu16", ignoring",
  6752. qid, new_tail);
  6753. if (n->outstanding_aers) {
  6754. nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
  6755. NVME_AER_INFO_ERR_INVALID_DB_VALUE,
  6756. NVME_LOG_ERROR_INFO);
  6757. }
  6758. return;
  6759. }
  6760. trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
  6761. sq->tail = new_tail;
  6762. if (!qid && n->dbbuf_enabled) {
  6763. /*
  6764. * The spec states "the host shall also update the controller's
  6765. * corresponding doorbell property to match the value of that entry
  6766. * in the Shadow Doorbell buffer."
  6767. *
  6768. * Since this context is currently a VM trap, we can safely enforce
  6769. * the requirement from the device side in case the host is
  6770. * misbehaving.
  6771. *
  6772. * Note, we shouldn't have to do this, but various drivers
  6773. * including ones that run on Linux, are not updating Admin Queues,
  6774. * so we can't trust reading it for an appropriate sq tail.
  6775. */
  6776. stl_le_pci_dma(pci, sq->db_addr, sq->tail, MEMTXATTRS_UNSPECIFIED);
  6777. }
  6778. qemu_bh_schedule(sq->bh);
  6779. }
  6780. }
  6781. static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
  6782. unsigned size)
  6783. {
  6784. NvmeCtrl *n = (NvmeCtrl *)opaque;
  6785. trace_pci_nvme_mmio_write(addr, data, size);
  6786. if (pci_is_vf(PCI_DEVICE(n)) && !nvme_sctrl(n)->scs &&
  6787. addr != NVME_REG_CSTS) {
  6788. trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size);
  6789. return;
  6790. }
  6791. if (addr < sizeof(n->bar)) {
  6792. nvme_write_bar(n, addr, data, size);
  6793. } else {
  6794. nvme_process_db(n, addr, data);
  6795. }
  6796. }
  6797. static const MemoryRegionOps nvme_mmio_ops = {
  6798. .read = nvme_mmio_read,
  6799. .write = nvme_mmio_write,
  6800. .endianness = DEVICE_LITTLE_ENDIAN,
  6801. .impl = {
  6802. .min_access_size = 2,
  6803. .max_access_size = 8,
  6804. },
  6805. };
  6806. static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
  6807. unsigned size)
  6808. {
  6809. NvmeCtrl *n = (NvmeCtrl *)opaque;
  6810. stn_le_p(&n->cmb.buf[addr], size, data);
  6811. }
  6812. static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
  6813. {
  6814. NvmeCtrl *n = (NvmeCtrl *)opaque;
  6815. return ldn_le_p(&n->cmb.buf[addr], size);
  6816. }
  6817. static const MemoryRegionOps nvme_cmb_ops = {
  6818. .read = nvme_cmb_read,
  6819. .write = nvme_cmb_write,
  6820. .endianness = DEVICE_LITTLE_ENDIAN,
  6821. .impl = {
  6822. .min_access_size = 1,
  6823. .max_access_size = 8,
  6824. },
  6825. };
  6826. static bool nvme_check_params(NvmeCtrl *n, Error **errp)
  6827. {
  6828. NvmeParams *params = &n->params;
  6829. if (params->num_queues) {
  6830. warn_report("num_queues is deprecated; please use max_ioqpairs "
  6831. "instead");
  6832. params->max_ioqpairs = params->num_queues - 1;
  6833. }
  6834. if (n->namespace.blkconf.blk && n->subsys) {
  6835. error_setg(errp, "subsystem support is unavailable with legacy "
  6836. "namespace ('drive' property)");
  6837. return false;
  6838. }
  6839. if (params->max_ioqpairs < 1 ||
  6840. params->max_ioqpairs > NVME_MAX_IOQPAIRS) {
  6841. error_setg(errp, "max_ioqpairs must be between 1 and %d",
  6842. NVME_MAX_IOQPAIRS);
  6843. return false;
  6844. }
  6845. if (params->msix_qsize < 1 ||
  6846. params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) {
  6847. error_setg(errp, "msix_qsize must be between 1 and %d",
  6848. PCI_MSIX_FLAGS_QSIZE + 1);
  6849. return false;
  6850. }
  6851. if (!params->serial) {
  6852. error_setg(errp, "serial property not set");
  6853. return false;
  6854. }
  6855. if (params->mqes < 1) {
  6856. error_setg(errp, "mqes property cannot be less than 1");
  6857. return false;
  6858. }
  6859. if (n->pmr.dev) {
  6860. if (params->msix_exclusive_bar) {
  6861. error_setg(errp, "not enough BARs available to enable PMR");
  6862. return false;
  6863. }
  6864. if (host_memory_backend_is_mapped(n->pmr.dev)) {
  6865. error_setg(errp, "can't use already busy memdev: %s",
  6866. object_get_canonical_path_component(OBJECT(n->pmr.dev)));
  6867. return false;
  6868. }
  6869. if (!is_power_of_2(n->pmr.dev->size)) {
  6870. error_setg(errp, "pmr backend size needs to be power of 2 in size");
  6871. return false;
  6872. }
  6873. host_memory_backend_set_mapped(n->pmr.dev, true);
  6874. }
  6875. if (n->params.zasl > n->params.mdts) {
  6876. error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less "
  6877. "than or equal to mdts (Maximum Data Transfer Size)");
  6878. return false;
  6879. }
  6880. if (!n->params.vsl) {
  6881. error_setg(errp, "vsl must be non-zero");
  6882. return false;
  6883. }
  6884. if (params->sriov_max_vfs) {
  6885. if (!n->subsys) {
  6886. error_setg(errp, "subsystem is required for the use of SR-IOV");
  6887. return false;
  6888. }
  6889. if (params->cmb_size_mb) {
  6890. error_setg(errp, "CMB is not supported with SR-IOV");
  6891. return false;
  6892. }
  6893. if (n->pmr.dev) {
  6894. error_setg(errp, "PMR is not supported with SR-IOV");
  6895. return false;
  6896. }
  6897. if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) {
  6898. error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible"
  6899. " must be set for the use of SR-IOV");
  6900. return false;
  6901. }
  6902. if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) {
  6903. error_setg(errp, "sriov_vq_flexible must be greater than or equal"
  6904. " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2);
  6905. return false;
  6906. }
  6907. if (params->max_ioqpairs < params->sriov_vq_flexible + 2) {
  6908. error_setg(errp, "(max_ioqpairs - sriov_vq_flexible) must be"
  6909. " greater than or equal to 2");
  6910. return false;
  6911. }
  6912. if (params->sriov_vi_flexible < params->sriov_max_vfs) {
  6913. error_setg(errp, "sriov_vi_flexible must be greater than or equal"
  6914. " to %d (sriov_max_vfs)", params->sriov_max_vfs);
  6915. return false;
  6916. }
  6917. if (params->msix_qsize < params->sriov_vi_flexible + 1) {
  6918. error_setg(errp, "(msix_qsize - sriov_vi_flexible) must be"
  6919. " greater than or equal to 1");
  6920. return false;
  6921. }
  6922. if (params->sriov_max_vi_per_vf &&
  6923. (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) {
  6924. error_setg(errp, "sriov_max_vi_per_vf must meet:"
  6925. " (sriov_max_vi_per_vf - 1) %% %d == 0 and"
  6926. " sriov_max_vi_per_vf >= 1", NVME_VF_RES_GRANULARITY);
  6927. return false;
  6928. }
  6929. if (params->sriov_max_vq_per_vf &&
  6930. (params->sriov_max_vq_per_vf < 2 ||
  6931. (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) {
  6932. error_setg(errp, "sriov_max_vq_per_vf must meet:"
  6933. " (sriov_max_vq_per_vf - 1) %% %d == 0 and"
  6934. " sriov_max_vq_per_vf >= 2", NVME_VF_RES_GRANULARITY);
  6935. return false;
  6936. }
  6937. }
  6938. return true;
  6939. }
  6940. static void nvme_init_state(NvmeCtrl *n)
  6941. {
  6942. NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
  6943. NvmeSecCtrlEntry *list = n->sec_ctrl_list;
  6944. NvmeSecCtrlEntry *sctrl;
  6945. PCIDevice *pci = PCI_DEVICE(n);
  6946. NvmeAtomic *atomic = &n->atomic;
  6947. NvmeIdCtrl *id = &n->id_ctrl;
  6948. uint8_t max_vfs;
  6949. int i;
  6950. if (pci_is_vf(pci)) {
  6951. sctrl = nvme_sctrl(n);
  6952. max_vfs = 0;
  6953. n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
  6954. n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
  6955. } else {
  6956. max_vfs = n->params.sriov_max_vfs;
  6957. n->conf_ioqpairs = n->params.max_ioqpairs;
  6958. n->conf_msix_qsize = n->params.msix_qsize;
  6959. }
  6960. n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
  6961. n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
  6962. n->temperature = NVME_TEMPERATURE;
  6963. n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
  6964. n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
  6965. n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
  6966. QTAILQ_INIT(&n->aer_queue);
  6967. n->nr_sec_ctrls = max_vfs;
  6968. for (i = 0; i < max_vfs; i++) {
  6969. sctrl = &list[i];
  6970. sctrl->pcid = cpu_to_le16(n->cntlid);
  6971. sctrl->vfn = cpu_to_le16(i + 1);
  6972. }
  6973. cap->cntlid = cpu_to_le16(n->cntlid);
  6974. cap->crt = NVME_CRT_VQ | NVME_CRT_VI;
  6975. if (pci_is_vf(pci)) {
  6976. cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs);
  6977. } else {
  6978. cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs -
  6979. n->params.sriov_vq_flexible);
  6980. cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible);
  6981. cap->vqrfap = cap->vqfrt;
  6982. cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
  6983. cap->vqfrsm = n->params.sriov_max_vq_per_vf ?
  6984. cpu_to_le16(n->params.sriov_max_vq_per_vf) :
  6985. cap->vqfrt / MAX(max_vfs, 1);
  6986. }
  6987. if (pci_is_vf(pci)) {
  6988. cap->viprt = cpu_to_le16(n->conf_msix_qsize);
  6989. } else {
  6990. cap->viprt = cpu_to_le16(n->params.msix_qsize -
  6991. n->params.sriov_vi_flexible);
  6992. cap->vifrt = cpu_to_le32(n->params.sriov_vi_flexible);
  6993. cap->virfap = cap->vifrt;
  6994. cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
  6995. cap->vifrsm = n->params.sriov_max_vi_per_vf ?
  6996. cpu_to_le16(n->params.sriov_max_vi_per_vf) :
  6997. cap->vifrt / MAX(max_vfs, 1);
  6998. }
  6999. /* Atomic Write */
  7000. id->awun = cpu_to_le16(n->params.atomic_awun);
  7001. id->awupf = cpu_to_le16(n->params.atomic_awupf);
  7002. n->dn = n->params.atomic_dn;
  7003. if (id->awun || id->awupf) {
  7004. if (id->awupf > id->awun) {
  7005. id->awupf = 0;
  7006. }
  7007. if (n->dn) {
  7008. atomic->atomic_max_write_size = id->awupf + 1;
  7009. } else {
  7010. atomic->atomic_max_write_size = id->awun + 1;
  7011. }
  7012. if (atomic->atomic_max_write_size == 1) {
  7013. atomic->atomic_writes = 0;
  7014. } else {
  7015. atomic->atomic_writes = 1;
  7016. }
  7017. }
  7018. }
  7019. static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
  7020. {
  7021. uint64_t cmb_size = n->params.cmb_size_mb * MiB;
  7022. uint64_t cap = ldq_le_p(&n->bar.cap);
  7023. n->cmb.buf = g_malloc0(cmb_size);
  7024. memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n,
  7025. "nvme-cmb", cmb_size);
  7026. pci_register_bar(pci_dev, NVME_CMB_BIR,
  7027. PCI_BASE_ADDRESS_SPACE_MEMORY |
  7028. PCI_BASE_ADDRESS_MEM_TYPE_64 |
  7029. PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem);
  7030. NVME_CAP_SET_CMBS(cap, 1);
  7031. stq_le_p(&n->bar.cap, cap);
  7032. if (n->params.legacy_cmb) {
  7033. nvme_cmb_enable_regs(n);
  7034. n->cmb.cmse = true;
  7035. }
  7036. }
  7037. static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
  7038. {
  7039. uint32_t pmrcap = ldl_le_p(&n->bar.pmrcap);
  7040. NVME_PMRCAP_SET_RDS(pmrcap, 1);
  7041. NVME_PMRCAP_SET_WDS(pmrcap, 1);
  7042. NVME_PMRCAP_SET_BIR(pmrcap, NVME_PMR_BIR);
  7043. /* Turn on bit 1 support */
  7044. NVME_PMRCAP_SET_PMRWBM(pmrcap, 0x02);
  7045. NVME_PMRCAP_SET_CMSS(pmrcap, 1);
  7046. stl_le_p(&n->bar.pmrcap, pmrcap);
  7047. pci_register_bar(pci_dev, NVME_PMR_BIR,
  7048. PCI_BASE_ADDRESS_SPACE_MEMORY |
  7049. PCI_BASE_ADDRESS_MEM_TYPE_64 |
  7050. PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr);
  7051. memory_region_set_enabled(&n->pmr.dev->mr, false);
  7052. }
  7053. static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs,
  7054. unsigned *msix_table_offset,
  7055. unsigned *msix_pba_offset)
  7056. {
  7057. uint64_t bar_size, msix_table_size;
  7058. bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE;
  7059. if (total_irqs == 0) {
  7060. goto out;
  7061. }
  7062. bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
  7063. if (msix_table_offset) {
  7064. *msix_table_offset = bar_size;
  7065. }
  7066. msix_table_size = PCI_MSIX_ENTRY_SIZE * total_irqs;
  7067. bar_size += msix_table_size;
  7068. bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
  7069. if (msix_pba_offset) {
  7070. *msix_pba_offset = bar_size;
  7071. }
  7072. bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8;
  7073. out:
  7074. return pow2ceil(bar_size);
  7075. }
  7076. static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
  7077. Error **errp)
  7078. {
  7079. uint16_t vf_dev_id = n->params.use_intel_id ?
  7080. PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
  7081. NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
  7082. uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm),
  7083. le16_to_cpu(cap->vifrsm),
  7084. NULL, NULL);
  7085. if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
  7086. n->params.sriov_max_vfs, n->params.sriov_max_vfs,
  7087. NVME_VF_OFFSET, NVME_VF_STRIDE, errp)) {
  7088. return false;
  7089. }
  7090. pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
  7091. PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
  7092. return true;
  7093. }
  7094. static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
  7095. {
  7096. Error *err = NULL;
  7097. int ret;
  7098. ret = pci_pm_init(pci_dev, offset, &err);
  7099. if (err) {
  7100. error_report_err(err);
  7101. return ret;
  7102. }
  7103. pci_set_word(pci_dev->config + offset + PCI_PM_PMC,
  7104. PCI_PM_CAP_VER_1_2);
  7105. pci_set_word(pci_dev->config + offset + PCI_PM_CTRL,
  7106. PCI_PM_CTRL_NO_SOFT_RESET);
  7107. pci_set_word(pci_dev->wmask + offset + PCI_PM_CTRL,
  7108. PCI_PM_CTRL_STATE_MASK);
  7109. return 0;
  7110. }
  7111. static bool pcie_doe_spdm_rsp(DOECap *doe_cap)
  7112. {
  7113. void *req = pcie_doe_get_write_mbox_ptr(doe_cap);
  7114. uint32_t req_len = pcie_doe_get_obj_len(req) * 4;
  7115. void *rsp = doe_cap->read_mbox;
  7116. uint32_t rsp_len = SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE;
  7117. uint32_t recvd = spdm_socket_rsp(doe_cap->spdm_socket,
  7118. SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE,
  7119. req, req_len, rsp, rsp_len);
  7120. doe_cap->read_mbox_len += DIV_ROUND_UP(recvd, 4);
  7121. return recvd != 0;
  7122. }
  7123. static DOEProtocol doe_spdm_prot[] = {
  7124. { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_CMA, pcie_doe_spdm_rsp },
  7125. { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_SECURED_CMA, pcie_doe_spdm_rsp },
  7126. { }
  7127. };
  7128. static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
  7129. {
  7130. ERRP_GUARD();
  7131. uint8_t *pci_conf = pci_dev->config;
  7132. uint64_t bar_size;
  7133. unsigned msix_table_offset = 0, msix_pba_offset = 0;
  7134. unsigned nr_vectors;
  7135. int ret;
  7136. pci_conf[PCI_INTERRUPT_PIN] = pci_is_vf(pci_dev) ? 0 : 1;
  7137. pci_config_set_prog_interface(pci_conf, 0x2);
  7138. if (n->params.use_intel_id) {
  7139. pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
  7140. pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME);
  7141. } else {
  7142. pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
  7143. pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
  7144. }
  7145. pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
  7146. nvme_add_pm_capability(pci_dev, 0x60);
  7147. pcie_endpoint_cap_init(pci_dev, 0x80);
  7148. pcie_cap_flr_init(pci_dev);
  7149. if (n->params.sriov_max_vfs) {
  7150. pcie_ari_init(pci_dev, 0x100);
  7151. }
  7152. if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
  7153. bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL);
  7154. memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
  7155. bar_size);
  7156. pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
  7157. PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem);
  7158. ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp);
  7159. } else {
  7160. assert(n->params.msix_qsize >= 1);
  7161. /* add one to max_ioqpairs to account for the admin queue pair */
  7162. if (!pci_is_vf(pci_dev)) {
  7163. nr_vectors = n->params.msix_qsize;
  7164. bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1,
  7165. nr_vectors, &msix_table_offset,
  7166. &msix_pba_offset);
  7167. } else {
  7168. NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev));
  7169. NvmePriCtrlCap *cap = &pn->pri_ctrl_cap;
  7170. nr_vectors = le16_to_cpu(cap->vifrsm);
  7171. bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm), nr_vectors,
  7172. &msix_table_offset, &msix_pba_offset);
  7173. }
  7174. memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
  7175. memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
  7176. msix_table_offset);
  7177. memory_region_add_subregion(&n->bar0, 0, &n->iomem);
  7178. if (pci_is_vf(pci_dev)) {
  7179. pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
  7180. } else {
  7181. pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
  7182. PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
  7183. }
  7184. ret = msix_init(pci_dev, nr_vectors,
  7185. &n->bar0, 0, msix_table_offset,
  7186. &n->bar0, 0, msix_pba_offset, 0, errp);
  7187. }
  7188. if (ret == -ENOTSUP) {
  7189. /* report that msix is not supported, but do not error out */
  7190. warn_report_err(*errp);
  7191. *errp = NULL;
  7192. } else if (ret < 0) {
  7193. /* propagate error to caller */
  7194. return false;
  7195. }
  7196. if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs &&
  7197. !nvme_init_sriov(n, pci_dev, 0x120, errp)) {
  7198. return false;
  7199. }
  7200. nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
  7201. pcie_cap_deverr_init(pci_dev);
  7202. /* DOE Initialisation */
  7203. if (pci_dev->spdm_port) {
  7204. uint16_t doe_offset = n->params.sriov_max_vfs ?
  7205. PCI_CONFIG_SPACE_SIZE + PCI_ARI_SIZEOF
  7206. : PCI_CONFIG_SPACE_SIZE;
  7207. pcie_doe_init(pci_dev, &pci_dev->doe_spdm, doe_offset,
  7208. doe_spdm_prot, true, 0);
  7209. pci_dev->doe_spdm.spdm_socket = spdm_socket_connect(pci_dev->spdm_port,
  7210. errp);
  7211. if (pci_dev->doe_spdm.spdm_socket < 0) {
  7212. return false;
  7213. }
  7214. }
  7215. if (n->params.cmb_size_mb) {
  7216. nvme_init_cmb(n, pci_dev);
  7217. }
  7218. if (n->pmr.dev) {
  7219. nvme_init_pmr(n, pci_dev);
  7220. }
  7221. return true;
  7222. }
  7223. static void nvme_init_subnqn(NvmeCtrl *n)
  7224. {
  7225. NvmeSubsystem *subsys = n->subsys;
  7226. NvmeIdCtrl *id = &n->id_ctrl;
  7227. if (!subsys) {
  7228. snprintf((char *)id->subnqn, sizeof(id->subnqn),
  7229. "nqn.2019-08.org.qemu:%s", n->params.serial);
  7230. } else {
  7231. pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
  7232. }
  7233. }
  7234. static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
  7235. {
  7236. NvmeIdCtrl *id = &n->id_ctrl;
  7237. uint8_t *pci_conf = pci_dev->config;
  7238. uint64_t cap = ldq_le_p(&n->bar.cap);
  7239. NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
  7240. uint32_t ctratt;
  7241. uint16_t oacs;
  7242. memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs));
  7243. memcpy(n->cse.iocs.nvm, nvme_cse_iocs_nvm_default, sizeof(n->cse.iocs.nvm));
  7244. memcpy(n->cse.iocs.zoned, nvme_cse_iocs_zoned_default,
  7245. sizeof(n->cse.iocs.zoned));
  7246. id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
  7247. id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
  7248. strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
  7249. strpadcpy((char *)id->fr, sizeof(id->fr), QEMU_VERSION, ' ');
  7250. strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
  7251. id->cntlid = cpu_to_le16(n->cntlid);
  7252. id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
  7253. ctratt = NVME_CTRATT_ELBAS;
  7254. if (n->params.ctratt.mem) {
  7255. ctratt |= NVME_CTRATT_MEM;
  7256. }
  7257. id->rab = 6;
  7258. if (n->params.use_intel_id) {
  7259. id->ieee[0] = 0xb3;
  7260. id->ieee[1] = 0x02;
  7261. id->ieee[2] = 0x00;
  7262. } else {
  7263. id->ieee[0] = 0x00;
  7264. id->ieee[1] = 0x54;
  7265. id->ieee[2] = 0x52;
  7266. }
  7267. id->mdts = n->params.mdts;
  7268. id->ver = cpu_to_le32(NVME_SPEC_VER);
  7269. oacs = NVME_OACS_NMS | NVME_OACS_FORMAT | NVME_OACS_DIRECTIVES;
  7270. if (n->params.dbcs) {
  7271. oacs |= NVME_OACS_DBCS;
  7272. n->cse.acs[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP;
  7273. }
  7274. if (n->params.sriov_max_vfs) {
  7275. oacs |= NVME_OACS_VMS;
  7276. n->cse.acs[NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP;
  7277. }
  7278. id->oacs = cpu_to_le16(oacs);
  7279. id->cntrltype = 0x1;
  7280. /*
  7281. * Because the controller always completes the Abort command immediately,
  7282. * there can never be more than one concurrently executing Abort command,
  7283. * so this value is never used for anything. Note that there can easily be
  7284. * many Abort commands in the queues, but they are not considered
  7285. * "executing" until processed by nvme_abort.
  7286. *
  7287. * The specification recommends a value of 3 for Abort Command Limit (four
  7288. * concurrently outstanding Abort commands), so lets use that though it is
  7289. * inconsequential.
  7290. */
  7291. id->acl = 3;
  7292. id->aerl = n->params.aerl;
  7293. id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
  7294. id->lpa = NVME_LPA_NS_SMART | NVME_LPA_CSE | NVME_LPA_EXTENDED;
  7295. /* recommended default value (~70 C) */
  7296. id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
  7297. id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
  7298. id->sqes = (NVME_SQES << 4) | NVME_SQES;
  7299. id->cqes = (NVME_CQES << 4) | NVME_CQES;
  7300. id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
  7301. id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
  7302. NVME_ONCS_FEATURES | NVME_ONCS_DSM |
  7303. NVME_ONCS_COMPARE | NVME_ONCS_COPY |
  7304. NVME_ONCS_NVMCSA | NVME_ONCS_NVMAFC);
  7305. /*
  7306. * NOTE: If this device ever supports a command set that does NOT use 0x0
  7307. * as a Flush-equivalent operation, support for the broadcast NSID in Flush
  7308. * should probably be removed.
  7309. *
  7310. * See comment in nvme_io_cmd.
  7311. */
  7312. id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
  7313. id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1 |
  7314. NVME_OCFS_COPY_FORMAT_2 | NVME_OCFS_COPY_FORMAT_3);
  7315. id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
  7316. NVME_CTRL_SGLS_MPTR_SGL);
  7317. nvme_init_subnqn(n);
  7318. id->psd[0].mp = cpu_to_le16(0x9c4);
  7319. id->psd[0].enlat = cpu_to_le32(0x10);
  7320. id->psd[0].exlat = cpu_to_le32(0x4);
  7321. id->cmic |= NVME_CMIC_MULTI_CTRL;
  7322. ctratt |= NVME_CTRATT_ENDGRPS;
  7323. id->endgidmax = cpu_to_le16(0x1);
  7324. if (n->subsys->endgrp.fdp.enabled) {
  7325. ctratt |= NVME_CTRATT_FDPS;
  7326. }
  7327. id->ctratt = cpu_to_le32(ctratt);
  7328. NVME_CAP_SET_MQES(cap, n->params.mqes);
  7329. NVME_CAP_SET_CQR(cap, 1);
  7330. NVME_CAP_SET_TO(cap, 0xf);
  7331. NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NCSS);
  7332. NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_IOCSS);
  7333. NVME_CAP_SET_MPSMAX(cap, 4);
  7334. NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0);
  7335. NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0);
  7336. stq_le_p(&n->bar.cap, cap);
  7337. stl_le_p(&n->bar.vs, NVME_SPEC_VER);
  7338. n->bar.intmc = n->bar.intms = 0;
  7339. if (pci_is_vf(pci_dev) && !sctrl->scs) {
  7340. stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
  7341. }
  7342. }
  7343. static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
  7344. {
  7345. int cntlid;
  7346. if (!n->subsys) {
  7347. DeviceState *dev = qdev_new(TYPE_NVME_SUBSYS);
  7348. qdev_prop_set_string(dev, "nqn", n->params.serial);
  7349. if (!qdev_realize(dev, NULL, errp)) {
  7350. return -1;
  7351. }
  7352. n->subsys = NVME_SUBSYS(dev);
  7353. }
  7354. cntlid = nvme_subsys_register_ctrl(n, errp);
  7355. if (cntlid < 0) {
  7356. return -1;
  7357. }
  7358. n->cntlid = cntlid;
  7359. return 0;
  7360. }
  7361. void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
  7362. {
  7363. uint32_t nsid = ns->params.nsid;
  7364. assert(nsid && nsid <= NVME_MAX_NAMESPACES);
  7365. n->namespaces[nsid] = ns;
  7366. ns->attached++;
  7367. }
  7368. static void nvme_realize(PCIDevice *pci_dev, Error **errp)
  7369. {
  7370. NvmeCtrl *n = NVME(pci_dev);
  7371. DeviceState *dev = DEVICE(pci_dev);
  7372. NvmeNamespace *ns;
  7373. NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev));
  7374. if (pci_is_vf(pci_dev)) {
  7375. /*
  7376. * VFs derive settings from the parent. PF's lifespan exceeds
  7377. * that of VF's.
  7378. */
  7379. memcpy(&n->params, &pn->params, sizeof(NvmeParams));
  7380. /*
  7381. * Set PF's serial value to a new string memory to prevent 'serial'
  7382. * property object release of PF when a VF is removed from the system.
  7383. */
  7384. n->params.serial = g_strdup(pn->params.serial);
  7385. n->subsys = pn->subsys;
  7386. /*
  7387. * Assigning this link (strong link) causes an `object_unref` later in
  7388. * `object_release_link_property`. Increment the refcount to balance
  7389. * this out.
  7390. */
  7391. object_ref(OBJECT(pn->subsys));
  7392. }
  7393. if (!nvme_check_params(n, errp)) {
  7394. return;
  7395. }
  7396. qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
  7397. if (nvme_init_subsys(n, errp)) {
  7398. return;
  7399. }
  7400. nvme_init_state(n);
  7401. if (!nvme_init_pci(n, pci_dev, errp)) {
  7402. return;
  7403. }
  7404. nvme_init_ctrl(n, pci_dev);
  7405. /* setup a namespace if the controller drive property was given */
  7406. if (n->namespace.blkconf.blk) {
  7407. ns = &n->namespace;
  7408. ns->params.nsid = 1;
  7409. ns->ctrl = n;
  7410. if (nvme_ns_setup(ns, errp)) {
  7411. return;
  7412. }
  7413. n->subsys->namespaces[ns->params.nsid] = ns;
  7414. }
  7415. }
  7416. static void nvme_exit(PCIDevice *pci_dev)
  7417. {
  7418. NvmeCtrl *n = NVME(pci_dev);
  7419. NvmeNamespace *ns;
  7420. int i;
  7421. nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
  7422. for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
  7423. ns = nvme_ns(n, i);
  7424. if (ns) {
  7425. ns->attached--;
  7426. }
  7427. }
  7428. nvme_subsys_unregister_ctrl(n->subsys, n);
  7429. g_free(n->cq);
  7430. g_free(n->sq);
  7431. g_free(n->aer_reqs);
  7432. if (n->params.cmb_size_mb) {
  7433. g_free(n->cmb.buf);
  7434. }
  7435. if (pci_dev->doe_spdm.spdm_socket > 0) {
  7436. spdm_socket_close(pci_dev->doe_spdm.spdm_socket,
  7437. SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE);
  7438. }
  7439. if (n->pmr.dev) {
  7440. host_memory_backend_set_mapped(n->pmr.dev, false);
  7441. }
  7442. if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
  7443. pcie_sriov_pf_exit(pci_dev);
  7444. }
  7445. if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) {
  7446. msix_uninit_exclusive_bar(pci_dev);
  7447. } else {
  7448. msix_uninit(pci_dev, &n->bar0, &n->bar0);
  7449. }
  7450. memory_region_del_subregion(&n->bar0, &n->iomem);
  7451. }
  7452. static const Property nvme_props[] = {
  7453. DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
  7454. DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
  7455. HostMemoryBackend *),
  7456. DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
  7457. NvmeSubsystem *),
  7458. DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
  7459. DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
  7460. DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
  7461. DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
  7462. DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
  7463. DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
  7464. DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
  7465. DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
  7466. DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
  7467. DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
  7468. DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
  7469. DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
  7470. DEFINE_PROP_BOOL("dbcs", NvmeCtrl, params.dbcs, true),
  7471. DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
  7472. DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
  7473. params.auto_transition_zones, true),
  7474. DEFINE_PROP_UINT16("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
  7475. DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl,
  7476. params.sriov_vq_flexible, 0),
  7477. DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl,
  7478. params.sriov_vi_flexible, 0),
  7479. DEFINE_PROP_UINT32("sriov_max_vi_per_vf", NvmeCtrl,
  7480. params.sriov_max_vi_per_vf, 0),
  7481. DEFINE_PROP_UINT32("sriov_max_vq_per_vf", NvmeCtrl,
  7482. params.sriov_max_vq_per_vf, 0),
  7483. DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar,
  7484. false),
  7485. DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff),
  7486. DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0),
  7487. DEFINE_PROP_BOOL("ctratt.mem", NvmeCtrl, params.ctratt.mem, false),
  7488. DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0),
  7489. DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0),
  7490. DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0),
  7491. DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false),
  7492. };
  7493. static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
  7494. void *opaque, Error **errp)
  7495. {
  7496. NvmeCtrl *n = NVME(obj);
  7497. uint8_t value = n->smart_critical_warning;
  7498. visit_type_uint8(v, name, &value, errp);
  7499. }
  7500. static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
  7501. void *opaque, Error **errp)
  7502. {
  7503. NvmeCtrl *n = NVME(obj);
  7504. uint8_t value, old_value, cap = 0, index, event;
  7505. if (!visit_type_uint8(v, name, &value, errp)) {
  7506. return;
  7507. }
  7508. cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY
  7509. | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA;
  7510. if (NVME_CAP_PMRS(ldq_le_p(&n->bar.cap))) {
  7511. cap |= NVME_SMART_PMR_UNRELIABLE;
  7512. }
  7513. if ((value & cap) != value) {
  7514. error_setg(errp, "unsupported smart critical warning bits: 0x%x",
  7515. value & ~cap);
  7516. return;
  7517. }
  7518. old_value = n->smart_critical_warning;
  7519. n->smart_critical_warning = value;
  7520. /* only inject new bits of smart critical warning */
  7521. for (index = 0; index < NVME_SMART_WARN_MAX; index++) {
  7522. event = 1 << index;
  7523. if (value & ~old_value & event)
  7524. nvme_smart_event(n, event);
  7525. }
  7526. }
  7527. static void nvme_pci_reset(DeviceState *qdev)
  7528. {
  7529. PCIDevice *pci_dev = PCI_DEVICE(qdev);
  7530. NvmeCtrl *n = NVME(pci_dev);
  7531. trace_pci_nvme_pci_reset();
  7532. nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
  7533. }
  7534. static void nvme_sriov_post_write_config(PCIDevice *dev, uint16_t old_num_vfs)
  7535. {
  7536. NvmeCtrl *n = NVME(dev);
  7537. NvmeSecCtrlEntry *sctrl;
  7538. int i;
  7539. for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) {
  7540. sctrl = &n->sec_ctrl_list[i];
  7541. nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
  7542. }
  7543. }
  7544. static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
  7545. uint32_t val, int len)
  7546. {
  7547. uint16_t old_num_vfs = pcie_sriov_num_vfs(dev);
  7548. if (pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) {
  7549. pcie_doe_write_config(&dev->doe_spdm, address, val, len);
  7550. }
  7551. pci_default_write_config(dev, address, val, len);
  7552. pcie_cap_flr_write_config(dev, address, val, len);
  7553. nvme_sriov_post_write_config(dev, old_num_vfs);
  7554. }
  7555. static uint32_t nvme_pci_read_config(PCIDevice *dev, uint32_t address, int len)
  7556. {
  7557. uint32_t val;
  7558. if (dev->spdm_port && pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) {
  7559. if (pcie_doe_read_config(&dev->doe_spdm, address, len, &val)) {
  7560. return val;
  7561. }
  7562. }
  7563. return pci_default_read_config(dev, address, len);
  7564. }
  7565. static const VMStateDescription nvme_vmstate = {
  7566. .name = "nvme",
  7567. .unmigratable = 1,
  7568. };
  7569. static void nvme_class_init(ObjectClass *oc, void *data)
  7570. {
  7571. DeviceClass *dc = DEVICE_CLASS(oc);
  7572. PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
  7573. pc->realize = nvme_realize;
  7574. pc->config_write = nvme_pci_write_config;
  7575. pc->config_read = nvme_pci_read_config;
  7576. pc->exit = nvme_exit;
  7577. pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
  7578. pc->revision = 2;
  7579. set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
  7580. dc->desc = "Non-Volatile Memory Express";
  7581. device_class_set_props(dc, nvme_props);
  7582. dc->vmsd = &nvme_vmstate;
  7583. device_class_set_legacy_reset(dc, nvme_pci_reset);
  7584. }
  7585. static void nvme_instance_init(Object *obj)
  7586. {
  7587. NvmeCtrl *n = NVME(obj);
  7588. device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex,
  7589. "bootindex", "/namespace@1,0",
  7590. DEVICE(obj));
  7591. object_property_add(obj, "smart_critical_warning", "uint8",
  7592. nvme_get_smart_warning,
  7593. nvme_set_smart_warning, NULL, NULL);
  7594. }
  7595. static const TypeInfo nvme_info = {
  7596. .name = TYPE_NVME,
  7597. .parent = TYPE_PCI_DEVICE,
  7598. .instance_size = sizeof(NvmeCtrl),
  7599. .instance_init = nvme_instance_init,
  7600. .class_init = nvme_class_init,
  7601. .interfaces = (InterfaceInfo[]) {
  7602. { INTERFACE_PCIE_DEVICE },
  7603. { }
  7604. },
  7605. };
  7606. static const TypeInfo nvme_bus_info = {
  7607. .name = TYPE_NVME_BUS,
  7608. .parent = TYPE_BUS,
  7609. .instance_size = sizeof(NvmeBus),
  7610. };
  7611. static void nvme_register_types(void)
  7612. {
  7613. type_register_static(&nvme_info);
  7614. type_register_static(&nvme_bus_info);
  7615. }
  7616. type_init(nvme_register_types)