nvme.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. /*
  2. * QEMU NVM Express
  3. *
  4. * Copyright (c) 2012 Intel Corporation
  5. * Copyright (c) 2021 Minwoo Im
  6. * Copyright (c) 2021 Samsung Electronics Co., Ltd.
  7. *
  8. * Authors:
  9. * Keith Busch <kbusch@kernel.org>
  10. * Klaus Jensen <k.jensen@samsung.com>
  11. * Gollu Appalanaidu <anaidu.gollu@samsung.com>
  12. * Dmitry Fomichev <dmitry.fomichev@wdc.com>
  13. * Minwoo Im <minwoo.im.dev@gmail.com>
  14. *
  15. * This code is licensed under the GNU GPL v2 or later.
  16. */
  17. #ifndef HW_NVME_NVME_H
  18. #define HW_NVME_NVME_H
  19. #include "qemu/uuid.h"
  20. #include "hw/pci/pci_device.h"
  21. #include "hw/block/block.h"
  22. #include "block/nvme.h"
  23. #define NVME_MAX_CONTROLLERS 256
  24. #define NVME_MAX_NAMESPACES 256
  25. #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
  26. QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
  27. typedef struct NvmeCtrl NvmeCtrl;
  28. typedef struct NvmeNamespace NvmeNamespace;
  29. #define TYPE_NVME_BUS "nvme-bus"
  30. OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
  31. typedef struct NvmeBus {
  32. BusState parent_bus;
  33. } NvmeBus;
  34. #define TYPE_NVME_SUBSYS "nvme-subsys"
  35. #define NVME_SUBSYS(obj) \
  36. OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
  37. #define SUBSYS_SLOT_RSVD (void *)0xFFFF
  38. typedef struct NvmeSubsystem {
  39. DeviceState parent_obj;
  40. NvmeBus bus;
  41. uint8_t subnqn[256];
  42. char *serial;
  43. NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
  44. NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
  45. struct {
  46. char *nqn;
  47. } params;
  48. } NvmeSubsystem;
  49. int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
  50. void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
  51. static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
  52. uint32_t cntlid)
  53. {
  54. if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
  55. return NULL;
  56. }
  57. if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
  58. return NULL;
  59. }
  60. return subsys->ctrls[cntlid];
  61. }
  62. static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
  63. uint32_t nsid)
  64. {
  65. if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
  66. return NULL;
  67. }
  68. return subsys->namespaces[nsid];
  69. }
  70. #define TYPE_NVME_NS "nvme-ns"
  71. #define NVME_NS(obj) \
  72. OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
  73. typedef struct NvmeZone {
  74. NvmeZoneDescr d;
  75. uint64_t w_ptr;
  76. QTAILQ_ENTRY(NvmeZone) entry;
  77. } NvmeZone;
  78. typedef struct NvmeNamespaceParams {
  79. bool detached;
  80. bool shared;
  81. uint32_t nsid;
  82. QemuUUID uuid;
  83. uint64_t eui64;
  84. bool eui64_default;
  85. uint16_t ms;
  86. uint8_t mset;
  87. uint8_t pi;
  88. uint8_t pil;
  89. uint8_t pif;
  90. uint16_t mssrl;
  91. uint32_t mcl;
  92. uint8_t msrc;
  93. bool zoned;
  94. bool cross_zone_read;
  95. uint64_t zone_size_bs;
  96. uint64_t zone_cap_bs;
  97. uint32_t max_active_zones;
  98. uint32_t max_open_zones;
  99. uint32_t zd_extension_size;
  100. uint32_t numzrwa;
  101. uint64_t zrwas;
  102. uint64_t zrwafg;
  103. } NvmeNamespaceParams;
  104. typedef struct NvmeNamespace {
  105. DeviceState parent_obj;
  106. BlockConf blkconf;
  107. int32_t bootindex;
  108. int64_t size;
  109. int64_t moff;
  110. NvmeIdNs id_ns;
  111. NvmeIdNsNvm id_ns_nvm;
  112. NvmeLBAF lbaf;
  113. unsigned int nlbaf;
  114. size_t lbasz;
  115. const uint32_t *iocs;
  116. uint8_t csi;
  117. uint16_t status;
  118. int attached;
  119. uint8_t pif;
  120. struct {
  121. uint16_t zrwas;
  122. uint16_t zrwafg;
  123. uint32_t numzrwa;
  124. } zns;
  125. QTAILQ_ENTRY(NvmeNamespace) entry;
  126. NvmeIdNsZoned *id_ns_zoned;
  127. NvmeZone *zone_array;
  128. QTAILQ_HEAD(, NvmeZone) exp_open_zones;
  129. QTAILQ_HEAD(, NvmeZone) imp_open_zones;
  130. QTAILQ_HEAD(, NvmeZone) closed_zones;
  131. QTAILQ_HEAD(, NvmeZone) full_zones;
  132. uint32_t num_zones;
  133. uint64_t zone_size;
  134. uint64_t zone_capacity;
  135. uint32_t zone_size_log2;
  136. uint8_t *zd_extensions;
  137. int32_t nr_open_zones;
  138. int32_t nr_active_zones;
  139. NvmeNamespaceParams params;
  140. struct {
  141. uint32_t err_rec;
  142. } features;
  143. } NvmeNamespace;
  144. static inline uint32_t nvme_nsid(NvmeNamespace *ns)
  145. {
  146. if (ns) {
  147. return ns->params.nsid;
  148. }
  149. return 0;
  150. }
  151. static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
  152. {
  153. return lba << ns->lbaf.ds;
  154. }
  155. static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
  156. {
  157. return ns->lbaf.ms * lba;
  158. }
  159. static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
  160. {
  161. return ns->moff + nvme_m2b(ns, lba);
  162. }
  163. static inline bool nvme_ns_ext(NvmeNamespace *ns)
  164. {
  165. return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
  166. }
  167. static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
  168. {
  169. return zone->d.zs >> 4;
  170. }
  171. static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
  172. {
  173. zone->d.zs = state << 4;
  174. }
  175. static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
  176. {
  177. return zone->d.zslba + ns->zone_size;
  178. }
  179. static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
  180. {
  181. return zone->d.zslba + zone->d.zcap;
  182. }
  183. static inline bool nvme_wp_is_valid(NvmeZone *zone)
  184. {
  185. uint8_t st = nvme_get_zone_state(zone);
  186. return st != NVME_ZONE_STATE_FULL &&
  187. st != NVME_ZONE_STATE_READ_ONLY &&
  188. st != NVME_ZONE_STATE_OFFLINE;
  189. }
  190. static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
  191. uint32_t zone_idx)
  192. {
  193. return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
  194. }
  195. static inline void nvme_aor_inc_open(NvmeNamespace *ns)
  196. {
  197. assert(ns->nr_open_zones >= 0);
  198. if (ns->params.max_open_zones) {
  199. ns->nr_open_zones++;
  200. assert(ns->nr_open_zones <= ns->params.max_open_zones);
  201. }
  202. }
  203. static inline void nvme_aor_dec_open(NvmeNamespace *ns)
  204. {
  205. if (ns->params.max_open_zones) {
  206. assert(ns->nr_open_zones > 0);
  207. ns->nr_open_zones--;
  208. }
  209. assert(ns->nr_open_zones >= 0);
  210. }
  211. static inline void nvme_aor_inc_active(NvmeNamespace *ns)
  212. {
  213. assert(ns->nr_active_zones >= 0);
  214. if (ns->params.max_active_zones) {
  215. ns->nr_active_zones++;
  216. assert(ns->nr_active_zones <= ns->params.max_active_zones);
  217. }
  218. }
  219. static inline void nvme_aor_dec_active(NvmeNamespace *ns)
  220. {
  221. if (ns->params.max_active_zones) {
  222. assert(ns->nr_active_zones > 0);
  223. ns->nr_active_zones--;
  224. assert(ns->nr_active_zones >= ns->nr_open_zones);
  225. }
  226. assert(ns->nr_active_zones >= 0);
  227. }
  228. void nvme_ns_init_format(NvmeNamespace *ns);
  229. int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
  230. void nvme_ns_drain(NvmeNamespace *ns);
  231. void nvme_ns_shutdown(NvmeNamespace *ns);
  232. void nvme_ns_cleanup(NvmeNamespace *ns);
  233. typedef struct NvmeAsyncEvent {
  234. QTAILQ_ENTRY(NvmeAsyncEvent) entry;
  235. NvmeAerResult result;
  236. } NvmeAsyncEvent;
  237. enum {
  238. NVME_SG_ALLOC = 1 << 0,
  239. NVME_SG_DMA = 1 << 1,
  240. };
  241. typedef struct NvmeSg {
  242. int flags;
  243. union {
  244. QEMUSGList qsg;
  245. QEMUIOVector iov;
  246. };
  247. } NvmeSg;
  248. typedef enum NvmeTxDirection {
  249. NVME_TX_DIRECTION_TO_DEVICE = 0,
  250. NVME_TX_DIRECTION_FROM_DEVICE = 1,
  251. } NvmeTxDirection;
  252. typedef struct NvmeRequest {
  253. struct NvmeSQueue *sq;
  254. struct NvmeNamespace *ns;
  255. BlockAIOCB *aiocb;
  256. uint16_t status;
  257. void *opaque;
  258. NvmeCqe cqe;
  259. NvmeCmd cmd;
  260. BlockAcctCookie acct;
  261. NvmeSg sg;
  262. QTAILQ_ENTRY(NvmeRequest)entry;
  263. } NvmeRequest;
  264. typedef struct NvmeBounceContext {
  265. NvmeRequest *req;
  266. struct {
  267. QEMUIOVector iov;
  268. uint8_t *bounce;
  269. } data, mdata;
  270. } NvmeBounceContext;
  271. static inline const char *nvme_adm_opc_str(uint8_t opc)
  272. {
  273. switch (opc) {
  274. case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ";
  275. case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ";
  276. case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE";
  277. case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ";
  278. case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ";
  279. case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY";
  280. case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT";
  281. case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES";
  282. case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
  283. case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
  284. case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
  285. case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT";
  286. case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG";
  287. case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
  288. default: return "NVME_ADM_CMD_UNKNOWN";
  289. }
  290. }
  291. static inline const char *nvme_io_opc_str(uint8_t opc)
  292. {
  293. switch (opc) {
  294. case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
  295. case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
  296. case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
  297. case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
  298. case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
  299. case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
  300. case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY";
  301. case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
  302. case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
  303. case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
  304. case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
  305. default: return "NVME_NVM_CMD_UNKNOWN";
  306. }
  307. }
  308. typedef struct NvmeSQueue {
  309. struct NvmeCtrl *ctrl;
  310. uint16_t sqid;
  311. uint16_t cqid;
  312. uint32_t head;
  313. uint32_t tail;
  314. uint32_t size;
  315. uint64_t dma_addr;
  316. uint64_t db_addr;
  317. uint64_t ei_addr;
  318. QEMUBH *bh;
  319. EventNotifier notifier;
  320. bool ioeventfd_enabled;
  321. NvmeRequest *io_req;
  322. QTAILQ_HEAD(, NvmeRequest) req_list;
  323. QTAILQ_HEAD(, NvmeRequest) out_req_list;
  324. QTAILQ_ENTRY(NvmeSQueue) entry;
  325. } NvmeSQueue;
  326. typedef struct NvmeCQueue {
  327. struct NvmeCtrl *ctrl;
  328. uint8_t phase;
  329. uint16_t cqid;
  330. uint16_t irq_enabled;
  331. uint32_t head;
  332. uint32_t tail;
  333. uint32_t vector;
  334. uint32_t size;
  335. uint64_t dma_addr;
  336. uint64_t db_addr;
  337. uint64_t ei_addr;
  338. QEMUBH *bh;
  339. EventNotifier notifier;
  340. bool ioeventfd_enabled;
  341. QTAILQ_HEAD(, NvmeSQueue) sq_list;
  342. QTAILQ_HEAD(, NvmeRequest) req_list;
  343. } NvmeCQueue;
  344. #define TYPE_NVME "nvme"
  345. #define NVME(obj) \
  346. OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
  347. typedef struct NvmeParams {
  348. char *serial;
  349. uint32_t num_queues; /* deprecated since 5.1 */
  350. uint32_t max_ioqpairs;
  351. uint16_t msix_qsize;
  352. uint32_t cmb_size_mb;
  353. uint8_t aerl;
  354. uint32_t aer_max_queued;
  355. uint8_t mdts;
  356. uint8_t vsl;
  357. bool use_intel_id;
  358. uint8_t zasl;
  359. bool auto_transition_zones;
  360. bool legacy_cmb;
  361. bool ioeventfd;
  362. uint8_t sriov_max_vfs;
  363. uint16_t sriov_vq_flexible;
  364. uint16_t sriov_vi_flexible;
  365. uint8_t sriov_max_vq_per_vf;
  366. uint8_t sriov_max_vi_per_vf;
  367. } NvmeParams;
  368. typedef struct NvmeCtrl {
  369. PCIDevice parent_obj;
  370. MemoryRegion bar0;
  371. MemoryRegion iomem;
  372. NvmeBar bar;
  373. NvmeParams params;
  374. NvmeBus bus;
  375. uint16_t cntlid;
  376. bool qs_created;
  377. uint32_t page_size;
  378. uint16_t page_bits;
  379. uint16_t max_prp_ents;
  380. uint16_t cqe_size;
  381. uint16_t sqe_size;
  382. uint32_t max_q_ents;
  383. uint8_t outstanding_aers;
  384. uint32_t irq_status;
  385. int cq_pending;
  386. uint64_t host_timestamp; /* Timestamp sent by the host */
  387. uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
  388. uint64_t starttime_ms;
  389. uint16_t temperature;
  390. uint8_t smart_critical_warning;
  391. uint32_t conf_msix_qsize;
  392. uint32_t conf_ioqpairs;
  393. uint64_t dbbuf_dbs;
  394. uint64_t dbbuf_eis;
  395. bool dbbuf_enabled;
  396. struct {
  397. MemoryRegion mem;
  398. uint8_t *buf;
  399. bool cmse;
  400. hwaddr cba;
  401. } cmb;
  402. struct {
  403. HostMemoryBackend *dev;
  404. bool cmse;
  405. hwaddr cba;
  406. } pmr;
  407. uint8_t aer_mask;
  408. NvmeRequest **aer_reqs;
  409. QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
  410. int aer_queued;
  411. uint32_t dmrsl;
  412. /* Namespace ID is started with 1 so bitmap should be 1-based */
  413. #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
  414. DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
  415. NvmeSubsystem *subsys;
  416. NvmeNamespace namespace;
  417. NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
  418. NvmeSQueue **sq;
  419. NvmeCQueue **cq;
  420. NvmeSQueue admin_sq;
  421. NvmeCQueue admin_cq;
  422. NvmeIdCtrl id_ctrl;
  423. struct {
  424. struct {
  425. uint16_t temp_thresh_hi;
  426. uint16_t temp_thresh_low;
  427. };
  428. uint32_t async_config;
  429. NvmeHostBehaviorSupport hbs;
  430. } features;
  431. NvmePriCtrlCap pri_ctrl_cap;
  432. NvmeSecCtrlList sec_ctrl_list;
  433. struct {
  434. uint16_t vqrfap;
  435. uint16_t virfap;
  436. } next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */
  437. } NvmeCtrl;
  438. typedef enum NvmeResetType {
  439. NVME_RESET_FUNCTION = 0,
  440. NVME_RESET_CONTROLLER = 1,
  441. } NvmeResetType;
  442. static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
  443. {
  444. if (!nsid || nsid > NVME_MAX_NAMESPACES) {
  445. return NULL;
  446. }
  447. return n->namespaces[nsid];
  448. }
  449. static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
  450. {
  451. NvmeSQueue *sq = req->sq;
  452. NvmeCtrl *n = sq->ctrl;
  453. return n->cq[sq->cqid];
  454. }
  455. static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
  456. {
  457. NvmeSQueue *sq = req->sq;
  458. return sq->ctrl;
  459. }
  460. static inline uint16_t nvme_cid(NvmeRequest *req)
  461. {
  462. if (!req) {
  463. return 0xffff;
  464. }
  465. return le16_to_cpu(req->cqe.cid);
  466. }
  467. static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
  468. {
  469. PCIDevice *pci_dev = &n->parent_obj;
  470. NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
  471. if (pci_is_vf(pci_dev)) {
  472. return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
  473. }
  474. return NULL;
  475. }
  476. static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
  477. uint16_t cntlid)
  478. {
  479. NvmeSecCtrlList *list = &n->sec_ctrl_list;
  480. uint8_t i;
  481. for (i = 0; i < list->numcntl; i++) {
  482. if (le16_to_cpu(list->sec[i].scid) == cntlid) {
  483. return &list->sec[i];
  484. }
  485. }
  486. return NULL;
  487. }
  488. void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
  489. uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
  490. NvmeTxDirection dir, NvmeRequest *req);
  491. uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
  492. NvmeTxDirection dir, NvmeRequest *req);
  493. void nvme_rw_complete_cb(void *opaque, int ret);
  494. uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
  495. NvmeCmd *cmd);
  496. #endif /* HW_NVME_NVME_H */