2
0

xen_disk.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853
  1. /*
  2. * xen paravirt block device backend
  3. *
  4. * (c) Gerd Hoffmann <kraxel@redhat.com>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; under version 2 of the License.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <stdarg.h>
  21. #include <string.h>
  22. #include <unistd.h>
  23. #include <signal.h>
  24. #include <inttypes.h>
  25. #include <time.h>
  26. #include <fcntl.h>
  27. #include <errno.h>
  28. #include <sys/ioctl.h>
  29. #include <sys/types.h>
  30. #include <sys/stat.h>
  31. #include <sys/mman.h>
  32. #include <sys/uio.h>
  33. #include <xs.h>
  34. #include <xenctrl.h>
  35. #include <xen/io/xenbus.h>
  36. #include "hw.h"
  37. #include "block_int.h"
  38. #include "qemu-char.h"
  39. #include "xen_blkif.h"
  40. #include "xen_backend.h"
  41. #include "blockdev.h"
  42. /* ------------------------------------------------------------- */
  43. static int syncwrite = 0;
  44. static int batch_maps = 0;
  45. static int max_requests = 32;
  46. static int use_aio = 1;
  47. /* ------------------------------------------------------------- */
  48. #define BLOCK_SIZE 512
  49. #define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
  50. struct ioreq {
  51. blkif_request_t req;
  52. int16_t status;
  53. /* parsed request */
  54. off_t start;
  55. QEMUIOVector v;
  56. int presync;
  57. int postsync;
  58. /* grant mapping */
  59. uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  60. uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  61. int prot;
  62. void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  63. void *pages;
  64. /* aio status */
  65. int aio_inflight;
  66. int aio_errors;
  67. struct XenBlkDev *blkdev;
  68. QLIST_ENTRY(ioreq) list;
  69. };
  70. struct XenBlkDev {
  71. struct XenDevice xendev; /* must be first */
  72. char *params;
  73. char *mode;
  74. char *type;
  75. char *dev;
  76. char *devtype;
  77. const char *fileproto;
  78. const char *filename;
  79. int ring_ref;
  80. void *sring;
  81. int64_t file_blk;
  82. int64_t file_size;
  83. int protocol;
  84. blkif_back_rings_t rings;
  85. int more_work;
  86. int cnt_map;
  87. /* request lists */
  88. QLIST_HEAD(inflight_head, ioreq) inflight;
  89. QLIST_HEAD(finished_head, ioreq) finished;
  90. QLIST_HEAD(freelist_head, ioreq) freelist;
  91. int requests_total;
  92. int requests_inflight;
  93. int requests_finished;
  94. /* qemu block driver */
  95. DriveInfo *dinfo;
  96. BlockDriverState *bs;
  97. QEMUBH *bh;
  98. };
  99. /* ------------------------------------------------------------- */
  100. static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
  101. {
  102. struct ioreq *ioreq = NULL;
  103. if (QLIST_EMPTY(&blkdev->freelist)) {
  104. if (blkdev->requests_total >= max_requests) {
  105. goto out;
  106. }
  107. /* allocate new struct */
  108. ioreq = qemu_mallocz(sizeof(*ioreq));
  109. ioreq->blkdev = blkdev;
  110. blkdev->requests_total++;
  111. qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
  112. } else {
  113. /* get one from freelist */
  114. ioreq = QLIST_FIRST(&blkdev->freelist);
  115. QLIST_REMOVE(ioreq, list);
  116. qemu_iovec_reset(&ioreq->v);
  117. }
  118. QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
  119. blkdev->requests_inflight++;
  120. out:
  121. return ioreq;
  122. }
  123. static void ioreq_finish(struct ioreq *ioreq)
  124. {
  125. struct XenBlkDev *blkdev = ioreq->blkdev;
  126. QLIST_REMOVE(ioreq, list);
  127. QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
  128. blkdev->requests_inflight--;
  129. blkdev->requests_finished++;
  130. }
  131. static void ioreq_release(struct ioreq *ioreq)
  132. {
  133. struct XenBlkDev *blkdev = ioreq->blkdev;
  134. QLIST_REMOVE(ioreq, list);
  135. memset(ioreq, 0, sizeof(*ioreq));
  136. ioreq->blkdev = blkdev;
  137. QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
  138. blkdev->requests_finished--;
  139. }
  140. /*
  141. * translate request into iovec + start offset
  142. * do sanity checks along the way
  143. */
  144. static int ioreq_parse(struct ioreq *ioreq)
  145. {
  146. struct XenBlkDev *blkdev = ioreq->blkdev;
  147. uintptr_t mem;
  148. size_t len;
  149. int i;
  150. xen_be_printf(&blkdev->xendev, 3,
  151. "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
  152. ioreq->req.operation, ioreq->req.nr_segments,
  153. ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
  154. switch (ioreq->req.operation) {
  155. case BLKIF_OP_READ:
  156. ioreq->prot = PROT_WRITE; /* to memory */
  157. break;
  158. case BLKIF_OP_WRITE_BARRIER:
  159. if (!ioreq->req.nr_segments) {
  160. ioreq->presync = 1;
  161. return 0;
  162. }
  163. if (!syncwrite) {
  164. ioreq->presync = ioreq->postsync = 1;
  165. }
  166. /* fall through */
  167. case BLKIF_OP_WRITE:
  168. ioreq->prot = PROT_READ; /* from memory */
  169. if (syncwrite) {
  170. ioreq->postsync = 1;
  171. }
  172. break;
  173. default:
  174. xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
  175. ioreq->req.operation);
  176. goto err;
  177. };
  178. if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
  179. xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
  180. goto err;
  181. }
  182. ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
  183. for (i = 0; i < ioreq->req.nr_segments; i++) {
  184. if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
  185. xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
  186. goto err;
  187. }
  188. if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
  189. xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
  190. goto err;
  191. }
  192. if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
  193. xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
  194. goto err;
  195. }
  196. ioreq->domids[i] = blkdev->xendev.dom;
  197. ioreq->refs[i] = ioreq->req.seg[i].gref;
  198. mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
  199. len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
  200. qemu_iovec_add(&ioreq->v, (void*)mem, len);
  201. }
  202. if (ioreq->start + ioreq->v.size > blkdev->file_size) {
  203. xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
  204. goto err;
  205. }
  206. return 0;
  207. err:
  208. ioreq->status = BLKIF_RSP_ERROR;
  209. return -1;
  210. }
  211. static void ioreq_unmap(struct ioreq *ioreq)
  212. {
  213. XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
  214. int i;
  215. if (ioreq->v.niov == 0) {
  216. return;
  217. }
  218. if (batch_maps) {
  219. if (!ioreq->pages) {
  220. return;
  221. }
  222. if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) {
  223. xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
  224. strerror(errno));
  225. }
  226. ioreq->blkdev->cnt_map -= ioreq->v.niov;
  227. ioreq->pages = NULL;
  228. } else {
  229. for (i = 0; i < ioreq->v.niov; i++) {
  230. if (!ioreq->page[i]) {
  231. continue;
  232. }
  233. if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) {
  234. xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
  235. strerror(errno));
  236. }
  237. ioreq->blkdev->cnt_map--;
  238. ioreq->page[i] = NULL;
  239. }
  240. }
  241. }
  242. static int ioreq_map(struct ioreq *ioreq)
  243. {
  244. XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
  245. int i;
  246. if (ioreq->v.niov == 0) {
  247. return 0;
  248. }
  249. if (batch_maps) {
  250. ioreq->pages = xc_gnttab_map_grant_refs
  251. (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
  252. if (ioreq->pages == NULL) {
  253. xen_be_printf(&ioreq->blkdev->xendev, 0,
  254. "can't map %d grant refs (%s, %d maps)\n",
  255. ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
  256. return -1;
  257. }
  258. for (i = 0; i < ioreq->v.niov; i++) {
  259. ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
  260. (uintptr_t)ioreq->v.iov[i].iov_base;
  261. }
  262. ioreq->blkdev->cnt_map += ioreq->v.niov;
  263. } else {
  264. for (i = 0; i < ioreq->v.niov; i++) {
  265. ioreq->page[i] = xc_gnttab_map_grant_ref
  266. (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
  267. if (ioreq->page[i] == NULL) {
  268. xen_be_printf(&ioreq->blkdev->xendev, 0,
  269. "can't map grant ref %d (%s, %d maps)\n",
  270. ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
  271. ioreq_unmap(ioreq);
  272. return -1;
  273. }
  274. ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
  275. ioreq->blkdev->cnt_map++;
  276. }
  277. }
  278. return 0;
  279. }
  280. static int ioreq_runio_qemu_sync(struct ioreq *ioreq)
  281. {
  282. struct XenBlkDev *blkdev = ioreq->blkdev;
  283. int i, rc;
  284. off_t pos;
  285. if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
  286. goto err_no_map;
  287. }
  288. if (ioreq->presync) {
  289. bdrv_flush(blkdev->bs);
  290. }
  291. switch (ioreq->req.operation) {
  292. case BLKIF_OP_READ:
  293. pos = ioreq->start;
  294. for (i = 0; i < ioreq->v.niov; i++) {
  295. rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE,
  296. ioreq->v.iov[i].iov_base,
  297. ioreq->v.iov[i].iov_len / BLOCK_SIZE);
  298. if (rc != 0) {
  299. xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n",
  300. ioreq->v.iov[i].iov_base,
  301. ioreq->v.iov[i].iov_len);
  302. goto err;
  303. }
  304. pos += ioreq->v.iov[i].iov_len;
  305. }
  306. break;
  307. case BLKIF_OP_WRITE:
  308. case BLKIF_OP_WRITE_BARRIER:
  309. if (!ioreq->req.nr_segments) {
  310. break;
  311. }
  312. pos = ioreq->start;
  313. for (i = 0; i < ioreq->v.niov; i++) {
  314. rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE,
  315. ioreq->v.iov[i].iov_base,
  316. ioreq->v.iov[i].iov_len / BLOCK_SIZE);
  317. if (rc != 0) {
  318. xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n",
  319. ioreq->v.iov[i].iov_base,
  320. ioreq->v.iov[i].iov_len);
  321. goto err;
  322. }
  323. pos += ioreq->v.iov[i].iov_len;
  324. }
  325. break;
  326. default:
  327. /* unknown operation (shouldn't happen -- parse catches this) */
  328. goto err;
  329. }
  330. if (ioreq->postsync) {
  331. bdrv_flush(blkdev->bs);
  332. }
  333. ioreq->status = BLKIF_RSP_OKAY;
  334. ioreq_unmap(ioreq);
  335. ioreq_finish(ioreq);
  336. return 0;
  337. err:
  338. ioreq_unmap(ioreq);
  339. err_no_map:
  340. ioreq_finish(ioreq);
  341. ioreq->status = BLKIF_RSP_ERROR;
  342. return -1;
  343. }
  344. static void qemu_aio_complete(void *opaque, int ret)
  345. {
  346. struct ioreq *ioreq = opaque;
  347. if (ret != 0) {
  348. xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
  349. ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
  350. ioreq->aio_errors++;
  351. }
  352. ioreq->aio_inflight--;
  353. if (ioreq->aio_inflight > 0) {
  354. return;
  355. }
  356. ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
  357. ioreq_unmap(ioreq);
  358. ioreq_finish(ioreq);
  359. qemu_bh_schedule(ioreq->blkdev->bh);
  360. }
  361. static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
  362. {
  363. struct XenBlkDev *blkdev = ioreq->blkdev;
  364. if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
  365. goto err_no_map;
  366. }
  367. ioreq->aio_inflight++;
  368. if (ioreq->presync) {
  369. bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
  370. }
  371. switch (ioreq->req.operation) {
  372. case BLKIF_OP_READ:
  373. ioreq->aio_inflight++;
  374. bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
  375. &ioreq->v, ioreq->v.size / BLOCK_SIZE,
  376. qemu_aio_complete, ioreq);
  377. break;
  378. case BLKIF_OP_WRITE:
  379. case BLKIF_OP_WRITE_BARRIER:
  380. if (!ioreq->req.nr_segments) {
  381. break;
  382. }
  383. ioreq->aio_inflight++;
  384. bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
  385. &ioreq->v, ioreq->v.size / BLOCK_SIZE,
  386. qemu_aio_complete, ioreq);
  387. break;
  388. default:
  389. /* unknown operation (shouldn't happen -- parse catches this) */
  390. goto err;
  391. }
  392. if (ioreq->postsync) {
  393. bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
  394. }
  395. qemu_aio_complete(ioreq, 0);
  396. return 0;
  397. err:
  398. ioreq_unmap(ioreq);
  399. err_no_map:
  400. ioreq_finish(ioreq);
  401. ioreq->status = BLKIF_RSP_ERROR;
  402. return -1;
  403. }
  404. static int blk_send_response_one(struct ioreq *ioreq)
  405. {
  406. struct XenBlkDev *blkdev = ioreq->blkdev;
  407. int send_notify = 0;
  408. int have_requests = 0;
  409. blkif_response_t resp;
  410. void *dst;
  411. resp.id = ioreq->req.id;
  412. resp.operation = ioreq->req.operation;
  413. resp.status = ioreq->status;
  414. /* Place on the response ring for the relevant domain. */
  415. switch (blkdev->protocol) {
  416. case BLKIF_PROTOCOL_NATIVE:
  417. dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
  418. break;
  419. case BLKIF_PROTOCOL_X86_32:
  420. dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
  421. blkdev->rings.x86_32_part.rsp_prod_pvt);
  422. break;
  423. case BLKIF_PROTOCOL_X86_64:
  424. dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
  425. blkdev->rings.x86_64_part.rsp_prod_pvt);
  426. break;
  427. default:
  428. dst = NULL;
  429. }
  430. memcpy(dst, &resp, sizeof(resp));
  431. blkdev->rings.common.rsp_prod_pvt++;
  432. RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
  433. if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) {
  434. /*
  435. * Tail check for pending requests. Allows frontend to avoid
  436. * notifications if requests are already in flight (lower
  437. * overheads and promotes batching).
  438. */
  439. RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests);
  440. } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) {
  441. have_requests = 1;
  442. }
  443. if (have_requests) {
  444. blkdev->more_work++;
  445. }
  446. return send_notify;
  447. }
  448. /* walk finished list, send outstanding responses, free requests */
  449. static void blk_send_response_all(struct XenBlkDev *blkdev)
  450. {
  451. struct ioreq *ioreq;
  452. int send_notify = 0;
  453. while (!QLIST_EMPTY(&blkdev->finished)) {
  454. ioreq = QLIST_FIRST(&blkdev->finished);
  455. send_notify += blk_send_response_one(ioreq);
  456. ioreq_release(ioreq);
  457. }
  458. if (send_notify) {
  459. xen_be_send_notify(&blkdev->xendev);
  460. }
  461. }
  462. static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
  463. {
  464. switch (blkdev->protocol) {
  465. case BLKIF_PROTOCOL_NATIVE:
  466. memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc),
  467. sizeof(ioreq->req));
  468. break;
  469. case BLKIF_PROTOCOL_X86_32:
  470. blkif_get_x86_32_req(&ioreq->req,
  471. RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc));
  472. break;
  473. case BLKIF_PROTOCOL_X86_64:
  474. blkif_get_x86_64_req(&ioreq->req,
  475. RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc));
  476. break;
  477. }
  478. return 0;
  479. }
  480. static void blk_handle_requests(struct XenBlkDev *blkdev)
  481. {
  482. RING_IDX rc, rp;
  483. struct ioreq *ioreq;
  484. blkdev->more_work = 0;
  485. rc = blkdev->rings.common.req_cons;
  486. rp = blkdev->rings.common.sring->req_prod;
  487. xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
  488. if (use_aio) {
  489. blk_send_response_all(blkdev);
  490. }
  491. while (rc != rp) {
  492. /* pull request from ring */
  493. if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
  494. break;
  495. }
  496. ioreq = ioreq_start(blkdev);
  497. if (ioreq == NULL) {
  498. blkdev->more_work++;
  499. break;
  500. }
  501. blk_get_request(blkdev, ioreq, rc);
  502. blkdev->rings.common.req_cons = ++rc;
  503. /* parse them */
  504. if (ioreq_parse(ioreq) != 0) {
  505. if (blk_send_response_one(ioreq)) {
  506. xen_be_send_notify(&blkdev->xendev);
  507. }
  508. ioreq_release(ioreq);
  509. continue;
  510. }
  511. if (use_aio) {
  512. /* run i/o in aio mode */
  513. ioreq_runio_qemu_aio(ioreq);
  514. } else {
  515. /* run i/o in sync mode */
  516. ioreq_runio_qemu_sync(ioreq);
  517. }
  518. }
  519. if (!use_aio) {
  520. blk_send_response_all(blkdev);
  521. }
  522. if (blkdev->more_work && blkdev->requests_inflight < max_requests) {
  523. qemu_bh_schedule(blkdev->bh);
  524. }
  525. }
  526. /* ------------------------------------------------------------- */
  527. static void blk_bh(void *opaque)
  528. {
  529. struct XenBlkDev *blkdev = opaque;
  530. blk_handle_requests(blkdev);
  531. }
  532. static void blk_alloc(struct XenDevice *xendev)
  533. {
  534. struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  535. QLIST_INIT(&blkdev->inflight);
  536. QLIST_INIT(&blkdev->finished);
  537. QLIST_INIT(&blkdev->freelist);
  538. blkdev->bh = qemu_bh_new(blk_bh, blkdev);
  539. if (xen_mode != XEN_EMULATE) {
  540. batch_maps = 1;
  541. }
  542. }
  543. static int blk_init(struct XenDevice *xendev)
  544. {
  545. struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  546. int index, qflags, have_barriers, info = 0;
  547. /* read xenstore entries */
  548. if (blkdev->params == NULL) {
  549. char *h = NULL;
  550. blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params");
  551. if (blkdev->params != NULL) {
  552. h = strchr(blkdev->params, ':');
  553. }
  554. if (h != NULL) {
  555. blkdev->fileproto = blkdev->params;
  556. blkdev->filename = h+1;
  557. *h = 0;
  558. } else {
  559. blkdev->fileproto = "<unset>";
  560. blkdev->filename = blkdev->params;
  561. }
  562. }
  563. if (!strcmp("aio", blkdev->fileproto)) {
  564. blkdev->fileproto = "raw";
  565. }
  566. if (blkdev->mode == NULL) {
  567. blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode");
  568. }
  569. if (blkdev->type == NULL) {
  570. blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type");
  571. }
  572. if (blkdev->dev == NULL) {
  573. blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev");
  574. }
  575. if (blkdev->devtype == NULL) {
  576. blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type");
  577. }
  578. /* do we have all we need? */
  579. if (blkdev->params == NULL ||
  580. blkdev->mode == NULL ||
  581. blkdev->type == NULL ||
  582. blkdev->dev == NULL) {
  583. goto out_error;
  584. }
  585. /* read-only ? */
  586. if (strcmp(blkdev->mode, "w") == 0) {
  587. qflags = BDRV_O_RDWR;
  588. } else {
  589. qflags = 0;
  590. info |= VDISK_READONLY;
  591. }
  592. /* cdrom ? */
  593. if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) {
  594. info |= VDISK_CDROM;
  595. }
  596. /* init qemu block driver */
  597. index = (blkdev->xendev.dev - 202 * 256) / 16;
  598. blkdev->dinfo = drive_get(IF_XEN, 0, index);
  599. if (!blkdev->dinfo) {
  600. /* setup via xenbus -> create new block driver instance */
  601. xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
  602. blkdev->bs = bdrv_new(blkdev->dev);
  603. if (blkdev->bs) {
  604. if (bdrv_open(blkdev->bs, blkdev->filename, qflags,
  605. bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) {
  606. bdrv_delete(blkdev->bs);
  607. blkdev->bs = NULL;
  608. }
  609. }
  610. if (!blkdev->bs) {
  611. goto out_error;
  612. }
  613. } else {
  614. /* setup via qemu cmdline -> already setup for us */
  615. xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
  616. blkdev->bs = blkdev->dinfo->bdrv;
  617. }
  618. blkdev->file_blk = BLOCK_SIZE;
  619. blkdev->file_size = bdrv_getlength(blkdev->bs);
  620. if (blkdev->file_size < 0) {
  621. xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
  622. (int)blkdev->file_size, strerror(-blkdev->file_size),
  623. blkdev->bs->drv ? blkdev->bs->drv->format_name : "-");
  624. blkdev->file_size = 0;
  625. }
  626. have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0;
  627. xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
  628. " size %" PRId64 " (%" PRId64 " MB)\n",
  629. blkdev->type, blkdev->fileproto, blkdev->filename,
  630. blkdev->file_size, blkdev->file_size >> 20);
  631. /* fill info */
  632. xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers);
  633. xenstore_write_be_int(&blkdev->xendev, "info", info);
  634. xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk);
  635. xenstore_write_be_int(&blkdev->xendev, "sectors",
  636. blkdev->file_size / blkdev->file_blk);
  637. return 0;
  638. out_error:
  639. qemu_free(blkdev->params);
  640. blkdev->params = NULL;
  641. qemu_free(blkdev->mode);
  642. blkdev->mode = NULL;
  643. qemu_free(blkdev->type);
  644. blkdev->type = NULL;
  645. qemu_free(blkdev->dev);
  646. blkdev->dev = NULL;
  647. qemu_free(blkdev->devtype);
  648. blkdev->devtype = NULL;
  649. return -1;
  650. }
  651. static int blk_connect(struct XenDevice *xendev)
  652. {
  653. struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  654. if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
  655. return -1;
  656. }
  657. if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
  658. &blkdev->xendev.remote_port) == -1) {
  659. return -1;
  660. }
  661. blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
  662. if (blkdev->xendev.protocol) {
  663. if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
  664. blkdev->protocol = BLKIF_PROTOCOL_X86_32;
  665. }
  666. if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
  667. blkdev->protocol = BLKIF_PROTOCOL_X86_64;
  668. }
  669. }
  670. blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev,
  671. blkdev->xendev.dom,
  672. blkdev->ring_ref,
  673. PROT_READ | PROT_WRITE);
  674. if (!blkdev->sring) {
  675. return -1;
  676. }
  677. blkdev->cnt_map++;
  678. switch (blkdev->protocol) {
  679. case BLKIF_PROTOCOL_NATIVE:
  680. {
  681. blkif_sring_t *sring_native = blkdev->sring;
  682. BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
  683. break;
  684. }
  685. case BLKIF_PROTOCOL_X86_32:
  686. {
  687. blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
  688. BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE);
  689. break;
  690. }
  691. case BLKIF_PROTOCOL_X86_64:
  692. {
  693. blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
  694. BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE);
  695. break;
  696. }
  697. }
  698. xen_be_bind_evtchn(&blkdev->xendev);
  699. xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
  700. "remote port %d, local port %d\n",
  701. blkdev->xendev.protocol, blkdev->ring_ref,
  702. blkdev->xendev.remote_port, blkdev->xendev.local_port);
  703. return 0;
  704. }
  705. static void blk_disconnect(struct XenDevice *xendev)
  706. {
  707. struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  708. if (blkdev->bs) {
  709. if (!blkdev->dinfo) {
  710. /* close/delete only if we created it ourself */
  711. bdrv_close(blkdev->bs);
  712. bdrv_delete(blkdev->bs);
  713. }
  714. blkdev->bs = NULL;
  715. }
  716. xen_be_unbind_evtchn(&blkdev->xendev);
  717. if (blkdev->sring) {
  718. xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
  719. blkdev->cnt_map--;
  720. blkdev->sring = NULL;
  721. }
  722. }
  723. static int blk_free(struct XenDevice *xendev)
  724. {
  725. struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  726. struct ioreq *ioreq;
  727. while (!QLIST_EMPTY(&blkdev->freelist)) {
  728. ioreq = QLIST_FIRST(&blkdev->freelist);
  729. QLIST_REMOVE(ioreq, list);
  730. qemu_iovec_destroy(&ioreq->v);
  731. qemu_free(ioreq);
  732. }
  733. qemu_free(blkdev->params);
  734. qemu_free(blkdev->mode);
  735. qemu_free(blkdev->type);
  736. qemu_free(blkdev->dev);
  737. qemu_free(blkdev->devtype);
  738. qemu_bh_delete(blkdev->bh);
  739. return 0;
  740. }
  741. static void blk_event(struct XenDevice *xendev)
  742. {
  743. struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  744. qemu_bh_schedule(blkdev->bh);
  745. }
  746. struct XenDevOps xen_blkdev_ops = {
  747. .size = sizeof(struct XenBlkDev),
  748. .flags = DEVOPS_FLAG_NEED_GNTDEV,
  749. .alloc = blk_alloc,
  750. .init = blk_init,
  751. .connect = blk_connect,
  752. .disconnect = blk_disconnect,
  753. .event = blk_event,
  754. .free = blk_free,
  755. };