|
@@ -160,6 +160,7 @@ typedef struct BDRVRawState {
|
|
bool has_write_zeroes:1;
|
|
bool has_write_zeroes:1;
|
|
bool use_linux_aio:1;
|
|
bool use_linux_aio:1;
|
|
bool use_linux_io_uring:1;
|
|
bool use_linux_io_uring:1;
|
|
|
|
+ int64_t *offset; /* offset of zone append operation */
|
|
int page_cache_inconsistent; /* errno from fdatasync failure */
|
|
int page_cache_inconsistent; /* errno from fdatasync failure */
|
|
bool has_fallocate;
|
|
bool has_fallocate;
|
|
bool needs_alignment;
|
|
bool needs_alignment;
|
|
@@ -1698,7 +1699,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
|
|
ssize_t len;
|
|
ssize_t len;
|
|
|
|
|
|
len = RETRY_ON_EINTR(
|
|
len = RETRY_ON_EINTR(
|
|
- (aiocb->aio_type & QEMU_AIO_WRITE) ?
|
|
|
|
|
|
+ (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ?
|
|
qemu_pwritev(aiocb->aio_fildes,
|
|
qemu_pwritev(aiocb->aio_fildes,
|
|
aiocb->io.iov,
|
|
aiocb->io.iov,
|
|
aiocb->io.niov,
|
|
aiocb->io.niov,
|
|
@@ -1727,7 +1728,7 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
|
|
ssize_t len;
|
|
ssize_t len;
|
|
|
|
|
|
while (offset < aiocb->aio_nbytes) {
|
|
while (offset < aiocb->aio_nbytes) {
|
|
- if (aiocb->aio_type & QEMU_AIO_WRITE) {
|
|
|
|
|
|
+ if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
|
|
len = pwrite(aiocb->aio_fildes,
|
|
len = pwrite(aiocb->aio_fildes,
|
|
(const char *)buf + offset,
|
|
(const char *)buf + offset,
|
|
aiocb->aio_nbytes - offset,
|
|
aiocb->aio_nbytes - offset,
|
|
@@ -1820,7 +1821,7 @@ static int handle_aiocb_rw(void *opaque)
|
|
}
|
|
}
|
|
|
|
|
|
nbytes = handle_aiocb_rw_linear(aiocb, buf);
|
|
nbytes = handle_aiocb_rw_linear(aiocb, buf);
|
|
- if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
|
|
|
|
|
|
+ if (!(aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))) {
|
|
char *p = buf;
|
|
char *p = buf;
|
|
size_t count = aiocb->aio_nbytes, copy;
|
|
size_t count = aiocb->aio_nbytes, copy;
|
|
int i;
|
|
int i;
|
|
@@ -2453,8 +2454,12 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
|
|
if (fd_open(bs) < 0)
|
|
if (fd_open(bs) < 0)
|
|
return -EIO;
|
|
return -EIO;
|
|
#if defined(CONFIG_BLKZONED)
|
|
#if defined(CONFIG_BLKZONED)
|
|
- if (type & QEMU_AIO_WRITE && bs->wps) {
|
|
|
|
|
|
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) {
|
|
qemu_co_mutex_lock(&bs->wps->colock);
|
|
qemu_co_mutex_lock(&bs->wps->colock);
|
|
|
|
+ if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) {
|
|
|
|
+ int index = offset / bs->bl.zone_size;
|
|
|
|
+ offset = bs->wps->wp[index];
|
|
|
|
+ }
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
@@ -2502,9 +2507,13 @@ out:
|
|
{
|
|
{
|
|
BlockZoneWps *wps = bs->wps;
|
|
BlockZoneWps *wps = bs->wps;
|
|
if (ret == 0) {
|
|
if (ret == 0) {
|
|
- if (type & QEMU_AIO_WRITE && wps && bs->bl.zone_size) {
|
|
|
|
|
|
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
|
|
|
|
+ && wps && bs->bl.zone_size) {
|
|
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
|
|
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
|
|
if (!BDRV_ZT_IS_CONV(*wp)) {
|
|
if (!BDRV_ZT_IS_CONV(*wp)) {
|
|
|
|
+ if (type & QEMU_AIO_ZONE_APPEND) {
|
|
|
|
+ *s->offset = *wp;
|
|
|
|
+ }
|
|
/* Advance the wp if needed */
|
|
/* Advance the wp if needed */
|
|
if (offset + bytes > *wp) {
|
|
if (offset + bytes > *wp) {
|
|
*wp = offset + bytes;
|
|
*wp = offset + bytes;
|
|
@@ -2512,12 +2521,12 @@ out:
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- if (type & QEMU_AIO_WRITE) {
|
|
|
|
|
|
+ if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
|
|
update_zones_wp(bs, s->fd, 0, 1);
|
|
update_zones_wp(bs, s->fd, 0, 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- if (type & QEMU_AIO_WRITE && wps) {
|
|
|
|
|
|
+ if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) {
|
|
qemu_co_mutex_unlock(&wps->colock);
|
|
qemu_co_mutex_unlock(&wps->colock);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -3515,6 +3524,40 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+#if defined(CONFIG_BLKZONED)
|
|
|
|
+static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
|
|
|
|
+ int64_t *offset,
|
|
|
|
+ QEMUIOVector *qiov,
|
|
|
|
+ BdrvRequestFlags flags) {
|
|
|
|
+ assert(flags == 0);
|
|
|
|
+ int64_t zone_size_mask = bs->bl.zone_size - 1;
|
|
|
|
+ int64_t iov_len = 0;
|
|
|
|
+ int64_t len = 0;
|
|
|
|
+ BDRVRawState *s = bs->opaque;
|
|
|
|
+ s->offset = offset;
|
|
|
|
+
|
|
|
|
+ if (*offset & zone_size_mask) {
|
|
|
|
+ error_report("sector offset %" PRId64 " is not aligned to zone size "
|
|
|
|
+ "%" PRId32 "", *offset / 512, bs->bl.zone_size / 512);
|
|
|
|
+ return -EINVAL;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ int64_t wg = bs->bl.write_granularity;
|
|
|
|
+ int64_t wg_mask = wg - 1;
|
|
|
|
+ for (int i = 0; i < qiov->niov; i++) {
|
|
|
|
+ iov_len = qiov->iov[i].iov_len;
|
|
|
|
+ if (iov_len & wg_mask) {
|
|
|
|
+ error_report("len of IOVector[%d] %" PRId64 " is not aligned to "
|
|
|
|
+ "block size %" PRId64 "", i, iov_len, wg);
|
|
|
|
+ return -EINVAL;
|
|
|
|
+ }
|
|
|
|
+ len += iov_len;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
static coroutine_fn int
|
|
static coroutine_fn int
|
|
raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
|
raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
|
bool blkdev)
|
|
bool blkdev)
|
|
@@ -4276,6 +4319,7 @@ static BlockDriver bdrv_host_device = {
|
|
/* zone management operations */
|
|
/* zone management operations */
|
|
.bdrv_co_zone_report = raw_co_zone_report,
|
|
.bdrv_co_zone_report = raw_co_zone_report,
|
|
.bdrv_co_zone_mgmt = raw_co_zone_mgmt,
|
|
.bdrv_co_zone_mgmt = raw_co_zone_mgmt,
|
|
|
|
+ .bdrv_co_zone_append = raw_co_zone_append,
|
|
#endif
|
|
#endif
|
|
};
|
|
};
|
|
|
|
|