diff --git a/fs/aio.c b/fs/aio.c new file mode 100644 index 0000000000..cfdd3acc61 --- /dev/null +++ b/fs/aio.c @@ -0,0 +1,307 @@ +#include "fs/aio.h" +#include "kernel/errno.h" +#include +#include + +// Ensure a minimum capacity in the AIOCTX table. +// +// AIOCTX must be locked before resizing the table. The lock can be elided in +// contexts where you know the table is not shared yet. +// +// Attempts to shrink the table will be rejected silently. +// May return _ENOMEM if memory for the new table could not be allocated. +static int _aioctx_table_ensure(struct aioctx_table *tbl, unsigned int newcap) { + if (tbl == NULL) return 0; + if (tbl->capacity >= newcap) return 0; + if ((INT_MAX / sizeof(struct aioctx*)) < newcap) return _ENOMEM; + + struct aioctx **new_contexts = malloc(sizeof(struct aioctx*) * newcap); + if (new_contexts == NULL) return _ENOMEM; + + memset(new_contexts, 0, sizeof(struct aioctx*) * newcap); + if (tbl->contexts) { + memcpy(new_contexts, tbl->contexts, sizeof(struct aioctx*) * tbl->capacity); + free(tbl->contexts); + } + + tbl->contexts = new_contexts; + tbl->capacity = newcap; + + return 0; +} + +struct aioctx *aioctx_new(int events_capacity, pid_t pid) { + if ((INT_MAX / sizeof(struct aioctx_event)) < events_capacity) return NULL; + + struct aioctx *aioctx = malloc(sizeof(struct aioctx)); + if (aioctx == NULL) return NULL; + + struct aioctx_event *aioctx_events = malloc(sizeof(struct aioctx_event) * events_capacity); + if (aioctx_events == NULL) { + free(aioctx); + return NULL; + } + + memset(aioctx_events, 0, sizeof(struct aioctx_event) * events_capacity); + + lock_init(&aioctx->lock); + cond_init(&aioctx->cond); + + aioctx->refcount = 1; + aioctx->events_capacity = events_capacity; + aioctx->events = aioctx_events; + aioctx->is_owned_by_task = true; + aioctx->pid = pid; + + return aioctx; +} + +void aioctx_retain(struct aioctx *ctx) { + if (ctx == NULL) return; + + lock(&ctx->lock); + ctx->refcount++; + unlock(&ctx->lock); +} + +static void _aioctx_decrement_ref(struct aioctx *ctx) { + if (--ctx->refcount == 0) { + cond_destroy(&ctx->cond); + free(ctx->events); + free(ctx); + } else { + unlock(&ctx->lock); + } +} + +void aioctx_release(struct aioctx *ctx) { + if (ctx == NULL) return; + + lock(&ctx->lock); + _aioctx_decrement_ref(ctx); +} + +void aioctx_release_from_task(struct aioctx *ctx) { + if (ctx == NULL) return; + + lock(&ctx->lock); + ctx->is_owned_by_task = false; + _aioctx_decrement_ref(ctx); +} + +signed int aioctx_submit_pending_event(struct aioctx *ctx, uint64_t user_data, addr_t iocbp, struct aioctx_event_pending pending_data) { + if (ctx == NULL) return _EINVAL; + + lock(&ctx->lock); + + signed int index = _EAGAIN; + + for (int i = 0; i < ctx->events_capacity; i += 1) { + if (ctx->events[i].tag == AIOCTX_NONE) { + index = i; + + ctx->events[i].tag = AIOCTX_PENDING; + ctx->events[i].user_data = user_data; + ctx->events[i].iocb_obj = iocbp; + ctx->events[i].data.as_pending = pending_data; + + break; + } + } + + unlock(&ctx->lock); + + return index; +} + +void aioctx_cancel_event(struct aioctx *ctx, unsigned int index) { + if (ctx == NULL) return; + + lock(&ctx->lock); + + if (index >= ctx->events_capacity) return; + + if (ctx->events[index].tag == AIOCTX_PENDING) + ctx->events[index].tag = AIOCTX_NONE; + + unlock(&ctx->lock); +} + +void aioctx_complete_event(struct aioctx *ctx, unsigned int index, int64_t result0, int64_t result1) { + if (ctx == NULL) return; + + lock(&ctx->lock); + + if (index >= ctx->events_capacity) return; + + if (ctx->events[index].tag == AIOCTX_PENDING) { + ctx->events[index].tag = AIOCTX_COMPLETE; + + struct aioctx_event_complete data; + + data.result[0] = result0; + data.result[1] = result1; + + ctx->events[index].data.as_complete = data; + } + + notify_once(&ctx->cond); + unlock(&ctx->lock); +} + +bool aioctx_consume_completed_event(struct aioctx *ctx, uint64_t *user_data, addr_t *iocbp, struct aioctx_event_complete *completed_data) { + if (ctx == NULL) return false; + + bool result = false; + + lock(&ctx->lock); + + for (int i = 0; i < ctx->events_capacity; i += 1) { + if (ctx->events[i].tag == AIOCTX_COMPLETE) { + *user_data = ctx->events[i].user_data; + *iocbp = ctx->events[i].iocb_obj; + *completed_data = ctx->events[i].data.as_complete; + + ctx->events[i].tag = AIOCTX_NONE; + result = true; + + break; + } + } + + unlock(&ctx->lock); + + return result; +} + +int aioctx_wait_for_completion(struct aioctx *ctx, struct timespec *timeout) { + if (ctx == NULL) return _EINVAL; + + lock(&ctx->lock); + int err = wait_for(&ctx->cond, &ctx->lock, timeout); + unlock(&ctx->lock); + + return err; +} + +void aioctx_lock(struct aioctx *ctx) { + if (ctx == NULL) return; + + lock(&ctx->lock); +} + +void aioctx_unlock(struct aioctx *ctx) { + if (ctx == NULL) return; + + unlock(&ctx->lock); +} + +signed int aioctx_get_pending_event(struct aioctx *ctx, unsigned int index, struct aioctx_event_pending **event) { + if (ctx == NULL) return _EINVAL; + if (!ctx->is_owned_by_task) return _EINVAL; + if (index >= ctx->events_capacity) return _EINVAL; + if (ctx->events[index].tag != AIOCTX_PENDING) return _EINVAL; + + if (event != NULL) *event = &ctx->events[index].data.as_pending; + + return 0; +} + +signed int aioctx_table_new(struct aioctx_table *tbl, unsigned int capacity) { + if (tbl == NULL) return _EINVAL; + + tbl->capacity = 0; + tbl->contexts = NULL; + lock_init(&tbl->lock); + + int err = _aioctx_table_ensure(tbl, capacity); + if (err < 0) return err; + + return 0; +} + +void aioctx_table_delete(struct aioctx_table *tbl) { + if (tbl == NULL) return; + + lock(&tbl->lock); + for (int i = 0; i < tbl->capacity; i += 1) { + if (tbl->contexts[i] != NULL) { + aioctx_release_from_task(tbl->contexts[i]); + } + } + free(tbl->contexts); +} + +signed int aioctx_table_insert(struct aioctx_table *tbl, struct aioctx *ctx) { + if (tbl == NULL) return _EINVAL; + if (ctx == NULL) return _EINVAL; + + lock(&tbl->lock); + + for (int i = 0; i < tbl->capacity; i += 1) { + if (tbl->contexts[i] == NULL) { + tbl->contexts[i] = ctx; + aioctx_retain(ctx); + unlock(&tbl->lock); + return i; + } + } + + //At this point, we've scanned the entire table and every entry is full. + int old_capacity = tbl->capacity; + if (((INT_MAX - 1) / 2) <= old_capacity) return _ENOMEM; + + int err = _aioctx_table_ensure(tbl, (tbl->capacity * 2) + 1); + if (err < 0) return err; + + tbl->contexts[old_capacity] = ctx; + + aioctx_retain(ctx); + unlock(&tbl->lock); + + return old_capacity; +} + +signed int aioctx_table_remove(struct aioctx_table *tbl, unsigned int ctx_id) { + if (tbl == NULL) return _EINVAL; + + lock(&tbl->lock); + + if (ctx_id >= tbl->capacity) { + unlock(&tbl->lock); + return _EINVAL; + } + + struct aioctx *ctx = tbl->contexts[ctx_id]; + if (ctx == NULL) { + unlock(&tbl->lock); + return _EINVAL; + } + + aioctx_release_from_task(ctx); + tbl->contexts[ctx_id] = NULL; + + unlock(&tbl->lock); + + return 0; +} + +struct aioctx *aioctx_table_get_and_retain(struct aioctx_table *tbl, unsigned int ctx_id) { + if (tbl == NULL) return NULL; + + lock(&tbl->lock); + + if (ctx_id >= tbl->capacity) { + unlock(&tbl->lock); + return NULL; + } + + struct aioctx *ctx = tbl->contexts[ctx_id]; + if (ctx != NULL) { + aioctx_retain(ctx); + } + + unlock(&tbl->lock); + + return ctx; +} \ No newline at end of file diff --git a/fs/aio.h b/fs/aio.h new file mode 100644 index 0000000000..f4dc9f2b26 --- /dev/null +++ b/fs/aio.h @@ -0,0 +1,251 @@ +#ifndef FS_AIO_H +#define FS_AIO_H + +#include "util/sync.h" +#include "fs/fd.h" +#include "misc.h" + +typedef dword_t aio_context_t; + +enum aioctx_event_tag { + // The event slot is empty and should be initialized before being used. + // + // Conveniently, this matches the zero value, so initializing the entire + // event will also make it None. + AIOCTX_NONE = 0, + + // The event slot is occupied with a pending I/O request. + // + // This corresponds to the as_pending variant of the data union. + AIOCTX_PENDING = 1, + + // The event slot is occupied with a completed I/O request. + // + // This corresponds to the as_complete variant of the data union. + AIOCTX_COMPLETE = 2, +}; + +enum aioctx_op { + AIOCTX_PREAD = 0, + AIOCTX_PWRITE = 1, + AIOCTX_FSYNC = 2, + AIOCTX_FDSYNC = 3, + AIOCTX_POLL = 5, + AIOCTX_NOOP = 6, + AIOCTX_PREADV = 7, + AIOCTX_PWRITEV = 8, +}; + +// A pending I/O event's information. +struct aioctx_event_pending { + // The operation to perform. + enum aioctx_op op; + + // The open file to perform it on. + fd_t fd; + + // A guest memory buffer to read to or write from. + uint64_t buf; + + // The bounds of the guest memory buffer. + uint64_t nbytes; + + // The file offset to perform the I/O operation. + int64_t offset; +}; + +// A completed I/O event's information. +struct aioctx_event_complete { + // Result values for the event. + int64_t result[2]; +}; + +// A single AIO completion event. +// +// This structure is nullable, aioctx_event->tag == AIOCTX_NONE means that the +// rest of the fields have yet to be initialized. +struct aioctx_event { + enum aioctx_event_tag tag; + + // A data value provided by the user to identify in-flight requests from + // the kernel. + uint64_t user_data; + + // Guest address of the IOCB structure that initiated this event + addr_t iocb_obj; + + union { + // Tag: AIOCTX_PENDING + struct aioctx_event_pending as_pending; + + // Tag: AIOCTX_COMPLETE + struct aioctx_event_complete as_complete; + } data; +}; + +// An AIO context. +// +// Individual AIO contexts are refcounted and locked independently from the +// tables that hold them. +struct aioctx { + atomic_uint refcount; + lock_t lock; + cond_t cond; + + // Indicates if this context is owned by a task. + // + // If true, then the `pid` field is guaranteed to be valid, and correspond + // to the task that made the request. If false, then the `pid` is invalid, + // and any pending or completed events should be treated as cancelled. + bool is_owned_by_task; + + // The process that currently owns the context. + pid_t pid; + + // The capacity of the events structure. + // + // This is specified by `io_setup`; requests that would potentially + // overflow the events table should be rejected with `_EAGAIN`. + dword_t events_capacity; + + // The current table of pending and completed events. + struct aioctx_event *events; +}; + +// The table of AIO contexts for a given process. +// +// The context table may be locked, but it is not refcounted and cannot be +// shared across tasks. +struct aioctx_table { + lock_t lock; + + // The capacity of the contexts table. + unsigned capacity; + + // Storage for the contexts table. + // + // This is an array-of-pointers to allow efficient reallocation. Individual + // entries are nullable. + struct aioctx **contexts; +}; + +// In-place construct an AIO context table. +// +// Returns an error value if internal table buffers could not be allocated. +signed int aioctx_table_new(struct aioctx_table *tbl, unsigned int capacity); + +// In-place destroy an AIO context table. +void aioctx_table_delete(struct aioctx_table *tbl); + +// Insert an AIO context into a given table. +// +// The return value will be a positive index into the context table if the +// context was successfully inserted, or an error code otherwise. +// +// The context must be non-null. There is no provision for inserting a null +// context into the table. +signed int aioctx_table_insert(struct aioctx_table *tbl, struct aioctx *ctx); + +// Remove an AIO context from the table by it's position (context ID). +// +// This returns an error code if the context ID is not valid for this table. +// +// All pending I/O requests on the given context will retain the context until +// they resolve. The context will also be flagged as having been released by +// the task, which is treated as an implicit cancellation of any pending +// requests. +signed int aioctx_table_remove(struct aioctx_table *tbl, unsigned int ctx_id); + +// Retrieve a pointer to a given AIO context by it's ID. +// +// This returns NULL if no such context exists. Otherwise, it will +// automatically retain the context before unlocking it's owning table. +struct aioctx *aioctx_table_get_and_retain(struct aioctx_table *tbl, unsigned int ctx_id); + +// Create a new AIO context. +// +// The context is returned in a retained, unlocked state. You should only +// release it after inserting it into some other structure that retains the +// context. +struct aioctx *aioctx_new(int events_capacity, pid_t pid); +void aioctx_retain(struct aioctx *ctx); +void aioctx_release(struct aioctx *ctx); + +// Release the AIO context and flag it as no longer being owned by a valid +// task. +// +// All pending I/O requests on the given context will retain the context until +// they resolve. The context will also be flagged as having been released by +// the task, which is treated as an implicit cancellation of any pending +// requests. +void aioctx_release_from_task(struct aioctx *ctx); + +// Submit a pending I/O event to the AIO context. +// +// This returns a positive integer corresponding to the event index within the +// context. This index remains stable and can be used to access the pending +// event data up until the event is resolved. +signed int aioctx_submit_pending_event(struct aioctx *ctx, uint64_t user_data, addr_t iocbp, struct aioctx_event_pending pending_data); + +// Cancel a pending I/O event, freeing the event index for reuse. +// +// This should only be used if the submitted FD has signalled a synchronous +// error (e.g. EINVAL) which indicates that it does not plan to complete the +// event later. +void aioctx_cancel_event(struct aioctx *ctx, unsigned int index); + +// Complete a pending I/O event. +// +// This accepts two result parameters, whose meaning is determined solely by +// the event opcode. +// +// This also signals any threads waiting on the context that an event has been +// completed. +void aioctx_complete_event(struct aioctx *ctx, unsigned int index, int64_t result0, int64_t result1); + +// Consume a completed I/O event. +// +// This returns true if and only if there was a completed event in the queue, +// and we were able to remove it from the queue. The user_data, iocbp, and +// completed_data parameters will be populated with the values from the removed +// event, and it's event ID will be permitted to be reused. +// +// If this function returns false, there were no completed events to remove +// from the queue, and the passed-in parameters should not be used. +bool aioctx_consume_completed_event(struct aioctx *ctx, uint64_t *user_data, addr_t *iocbp, struct aioctx_event_complete *completed_data); + +// Wait for an event to complete. +// +// This function blocks the current thread until an event completion is posted +// to the context, or the timeout expires. When new events are completed, this +// function will return 0. If the timeout expired, this function will return +// _ETIMEDOUT. Any other error codes should be sent to client code. +// +// Please note that this function returning with 0 is not a guarantee that +// `aioctx_consume_completed_event` will yield data. This function may +// spuriously return 0 or some other thread may have claimed the event in +// between this function returning and the other function being called. +int aioctx_wait_for_completion(struct aioctx *ctx, struct timespec *timeout); + +void aioctx_lock(struct aioctx *ctx); +void aioctx_unlock(struct aioctx *ctx); + +// Get a pending event from the AIOCTX. +// +// The event structure will be returned by writing it's pointer to the **event +// parameter. +// +// This function returns _EINVAL if the given index is not a valid event, not a +// pending event, or if the context has been released by it's supporting task. +// In the event that this function returns an error, the event should be +// considered cancelled. Any resources related to the event should be disposed +// of. +// +// This function is not synchronized and returns pointers to the context's +// internal structures. As such, you must retain and lock the table before +// calling this function, and drop all internal pointers before unlocking or +// releasing the context. Do not hold the lock for longer than necessary as you +// may serialize or deadlock other I/O requests. +signed int aioctx_get_pending_event(struct aioctx *ctx, unsigned int index, struct aioctx_event_pending **event); + +#endif \ No newline at end of file diff --git a/fs/fd.h b/fs/fd.h index 7336417215..90a0feb184 100644 --- a/fs/fd.h +++ b/fs/fd.h @@ -9,6 +9,10 @@ #include "fs/proc.h" #include "fs/sockrestart.h" +typedef sdword_t fd_t; + +#include "fs/aio.h" + // FIXME almost everything that uses the structs in this file does so without any kind of sane locking struct fd { @@ -108,7 +112,6 @@ struct fd { cond_t cond; }; -typedef sdword_t fd_t; #define AT_FDCWD_ -100 struct fd *fd_create(const struct fd_ops *ops); @@ -166,6 +169,22 @@ struct fd_ops { int (*getflags)(struct fd *fd); // handle F_SETFL, i.e. set O_NONBLOCK int (*setflags)(struct fd *fd, dword_t arg); + + // Submit an AIO event to the FD. + // + // The AIO context is provided to the FD in an unlocked, unretained state. + // Asynchronous usage of the AIO context must first retain it; though + // synchronous usage (i.e. before this function returns) is permitted to + // omit the retain/release step. + // + // This may return a synchronous error code (e.g. _EINVAL) for operations + // that cannot be performed on this FD. If this operation returns an error + // code, then the event ID given will be cancelled, and the FD should not + // attempt to complete it. + // + // Asynchronous error codes must be instead returned by completing the + // event with the error code as it's result. + int (*io_submit)(struct fd *fd, struct aioctx *ctx, unsigned int event_id); }; struct fdtable { diff --git a/kernel/aio.c b/kernel/aio.c new file mode 100644 index 0000000000..3546b03368 --- /dev/null +++ b/kernel/aio.c @@ -0,0 +1,468 @@ +#include "debug.h" +#include "kernel/calls.h" +#include "kernel/task.h" +#include "kernel/aio.h" +#include "kernel/fs.h" +#include "kernel/time.h" +#include "fs/aio.h" +#include "fs/fd.h" + +struct _guest_iocb { + uint64_t data; + uint32_t key; + uint32_t rw_flags; + uint16_t lio_opcode; + int16_t reqprio; + uint32_t fildes; + uint64_t buf; + uint64_t nbytes; + int64_t offset; + uint64_t reserved2; + uint32_t flags; + uint32_t resfd; +}; + +// Guest memory offsets for the IOCB structure. +// Calculated by a test program compiled and ran in iSH itself. +static_assert(offsetof(struct _guest_iocb, data) == 0, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, key) == 8, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, rw_flags) == 12, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, lio_opcode) == 16, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, reqprio) == 18, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, fildes) == 20, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, buf) == 24, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, nbytes) == 32, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, offset) == 40, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, reserved2) == 48, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, flags) == 56, "IOCB order"); +static_assert(offsetof(struct _guest_iocb, resfd) == 60, "IOCB order"); + +struct _guest_ioevent { + uint64_t data; + uint64_t obj; + int64_t res; + int64_t res2; +}; + +// Guest memory offsets for the IO_EVENT structure. +// Also confirmed by test program. +static_assert(offsetof(struct _guest_ioevent, data) == 0, "IOEVENT order"); +static_assert(offsetof(struct _guest_ioevent, obj) == 8, "IOEVENT order"); +static_assert(offsetof(struct _guest_ioevent, res) == 16, "IOEVENT order"); +static_assert(offsetof(struct _guest_ioevent, res2) == 24, "IOEVENT order"); +static_assert(sizeof(struct _guest_ioevent) == 32, "IOEVENT size"); + +dword_t sys_io_setup(dword_t nr_events, addr_t ctx_idp) { + STRACE("io_setup(%d, 0x%x)", nr_events, ctx_idp); + + struct aioctx *ctx = aioctx_new(nr_events, current->pid); + if (ctx == NULL) return _ENOMEM; + if (IS_ERR(ctx)) return PTR_ERR(ctx); + + int ctx_id = aioctx_table_insert(¤t->aioctx, ctx); + aioctx_release(ctx); + if (ctx_id < 0) { + return ctx_id; + } + + dword_t ctx_id_guest = (dword_t)ctx_id; + if (ctx_idp && user_write(ctx_idp, (char*)&ctx_id_guest, sizeof(dword_t))) + return _EFAULT; + + return 0; +} + +dword_t sys_io_destroy(dword_t ctx_id) { + STRACE("io_destroy(%d)", ctx_id); + + int err = aioctx_table_remove(¤t->aioctx, ctx_id) < 0; + if (err < 0) { + return err; + } + + return 0; +} + +dword_t sys_io_getevents(dword_t ctx_id, dword_t min_nr, dword_t nr, addr_t events, addr_t timeout_addr) { + STRACE("io_getevents(0x%x, %d, %d, 0x%x, 0x%x)", ctx_id, min_nr, nr, events, timeout_addr); + + struct aioctx *ctx = aioctx_table_get_and_retain(¤t->aioctx, ctx_id); + if (ctx == NULL) return _EINVAL; + if (events == 0) return _EFAULT; + + struct timespec_ guest_timeout; + struct timespec host_timeout; + struct timespec *timeout = &host_timeout; + + if (timeout_addr != 0) { + if (user_get(timeout_addr, guest_timeout)) return _EFAULT; + host_timeout.tv_sec = guest_timeout.sec; + host_timeout.tv_nsec = guest_timeout.nsec; + } else { + timeout = NULL; + } + + dword_t i = 0; + for (i = 0; i < nr; i += 1) { + uint64_t user_data; + addr_t iocbp; + struct aioctx_event_complete cdata; + + if (!aioctx_consume_completed_event(ctx, &user_data, &iocbp, &cdata)) { + if (i >= min_nr) break; + + int err = aioctx_wait_for_completion(ctx, timeout); + + if (err == _ETIMEDOUT) break; + if (err < 0) return err; + continue; + } + + struct _guest_ioevent gevent = {0}; + gevent.data = user_data; + gevent.obj = (uint64_t)iocbp; + gevent.res = cdata.result[0]; + gevent.res2 = cdata.result[1]; + + if (user_put(events, gevent)) return _EFAULT; + + events += sizeof(struct _guest_ioevent); + } + + return i; +} + +dword_t sys_io_submit(dword_t ctx_id, dword_t u_nr, addr_t iocbpp) { + sdword_t nr = (sdword_t)u_nr; + STRACE("io_submit(0x%x, %d, 0x%x)", ctx_id, nr, iocbpp); + + if (nr < 0) return _EINVAL; + + struct aioctx *ctx = aioctx_table_get_and_retain(¤t->aioctx, ctx_id); + if (ctx == NULL) return _EINVAL; + + sdword_t i; + signed int err; + for (i = 0; i < nr; i += 1) { + addr_t iocbp = 0; + if (user_get(iocbpp + i * sizeof(addr_t), iocbp)) goto fault; + + struct _guest_iocb giocb = {0}; + if (user_get(iocbp, giocb)) goto fault; + + struct aioctx_event_pending host_iocb; + + host_iocb.op = (enum aioctx_op)giocb.lio_opcode; + host_iocb.fd = giocb.fildes; + host_iocb.buf = giocb.buf; + host_iocb.nbytes = giocb.nbytes; + host_iocb.offset = giocb.offset; + + lock(¤t->files->lock); + + struct fd *fdp = fdtable_get(current->files, host_iocb.fd); + if (fdp == NULL) { + unlock(¤t->files->lock); + + // Linux man pages mention only the FIRST FD is checked (why?). + // TODO: It also doesn't say what happens if further IOCBs are + // unchecked, so I'm assuming it halts IOCB processing at this + // point. + if (i == 0) goto badf; + break; + } + + err = aioctx_submit_pending_event(ctx, giocb.data, iocbp, host_iocb); + if (err < 0) { + // TODO: This assumes the usual pattern of "first IOCB errors, all + // others stop processing without erroring" + unlock(¤t->files->lock); + + if (i == 0) goto err; + break; + } + + unsigned int event_id = (unsigned int)err; + if (fdp->ops->io_submit) { + err = fdp->ops->io_submit(fdp, ctx, event_id); + } else { + err = aio_fallback_submit(fdp, ctx, event_id); + } + + unlock(¤t->files->lock); + + if (err < 0) { + aioctx_cancel_event(ctx, event_id); + + if (i == 0) goto err; + break; + } + } + + aioctx_release(ctx); + return i; + +fault: + aioctx_release(ctx); + return _EFAULT; + +badf: + aioctx_release(ctx); + return _EBADF; + +err: + aioctx_release(ctx); + return err; +} + +dword_t sys_io_cancel(dword_t ctx_id, addr_t iocb, addr_t result) { + STRACE("io_submit(0x%x, 0x%x, 0x%x)", ctx_id, iocb, result); + + return _ENOSYS; +} + +/** + * Do a single PREAD operation, falling back to seek-and-read if necessary. + * + * The return code corresponds to the 'sync error' concept of fallback_submit, + * while async errors should be flagged by writing to `*err`. + * + * `*err` also is used to return the total number of bytes read. + */ +static signed int __aio_fallback_pread( + struct fd *fd, + addr_t guest_buf, + uint64_t nbytes, + int64_t offset, + signed int *err) { + + if (nbytes > 0xFFFFFFFE) return _ENOMEM; + + // Don't ask me why, but the sync I/O code null-terminates it's + // buffers, so I'm doing it here too. + char *buf = malloc(nbytes + 1); + if (buf == NULL) return _ENOMEM; + + if (fd->ops->pread) { + *err = fd->ops->pread(fd, buf, nbytes, offset); + } else if (fd->ops->read && fd->ops->lseek) { + off_t_ saved_off = fd->ops->lseek(fd, 0, LSEEK_CUR); + if (saved_off < 0) { + *err = saved_off; + goto fail_async; + } + + off_t_ seek_result = fd->ops->lseek(fd, offset, LSEEK_SET); + if (seek_result < 0) { + *err = seek_result; + goto fail_async; + } + + ssize_t read_bytes = fd->ops->read(fd, buf, nbytes); + + seek_result = fd->ops->lseek(fd, saved_off, LSEEK_SET); + if (seek_result < 0) { + *err = seek_result; + goto fail_async; + } + + *err = read_bytes; + } else { + goto fail_einval; + } + + if (*err < 0) goto fail_async; + + buf[*err] = '\0'; + if (user_write(guest_buf, buf, *err)) goto fail_efault; + +fail_async: + free(buf); + return 0; + +fail_einval: + free(buf); + return _EINVAL; + +fail_efault: + free(buf); + return _EFAULT; +} + +/** + * Do a single PWRITE operation, falling back to seek-and-write if necessary. + * + * The return code corresponds to the 'sync error' concept of fallback_submit, + * while async errors should be flagged by writing to `*err`. + * + * `*err` also is used to return the total number of bytes written. + */ +static signed int __aio_fallback_pwrite( + struct fd *fd, + addr_t guest_buf, + uint64_t nbytes, + int64_t offset, + signed int *err) { + + if (nbytes > 0xFFFFFFFE) return _ENOMEM; + + char *buf = malloc(nbytes); + if (buf == NULL) return _ENOMEM; + + if (user_read(guest_buf, buf, nbytes)) { + free(buf); + return _EFAULT; + } + + ssize_t written_bytes; + if (fd->ops->pwrite) { + written_bytes = fd->ops->pwrite(fd, buf, nbytes, offset); + } else if (fd->ops->write && fd->ops->lseek) { + off_t_ saved_off = fd->ops->lseek(fd, 0, LSEEK_CUR); + if (saved_off < 0) { + *err = saved_off; + goto fail_async; + } + + off_t_ seek_result = fd->ops->lseek(fd, offset, LSEEK_SET); + if (seek_result < 0) { + *err = seek_result; + goto fail_async; + } + + written_bytes = fd->ops->write(fd, buf, nbytes); + + seek_result = fd->ops->lseek(fd, saved_off, LSEEK_SET); + if (seek_result < 0) { + *err = seek_result; + goto fail_async; + } + } else { + free(buf); + return _EINVAL; + } + + *err = (signed int)written_bytes; + +fail_async: + free(buf); + return 0; +} + +int aio_fallback_submit(struct fd *fd, struct aioctx *ctx, unsigned int event_id) { + aioctx_lock(ctx); + + struct aioctx_event_pending *evt = NULL; + + // General structure of the fallback: + // + // Some errors we're going to treat as fatal and return immediately. These + // should trigger event cancellation. We call these "sync errors". + // + // Other errors will be returned as the result of event completion. We call + // these "async errors". These do NOT cancel the AIO event, but are treated + // as the event's completion. + // + // Why do it this way? Because the manpages for io_submit say so - only a + // certain subset of errors are returned by it, and only for the first + // IOCB submitted. + signed int sync_err = aioctx_get_pending_event(ctx, event_id, &evt); + signed int async_result0 = 0; + if (sync_err < 0 || evt == NULL) { + aioctx_unlock(ctx); + + if (sync_err == 0) return _EINVAL; + return sync_err; + } + + struct iovec_ *iov_list = NULL; + switch (evt->op) { + case AIOCTX_PREAD: + sync_err = __aio_fallback_pread(fd, (addr_t)evt->buf, evt->nbytes, evt->offset, &async_result0); + break; + case AIOCTX_PWRITE: + sync_err = __aio_fallback_pwrite(fd, (addr_t)evt->buf, evt->nbytes, evt->offset, &async_result0); + break; + case AIOCTX_FSYNC: + case AIOCTX_FDSYNC: + if (fd->ops->fsync) { + async_result0 = fd->ops->fsync(fd); + } else { + sync_err = _EINVAL; + } + + break; + case AIOCTX_NOOP: + async_result0 = 0; + sync_err = 0; + break; + case AIOCTX_PREADV: + iov_list = read_iovec((addr_t)evt->buf, evt->nbytes); + if (IS_ERR(iov_list)) { + sync_err = PTR_ERR(iov_list); + break; + } + + ssize_t total_read = 0; + + for (unsigned int i = 0; i < evt->nbytes; i += 1) { + signed int cur_read = 0; + sync_err = __aio_fallback_pread(fd, iov_list[i].base, iov_list[i].len, evt->offset + total_read, &cur_read); + if (sync_err < 0 || cur_read < 0) { + async_result0 = cur_read; + break; + } + + total_read += cur_read; + + if ((uint_t)cur_read < iov_list[i].len) break; + } + + free(iov_list); + + if (sync_err < 0 || async_result0 < 0) break; + + async_result0 = total_read; + break; + case AIOCTX_PWRITEV: + iov_list = read_iovec((addr_t)evt->buf, evt->nbytes); + if (IS_ERR(iov_list)) { + sync_err = PTR_ERR(iov_list); + break; + } + + ssize_t total_write = 0; + + for (unsigned int i = 0; i < evt->nbytes; i += 1) { + signed int cur_write = 0; + sync_err = __aio_fallback_pwrite(fd, iov_list[i].base, iov_list[i].len, evt->offset + total_write, &cur_write); + if (sync_err < 0 || cur_write < 0) { + async_result0 = cur_write; + break; + } + + total_write += cur_write; + + if ((uint_t)cur_write < iov_list[i].len) break; + } + + free(iov_list); + + if (sync_err < 0 || async_result0 < 0) break; + + async_result0 = total_write; + break; + //TODO: AIOCTX_POLL + default: + sync_err = _EINVAL; + break; + } + + aioctx_unlock(ctx); + + if (sync_err == 0) { + aioctx_complete_event(ctx, event_id, async_result0, 0); + } + + return sync_err; +} diff --git a/kernel/aio.h b/kernel/aio.h new file mode 100644 index 0000000000..b21e4f5c74 --- /dev/null +++ b/kernel/aio.h @@ -0,0 +1,13 @@ +#ifndef KERNEL_AIO_H +#define KERNEL_AIO_H + +#include "fs/fd.h" +#include "fs/aio.h" + +// Synchronous fallback for non-async files. +// +// This is equivalent to the `io_submit` field on `fd_ops`, but is intended to +// be called if that field is NULL. +int aio_fallback_submit(struct fd *fd, struct aioctx *ctx, unsigned int event_id); + +#endif \ No newline at end of file diff --git a/kernel/calls.c b/kernel/calls.c index 8ce40a5a14..1f1a396a77 100644 --- a/kernel/calls.c +++ b/kernel/calls.c @@ -165,7 +165,11 @@ syscall_t syscall_table[] = { [241] = (syscall_t) sys_sched_setaffinity, [242] = (syscall_t) sys_sched_getaffinity, [243] = (syscall_t) sys_set_thread_area, - [245] = (syscall_t) syscall_stub, // io_setup + [245] = (syscall_t) sys_io_setup, + [246] = (syscall_t) sys_io_destroy, + [247] = (syscall_t) sys_io_getevents, + [248] = (syscall_t) sys_io_submit, + [249] = (syscall_t) sys_io_cancel, [252] = (syscall_t) sys_exit_group, [254] = (syscall_t) sys_epoll_create0, [255] = (syscall_t) sys_epoll_ctl, diff --git a/kernel/calls.h b/kernel/calls.h index e054825478..503719d493 100644 --- a/kernel/calls.h +++ b/kernel/calls.h @@ -168,6 +168,13 @@ dword_t sys_umount2(addr_t target_addr, dword_t flags); dword_t sys_xattr_stub(addr_t path_addr, addr_t name_addr, addr_t value_addr, dword_t size, dword_t flags); +// async i/o +dword_t sys_io_setup(dword_t nr_events, addr_t ctx_idp); +dword_t sys_io_destroy(addr_t ctx_id); +dword_t sys_io_getevents(addr_t ctx_id, dword_t min_nr, dword_t nr, addr_t events, addr_t timeout); +dword_t sys_io_submit(addr_t ctx_id, dword_t nr, addr_t iocbpp); +dword_t sys_io_cancel(addr_t ctx_id, addr_t iocb, addr_t result); + // process information pid_t_ sys_getpid(void); pid_t_ sys_gettid(void); diff --git a/kernel/fork.c b/kernel/fork.c index 307205333b..a6008e53c5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1,6 +1,7 @@ #include "debug.h" #include "kernel/task.h" #include "fs/fd.h" +#include "fs/aio.h" #include "kernel/calls.h" #include "fs/tty.h" #include "kernel/mm.h" @@ -122,6 +123,11 @@ static int copy_task(struct task *task, dword_t flags, addr_t stack, addr_t ptid task->clear_tid = ctid_addr; task->exit_signal = flags & CSIGNAL_; + err = aioctx_table_new(&task->aioctx, 0); + if (err < 0) { + goto fail_free_sighand; + } + // remember to do CLONE_SYSVSEM return 0; diff --git a/kernel/fs.c b/kernel/fs.c index df1d2aaaa2..af74a57536 100644 --- a/kernel/fs.c +++ b/kernel/fs.c @@ -295,7 +295,7 @@ dword_t sys_write(fd_t fd_no, addr_t buf_addr, dword_t size) { // that yet because it's more work and the efficiency gain from that is dwarfed // by the inefficiency of the emulator. -static struct iovec_ *read_iovec(addr_t iovec_addr, unsigned iovec_count) { +struct iovec_ *read_iovec(addr_t iovec_addr, unsigned iovec_count) { dword_t iovec_size = sizeof(struct iovec_) * iovec_count; struct iovec_ *iovec = malloc(iovec_size); if (iovec == NULL) diff --git a/kernel/fs.h b/kernel/fs.h index 5a8154ccb4..4bbce6fa3d 100644 --- a/kernel/fs.h +++ b/kernel/fs.h @@ -1,5 +1,5 @@ -#ifndef FS_H -#define FS_H +#ifndef KERNEL_FS_H +#define KERNEL_FS_H #include "misc.h" #include "util/list.h" @@ -8,6 +8,7 @@ #include "fs/fake-db.h" #include "fs/fix_path.h" #include "emu/memory.h" +#include "kernel/calls.h" #include #include @@ -175,4 +176,6 @@ extern const struct fs_ops devptsfs; extern const struct fs_ops tmpfs; void fs_register(const struct fs_ops *fs); +struct iovec_ *read_iovec(addr_t iovec_addr, unsigned iovec_count); + #endif diff --git a/kernel/init.c b/kernel/init.c index 769d769959..815f6fe5df 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -5,6 +5,7 @@ #include "fs/fd.h" #include "fs/real.h" #include "fs/tty.h" +#include "fs/aio.h" #include "kernel/calls.h" #include "kernel/init.h" #include "kernel/personality.h" @@ -71,6 +72,9 @@ static struct task *construct_task(struct task *parent) { task_set_mm(task, mm_new()); task->sighand = sighand_new(); task->files = fdtable_new(3); // why is there a 3 here + + signed int err = aioctx_table_new(&task->aioctx, 0); + if (err < 0) return ERR_PTR(err); task->fs = fs_info_new(); task->fs->umask = 0022; diff --git a/kernel/task.c b/kernel/task.c index dd02d4b77e..e9d6da2b32 100644 --- a/kernel/task.c +++ b/kernel/task.c @@ -6,6 +6,7 @@ #include "kernel/task.h" #include "emu/memory.h" #include "emu/tlb.h" +#include "fs/aio.h" __thread struct task *current; @@ -92,6 +93,7 @@ struct task *task_create_(struct task *parent) { void task_destroy(struct task *task) { list_remove(&task->siblings); pid_get(task->pid)->task = NULL; + aioctx_table_delete(&task->aioctx); free(task); } diff --git a/kernel/task.h b/kernel/task.h index c430e07e02..abb7f2ef2e 100644 --- a/kernel/task.h +++ b/kernel/task.h @@ -5,6 +5,7 @@ #include "emu/cpu.h" #include "kernel/mm.h" #include "kernel/fs.h" +#include "kernel/aio.h" #include "kernel/signal.h" #include "kernel/resource.h" #include "fs/sockrestart.h" @@ -36,6 +37,9 @@ struct task { struct fdtable *files; struct fs_info *fs; + // Currently active AIO contexts. Contains internal lock. + struct aioctx_table aioctx; + // locked by sighand->lock struct sighand *sighand; sigset_t_ blocked; diff --git a/meson.build b/meson.build index 9328dcb522..196a83f390 100644 --- a/meson.build +++ b/meson.build @@ -110,6 +110,7 @@ if get_option('kernel') == 'ish' 'kernel/fs.c', 'kernel/fs_info.c', + 'kernel/aio.c', 'fs/mount.c', 'fs/fd.c', 'fs/inode.c', @@ -141,6 +142,7 @@ if get_option('kernel') == 'ish' 'fs/tmp.c', 'fs/poll.c', + 'fs/aio.c', 'kernel/poll.c', 'kernel/epoll.c', diff --git a/tests/e2e/aio_rw/aio_rw.c b/tests/e2e/aio_rw/aio_rw.c new file mode 100644 index 0000000000..28d8d92d3c --- /dev/null +++ b/tests/e2e/aio_rw/aio_rw.c @@ -0,0 +1,174 @@ +#include +#include +#include +#include +#include +#include +#include + +typedef uint64_t aio_context_t; + +enum { + IOCB_CMD_PREAD = 0, + IOCB_CMD_PWRITE = 1, + IOCB_CMD_FSYNC = 2, + IOCB_CMD_FDSYNC = 3, + IOCB_CMD_NOOP = 6, + IOCB_CMD_PREADV = 7, + IOCB_CMD_PWRITEV = 8, +}; + +struct iocb { + uint64_t aio_data; + uint32_t aio_key; + uint32_t aio_rw_flags; + uint16_t aio_lio_opcode; + int16_t aio_reqprio; + uint32_t aio_fildes; + uint64_t aio_buf; + uint64_t aio_nbytes; + int64_t aio_offset; + uint64_t aio_reserved2; + uint32_t aio_flags; + uint32_t aio_resfd; +}; + +struct io_event { + uint64_t data; + uint64_t obj; + int64_t res; + int64_t res2; +}; + +const char* info = "Welcome to ASYNC world!"; + +void main() { + aio_context_t ctx = 0; + + int err = syscall(__NR_io_setup, 10, &ctx); + if (err < 0) { + printf("io_setup err: %d\n", errno); + return; + } + + printf("io_setup success\n"); + + err = open("test.txt", O_CREAT | O_RDWR, 0777); + if (err < 0) { + printf("open err: %d\n", errno); + return; + } + + printf("open success\n"); + + int fd = err; + struct iocb req; + memset(&req, 0, sizeof(req)); + + req.aio_data = 0xDEADBEEF; + req.aio_lio_opcode = IOCB_CMD_PWRITE; + req.aio_fildes = fd; + req.aio_buf = (uintptr_t)info; + req.aio_nbytes = strlen(info); + req.aio_offset = 0; + + struct iocb* reqs[1]; + reqs[0] = &req; + + err = syscall(__NR_io_submit, ctx, 1, &reqs); + if (err < 0) { + printf("io_submit err: %d\n", errno); + return; + } else if (err == 0) { + printf("io_submit reports 0 successful IOCBs, but no error code. errno is %d", errno); + } + + printf("io_submit success: %d\n", err); + + struct io_event evt; + + do { + err = syscall(__NR_io_getevents, ctx, 0, 1, &evt, NULL); + if (err < 0) { + printf("io_getevents err: %d\n", errno); + return; + } + } while (err < 1); + + printf("io_getevents success: %d\n", err); + printf("evt.data: 0x%llX\n", evt.data); + + if (evt.obj == (uintptr_t)&req) { + printf("evt.obj matches &req\n"); + } else { + printf("evt.obj does NOT match &req, 0x%llX given\n", evt.obj); + printf("(&req is 0x%llX)\n", (uint64_t)(uintptr_t)&req); + } + + printf("evt.res: %lld\n", evt.res); + printf("evt.res2: %lld\n", evt.res2); + + memset(&req, 0, sizeof(req)); + char rbuf[513]; + + req.aio_data = 0xCAFEBABE; + req.aio_lio_opcode = IOCB_CMD_PREAD; + req.aio_fildes = fd; + req.aio_buf = (uintptr_t)rbuf; + req.aio_nbytes = 512; + req.aio_offset = 0; + + reqs[0] = &req; + + err = syscall(__NR_io_submit, ctx, 1, &reqs); + if (err < 0) { + printf("io_submit err: %d\n", errno); + return; + } else if (err == 0) { + printf("io_submit reports 0 successful IOCBs, but no error code. errno is %d", errno); + } + + do { + err = syscall(__NR_io_getevents, ctx, 0, 1, &evt, NULL); + if (err < 0) { + printf("io_getevents err: %d\n", errno); + return; + } + } while (err < 1); + + printf("io_getevents success: %d\n", err); + printf("evt.data: 0x%llX\n", evt.data); + + if (evt.obj == (uint32_t)&req) { + printf("evt.obj matches &req\n"); + } else { + printf("evt.obj does NOT match &req, 0x%llX given\n", evt.obj); + printf("(&req is 0x%llX)\n", (uint64_t)(uint32_t)&req); + } + + printf("evt.res: %lld\n", evt.res); + printf("evt.res2: %lld\n", evt.res2); + + if (evt.res < 513) { + rbuf[evt.res] = 0; + printf("rbuf: %s\n", rbuf); + } else { + printf("rbuf: \n"); + } + + err = close(fd); + if (err < 0) { + printf("close err: %d\n", err); + return; + } + + printf("close success\n"); + + err = syscall(__NR_io_destroy, ctx); + if (err < 0) { + printf("io_destroy err: %d\n", errno); + return; + } + + printf("io_destroy success\n"); +} \ No newline at end of file diff --git a/tests/e2e/aio_rw/expected.txt b/tests/e2e/aio_rw/expected.txt new file mode 100644 index 0000000000..6ac3a9284c --- /dev/null +++ b/tests/e2e/aio_rw/expected.txt @@ -0,0 +1,16 @@ +io_setup success +open success +io_submit success: 1 +io_getevents success: 1 +evt.data: 0xDEADBEEF +evt.obj matches &req +evt.res: 23 +evt.res2: 0 +io_getevents success: 1 +evt.data: 0xCAFEBABE +evt.obj matches &req +evt.res: 23 +evt.res2: 0 +rbuf: Welcome to ASYNC world! +close success +io_destroy success diff --git a/tests/e2e/aio_rw/test.sh b/tests/e2e/aio_rw/test.sh new file mode 100755 index 0000000000..7ae99e7191 --- /dev/null +++ b/tests/e2e/aio_rw/test.sh @@ -0,0 +1,4 @@ +#!/bin/sh +gcc aio_rw.c -o ./aio_rw +./aio_rw +rm test.txt \ No newline at end of file diff --git a/tests/e2e/aio_rw_vectored/aio_rw_vectored.c b/tests/e2e/aio_rw_vectored/aio_rw_vectored.c new file mode 100644 index 0000000000..f78c43fb6a --- /dev/null +++ b/tests/e2e/aio_rw_vectored/aio_rw_vectored.c @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +typedef uint64_t aio_context_t; + +enum { + IOCB_CMD_PREAD = 0, + IOCB_CMD_PWRITE = 1, + IOCB_CMD_FSYNC = 2, + IOCB_CMD_FDSYNC = 3, + IOCB_CMD_NOOP = 6, + IOCB_CMD_PREADV = 7, + IOCB_CMD_PWRITEV = 8, +}; + +struct iocb { + uint64_t aio_data; + uint32_t aio_key; + uint32_t aio_rw_flags; + uint16_t aio_lio_opcode; + int16_t aio_reqprio; + uint32_t aio_fildes; + uint64_t aio_buf; + uint64_t aio_nbytes; + int64_t aio_offset; + uint64_t aio_reserved2; + uint32_t aio_flags; + uint32_t aio_resfd; +}; + +struct io_event { + uint64_t data; + uint64_t obj; + int64_t res; + int64_t res2; +}; + +const char* info0 = "Welcome to ASYNC world - "; +const char* info1 = "now you're writing with vectors"; + +void main() { + aio_context_t ctx = 0; + + int err = syscall(__NR_io_setup, 10, &ctx); + if (err < 0) { + printf("io_setup err: %d\n", errno); + return; + } + + printf("io_setup success\n"); + + err = open("test.txt", O_CREAT | O_RDWR, 0777); + if (err < 0) { + printf("open err: %d\n", errno); + return; + } + + printf("open success\n"); + + int fd = err; + struct iovec req_vecs[2]; + req_vecs[0].iov_base = info0; + req_vecs[0].iov_len = strlen(info0); + req_vecs[1].iov_base = info1; + req_vecs[1].iov_len = strlen(info1); + + struct iocb req; + memset(&req, 0, sizeof(req)); + + req.aio_data = 0x31337; + req.aio_lio_opcode = IOCB_CMD_PWRITEV; + req.aio_fildes = fd; + req.aio_buf = (uintptr_t)&req_vecs; + req.aio_nbytes = 2; + req.aio_offset = 0; + + struct iocb* reqs[1]; + reqs[0] = &req; + + err = syscall(__NR_io_submit, ctx, 1, &reqs); + if (err < 0) { + printf("io_submit err: %d\n", errno); + return; + } else if (err == 0) { + printf("io_submit reports 0 successful IOCBs, but no error code. errno is %d", errno); + } + + printf("io_submit success: %d\n", err); + + struct io_event evt; + + do { + err = syscall(__NR_io_getevents, ctx, 0, 1, &evt, NULL); + if (err < 0) { + printf("io_getevents err: %d\n", errno); + return; + } + } while (err < 1); + + printf("io_getevents success: %d\n", err); + printf("evt.data: 0x%llX\n", evt.data); + + if (evt.obj == (uintptr_t)&req) { + printf("evt.obj matches &req\n"); + } else { + printf("evt.obj does NOT match &req, 0x%llX given\n", evt.obj); + printf("(&req is 0x%llX)\n", (uint64_t)(uintptr_t)&req); + } + + printf("evt.res: %lld\n", evt.res); + printf("evt.res2: %lld\n", evt.res2); + + memset(&req, 0, sizeof(req)); + char rbuf[2][25]; + + rbuf[0][24] = 0; + rbuf[1][24] = 0; + + req_vecs[0].iov_base = rbuf[0]; + req_vecs[0].iov_len = 24; + req_vecs[1].iov_base = rbuf[1]; + req_vecs[1].iov_len = 24; + + req.aio_data = 0xCAFEBABE; + req.aio_lio_opcode = IOCB_CMD_PREADV; + req.aio_fildes = fd; + req.aio_buf = (uintptr_t)req_vecs; + req.aio_nbytes = 2; + req.aio_offset = 0; + + reqs[0] = &req; + + err = syscall(__NR_io_submit, ctx, 1, &reqs); + if (err < 0) { + printf("io_submit err: %d\n", errno); + return; + } else if (err == 0) { + printf("io_submit reports 0 successful IOCBs, but no error code. errno is %d", errno); + } + + do { + err = syscall(__NR_io_getevents, ctx, 0, 1, &evt, NULL); + if (err < 0) { + printf("io_getevents err: %d\n", errno); + return; + } + } while (err < 1); + + printf("io_getevents success: %d\n", err); + printf("evt.data: 0x%llX\n", evt.data); + + if (evt.obj == (uintptr_t)&req) { + printf("evt.obj matches &req\n"); + } else { + printf("evt.obj does NOT match &req, 0x%llX given\n", evt.obj); + printf("(&req is 0x%llX)\n", (uint64_t)(uintptr_t)&req); + } + + printf("evt.res: %lld\n", evt.res); + printf("evt.res2: %lld\n", evt.res2); + + if (evt.res <= 48) { + printf("rbuf[0]: %s\n", rbuf[0]); + printf("rbuf[1]: %s\n", rbuf[1]); + } else { + printf("rbuf: \n"); + } + + err = close(fd); + if (err < 0) { + printf("close err: %d\n", err); + return; + } + + printf("close success\n"); + + err = syscall(__NR_io_destroy, ctx); + if (err < 0) { + printf("io_destroy err: %d\n", errno); + return; + } + + printf("io_destroy success\n"); +} diff --git a/tests/e2e/aio_rw_vectored/expected.txt b/tests/e2e/aio_rw_vectored/expected.txt new file mode 100644 index 0000000000..1b9db82f20 --- /dev/null +++ b/tests/e2e/aio_rw_vectored/expected.txt @@ -0,0 +1,17 @@ +io_setup success +open success +io_submit success: 1 +io_getevents success: 1 +evt.data: 0x31337 +evt.obj matches &req +evt.res: 56 +evt.res2: 0 +io_getevents success: 1 +evt.data: 0xCAFEBABE +evt.obj matches &req +evt.res: 48 +evt.res2: 0 +rbuf[0]: Welcome to ASYNC world - +rbuf[1]: now you're writing with +close success +io_destroy success diff --git a/tests/e2e/aio_rw_vectored/test.sh b/tests/e2e/aio_rw_vectored/test.sh new file mode 100755 index 0000000000..688ada405b --- /dev/null +++ b/tests/e2e/aio_rw_vectored/test.sh @@ -0,0 +1,4 @@ +#!/bin/sh +gcc aio_rw_vectored.c -o ./aio_rw_vectored +./aio_rw_vectored +rm test.txt