123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511 |
- /*
- * ioapic.c IOAPIC emulation logic
- *
- * Copyright (c) 2004-2005 Fabrice Bellard
- *
- * Split the ioapic logic from apic.c
- * Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- #include "qemu/osdep.h"
- #include "qapi/error.h"
- #include "monitor/monitor.h"
- #include "hw/i386/apic.h"
- #include "hw/i386/x86.h"
- #include "hw/intc/i8259.h"
- #include "hw/intc/ioapic.h"
- #include "hw/intc/ioapic_internal.h"
- #include "hw/pci/msi.h"
- #include "hw/qdev-properties.h"
- #include "system/kvm.h"
- #include "system/system.h"
- #include "hw/i386/apic-msidef.h"
- #include "hw/i386/x86-iommu.h"
- #include "trace.h"
- #define APIC_DELIVERY_MODE_SHIFT 8
- #define APIC_POLARITY_SHIFT 14
- #define APIC_TRIG_MODE_SHIFT 15
- static IOAPICCommonState *ioapics[MAX_IOAPICS];
- /* global variable from ioapic_common.c */
- extern int ioapic_no;
- struct ioapic_entry_info {
- /* fields parsed from IOAPIC entries */
- uint8_t masked;
- uint8_t trig_mode;
- uint16_t dest_idx;
- uint8_t dest_mode;
- uint8_t delivery_mode;
- uint8_t vector;
- /* MSI message generated from above parsed fields */
- uint32_t addr;
- uint32_t data;
- };
- static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
- {
- memset(info, 0, sizeof(*info));
- info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
- info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
- /*
- * By default, this would be dest_id[8] + reserved[8]. When IR
- * is enabled, this would be interrupt_index[15] +
- * interrupt_format[1]. This field never means anything, but
- * only used to generate corresponding MSI.
- */
- info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
- info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
- info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
- & IOAPIC_DM_MASK;
- if (info->delivery_mode == IOAPIC_DM_EXTINT) {
- info->vector = pic_read_irq(isa_pic);
- } else {
- info->vector = entry & IOAPIC_VECTOR_MASK;
- }
- info->addr = APIC_DEFAULT_ADDRESS | \
- (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
- (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
- info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
- (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
- (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
- }
- static void ioapic_service(IOAPICCommonState *s)
- {
- AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as;
- struct ioapic_entry_info info;
- uint8_t i;
- uint32_t mask;
- uint64_t entry;
- for (i = 0; i < IOAPIC_NUM_PINS; i++) {
- mask = 1 << i;
- if (s->irr & mask) {
- int coalesce = 0;
- entry = s->ioredtbl[i];
- ioapic_entry_parse(entry, &info);
- if (!info.masked) {
- if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
- s->irr &= ~mask;
- } else {
- coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
- trace_ioapic_set_remote_irr(i);
- s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
- }
- if (coalesce) {
- /* We are level triggered interrupts, and the
- * guest should be still working on previous one,
- * so skip it. */
- continue;
- }
- #ifdef CONFIG_KVM
- if (kvm_irqchip_is_split()) {
- if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
- kvm_set_irq(kvm_state, i, 1);
- kvm_set_irq(kvm_state, i, 0);
- } else {
- kvm_set_irq(kvm_state, i, 1);
- }
- continue;
- }
- #endif
- /* No matter whether IR is enabled, we translate
- * the IOAPIC message into a MSI one, and its
- * address space will decide whether we need a
- * translation. */
- stl_le_phys(ioapic_as, info.addr, info.data);
- }
- }
- }
- }
- #define SUCCESSIVE_IRQ_MAX_COUNT 10000
- static void delayed_ioapic_service_cb(void *opaque)
- {
- IOAPICCommonState *s = opaque;
- ioapic_service(s);
- }
- static void ioapic_set_irq(void *opaque, int vector, int level)
- {
- IOAPICCommonState *s = opaque;
- /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
- * to GSI 2. GSI maps to ioapic 1-1. This is not
- * the cleanest way of doing it but it should work. */
- trace_ioapic_set_irq(vector, level);
- ioapic_stat_update_irq(s, vector, level);
- if (vector == 0) {
- vector = 2;
- }
- if (vector < IOAPIC_NUM_PINS) {
- uint32_t mask = 1 << vector;
- uint64_t entry = s->ioredtbl[vector];
- if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
- IOAPIC_TRIGGER_LEVEL) {
- /* level triggered */
- if (level) {
- s->irr |= mask;
- if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
- ioapic_service(s);
- }
- } else {
- s->irr &= ~mask;
- }
- } else {
- /* According to the 82093AA manual, we must ignore edge requests
- * if the input pin is masked. */
- if (level && !(entry & IOAPIC_LVT_MASKED)) {
- s->irr |= mask;
- ioapic_service(s);
- }
- }
- }
- }
- static void ioapic_update_kvm_routes(IOAPICCommonState *s)
- {
- #ifdef CONFIG_KVM
- int i;
- if (kvm_irqchip_is_split()) {
- for (i = 0; i < IOAPIC_NUM_PINS; i++) {
- MSIMessage msg;
- struct ioapic_entry_info info;
- ioapic_entry_parse(s->ioredtbl[i], &info);
- if (!info.masked) {
- msg.address = info.addr;
- msg.data = info.data;
- kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
- }
- }
- kvm_irqchip_commit_routes(kvm_state);
- }
- #endif
- }
- #ifdef CONFIG_KVM
- static void ioapic_iec_notifier(void *private, bool global,
- uint32_t index, uint32_t mask)
- {
- IOAPICCommonState *s = (IOAPICCommonState *)private;
- /* For simplicity, we just update all the routes */
- ioapic_update_kvm_routes(s);
- }
- #endif
- void ioapic_eoi_broadcast(int vector)
- {
- IOAPICCommonState *s;
- uint64_t entry;
- int i, n;
- trace_ioapic_eoi_broadcast(vector);
- for (i = 0; i < MAX_IOAPICS; i++) {
- s = ioapics[i];
- if (!s) {
- continue;
- }
- for (n = 0; n < IOAPIC_NUM_PINS; n++) {
- entry = s->ioredtbl[n];
- if ((entry & IOAPIC_VECTOR_MASK) != vector ||
- ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
- continue;
- }
- #ifdef CONFIG_KVM
- /*
- * When IOAPIC is in the userspace while APIC is still in
- * the kernel (i.e., split irqchip), we have a trick to
- * kick the resamplefd logic for registered irqfds from
- * userspace to deactivate the IRQ. When that happens, it
- * means the irq bypassed userspace IOAPIC (so the irr and
- * remote-irr of the table entry should be bypassed too
- * even if interrupt come). Still kick the resamplefds if
- * they're bound to the IRQ, to make sure to EOI the
- * interrupt for the hardware correctly.
- *
- * Note: We still need to go through the irr & remote-irr
- * operations below because we don't know whether there're
- * emulated devices that are using/sharing the same IRQ.
- */
- kvm_resample_fd_notify(n);
- #endif
- if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
- continue;
- }
- trace_ioapic_clear_remote_irr(n, vector);
- s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
- if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
- ++s->irq_eoi[n];
- if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) {
- /*
- * Real hardware does not deliver the interrupt immediately
- * during eoi broadcast, and this lets a buggy guest make
- * slow progress even if it does not correctly handle a
- * level-triggered interrupt. Emulate this behavior if we
- * detect an interrupt storm.
- */
- s->irq_eoi[n] = 0;
- timer_mod_anticipate(s->delayed_ioapic_service_timer,
- qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- NANOSECONDS_PER_SECOND / 100);
- trace_ioapic_eoi_delayed_reassert(n);
- } else {
- ioapic_service(s);
- }
- } else {
- s->irq_eoi[n] = 0;
- }
- }
- }
- }
- static uint64_t
- ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
- {
- IOAPICCommonState *s = opaque;
- int index;
- uint32_t val = 0;
- addr &= 0xff;
- switch (addr) {
- case IOAPIC_IOREGSEL:
- val = s->ioregsel;
- break;
- case IOAPIC_IOWIN:
- if (size != 4) {
- break;
- }
- switch (s->ioregsel) {
- case IOAPIC_REG_ID:
- case IOAPIC_REG_ARB:
- val = s->id << IOAPIC_ID_SHIFT;
- break;
- case IOAPIC_REG_VER:
- val = s->version |
- ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
- break;
- default:
- index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
- if (index >= 0 && index < IOAPIC_NUM_PINS) {
- if (s->ioregsel & 1) {
- val = s->ioredtbl[index] >> 32;
- } else {
- val = s->ioredtbl[index] & 0xffffffff;
- }
- }
- }
- break;
- }
- trace_ioapic_mem_read(addr, s->ioregsel, size, val);
- return val;
- }
- /*
- * This is to satisfy the hack in Linux kernel. One hack of it is to
- * simulate clearing the Remote IRR bit of IOAPIC entry using the
- * following:
- *
- * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
- * Otherwise, we simulate the EOI message manually by changing the trigger
- * mode to edge and then back to level, with RTE being masked during
- * this."
- *
- * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
- *
- * This is based on the assumption that, Remote IRR bit will be
- * cleared by IOAPIC hardware when configured as edge-triggered
- * interrupts.
- *
- * Without this, level-triggered interrupts in IR mode might fail to
- * work correctly.
- */
- static inline void
- ioapic_fix_edge_remote_irr(uint64_t *entry)
- {
- if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
- /* Edge-triggered interrupts, make sure remote IRR is zero */
- *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
- }
- }
- static void
- ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
- unsigned int size)
- {
- IOAPICCommonState *s = opaque;
- int index;
- addr &= 0xff;
- trace_ioapic_mem_write(addr, s->ioregsel, size, val);
- switch (addr) {
- case IOAPIC_IOREGSEL:
- s->ioregsel = val;
- break;
- case IOAPIC_IOWIN:
- if (size != 4) {
- break;
- }
- switch (s->ioregsel) {
- case IOAPIC_REG_ID:
- s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
- break;
- case IOAPIC_REG_VER:
- case IOAPIC_REG_ARB:
- break;
- default:
- index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
- if (index >= 0 && index < IOAPIC_NUM_PINS) {
- uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
- if (s->ioregsel & 1) {
- s->ioredtbl[index] &= 0xffffffff;
- s->ioredtbl[index] |= (uint64_t)val << 32;
- } else {
- s->ioredtbl[index] &= ~0xffffffffULL;
- s->ioredtbl[index] |= val;
- }
- /* restore RO bits */
- s->ioredtbl[index] &= IOAPIC_RW_BITS;
- s->ioredtbl[index] |= ro_bits;
- s->irq_eoi[index] = 0;
- ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
- ioapic_update_kvm_routes(s);
- ioapic_service(s);
- }
- }
- break;
- case IOAPIC_EOI:
- /* Explicit EOI is only supported for IOAPIC version 0x20 */
- if (size != 4 || s->version != 0x20) {
- break;
- }
- ioapic_eoi_broadcast(val);
- break;
- }
- }
- static const MemoryRegionOps ioapic_io_ops = {
- .read = ioapic_mem_read,
- .write = ioapic_mem_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
- };
- static void ioapic_machine_done_notify(Notifier *notifier, void *data)
- {
- #ifdef CONFIG_KVM
- IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
- machine_done);
- if (kvm_irqchip_is_split()) {
- X86IOMMUState *iommu = x86_iommu_get_default();
- if (iommu) {
- /* Register this IOAPIC with IOMMU IEC notifier, so that
- * when there are IR invalidates, we can be notified to
- * update kernel IR cache. */
- x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
- }
- }
- #endif
- }
- #define IOAPIC_VER_DEF 0x20
- static void ioapic_realize(DeviceState *dev, Error **errp)
- {
- IOAPICCommonState *s = IOAPIC_COMMON(dev);
- if (s->version != 0x11 && s->version != 0x20) {
- error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
- "(default: 0x%x).", IOAPIC_VER_DEF);
- return;
- }
- memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
- "ioapic", 0x1000);
- s->delayed_ioapic_service_timer =
- timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s);
- qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
- ioapics[ioapic_no] = s;
- s->machine_done.notify = ioapic_machine_done_notify;
- qemu_add_machine_init_done_notifier(&s->machine_done);
- }
- static void ioapic_unrealize(DeviceState *dev)
- {
- IOAPICCommonState *s = IOAPIC_COMMON(dev);
- timer_free(s->delayed_ioapic_service_timer);
- }
- static const Property ioapic_properties[] = {
- DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
- };
- static void ioapic_class_init(ObjectClass *klass, void *data)
- {
- IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
- DeviceClass *dc = DEVICE_CLASS(klass);
- k->realize = ioapic_realize;
- k->unrealize = ioapic_unrealize;
- /*
- * If APIC is in kernel, we need to update the kernel cache after
- * migration, otherwise first 24 gsi routes will be invalid.
- */
- k->post_load = ioapic_update_kvm_routes;
- device_class_set_legacy_reset(dc, ioapic_reset_common);
- device_class_set_props(dc, ioapic_properties);
- }
- static const TypeInfo ioapic_info = {
- .name = TYPE_IOAPIC,
- .parent = TYPE_IOAPIC_COMMON,
- .instance_size = sizeof(IOAPICCommonState),
- .class_init = ioapic_class_init,
- };
- static void ioapic_register_types(void)
- {
- type_register_static(&ioapic_info);
- }
- type_init(ioapic_register_types)
|