ioapic.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*
  2. * ioapic.c IOAPIC emulation logic
  3. *
  4. * Copyright (c) 2004-2005 Fabrice Bellard
  5. *
  6. * Split the ioapic logic from apic.c
  7. * Xiantao Zhang <xiantao.zhang@intel.com>
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2 of the License, or (at your option) any later version.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21. */
  22. #include "qemu/osdep.h"
  23. #include "qapi/error.h"
  24. #include "monitor/monitor.h"
  25. #include "hw/i386/apic.h"
  26. #include "hw/i386/ioapic.h"
  27. #include "hw/i386/ioapic_internal.h"
  28. #include "hw/i386/x86.h"
  29. #include "hw/intc/i8259.h"
  30. #include "hw/pci/msi.h"
  31. #include "hw/qdev-properties.h"
  32. #include "sysemu/kvm.h"
  33. #include "sysemu/sysemu.h"
  34. #include "hw/i386/apic-msidef.h"
  35. #include "hw/i386/x86-iommu.h"
  36. #include "trace.h"
  37. #define APIC_DELIVERY_MODE_SHIFT 8
  38. #define APIC_POLARITY_SHIFT 14
  39. #define APIC_TRIG_MODE_SHIFT 15
  40. static IOAPICCommonState *ioapics[MAX_IOAPICS];
  41. /* global variable from ioapic_common.c */
  42. extern int ioapic_no;
  43. struct ioapic_entry_info {
  44. /* fields parsed from IOAPIC entries */
  45. uint8_t masked;
  46. uint8_t trig_mode;
  47. uint16_t dest_idx;
  48. uint8_t dest_mode;
  49. uint8_t delivery_mode;
  50. uint8_t vector;
  51. /* MSI message generated from above parsed fields */
  52. uint32_t addr;
  53. uint32_t data;
  54. };
  55. static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
  56. {
  57. memset(info, 0, sizeof(*info));
  58. info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
  59. info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
  60. /*
  61. * By default, this would be dest_id[8] + reserved[8]. When IR
  62. * is enabled, this would be interrupt_index[15] +
  63. * interrupt_format[1]. This field never means anything, but
  64. * only used to generate corresponding MSI.
  65. */
  66. info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
  67. info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
  68. info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
  69. & IOAPIC_DM_MASK;
  70. if (info->delivery_mode == IOAPIC_DM_EXTINT) {
  71. info->vector = pic_read_irq(isa_pic);
  72. } else {
  73. info->vector = entry & IOAPIC_VECTOR_MASK;
  74. }
  75. info->addr = APIC_DEFAULT_ADDRESS | \
  76. (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
  77. (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
  78. info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
  79. (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
  80. (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
  81. }
  82. static void ioapic_service(IOAPICCommonState *s)
  83. {
  84. AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as;
  85. struct ioapic_entry_info info;
  86. uint8_t i;
  87. uint32_t mask;
  88. uint64_t entry;
  89. for (i = 0; i < IOAPIC_NUM_PINS; i++) {
  90. mask = 1 << i;
  91. if (s->irr & mask) {
  92. int coalesce = 0;
  93. entry = s->ioredtbl[i];
  94. ioapic_entry_parse(entry, &info);
  95. if (!info.masked) {
  96. if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
  97. s->irr &= ~mask;
  98. } else {
  99. coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
  100. trace_ioapic_set_remote_irr(i);
  101. s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
  102. }
  103. if (coalesce) {
  104. /* We are level triggered interrupts, and the
  105. * guest should be still working on previous one,
  106. * so skip it. */
  107. continue;
  108. }
  109. #ifdef CONFIG_KVM
  110. if (kvm_irqchip_is_split()) {
  111. if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
  112. kvm_set_irq(kvm_state, i, 1);
  113. kvm_set_irq(kvm_state, i, 0);
  114. } else {
  115. kvm_set_irq(kvm_state, i, 1);
  116. }
  117. continue;
  118. }
  119. #endif
  120. /* No matter whether IR is enabled, we translate
  121. * the IOAPIC message into a MSI one, and its
  122. * address space will decide whether we need a
  123. * translation. */
  124. stl_le_phys(ioapic_as, info.addr, info.data);
  125. }
  126. }
  127. }
  128. }
  129. #define SUCCESSIVE_IRQ_MAX_COUNT 10000
  130. static void delayed_ioapic_service_cb(void *opaque)
  131. {
  132. IOAPICCommonState *s = opaque;
  133. ioapic_service(s);
  134. }
  135. static void ioapic_set_irq(void *opaque, int vector, int level)
  136. {
  137. IOAPICCommonState *s = opaque;
  138. /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
  139. * to GSI 2. GSI maps to ioapic 1-1. This is not
  140. * the cleanest way of doing it but it should work. */
  141. trace_ioapic_set_irq(vector, level);
  142. ioapic_stat_update_irq(s, vector, level);
  143. if (vector == 0) {
  144. vector = 2;
  145. }
  146. if (vector < IOAPIC_NUM_PINS) {
  147. uint32_t mask = 1 << vector;
  148. uint64_t entry = s->ioredtbl[vector];
  149. if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
  150. IOAPIC_TRIGGER_LEVEL) {
  151. /* level triggered */
  152. if (level) {
  153. s->irr |= mask;
  154. if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
  155. ioapic_service(s);
  156. }
  157. } else {
  158. s->irr &= ~mask;
  159. }
  160. } else {
  161. /* According to the 82093AA manual, we must ignore edge requests
  162. * if the input pin is masked. */
  163. if (level && !(entry & IOAPIC_LVT_MASKED)) {
  164. s->irr |= mask;
  165. ioapic_service(s);
  166. }
  167. }
  168. }
  169. }
  170. static void ioapic_update_kvm_routes(IOAPICCommonState *s)
  171. {
  172. #ifdef CONFIG_KVM
  173. int i;
  174. if (kvm_irqchip_is_split()) {
  175. for (i = 0; i < IOAPIC_NUM_PINS; i++) {
  176. MSIMessage msg;
  177. struct ioapic_entry_info info;
  178. ioapic_entry_parse(s->ioredtbl[i], &info);
  179. if (!info.masked) {
  180. msg.address = info.addr;
  181. msg.data = info.data;
  182. kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
  183. }
  184. }
  185. kvm_irqchip_commit_routes(kvm_state);
  186. }
  187. #endif
  188. }
  189. #ifdef CONFIG_KVM
  190. static void ioapic_iec_notifier(void *private, bool global,
  191. uint32_t index, uint32_t mask)
  192. {
  193. IOAPICCommonState *s = (IOAPICCommonState *)private;
  194. /* For simplicity, we just update all the routes */
  195. ioapic_update_kvm_routes(s);
  196. }
  197. #endif
  198. void ioapic_eoi_broadcast(int vector)
  199. {
  200. IOAPICCommonState *s;
  201. uint64_t entry;
  202. int i, n;
  203. trace_ioapic_eoi_broadcast(vector);
  204. for (i = 0; i < MAX_IOAPICS; i++) {
  205. s = ioapics[i];
  206. if (!s) {
  207. continue;
  208. }
  209. for (n = 0; n < IOAPIC_NUM_PINS; n++) {
  210. entry = s->ioredtbl[n];
  211. if ((entry & IOAPIC_VECTOR_MASK) != vector ||
  212. ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
  213. continue;
  214. }
  215. #ifdef CONFIG_KVM
  216. /*
  217. * When IOAPIC is in the userspace while APIC is still in
  218. * the kernel (i.e., split irqchip), we have a trick to
  219. * kick the resamplefd logic for registered irqfds from
  220. * userspace to deactivate the IRQ. When that happens, it
  221. * means the irq bypassed userspace IOAPIC (so the irr and
  222. * remote-irr of the table entry should be bypassed too
  223. * even if interrupt come). Still kick the resamplefds if
  224. * they're bound to the IRQ, to make sure to EOI the
  225. * interrupt for the hardware correctly.
  226. *
  227. * Note: We still need to go through the irr & remote-irr
  228. * operations below because we don't know whether there're
  229. * emulated devices that are using/sharing the same IRQ.
  230. */
  231. kvm_resample_fd_notify(n);
  232. #endif
  233. if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
  234. continue;
  235. }
  236. trace_ioapic_clear_remote_irr(n, vector);
  237. s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
  238. if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
  239. ++s->irq_eoi[n];
  240. if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) {
  241. /*
  242. * Real hardware does not deliver the interrupt immediately
  243. * during eoi broadcast, and this lets a buggy guest make
  244. * slow progress even if it does not correctly handle a
  245. * level-triggered interrupt. Emulate this behavior if we
  246. * detect an interrupt storm.
  247. */
  248. s->irq_eoi[n] = 0;
  249. timer_mod_anticipate(s->delayed_ioapic_service_timer,
  250. qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
  251. NANOSECONDS_PER_SECOND / 100);
  252. trace_ioapic_eoi_delayed_reassert(n);
  253. } else {
  254. ioapic_service(s);
  255. }
  256. } else {
  257. s->irq_eoi[n] = 0;
  258. }
  259. }
  260. }
  261. }
  262. static uint64_t
  263. ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
  264. {
  265. IOAPICCommonState *s = opaque;
  266. int index;
  267. uint32_t val = 0;
  268. addr &= 0xff;
  269. switch (addr) {
  270. case IOAPIC_IOREGSEL:
  271. val = s->ioregsel;
  272. break;
  273. case IOAPIC_IOWIN:
  274. if (size != 4) {
  275. break;
  276. }
  277. switch (s->ioregsel) {
  278. case IOAPIC_REG_ID:
  279. case IOAPIC_REG_ARB:
  280. val = s->id << IOAPIC_ID_SHIFT;
  281. break;
  282. case IOAPIC_REG_VER:
  283. val = s->version |
  284. ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
  285. break;
  286. default:
  287. index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
  288. if (index >= 0 && index < IOAPIC_NUM_PINS) {
  289. if (s->ioregsel & 1) {
  290. val = s->ioredtbl[index] >> 32;
  291. } else {
  292. val = s->ioredtbl[index] & 0xffffffff;
  293. }
  294. }
  295. }
  296. break;
  297. }
  298. trace_ioapic_mem_read(addr, s->ioregsel, size, val);
  299. return val;
  300. }
  301. /*
  302. * This is to satisfy the hack in Linux kernel. One hack of it is to
  303. * simulate clearing the Remote IRR bit of IOAPIC entry using the
  304. * following:
  305. *
  306. * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
  307. * Otherwise, we simulate the EOI message manually by changing the trigger
  308. * mode to edge and then back to level, with RTE being masked during
  309. * this."
  310. *
  311. * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
  312. *
  313. * This is based on the assumption that, Remote IRR bit will be
  314. * cleared by IOAPIC hardware when configured as edge-triggered
  315. * interrupts.
  316. *
  317. * Without this, level-triggered interrupts in IR mode might fail to
  318. * work correctly.
  319. */
  320. static inline void
  321. ioapic_fix_edge_remote_irr(uint64_t *entry)
  322. {
  323. if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
  324. /* Edge-triggered interrupts, make sure remote IRR is zero */
  325. *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
  326. }
  327. }
  328. static void
  329. ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
  330. unsigned int size)
  331. {
  332. IOAPICCommonState *s = opaque;
  333. int index;
  334. addr &= 0xff;
  335. trace_ioapic_mem_write(addr, s->ioregsel, size, val);
  336. switch (addr) {
  337. case IOAPIC_IOREGSEL:
  338. s->ioregsel = val;
  339. break;
  340. case IOAPIC_IOWIN:
  341. if (size != 4) {
  342. break;
  343. }
  344. switch (s->ioregsel) {
  345. case IOAPIC_REG_ID:
  346. s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
  347. break;
  348. case IOAPIC_REG_VER:
  349. case IOAPIC_REG_ARB:
  350. break;
  351. default:
  352. index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
  353. if (index >= 0 && index < IOAPIC_NUM_PINS) {
  354. uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
  355. if (s->ioregsel & 1) {
  356. s->ioredtbl[index] &= 0xffffffff;
  357. s->ioredtbl[index] |= (uint64_t)val << 32;
  358. } else {
  359. s->ioredtbl[index] &= ~0xffffffffULL;
  360. s->ioredtbl[index] |= val;
  361. }
  362. /* restore RO bits */
  363. s->ioredtbl[index] &= IOAPIC_RW_BITS;
  364. s->ioredtbl[index] |= ro_bits;
  365. s->irq_eoi[index] = 0;
  366. ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
  367. ioapic_service(s);
  368. }
  369. }
  370. break;
  371. case IOAPIC_EOI:
  372. /* Explicit EOI is only supported for IOAPIC version 0x20 */
  373. if (size != 4 || s->version != 0x20) {
  374. break;
  375. }
  376. ioapic_eoi_broadcast(val);
  377. break;
  378. }
  379. ioapic_update_kvm_routes(s);
  380. }
  381. static const MemoryRegionOps ioapic_io_ops = {
  382. .read = ioapic_mem_read,
  383. .write = ioapic_mem_write,
  384. .endianness = DEVICE_NATIVE_ENDIAN,
  385. };
  386. static void ioapic_machine_done_notify(Notifier *notifier, void *data)
  387. {
  388. #ifdef CONFIG_KVM
  389. IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
  390. machine_done);
  391. if (kvm_irqchip_is_split()) {
  392. X86IOMMUState *iommu = x86_iommu_get_default();
  393. if (iommu) {
  394. /* Register this IOAPIC with IOMMU IEC notifier, so that
  395. * when there are IR invalidates, we can be notified to
  396. * update kernel IR cache. */
  397. x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
  398. }
  399. }
  400. #endif
  401. }
  402. #define IOAPIC_VER_DEF 0x20
  403. static void ioapic_realize(DeviceState *dev, Error **errp)
  404. {
  405. IOAPICCommonState *s = IOAPIC_COMMON(dev);
  406. if (s->version != 0x11 && s->version != 0x20) {
  407. error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
  408. "(default: 0x%x).", IOAPIC_VER_DEF);
  409. return;
  410. }
  411. memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
  412. "ioapic", 0x1000);
  413. s->delayed_ioapic_service_timer =
  414. timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s);
  415. qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
  416. ioapics[ioapic_no] = s;
  417. s->machine_done.notify = ioapic_machine_done_notify;
  418. qemu_add_machine_init_done_notifier(&s->machine_done);
  419. }
  420. static void ioapic_unrealize(DeviceState *dev)
  421. {
  422. IOAPICCommonState *s = IOAPIC_COMMON(dev);
  423. timer_del(s->delayed_ioapic_service_timer);
  424. timer_free(s->delayed_ioapic_service_timer);
  425. }
  426. static Property ioapic_properties[] = {
  427. DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
  428. DEFINE_PROP_END_OF_LIST(),
  429. };
  430. static void ioapic_class_init(ObjectClass *klass, void *data)
  431. {
  432. IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
  433. DeviceClass *dc = DEVICE_CLASS(klass);
  434. k->realize = ioapic_realize;
  435. k->unrealize = ioapic_unrealize;
  436. /*
  437. * If APIC is in kernel, we need to update the kernel cache after
  438. * migration, otherwise first 24 gsi routes will be invalid.
  439. */
  440. k->post_load = ioapic_update_kvm_routes;
  441. dc->reset = ioapic_reset_common;
  442. device_class_set_props(dc, ioapic_properties);
  443. }
  444. static const TypeInfo ioapic_info = {
  445. .name = TYPE_IOAPIC,
  446. .parent = TYPE_IOAPIC_COMMON,
  447. .instance_size = sizeof(IOAPICCommonState),
  448. .class_init = ioapic_class_init,
  449. };
  450. static void ioapic_register_types(void)
  451. {
  452. type_register_static(&ioapic_info);
  453. }
  454. type_init(ioapic_register_types)