ioapic.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /*
  2. * ioapic.c IOAPIC emulation logic
  3. *
  4. * Copyright (c) 2004-2005 Fabrice Bellard
  5. *
  6. * Split the ioapic logic from apic.c
  7. * Xiantao Zhang <xiantao.zhang@intel.com>
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2 of the License, or (at your option) any later version.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21. */
  22. #include "qemu/osdep.h"
  23. #include "qapi/error.h"
  24. #include "monitor/monitor.h"
  25. #include "hw/i386/pc.h"
  26. #include "hw/i386/apic.h"
  27. #include "hw/i386/ioapic.h"
  28. #include "hw/i386/ioapic_internal.h"
  29. #include "hw/pci/msi.h"
  30. #include "hw/qdev-properties.h"
  31. #include "sysemu/kvm.h"
  32. #include "sysemu/sysemu.h"
  33. #include "hw/i386/apic-msidef.h"
  34. #include "hw/i386/x86-iommu.h"
  35. #include "trace.h"
  36. #define APIC_DELIVERY_MODE_SHIFT 8
  37. #define APIC_POLARITY_SHIFT 14
  38. #define APIC_TRIG_MODE_SHIFT 15
  39. static IOAPICCommonState *ioapics[MAX_IOAPICS];
  40. /* global variable from ioapic_common.c */
  41. extern int ioapic_no;
  42. struct ioapic_entry_info {
  43. /* fields parsed from IOAPIC entries */
  44. uint8_t masked;
  45. uint8_t trig_mode;
  46. uint16_t dest_idx;
  47. uint8_t dest_mode;
  48. uint8_t delivery_mode;
  49. uint8_t vector;
  50. /* MSI message generated from above parsed fields */
  51. uint32_t addr;
  52. uint32_t data;
  53. };
  54. static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
  55. {
  56. memset(info, 0, sizeof(*info));
  57. info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
  58. info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
  59. /*
  60. * By default, this would be dest_id[8] + reserved[8]. When IR
  61. * is enabled, this would be interrupt_index[15] +
  62. * interrupt_format[1]. This field never means anything, but
  63. * only used to generate corresponding MSI.
  64. */
  65. info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
  66. info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
  67. info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
  68. & IOAPIC_DM_MASK;
  69. if (info->delivery_mode == IOAPIC_DM_EXTINT) {
  70. info->vector = pic_read_irq(isa_pic);
  71. } else {
  72. info->vector = entry & IOAPIC_VECTOR_MASK;
  73. }
  74. info->addr = APIC_DEFAULT_ADDRESS | \
  75. (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
  76. (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
  77. info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
  78. (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
  79. (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
  80. }
  81. static void ioapic_service(IOAPICCommonState *s)
  82. {
  83. AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as;
  84. struct ioapic_entry_info info;
  85. uint8_t i;
  86. uint32_t mask;
  87. uint64_t entry;
  88. for (i = 0; i < IOAPIC_NUM_PINS; i++) {
  89. mask = 1 << i;
  90. if (s->irr & mask) {
  91. int coalesce = 0;
  92. entry = s->ioredtbl[i];
  93. ioapic_entry_parse(entry, &info);
  94. if (!info.masked) {
  95. if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
  96. s->irr &= ~mask;
  97. } else {
  98. coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
  99. trace_ioapic_set_remote_irr(i);
  100. s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
  101. }
  102. if (coalesce) {
  103. /* We are level triggered interrupts, and the
  104. * guest should be still working on previous one,
  105. * so skip it. */
  106. continue;
  107. }
  108. #ifdef CONFIG_KVM
  109. if (kvm_irqchip_is_split()) {
  110. if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
  111. kvm_set_irq(kvm_state, i, 1);
  112. kvm_set_irq(kvm_state, i, 0);
  113. } else {
  114. kvm_set_irq(kvm_state, i, 1);
  115. }
  116. continue;
  117. }
  118. #endif
  119. /* No matter whether IR is enabled, we translate
  120. * the IOAPIC message into a MSI one, and its
  121. * address space will decide whether we need a
  122. * translation. */
  123. stl_le_phys(ioapic_as, info.addr, info.data);
  124. }
  125. }
  126. }
  127. }
  128. #define SUCCESSIVE_IRQ_MAX_COUNT 10000
  129. static void delayed_ioapic_service_cb(void *opaque)
  130. {
  131. IOAPICCommonState *s = opaque;
  132. ioapic_service(s);
  133. }
  134. static void ioapic_set_irq(void *opaque, int vector, int level)
  135. {
  136. IOAPICCommonState *s = opaque;
  137. /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
  138. * to GSI 2. GSI maps to ioapic 1-1. This is not
  139. * the cleanest way of doing it but it should work. */
  140. trace_ioapic_set_irq(vector, level);
  141. ioapic_stat_update_irq(s, vector, level);
  142. if (vector == 0) {
  143. vector = 2;
  144. }
  145. if (vector < IOAPIC_NUM_PINS) {
  146. uint32_t mask = 1 << vector;
  147. uint64_t entry = s->ioredtbl[vector];
  148. if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
  149. IOAPIC_TRIGGER_LEVEL) {
  150. /* level triggered */
  151. if (level) {
  152. s->irr |= mask;
  153. if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
  154. ioapic_service(s);
  155. }
  156. } else {
  157. s->irr &= ~mask;
  158. }
  159. } else {
  160. /* According to the 82093AA manual, we must ignore edge requests
  161. * if the input pin is masked. */
  162. if (level && !(entry & IOAPIC_LVT_MASKED)) {
  163. s->irr |= mask;
  164. ioapic_service(s);
  165. }
  166. }
  167. }
  168. }
  169. static void ioapic_update_kvm_routes(IOAPICCommonState *s)
  170. {
  171. #ifdef CONFIG_KVM
  172. int i;
  173. if (kvm_irqchip_is_split()) {
  174. for (i = 0; i < IOAPIC_NUM_PINS; i++) {
  175. MSIMessage msg;
  176. struct ioapic_entry_info info;
  177. ioapic_entry_parse(s->ioredtbl[i], &info);
  178. if (!info.masked) {
  179. msg.address = info.addr;
  180. msg.data = info.data;
  181. kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
  182. }
  183. }
  184. kvm_irqchip_commit_routes(kvm_state);
  185. }
  186. #endif
  187. }
  188. #ifdef CONFIG_KVM
  189. static void ioapic_iec_notifier(void *private, bool global,
  190. uint32_t index, uint32_t mask)
  191. {
  192. IOAPICCommonState *s = (IOAPICCommonState *)private;
  193. /* For simplicity, we just update all the routes */
  194. ioapic_update_kvm_routes(s);
  195. }
  196. #endif
  197. void ioapic_eoi_broadcast(int vector)
  198. {
  199. IOAPICCommonState *s;
  200. uint64_t entry;
  201. int i, n;
  202. trace_ioapic_eoi_broadcast(vector);
  203. for (i = 0; i < MAX_IOAPICS; i++) {
  204. s = ioapics[i];
  205. if (!s) {
  206. continue;
  207. }
  208. for (n = 0; n < IOAPIC_NUM_PINS; n++) {
  209. entry = s->ioredtbl[n];
  210. if ((entry & IOAPIC_VECTOR_MASK) != vector ||
  211. ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
  212. continue;
  213. }
  214. if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
  215. continue;
  216. }
  217. trace_ioapic_clear_remote_irr(n, vector);
  218. s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
  219. if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
  220. ++s->irq_eoi[n];
  221. if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) {
  222. /*
  223. * Real hardware does not deliver the interrupt immediately
  224. * during eoi broadcast, and this lets a buggy guest make
  225. * slow progress even if it does not correctly handle a
  226. * level-triggered interrupt. Emulate this behavior if we
  227. * detect an interrupt storm.
  228. */
  229. s->irq_eoi[n] = 0;
  230. timer_mod_anticipate(s->delayed_ioapic_service_timer,
  231. qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
  232. NANOSECONDS_PER_SECOND / 100);
  233. trace_ioapic_eoi_delayed_reassert(n);
  234. } else {
  235. ioapic_service(s);
  236. }
  237. } else {
  238. s->irq_eoi[n] = 0;
  239. }
  240. }
  241. }
  242. }
  243. static uint64_t
  244. ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
  245. {
  246. IOAPICCommonState *s = opaque;
  247. int index;
  248. uint32_t val = 0;
  249. addr &= 0xff;
  250. switch (addr) {
  251. case IOAPIC_IOREGSEL:
  252. val = s->ioregsel;
  253. break;
  254. case IOAPIC_IOWIN:
  255. if (size != 4) {
  256. break;
  257. }
  258. switch (s->ioregsel) {
  259. case IOAPIC_REG_ID:
  260. case IOAPIC_REG_ARB:
  261. val = s->id << IOAPIC_ID_SHIFT;
  262. break;
  263. case IOAPIC_REG_VER:
  264. val = s->version |
  265. ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
  266. break;
  267. default:
  268. index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
  269. if (index >= 0 && index < IOAPIC_NUM_PINS) {
  270. if (s->ioregsel & 1) {
  271. val = s->ioredtbl[index] >> 32;
  272. } else {
  273. val = s->ioredtbl[index] & 0xffffffff;
  274. }
  275. }
  276. }
  277. break;
  278. }
  279. trace_ioapic_mem_read(addr, s->ioregsel, size, val);
  280. return val;
  281. }
  282. /*
  283. * This is to satisfy the hack in Linux kernel. One hack of it is to
  284. * simulate clearing the Remote IRR bit of IOAPIC entry using the
  285. * following:
  286. *
  287. * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
  288. * Otherwise, we simulate the EOI message manually by changing the trigger
  289. * mode to edge and then back to level, with RTE being masked during
  290. * this."
  291. *
  292. * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
  293. *
  294. * This is based on the assumption that, Remote IRR bit will be
  295. * cleared by IOAPIC hardware when configured as edge-triggered
  296. * interrupts.
  297. *
  298. * Without this, level-triggered interrupts in IR mode might fail to
  299. * work correctly.
  300. */
  301. static inline void
  302. ioapic_fix_edge_remote_irr(uint64_t *entry)
  303. {
  304. if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
  305. /* Edge-triggered interrupts, make sure remote IRR is zero */
  306. *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
  307. }
  308. }
  309. static void
  310. ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
  311. unsigned int size)
  312. {
  313. IOAPICCommonState *s = opaque;
  314. int index;
  315. addr &= 0xff;
  316. trace_ioapic_mem_write(addr, s->ioregsel, size, val);
  317. switch (addr) {
  318. case IOAPIC_IOREGSEL:
  319. s->ioregsel = val;
  320. break;
  321. case IOAPIC_IOWIN:
  322. if (size != 4) {
  323. break;
  324. }
  325. switch (s->ioregsel) {
  326. case IOAPIC_REG_ID:
  327. s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
  328. break;
  329. case IOAPIC_REG_VER:
  330. case IOAPIC_REG_ARB:
  331. break;
  332. default:
  333. index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
  334. if (index >= 0 && index < IOAPIC_NUM_PINS) {
  335. uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
  336. if (s->ioregsel & 1) {
  337. s->ioredtbl[index] &= 0xffffffff;
  338. s->ioredtbl[index] |= (uint64_t)val << 32;
  339. } else {
  340. s->ioredtbl[index] &= ~0xffffffffULL;
  341. s->ioredtbl[index] |= val;
  342. }
  343. /* restore RO bits */
  344. s->ioredtbl[index] &= IOAPIC_RW_BITS;
  345. s->ioredtbl[index] |= ro_bits;
  346. s->irq_eoi[index] = 0;
  347. ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
  348. ioapic_service(s);
  349. }
  350. }
  351. break;
  352. case IOAPIC_EOI:
  353. /* Explicit EOI is only supported for IOAPIC version 0x20 */
  354. if (size != 4 || s->version != 0x20) {
  355. break;
  356. }
  357. ioapic_eoi_broadcast(val);
  358. break;
  359. }
  360. ioapic_update_kvm_routes(s);
  361. }
  362. static const MemoryRegionOps ioapic_io_ops = {
  363. .read = ioapic_mem_read,
  364. .write = ioapic_mem_write,
  365. .endianness = DEVICE_NATIVE_ENDIAN,
  366. };
  367. static void ioapic_machine_done_notify(Notifier *notifier, void *data)
  368. {
  369. #ifdef CONFIG_KVM
  370. IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
  371. machine_done);
  372. if (kvm_irqchip_is_split()) {
  373. X86IOMMUState *iommu = x86_iommu_get_default();
  374. if (iommu) {
  375. /* Register this IOAPIC with IOMMU IEC notifier, so that
  376. * when there are IR invalidates, we can be notified to
  377. * update kernel IR cache. */
  378. x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
  379. }
  380. }
  381. #endif
  382. }
  383. #define IOAPIC_VER_DEF 0x20
  384. static void ioapic_realize(DeviceState *dev, Error **errp)
  385. {
  386. IOAPICCommonState *s = IOAPIC_COMMON(dev);
  387. if (s->version != 0x11 && s->version != 0x20) {
  388. error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
  389. "(default: 0x%x).", IOAPIC_VER_DEF);
  390. return;
  391. }
  392. memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
  393. "ioapic", 0x1000);
  394. s->delayed_ioapic_service_timer =
  395. timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s);
  396. qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
  397. ioapics[ioapic_no] = s;
  398. s->machine_done.notify = ioapic_machine_done_notify;
  399. qemu_add_machine_init_done_notifier(&s->machine_done);
  400. }
  401. static void ioapic_unrealize(DeviceState *dev, Error **errp)
  402. {
  403. IOAPICCommonState *s = IOAPIC_COMMON(dev);
  404. timer_del(s->delayed_ioapic_service_timer);
  405. timer_free(s->delayed_ioapic_service_timer);
  406. }
  407. static Property ioapic_properties[] = {
  408. DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
  409. DEFINE_PROP_END_OF_LIST(),
  410. };
  411. static void ioapic_class_init(ObjectClass *klass, void *data)
  412. {
  413. IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
  414. DeviceClass *dc = DEVICE_CLASS(klass);
  415. k->realize = ioapic_realize;
  416. k->unrealize = ioapic_unrealize;
  417. /*
  418. * If APIC is in kernel, we need to update the kernel cache after
  419. * migration, otherwise first 24 gsi routes will be invalid.
  420. */
  421. k->post_load = ioapic_update_kvm_routes;
  422. dc->reset = ioapic_reset_common;
  423. dc->props = ioapic_properties;
  424. }
  425. static const TypeInfo ioapic_info = {
  426. .name = TYPE_IOAPIC,
  427. .parent = TYPE_IOAPIC_COMMON,
  428. .instance_size = sizeof(IOAPICCommonState),
  429. .class_init = ioapic_class_init,
  430. };
  431. static void ioapic_register_types(void)
  432. {
  433. type_register_static(&ioapic_info);
  434. }
  435. type_init(ioapic_register_types)