spapr_irq.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. /*
  2. * QEMU PowerPC sPAPR IRQ interface
  3. *
  4. * Copyright (c) 2018, IBM Corporation.
  5. *
  6. * This code is licensed under the GPL version 2 or later. See the
  7. * COPYING file in the top-level directory.
  8. */
  9. #include "qemu/osdep.h"
  10. #include "qemu/log.h"
  11. #include "qemu/error-report.h"
  12. #include "qapi/error.h"
  13. #include "hw/irq.h"
  14. #include "hw/ppc/spapr.h"
  15. #include "hw/ppc/spapr_cpu_core.h"
  16. #include "hw/ppc/spapr_xive.h"
  17. #include "hw/ppc/xics.h"
  18. #include "hw/ppc/xics_spapr.h"
  19. #include "hw/qdev-properties.h"
  20. #include "cpu-models.h"
  21. #include "system/kvm.h"
  22. #include "trace.h"
  23. QEMU_BUILD_BUG_ON(SPAPR_IRQ_NR_IPIS > SPAPR_XIRQ_BASE);
  24. static const TypeInfo spapr_intc_info = {
  25. .name = TYPE_SPAPR_INTC,
  26. .parent = TYPE_INTERFACE,
  27. .class_size = sizeof(SpaprInterruptControllerClass),
  28. };
  29. static void spapr_irq_msi_init(SpaprMachineState *spapr)
  30. {
  31. if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
  32. /* Legacy mode doesn't use this allocator */
  33. return;
  34. }
  35. spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
  36. spapr->irq_map = bitmap_new(spapr->irq_map_nr);
  37. }
  38. int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
  39. Error **errp)
  40. {
  41. int irq;
  42. /*
  43. * The 'align_mask' parameter of bitmap_find_next_zero_area()
  44. * should be one less than a power of 2; 0 means no
  45. * alignment. Adapt the 'align' value of the former allocator
  46. * to fit the requirements of bitmap_find_next_zero_area()
  47. */
  48. align -= 1;
  49. irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
  50. align);
  51. if (irq == spapr->irq_map_nr) {
  52. error_setg(errp, "can't find a free %d-IRQ block", num);
  53. return -1;
  54. }
  55. bitmap_set(spapr->irq_map, irq, num);
  56. return irq + SPAPR_IRQ_MSI;
  57. }
  58. void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
  59. {
  60. bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
  61. }
  62. int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
  63. SpaprInterruptController *intc,
  64. uint32_t nr_servers,
  65. Error **errp)
  66. {
  67. Error *local_err = NULL;
  68. if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
  69. if (fn(intc, nr_servers, &local_err) < 0) {
  70. if (kvm_kernel_irqchip_required()) {
  71. error_prepend(&local_err,
  72. "kernel_irqchip requested but unavailable: ");
  73. error_propagate(errp, local_err);
  74. return -1;
  75. }
  76. /*
  77. * We failed to initialize the KVM device, fallback to
  78. * emulated mode
  79. */
  80. error_prepend(&local_err,
  81. "kernel_irqchip allowed but unavailable: ");
  82. error_append_hint(&local_err,
  83. "Falling back to kernel-irqchip=off\n");
  84. warn_report_err(local_err);
  85. }
  86. }
  87. return 0;
  88. }
  89. /*
  90. * XICS IRQ backend.
  91. */
  92. SpaprIrq spapr_irq_xics = {
  93. .xics = true,
  94. .xive = false,
  95. };
  96. /*
  97. * XIVE IRQ backend.
  98. */
  99. SpaprIrq spapr_irq_xive = {
  100. .xics = false,
  101. .xive = true,
  102. };
  103. /*
  104. * Dual XIVE and XICS IRQ backend.
  105. *
  106. * Both interrupt mode, XIVE and XICS, objects are created but the
  107. * machine starts in legacy interrupt mode (XICS). It can be changed
  108. * by the CAS negotiation process and, in that case, the new mode is
  109. * activated after an extra machine reset.
  110. */
  111. /*
  112. * Define values in sync with the XIVE and XICS backend
  113. */
  114. SpaprIrq spapr_irq_dual = {
  115. .xics = true,
  116. .xive = true,
  117. };
  118. static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
  119. {
  120. ERRP_GUARD();
  121. MachineState *machine = MACHINE(spapr);
  122. /*
  123. * Sanity checks on non-P9 machines. On these, XIVE is not
  124. * advertised, see spapr_dt_ov5_platform_support()
  125. */
  126. if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
  127. 0, spapr->max_compat_pvr)) {
  128. /*
  129. * If the 'dual' interrupt mode is selected, force XICS as CAS
  130. * negotiation is useless.
  131. */
  132. if (spapr->irq == &spapr_irq_dual) {
  133. spapr->irq = &spapr_irq_xics;
  134. return 0;
  135. }
  136. /*
  137. * Non-P9 machines using only XIVE is a bogus setup. We have two
  138. * scenarios to take into account because of the compat mode:
  139. *
  140. * 1. POWER7/8 machines should fail to init later on when creating
  141. * the XIVE interrupt presenters because a POWER9 exception
  142. * model is required.
  143. * 2. POWER9 machines using the POWER8 compat mode won't fail and
  144. * will let the OS boot with a partial XIVE setup : DT
  145. * properties but no hcalls.
  146. *
  147. * To cover both and not confuse the OS, add an early failure in
  148. * QEMU.
  149. */
  150. if (!spapr->irq->xics) {
  151. error_setg(errp, "XIVE-only machines require a POWER9 CPU");
  152. return -1;
  153. }
  154. }
  155. /*
  156. * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
  157. * re-created. Same happens with KVM nested guests. Detect that early to
  158. * avoid QEMU to exit later when the guest reboots.
  159. */
  160. if (kvm_enabled() &&
  161. spapr->irq == &spapr_irq_dual &&
  162. kvm_kernel_irqchip_required() &&
  163. xics_kvm_has_broken_disconnect()) {
  164. error_setg(errp,
  165. "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
  166. error_append_hint(errp,
  167. "This can happen with an old KVM or in a KVM nested guest.\n");
  168. error_append_hint(errp,
  169. "Try without kernel-irqchip or with kernel-irqchip=off.\n");
  170. return -1;
  171. }
  172. return 0;
  173. }
  174. /*
  175. * sPAPR IRQ frontend routines for devices
  176. */
  177. #define ALL_INTCS(spapr_) \
  178. { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
  179. int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
  180. PowerPCCPU *cpu, Error **errp)
  181. {
  182. SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
  183. int i;
  184. int rc;
  185. for (i = 0; i < ARRAY_SIZE(intcs); i++) {
  186. SpaprInterruptController *intc = intcs[i];
  187. if (intc) {
  188. SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
  189. rc = sicc->cpu_intc_create(intc, cpu, errp);
  190. if (rc < 0) {
  191. return rc;
  192. }
  193. }
  194. }
  195. return 0;
  196. }
  197. void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
  198. {
  199. SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
  200. int i;
  201. for (i = 0; i < ARRAY_SIZE(intcs); i++) {
  202. SpaprInterruptController *intc = intcs[i];
  203. if (intc) {
  204. SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
  205. sicc->cpu_intc_reset(intc, cpu);
  206. }
  207. }
  208. }
  209. void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
  210. {
  211. SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
  212. int i;
  213. for (i = 0; i < ARRAY_SIZE(intcs); i++) {
  214. SpaprInterruptController *intc = intcs[i];
  215. if (intc) {
  216. SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
  217. sicc->cpu_intc_destroy(intc, cpu);
  218. }
  219. }
  220. }
  221. static void spapr_set_irq(void *opaque, int irq, int level)
  222. {
  223. SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
  224. SpaprInterruptControllerClass *sicc
  225. = SPAPR_INTC_GET_CLASS(spapr->active_intc);
  226. sicc->set_irq(spapr->active_intc, irq, level);
  227. }
  228. void spapr_irq_print_info(SpaprMachineState *spapr, GString *buf)
  229. {
  230. SpaprInterruptControllerClass *sicc
  231. = SPAPR_INTC_GET_CLASS(spapr->active_intc);
  232. sicc->print_info(spapr->active_intc, buf);
  233. }
  234. void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
  235. void *fdt, uint32_t phandle)
  236. {
  237. SpaprInterruptControllerClass *sicc
  238. = SPAPR_INTC_GET_CLASS(spapr->active_intc);
  239. sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
  240. }
  241. uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
  242. {
  243. SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
  244. if (smc->legacy_irq_allocation) {
  245. return smc->nr_xirqs;
  246. } else {
  247. return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
  248. }
  249. }
  250. void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
  251. {
  252. SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
  253. if (kvm_enabled() && kvm_kernel_irqchip_split()) {
  254. error_setg(errp, "kernel_irqchip split mode not supported on pseries");
  255. return;
  256. }
  257. if (spapr_irq_check(spapr, errp) < 0) {
  258. return;
  259. }
  260. /* Initialize the MSI IRQ allocator. */
  261. spapr_irq_msi_init(spapr);
  262. if (spapr->irq->xics) {
  263. Object *obj;
  264. obj = object_new(TYPE_ICS_SPAPR);
  265. object_property_add_child(OBJECT(spapr), "ics", obj);
  266. object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
  267. &error_abort);
  268. object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
  269. if (!qdev_realize(DEVICE(obj), NULL, errp)) {
  270. return;
  271. }
  272. spapr->ics = ICS_SPAPR(obj);
  273. }
  274. if (spapr->irq->xive) {
  275. uint32_t nr_servers = spapr_max_server_number(spapr);
  276. DeviceState *dev;
  277. int i;
  278. dev = qdev_new(TYPE_SPAPR_XIVE);
  279. qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
  280. /*
  281. * 8 XIVE END structures per CPU. One for each available
  282. * priority
  283. */
  284. qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
  285. object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
  286. &error_abort);
  287. sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
  288. spapr->xive = SPAPR_XIVE(dev);
  289. /* Enable the CPU IPIs */
  290. for (i = 0; i < nr_servers; ++i) {
  291. SpaprInterruptControllerClass *sicc
  292. = SPAPR_INTC_GET_CLASS(spapr->xive);
  293. if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
  294. false, errp) < 0) {
  295. return;
  296. }
  297. }
  298. spapr_xive_hcall_init(spapr);
  299. }
  300. spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
  301. smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
  302. /*
  303. * Mostly we don't actually need this until reset, except that not
  304. * having this set up can cause VFIO devices to issue a
  305. * false-positive warning during realize(), because they don't yet
  306. * have an in-kernel irq chip.
  307. */
  308. spapr_irq_update_active_intc(spapr);
  309. }
  310. int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
  311. {
  312. SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
  313. int i;
  314. SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
  315. int rc;
  316. assert(irq >= SPAPR_XIRQ_BASE);
  317. assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
  318. for (i = 0; i < ARRAY_SIZE(intcs); i++) {
  319. SpaprInterruptController *intc = intcs[i];
  320. if (intc) {
  321. SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
  322. rc = sicc->claim_irq(intc, irq, lsi, errp);
  323. if (rc < 0) {
  324. return rc;
  325. }
  326. }
  327. }
  328. return 0;
  329. }
  330. void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
  331. {
  332. SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
  333. int i, j;
  334. SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
  335. assert(irq >= SPAPR_XIRQ_BASE);
  336. assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
  337. for (i = irq; i < (irq + num); i++) {
  338. for (j = 0; j < ARRAY_SIZE(intcs); j++) {
  339. SpaprInterruptController *intc = intcs[j];
  340. if (intc) {
  341. SpaprInterruptControllerClass *sicc
  342. = SPAPR_INTC_GET_CLASS(intc);
  343. sicc->free_irq(intc, i);
  344. }
  345. }
  346. }
  347. }
  348. qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
  349. {
  350. SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
  351. /*
  352. * This interface is basically for VIO and PHB devices to find the
  353. * right qemu_irq to manipulate, so we only allow access to the
  354. * external irqs for now. Currently anything which needs to
  355. * access the IPIs most naturally gets there via the guest side
  356. * interfaces, we can change this if we need to in future.
  357. */
  358. assert(irq >= SPAPR_XIRQ_BASE);
  359. assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
  360. if (spapr->ics) {
  361. assert(ics_valid_irq(spapr->ics, irq));
  362. }
  363. if (spapr->xive) {
  364. assert(irq < spapr->xive->nr_irqs);
  365. assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
  366. }
  367. return spapr->qirqs[irq];
  368. }
  369. int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
  370. {
  371. SpaprInterruptControllerClass *sicc;
  372. spapr_irq_update_active_intc(spapr);
  373. sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
  374. return sicc->post_load(spapr->active_intc, version_id);
  375. }
  376. void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
  377. {
  378. assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
  379. spapr_irq_update_active_intc(spapr);
  380. }
  381. int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
  382. {
  383. const char *nodename = "interrupt-controller";
  384. int offset, phandle;
  385. offset = fdt_subnode_offset(fdt, 0, nodename);
  386. if (offset < 0) {
  387. error_setg(errp, "Can't find node \"%s\": %s",
  388. nodename, fdt_strerror(offset));
  389. return -1;
  390. }
  391. phandle = fdt_get_phandle(fdt, offset);
  392. if (!phandle) {
  393. error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
  394. return -1;
  395. }
  396. return phandle;
  397. }
  398. static void set_active_intc(SpaprMachineState *spapr,
  399. SpaprInterruptController *new_intc)
  400. {
  401. SpaprInterruptControllerClass *sicc;
  402. uint32_t nr_servers = spapr_max_server_number(spapr);
  403. assert(new_intc);
  404. if (new_intc == spapr->active_intc) {
  405. /* Nothing to do */
  406. return;
  407. }
  408. if (spapr->active_intc) {
  409. sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
  410. if (sicc->deactivate) {
  411. sicc->deactivate(spapr->active_intc);
  412. }
  413. }
  414. sicc = SPAPR_INTC_GET_CLASS(new_intc);
  415. if (sicc->activate) {
  416. sicc->activate(new_intc, nr_servers, &error_fatal);
  417. }
  418. spapr->active_intc = new_intc;
  419. /*
  420. * We've changed the kernel irqchip, let VFIO devices know they
  421. * need to readjust.
  422. */
  423. kvm_irqchip_change_notify();
  424. }
  425. void spapr_irq_update_active_intc(SpaprMachineState *spapr)
  426. {
  427. SpaprInterruptController *new_intc;
  428. if (!spapr->ics) {
  429. /*
  430. * XXX before we run CAS, ov5_cas is initialized empty, which
  431. * indicates XICS, even if we have ic-mode=xive. TODO: clean
  432. * up the CAS path so that we have a clearer way of handling
  433. * this.
  434. */
  435. new_intc = SPAPR_INTC(spapr->xive);
  436. } else if (spapr->ov5_cas
  437. && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
  438. new_intc = SPAPR_INTC(spapr->xive);
  439. } else {
  440. new_intc = SPAPR_INTC(spapr->ics);
  441. }
  442. set_active_intc(spapr, new_intc);
  443. }
  444. /*
  445. * XICS legacy routines - to deprecate one day
  446. */
  447. static int ics_find_free_block(ICSState *ics, int num, int alignnum)
  448. {
  449. int first, i;
  450. for (first = 0; first < ics->nr_irqs; first += alignnum) {
  451. if (num > (ics->nr_irqs - first)) {
  452. return -1;
  453. }
  454. for (i = first; i < first + num; ++i) {
  455. if (!ics_irq_free(ics, i)) {
  456. break;
  457. }
  458. }
  459. if (i == (first + num)) {
  460. return first;
  461. }
  462. }
  463. return -1;
  464. }
  465. int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
  466. {
  467. ICSState *ics = spapr->ics;
  468. int first = -1;
  469. assert(ics);
  470. /*
  471. * MSIMesage::data is used for storing VIRQ so
  472. * it has to be aligned to num to support multiple
  473. * MSI vectors. MSI-X is not affected by this.
  474. * The hint is used for the first IRQ, the rest should
  475. * be allocated continuously.
  476. */
  477. if (align) {
  478. assert((num == 1) || (num == 2) || (num == 4) ||
  479. (num == 8) || (num == 16) || (num == 32));
  480. first = ics_find_free_block(ics, num, num);
  481. } else {
  482. first = ics_find_free_block(ics, num, 1);
  483. }
  484. if (first < 0) {
  485. error_setg(errp, "can't find a free %d-IRQ block", num);
  486. return -1;
  487. }
  488. return first + ics->offset;
  489. }
  490. SpaprIrq spapr_irq_xics_legacy = {
  491. .xics = true,
  492. .xive = false,
  493. };
  494. static void spapr_irq_register_types(void)
  495. {
  496. type_register_static(&spapr_intc_info);
  497. }
  498. type_init(spapr_irq_register_types)