xics_kvm.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. /*
  2. * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
  3. *
  4. * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics, in-kernel emulation
  5. *
  6. * Copyright (c) 2013 David Gibson, IBM Corporation.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. *
  26. */
  27. #include "qemu/osdep.h"
  28. #include "qapi/error.h"
  29. #include "qemu-common.h"
  30. #include "cpu.h"
  31. #include "trace.h"
  32. #include "sysemu/kvm.h"
  33. #include "hw/ppc/spapr.h"
  34. #include "hw/ppc/spapr_cpu_core.h"
  35. #include "hw/ppc/xics.h"
  36. #include "hw/ppc/xics_spapr.h"
  37. #include "kvm_ppc.h"
  38. #include "qemu/config-file.h"
  39. #include "qemu/error-report.h"
  40. #include <sys/ioctl.h>
  41. static int kernel_xics_fd = -1;
  42. typedef struct KVMEnabledICP {
  43. unsigned long vcpu_id;
  44. QLIST_ENTRY(KVMEnabledICP) node;
  45. } KVMEnabledICP;
  46. static QLIST_HEAD(, KVMEnabledICP)
  47. kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps);
  48. static void kvm_disable_icps(void)
  49. {
  50. KVMEnabledICP *enabled_icp, *next;
  51. QLIST_FOREACH_SAFE(enabled_icp, &kvm_enabled_icps, node, next) {
  52. QLIST_REMOVE(enabled_icp, node);
  53. g_free(enabled_icp);
  54. }
  55. }
  56. /*
  57. * ICP-KVM
  58. */
  59. void icp_get_kvm_state(ICPState *icp)
  60. {
  61. uint64_t state;
  62. int ret;
  63. /* The KVM XICS device is not in use */
  64. if (kernel_xics_fd == -1) {
  65. return;
  66. }
  67. /* ICP for this CPU thread is not in use, exiting */
  68. if (!icp->cs) {
  69. return;
  70. }
  71. ret = kvm_get_one_reg(icp->cs, KVM_REG_PPC_ICP_STATE, &state);
  72. if (ret != 0) {
  73. error_report("Unable to retrieve KVM interrupt controller state"
  74. " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
  75. exit(1);
  76. }
  77. icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
  78. icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
  79. & KVM_REG_PPC_ICP_MFRR_MASK;
  80. icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
  81. & KVM_REG_PPC_ICP_PPRI_MASK;
  82. }
  83. static void do_icp_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
  84. {
  85. icp_get_kvm_state(arg.host_ptr);
  86. }
  87. void icp_synchronize_state(ICPState *icp)
  88. {
  89. if (icp->cs) {
  90. run_on_cpu(icp->cs, do_icp_synchronize_state, RUN_ON_CPU_HOST_PTR(icp));
  91. }
  92. }
  93. int icp_set_kvm_state(ICPState *icp, Error **errp)
  94. {
  95. uint64_t state;
  96. int ret;
  97. /* The KVM XICS device is not in use */
  98. if (kernel_xics_fd == -1) {
  99. return 0;
  100. }
  101. /* ICP for this CPU thread is not in use, exiting */
  102. if (!icp->cs) {
  103. return 0;
  104. }
  105. state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
  106. | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
  107. | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
  108. ret = kvm_set_one_reg(icp->cs, KVM_REG_PPC_ICP_STATE, &state);
  109. if (ret < 0) {
  110. error_setg_errno(errp, -ret,
  111. "Unable to restore KVM interrupt controller state (0x%"
  112. PRIx64 ") for CPU %ld", state,
  113. kvm_arch_vcpu_id(icp->cs));
  114. return ret;
  115. }
  116. return 0;
  117. }
  118. void icp_kvm_realize(DeviceState *dev, Error **errp)
  119. {
  120. ICPState *icp = ICP(dev);
  121. CPUState *cs;
  122. KVMEnabledICP *enabled_icp;
  123. unsigned long vcpu_id;
  124. int ret;
  125. /* The KVM XICS device is not in use */
  126. if (kernel_xics_fd == -1) {
  127. return;
  128. }
  129. cs = icp->cs;
  130. vcpu_id = kvm_arch_vcpu_id(cs);
  131. /*
  132. * If we are reusing a parked vCPU fd corresponding to the CPU
  133. * which was hot-removed earlier we don't have to renable
  134. * KVM_CAP_IRQ_XICS capability again.
  135. */
  136. QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) {
  137. if (enabled_icp->vcpu_id == vcpu_id) {
  138. return;
  139. }
  140. }
  141. ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id);
  142. if (ret < 0) {
  143. Error *local_err = NULL;
  144. error_setg(&local_err, "Unable to connect CPU%ld to kernel XICS: %s",
  145. vcpu_id, strerror(errno));
  146. if (errno == ENOSPC) {
  147. error_append_hint(&local_err, "Try -smp maxcpus=N with N < %u\n",
  148. MACHINE(qdev_get_machine())->smp.max_cpus);
  149. }
  150. error_propagate(errp, local_err);
  151. return;
  152. }
  153. enabled_icp = g_malloc(sizeof(*enabled_icp));
  154. enabled_icp->vcpu_id = vcpu_id;
  155. QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node);
  156. }
  157. /*
  158. * ICS-KVM
  159. */
  160. void ics_get_kvm_state(ICSState *ics)
  161. {
  162. uint64_t state;
  163. int i;
  164. /* The KVM XICS device is not in use */
  165. if (kernel_xics_fd == -1) {
  166. return;
  167. }
  168. for (i = 0; i < ics->nr_irqs; i++) {
  169. ICSIRQState *irq = &ics->irqs[i];
  170. if (ics_irq_free(ics, i)) {
  171. continue;
  172. }
  173. kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES,
  174. i + ics->offset, &state, false, &error_fatal);
  175. irq->server = state & KVM_XICS_DESTINATION_MASK;
  176. irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT)
  177. & KVM_XICS_PRIORITY_MASK;
  178. /*
  179. * To be consistent with the software emulation in xics.c, we
  180. * split out the masked state + priority that we get from the
  181. * kernel into 'current priority' (0xff if masked) and
  182. * 'saved priority' (if masked, this is the priority the
  183. * interrupt had before it was masked). Masking and unmasking
  184. * are done with the ibm,int-off and ibm,int-on RTAS calls.
  185. */
  186. if (state & KVM_XICS_MASKED) {
  187. irq->priority = 0xff;
  188. } else {
  189. irq->priority = irq->saved_priority;
  190. }
  191. irq->status = 0;
  192. if (state & KVM_XICS_PENDING) {
  193. if (state & KVM_XICS_LEVEL_SENSITIVE) {
  194. irq->status |= XICS_STATUS_ASSERTED;
  195. } else {
  196. /*
  197. * A pending edge-triggered interrupt (or MSI)
  198. * must have been rejected previously when we
  199. * first detected it and tried to deliver it,
  200. * so mark it as pending and previously rejected
  201. * for consistency with how xics.c works.
  202. */
  203. irq->status |= XICS_STATUS_MASKED_PENDING
  204. | XICS_STATUS_REJECTED;
  205. }
  206. }
  207. if (state & KVM_XICS_PRESENTED) {
  208. irq->status |= XICS_STATUS_PRESENTED;
  209. }
  210. if (state & KVM_XICS_QUEUED) {
  211. irq->status |= XICS_STATUS_QUEUED;
  212. }
  213. }
  214. }
  215. void ics_synchronize_state(ICSState *ics)
  216. {
  217. ics_get_kvm_state(ics);
  218. }
  219. int ics_set_kvm_state_one(ICSState *ics, int srcno, Error **errp)
  220. {
  221. uint64_t state;
  222. ICSIRQState *irq = &ics->irqs[srcno];
  223. int ret;
  224. /* The KVM XICS device is not in use */
  225. if (kernel_xics_fd == -1) {
  226. return 0;
  227. }
  228. state = irq->server;
  229. state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
  230. << KVM_XICS_PRIORITY_SHIFT;
  231. if (irq->priority != irq->saved_priority) {
  232. assert(irq->priority == 0xff);
  233. }
  234. if (irq->priority == 0xff) {
  235. state |= KVM_XICS_MASKED;
  236. }
  237. if (irq->flags & XICS_FLAGS_IRQ_LSI) {
  238. state |= KVM_XICS_LEVEL_SENSITIVE;
  239. if (irq->status & XICS_STATUS_ASSERTED) {
  240. state |= KVM_XICS_PENDING;
  241. }
  242. } else {
  243. if (irq->status & XICS_STATUS_MASKED_PENDING) {
  244. state |= KVM_XICS_PENDING;
  245. }
  246. }
  247. if (irq->status & XICS_STATUS_PRESENTED) {
  248. state |= KVM_XICS_PRESENTED;
  249. }
  250. if (irq->status & XICS_STATUS_QUEUED) {
  251. state |= KVM_XICS_QUEUED;
  252. }
  253. ret = kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES,
  254. srcno + ics->offset, &state, true, errp);
  255. if (ret < 0) {
  256. return ret;
  257. }
  258. return 0;
  259. }
  260. int ics_set_kvm_state(ICSState *ics, Error **errp)
  261. {
  262. int i;
  263. /* The KVM XICS device is not in use */
  264. if (kernel_xics_fd == -1) {
  265. return 0;
  266. }
  267. for (i = 0; i < ics->nr_irqs; i++) {
  268. int ret;
  269. if (ics_irq_free(ics, i)) {
  270. continue;
  271. }
  272. ret = ics_set_kvm_state_one(ics, i, errp);
  273. if (ret < 0) {
  274. return ret;
  275. }
  276. }
  277. return 0;
  278. }
  279. void ics_kvm_set_irq(ICSState *ics, int srcno, int val)
  280. {
  281. struct kvm_irq_level args;
  282. int rc;
  283. /* The KVM XICS device should be in use */
  284. assert(kernel_xics_fd != -1);
  285. args.irq = srcno + ics->offset;
  286. if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
  287. if (!val) {
  288. return;
  289. }
  290. args.level = KVM_INTERRUPT_SET;
  291. } else {
  292. args.level = val ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
  293. }
  294. rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
  295. if (rc < 0) {
  296. perror("kvm_irq_line");
  297. }
  298. }
  299. int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers,
  300. Error **errp)
  301. {
  302. ICSState *ics = ICS_SPAPR(intc);
  303. int rc;
  304. CPUState *cs;
  305. Error *local_err = NULL;
  306. /*
  307. * The KVM XICS device already in use. This is the case when
  308. * rebooting under the XICS-only interrupt mode.
  309. */
  310. if (kernel_xics_fd != -1) {
  311. return 0;
  312. }
  313. if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
  314. error_setg(errp,
  315. "KVM and IRQ_XICS capability must be present for in-kernel XICS");
  316. return -1;
  317. }
  318. rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive");
  319. if (rc < 0) {
  320. error_setg_errno(&local_err, -rc,
  321. "kvmppc_define_rtas_kernel_token: ibm,set-xive");
  322. goto fail;
  323. }
  324. rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive");
  325. if (rc < 0) {
  326. error_setg_errno(&local_err, -rc,
  327. "kvmppc_define_rtas_kernel_token: ibm,get-xive");
  328. goto fail;
  329. }
  330. rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on");
  331. if (rc < 0) {
  332. error_setg_errno(&local_err, -rc,
  333. "kvmppc_define_rtas_kernel_token: ibm,int-on");
  334. goto fail;
  335. }
  336. rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off");
  337. if (rc < 0) {
  338. error_setg_errno(&local_err, -rc,
  339. "kvmppc_define_rtas_kernel_token: ibm,int-off");
  340. goto fail;
  341. }
  342. /* Create the KVM XICS device */
  343. rc = kvm_create_device(kvm_state, KVM_DEV_TYPE_XICS, false);
  344. if (rc < 0) {
  345. error_setg_errno(&local_err, -rc, "Error on KVM_CREATE_DEVICE for XICS");
  346. goto fail;
  347. }
  348. /* Tell KVM about the # of VCPUs we may have (POWER9 and newer only) */
  349. if (kvm_device_check_attr(rc, KVM_DEV_XICS_GRP_CTRL,
  350. KVM_DEV_XICS_NR_SERVERS)) {
  351. if (kvm_device_access(rc, KVM_DEV_XICS_GRP_CTRL,
  352. KVM_DEV_XICS_NR_SERVERS, &nr_servers, true,
  353. &local_err)) {
  354. goto fail;
  355. }
  356. }
  357. kernel_xics_fd = rc;
  358. kvm_kernel_irqchip = true;
  359. kvm_msi_via_irqfd_allowed = true;
  360. kvm_gsi_direct_mapping = true;
  361. /* Create the presenters */
  362. CPU_FOREACH(cs) {
  363. PowerPCCPU *cpu = POWERPC_CPU(cs);
  364. icp_kvm_realize(DEVICE(spapr_cpu_state(cpu)->icp), &local_err);
  365. if (local_err) {
  366. goto fail;
  367. }
  368. }
  369. /* Update the KVM sources */
  370. ics_set_kvm_state(ics, &local_err);
  371. if (local_err) {
  372. goto fail;
  373. }
  374. /* Connect the presenters to the initial VCPUs of the machine */
  375. CPU_FOREACH(cs) {
  376. PowerPCCPU *cpu = POWERPC_CPU(cs);
  377. icp_set_kvm_state(spapr_cpu_state(cpu)->icp, &local_err);
  378. if (local_err) {
  379. goto fail;
  380. }
  381. }
  382. return 0;
  383. fail:
  384. error_propagate(errp, local_err);
  385. xics_kvm_disconnect(intc);
  386. return -1;
  387. }
  388. void xics_kvm_disconnect(SpaprInterruptController *intc)
  389. {
  390. /*
  391. * Only on P9 using the XICS-on XIVE KVM device:
  392. *
  393. * When the KVM device fd is closed, the device is destroyed and
  394. * removed from the list of devices of the VM. The VCPU presenters
  395. * are also detached from the device.
  396. */
  397. if (kernel_xics_fd != -1) {
  398. close(kernel_xics_fd);
  399. kernel_xics_fd = -1;
  400. }
  401. kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
  402. kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
  403. kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
  404. kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
  405. kvm_kernel_irqchip = false;
  406. kvm_msi_via_irqfd_allowed = false;
  407. kvm_gsi_direct_mapping = false;
  408. /* Clear the presenter from the VCPUs */
  409. kvm_disable_icps();
  410. }
  411. /*
  412. * This is a heuristic to detect older KVMs on POWER9 hosts that don't
  413. * support destruction of a KVM XICS device while the VM is running.
  414. * Required to start a spapr machine with ic-mode=dual,kernel-irqchip=on.
  415. */
  416. bool xics_kvm_has_broken_disconnect(SpaprMachineState *spapr)
  417. {
  418. int rc;
  419. rc = kvm_create_device(kvm_state, KVM_DEV_TYPE_XICS, false);
  420. if (rc < 0) {
  421. /*
  422. * The error is ignored on purpose. The KVM XICS setup code
  423. * will catch it again anyway. The goal here is to see if
  424. * close() actually destroys the device or not.
  425. */
  426. return false;
  427. }
  428. close(rc);
  429. rc = kvm_create_device(kvm_state, KVM_DEV_TYPE_XICS, false);
  430. if (rc >= 0) {
  431. close(rc);
  432. return false;
  433. }
  434. return errno == EEXIST;
  435. }