pcie_aer.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. /*
  2. * pcie_aer.c
  3. *
  4. * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
  5. * VA Linux Systems Japan K.K.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, see <http://www.gnu.org/licenses/>.
  19. */
  20. #include "qemu/osdep.h"
  21. #include "sysemu/sysemu.h"
  22. #include "qapi/qmp/qdict.h"
  23. #include "migration/vmstate.h"
  24. #include "monitor/monitor.h"
  25. #include "hw/pci/pci_bridge.h"
  26. #include "hw/pci/pcie.h"
  27. #include "hw/pci/msix.h"
  28. #include "hw/pci/msi.h"
  29. #include "hw/pci/pci_bus.h"
  30. #include "hw/pci/pcie_regs.h"
  31. #include "qapi/error.h"
  32. //#define DEBUG_PCIE
  33. #ifdef DEBUG_PCIE
  34. # define PCIE_DPRINTF(fmt, ...) \
  35. fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
  36. #else
  37. # define PCIE_DPRINTF(fmt, ...) do {} while (0)
  38. #endif
  39. #define PCIE_DEV_PRINTF(dev, fmt, ...) \
  40. PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
  41. #define PCI_ERR_SRC_COR_OFFS 0
  42. #define PCI_ERR_SRC_UNCOR_OFFS 2
  43. typedef struct PCIEErrorDetails {
  44. const char *id;
  45. const char *root_bus;
  46. int bus;
  47. int devfn;
  48. } PCIEErrorDetails;
  49. /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
  50. static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
  51. {
  52. switch (status) {
  53. case PCI_ERR_UNC_INTN:
  54. case PCI_ERR_UNC_DLP:
  55. case PCI_ERR_UNC_SDN:
  56. case PCI_ERR_UNC_RX_OVER:
  57. case PCI_ERR_UNC_FCP:
  58. case PCI_ERR_UNC_MALF_TLP:
  59. return PCI_ERR_ROOT_CMD_FATAL_EN;
  60. case PCI_ERR_UNC_POISON_TLP:
  61. case PCI_ERR_UNC_ECRC:
  62. case PCI_ERR_UNC_UNSUP:
  63. case PCI_ERR_UNC_COMP_TIME:
  64. case PCI_ERR_UNC_COMP_ABORT:
  65. case PCI_ERR_UNC_UNX_COMP:
  66. case PCI_ERR_UNC_ACSV:
  67. case PCI_ERR_UNC_MCBTLP:
  68. case PCI_ERR_UNC_ATOP_EBLOCKED:
  69. case PCI_ERR_UNC_TLP_PRF_BLOCKED:
  70. return PCI_ERR_ROOT_CMD_NONFATAL_EN;
  71. default:
  72. abort();
  73. break;
  74. }
  75. return PCI_ERR_ROOT_CMD_FATAL_EN;
  76. }
  77. static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
  78. {
  79. if (aer_log->log_num == aer_log->log_max) {
  80. return -1;
  81. }
  82. memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
  83. aer_log->log_num++;
  84. return 0;
  85. }
  86. static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
  87. {
  88. assert(aer_log->log_num);
  89. *err = aer_log->log[0];
  90. aer_log->log_num--;
  91. memmove(&aer_log->log[0], &aer_log->log[1],
  92. aer_log->log_num * sizeof *err);
  93. }
  94. static void aer_log_clear_all_err(PCIEAERLog *aer_log)
  95. {
  96. aer_log->log_num = 0;
  97. }
  98. int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
  99. uint16_t size, Error **errp)
  100. {
  101. pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
  102. offset, size);
  103. dev->exp.aer_cap = offset;
  104. /* clip down the value to avoid unreasonable memory usage */
  105. if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
  106. error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
  107. "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
  108. return -EINVAL;
  109. }
  110. dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
  111. dev->exp.aer_log.log_max);
  112. pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
  113. PCI_ERR_UNC_SUPPORTED);
  114. pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
  115. PCI_ERR_UNC_SEVERITY_DEFAULT);
  116. pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
  117. PCI_ERR_UNC_SUPPORTED);
  118. pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
  119. PCI_ERR_COR_SUPPORTED);
  120. pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
  121. PCI_ERR_COR_MASK_DEFAULT);
  122. pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
  123. PCI_ERR_COR_SUPPORTED);
  124. /* capabilities and control. multiple header logging is supported */
  125. if (dev->exp.aer_log.log_max > 0) {
  126. pci_set_long(dev->config + offset + PCI_ERR_CAP,
  127. PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
  128. PCI_ERR_CAP_MHRC);
  129. pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
  130. PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
  131. PCI_ERR_CAP_MHRE);
  132. } else {
  133. pci_set_long(dev->config + offset + PCI_ERR_CAP,
  134. PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
  135. pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
  136. PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
  137. }
  138. switch (pcie_cap_get_type(dev)) {
  139. case PCI_EXP_TYPE_ROOT_PORT:
  140. /* this case will be set by pcie_aer_root_init() */
  141. /* fallthrough */
  142. case PCI_EXP_TYPE_DOWNSTREAM:
  143. case PCI_EXP_TYPE_UPSTREAM:
  144. pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
  145. PCI_BRIDGE_CTL_SERR);
  146. pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
  147. PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
  148. break;
  149. default:
  150. /* nothing */
  151. break;
  152. }
  153. return 0;
  154. }
  155. void pcie_aer_exit(PCIDevice *dev)
  156. {
  157. g_free(dev->exp.aer_log.log);
  158. }
  159. static void pcie_aer_update_uncor_status(PCIDevice *dev)
  160. {
  161. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  162. PCIEAERLog *aer_log = &dev->exp.aer_log;
  163. uint16_t i;
  164. for (i = 0; i < aer_log->log_num; i++) {
  165. pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
  166. dev->exp.aer_log.log[i].status);
  167. }
  168. }
  169. /*
  170. * return value:
  171. * true: error message needs to be sent up
  172. * false: error message is masked
  173. *
  174. * 6.2.6 Error Message Control
  175. * Figure 6-3
  176. * all pci express devices part
  177. */
  178. static bool
  179. pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
  180. {
  181. if (!(pcie_aer_msg_is_uncor(msg) &&
  182. (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
  183. return false;
  184. }
  185. /* Signaled System Error
  186. *
  187. * 7.5.1.1 Command register
  188. * Bit 8 SERR# Enable
  189. *
  190. * When Set, this bit enables reporting of Non-fatal and Fatal
  191. * errors detected by the Function to the Root Complex. Note that
  192. * errors are reported if enabled either through this bit or through
  193. * the PCI Express specific bits in the Device Control register (see
  194. * Section 7.8.4).
  195. */
  196. pci_word_test_and_set_mask(dev->config + PCI_STATUS,
  197. PCI_STATUS_SIG_SYSTEM_ERROR);
  198. if (!(msg->severity &
  199. pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
  200. return false;
  201. }
  202. /* send up error message */
  203. return true;
  204. }
  205. /*
  206. * return value:
  207. * true: error message is sent up
  208. * false: error message is masked
  209. *
  210. * 6.2.6 Error Message Control
  211. * Figure 6-3
  212. * virtual pci bridge part
  213. */
  214. static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
  215. {
  216. uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
  217. if (pcie_aer_msg_is_uncor(msg)) {
  218. /* Received System Error */
  219. pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
  220. PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
  221. }
  222. if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
  223. return false;
  224. }
  225. return true;
  226. }
  227. void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
  228. {
  229. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  230. assert(vector < PCI_ERR_ROOT_IRQ_MAX);
  231. pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
  232. PCI_ERR_ROOT_IRQ);
  233. pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
  234. vector << PCI_ERR_ROOT_IRQ_SHIFT);
  235. }
  236. static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
  237. {
  238. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  239. uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
  240. return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
  241. }
  242. /* Given a status register, get corresponding bits in the command register */
  243. static uint32_t pcie_aer_status_to_cmd(uint32_t status)
  244. {
  245. uint32_t cmd = 0;
  246. if (status & PCI_ERR_ROOT_COR_RCV) {
  247. cmd |= PCI_ERR_ROOT_CMD_COR_EN;
  248. }
  249. if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
  250. cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
  251. }
  252. if (status & PCI_ERR_ROOT_FATAL_RCV) {
  253. cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
  254. }
  255. return cmd;
  256. }
  257. static void pcie_aer_root_notify(PCIDevice *dev)
  258. {
  259. if (msix_enabled(dev)) {
  260. msix_notify(dev, pcie_aer_root_get_vector(dev));
  261. } else if (msi_enabled(dev)) {
  262. msi_notify(dev, pcie_aer_root_get_vector(dev));
  263. } else {
  264. pci_irq_assert(dev);
  265. }
  266. }
  267. /*
  268. * 6.2.6 Error Message Control
  269. * Figure 6-3
  270. * root port part
  271. */
  272. static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
  273. {
  274. uint16_t cmd;
  275. uint8_t *aer_cap;
  276. uint32_t root_cmd;
  277. uint32_t root_status, prev_status;
  278. cmd = pci_get_word(dev->config + PCI_COMMAND);
  279. aer_cap = dev->config + dev->exp.aer_cap;
  280. root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
  281. prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
  282. if (cmd & PCI_COMMAND_SERR) {
  283. /* System Error.
  284. *
  285. * The way to report System Error is platform specific and
  286. * it isn't implemented in qemu right now.
  287. * So just discard the error for now.
  288. * OS which cares of aer would receive errors via
  289. * native aer mechanims, so this wouldn't matter.
  290. */
  291. }
  292. /* Errro Message Received: Root Error Status register */
  293. switch (msg->severity) {
  294. case PCI_ERR_ROOT_CMD_COR_EN:
  295. if (root_status & PCI_ERR_ROOT_COR_RCV) {
  296. root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
  297. } else {
  298. pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
  299. msg->source_id);
  300. }
  301. root_status |= PCI_ERR_ROOT_COR_RCV;
  302. break;
  303. case PCI_ERR_ROOT_CMD_NONFATAL_EN:
  304. root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
  305. break;
  306. case PCI_ERR_ROOT_CMD_FATAL_EN:
  307. if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
  308. root_status |= PCI_ERR_ROOT_FIRST_FATAL;
  309. }
  310. root_status |= PCI_ERR_ROOT_FATAL_RCV;
  311. break;
  312. default:
  313. abort();
  314. break;
  315. }
  316. if (pcie_aer_msg_is_uncor(msg)) {
  317. if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
  318. root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
  319. } else {
  320. pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
  321. PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
  322. }
  323. root_status |= PCI_ERR_ROOT_UNCOR_RCV;
  324. }
  325. pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
  326. /* 6.2.4.1.2 Interrupt Generation */
  327. /* All the above did was set some bits in the status register.
  328. * Specifically these that match message severity.
  329. * The below code relies on this fact. */
  330. if (!(root_cmd & msg->severity) ||
  331. (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
  332. /* Condition is not being set or was already true so nothing to do. */
  333. return;
  334. }
  335. pcie_aer_root_notify(dev);
  336. }
  337. /*
  338. * 6.2.6 Error Message Control Figure 6-3
  339. *
  340. * Walk up the bus tree from the device, propagate the error message.
  341. */
  342. static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
  343. {
  344. uint8_t type;
  345. while (dev) {
  346. if (!pci_is_express(dev)) {
  347. /* just ignore it */
  348. /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
  349. * Consider e.g. a PCI bridge above a PCI Express device. */
  350. return;
  351. }
  352. type = pcie_cap_get_type(dev);
  353. if ((type == PCI_EXP_TYPE_ROOT_PORT ||
  354. type == PCI_EXP_TYPE_UPSTREAM ||
  355. type == PCI_EXP_TYPE_DOWNSTREAM) &&
  356. !pcie_aer_msg_vbridge(dev, msg)) {
  357. return;
  358. }
  359. if (!pcie_aer_msg_alldev(dev, msg)) {
  360. return;
  361. }
  362. if (type == PCI_EXP_TYPE_ROOT_PORT) {
  363. pcie_aer_msg_root_port(dev, msg);
  364. /* Root port can notify system itself,
  365. or send the error message to root complex event collector. */
  366. /*
  367. * if root port is associated with an event collector,
  368. * return the root complex event collector here.
  369. * For now root complex event collector isn't supported.
  370. */
  371. return;
  372. }
  373. dev = pci_bridge_get_device(pci_get_bus(dev));
  374. }
  375. }
  376. static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
  377. {
  378. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  379. uint8_t first_bit = ctz32(err->status);
  380. uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
  381. int i;
  382. assert(err->status);
  383. assert(!(err->status & (err->status - 1)));
  384. errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
  385. errcap |= PCI_ERR_CAP_FEP(first_bit);
  386. if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
  387. for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
  388. /* 7.10.8 Header Log Register */
  389. uint8_t *header_log =
  390. aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
  391. stl_be_p(header_log, err->header[i]);
  392. }
  393. } else {
  394. assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
  395. memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
  396. }
  397. if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
  398. (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
  399. PCI_EXP_DEVCAP2_EETLPP)) {
  400. for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
  401. /* 7.10.12 tlp prefix log register */
  402. uint8_t *prefix_log =
  403. aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
  404. stl_be_p(prefix_log, err->prefix[i]);
  405. }
  406. errcap |= PCI_ERR_CAP_TLP;
  407. } else {
  408. memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
  409. PCI_ERR_TLP_PREFIX_LOG_SIZE);
  410. }
  411. pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
  412. }
  413. static void pcie_aer_clear_log(PCIDevice *dev)
  414. {
  415. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  416. pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
  417. PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
  418. memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
  419. memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
  420. }
  421. static void pcie_aer_clear_error(PCIDevice *dev)
  422. {
  423. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  424. uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
  425. PCIEAERLog *aer_log = &dev->exp.aer_log;
  426. PCIEAERErr err;
  427. if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
  428. pcie_aer_clear_log(dev);
  429. return;
  430. }
  431. /*
  432. * If more errors are queued, set corresponding bits in uncorrectable
  433. * error status.
  434. * We emulate uncorrectable error status register as W1CS.
  435. * So set bit in uncorrectable error status here again for multiple
  436. * error recording support.
  437. *
  438. * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
  439. */
  440. pcie_aer_update_uncor_status(dev);
  441. aer_log_del_err(aer_log, &err);
  442. pcie_aer_update_log(dev, &err);
  443. }
  444. static int pcie_aer_record_error(PCIDevice *dev,
  445. const PCIEAERErr *err)
  446. {
  447. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  448. uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
  449. int fep = PCI_ERR_CAP_FEP(errcap);
  450. assert(err->status);
  451. assert(!(err->status & (err->status - 1)));
  452. if (errcap & PCI_ERR_CAP_MHRE &&
  453. (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
  454. /* Not first error. queue error */
  455. if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
  456. /* overflow */
  457. return -1;
  458. }
  459. return 0;
  460. }
  461. pcie_aer_update_log(dev, err);
  462. return 0;
  463. }
  464. typedef struct PCIEAERInject {
  465. PCIDevice *dev;
  466. uint8_t *aer_cap;
  467. const PCIEAERErr *err;
  468. uint16_t devctl;
  469. uint16_t devsta;
  470. uint32_t error_status;
  471. bool unsupported_request;
  472. bool log_overflow;
  473. PCIEAERMsg msg;
  474. } PCIEAERInject;
  475. static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
  476. uint32_t uncor_status,
  477. bool is_advisory_nonfatal)
  478. {
  479. PCIDevice *dev = inj->dev;
  480. inj->devsta |= PCI_EXP_DEVSTA_CED;
  481. if (inj->unsupported_request) {
  482. inj->devsta |= PCI_EXP_DEVSTA_URD;
  483. }
  484. pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
  485. if (inj->aer_cap) {
  486. uint32_t mask;
  487. pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
  488. inj->error_status);
  489. mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
  490. if (mask & inj->error_status) {
  491. return false;
  492. }
  493. if (is_advisory_nonfatal) {
  494. uint32_t uncor_mask =
  495. pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
  496. if (!(uncor_mask & uncor_status)) {
  497. inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
  498. }
  499. pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
  500. uncor_status);
  501. }
  502. }
  503. if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
  504. return false;
  505. }
  506. if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
  507. return false;
  508. }
  509. inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
  510. return true;
  511. }
  512. static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
  513. {
  514. PCIDevice *dev = inj->dev;
  515. uint16_t cmd;
  516. if (is_fatal) {
  517. inj->devsta |= PCI_EXP_DEVSTA_FED;
  518. } else {
  519. inj->devsta |= PCI_EXP_DEVSTA_NFED;
  520. }
  521. if (inj->unsupported_request) {
  522. inj->devsta |= PCI_EXP_DEVSTA_URD;
  523. }
  524. pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
  525. if (inj->aer_cap) {
  526. uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
  527. if (mask & inj->error_status) {
  528. pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
  529. inj->error_status);
  530. return false;
  531. }
  532. inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
  533. pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
  534. inj->error_status);
  535. }
  536. cmd = pci_get_word(dev->config + PCI_COMMAND);
  537. if (inj->unsupported_request &&
  538. !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
  539. return false;
  540. }
  541. if (is_fatal) {
  542. if (!((cmd & PCI_COMMAND_SERR) ||
  543. (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
  544. return false;
  545. }
  546. inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
  547. } else {
  548. if (!((cmd & PCI_COMMAND_SERR) ||
  549. (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
  550. return false;
  551. }
  552. inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
  553. }
  554. return true;
  555. }
  556. /*
  557. * non-Function specific error must be recorded in all functions.
  558. * It is the responsibility of the caller of this function.
  559. * It is also caller's responsibility to determine which function should
  560. * report the error.
  561. *
  562. * 6.2.4 Error Logging
  563. * 6.2.5 Sequence of Device Error Signaling and Logging Operations
  564. * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
  565. * Operations
  566. */
  567. static int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
  568. {
  569. uint8_t *aer_cap = NULL;
  570. uint16_t devctl = 0;
  571. uint16_t devsta = 0;
  572. uint32_t error_status = err->status;
  573. PCIEAERInject inj;
  574. if (!pci_is_express(dev)) {
  575. return -ENOSYS;
  576. }
  577. if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
  578. error_status &= PCI_ERR_COR_SUPPORTED;
  579. } else {
  580. error_status &= PCI_ERR_UNC_SUPPORTED;
  581. }
  582. /* invalid status bit. one and only one bit must be set */
  583. if (!error_status || (error_status & (error_status - 1))) {
  584. return -EINVAL;
  585. }
  586. if (dev->exp.aer_cap) {
  587. uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
  588. aer_cap = dev->config + dev->exp.aer_cap;
  589. devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
  590. devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
  591. }
  592. inj.dev = dev;
  593. inj.aer_cap = aer_cap;
  594. inj.err = err;
  595. inj.devctl = devctl;
  596. inj.devsta = devsta;
  597. inj.error_status = error_status;
  598. inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
  599. err->status == PCI_ERR_UNC_UNSUP;
  600. inj.log_overflow = false;
  601. if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
  602. if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
  603. return 0;
  604. }
  605. } else {
  606. bool is_fatal =
  607. pcie_aer_uncor_default_severity(error_status) ==
  608. PCI_ERR_ROOT_CMD_FATAL_EN;
  609. if (aer_cap) {
  610. is_fatal =
  611. error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
  612. }
  613. if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
  614. inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
  615. if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
  616. return 0;
  617. }
  618. } else {
  619. if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
  620. return 0;
  621. }
  622. }
  623. }
  624. /* send up error message */
  625. inj.msg.source_id = err->source_id;
  626. pcie_aer_msg(dev, &inj.msg);
  627. if (inj.log_overflow) {
  628. PCIEAERErr header_log_overflow = {
  629. .status = PCI_ERR_COR_HL_OVERFLOW,
  630. .flags = PCIE_AER_ERR_IS_CORRECTABLE,
  631. };
  632. int ret = pcie_aer_inject_error(dev, &header_log_overflow);
  633. assert(!ret);
  634. }
  635. return 0;
  636. }
  637. void pcie_aer_write_config(PCIDevice *dev,
  638. uint32_t addr, uint32_t val, int len)
  639. {
  640. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  641. uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
  642. uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
  643. uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
  644. /* uncorrectable error */
  645. if (!(uncorsta & first_error)) {
  646. /* the bit that corresponds to the first error is cleared */
  647. pcie_aer_clear_error(dev);
  648. } else if (errcap & PCI_ERR_CAP_MHRE) {
  649. /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
  650. * nothing should happen. So we have to revert the modification to
  651. * the register.
  652. */
  653. pcie_aer_update_uncor_status(dev);
  654. } else {
  655. /* capability & control
  656. * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
  657. */
  658. aer_log_clear_all_err(&dev->exp.aer_log);
  659. }
  660. }
  661. void pcie_aer_root_init(PCIDevice *dev)
  662. {
  663. uint16_t pos = dev->exp.aer_cap;
  664. pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
  665. PCI_ERR_ROOT_CMD_EN_MASK);
  666. pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
  667. PCI_ERR_ROOT_STATUS_REPORT_MASK);
  668. /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
  669. * device-specific method.
  670. */
  671. pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
  672. ~PCI_ERR_ROOT_IRQ);
  673. }
  674. void pcie_aer_root_reset(PCIDevice *dev)
  675. {
  676. uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
  677. pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
  678. /*
  679. * Advanced Error Interrupt Message Number in Root Error Status Register
  680. * must be updated by chip dependent code because it's chip dependent
  681. * which number is used.
  682. */
  683. }
  684. void pcie_aer_root_write_config(PCIDevice *dev,
  685. uint32_t addr, uint32_t val, int len,
  686. uint32_t root_cmd_prev)
  687. {
  688. uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
  689. uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
  690. uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
  691. uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
  692. /* 6.2.4.1.2 Interrupt Generation */
  693. if (!msix_enabled(dev) && !msi_enabled(dev)) {
  694. pci_set_irq(dev, !!(root_cmd & enabled_cmd));
  695. return;
  696. }
  697. if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
  698. /* Send MSI on transition from false to true. */
  699. return;
  700. }
  701. pcie_aer_root_notify(dev);
  702. }
  703. static const VMStateDescription vmstate_pcie_aer_err = {
  704. .name = "PCIE_AER_ERROR",
  705. .version_id = 1,
  706. .minimum_version_id = 1,
  707. .fields = (VMStateField[]) {
  708. VMSTATE_UINT32(status, PCIEAERErr),
  709. VMSTATE_UINT16(source_id, PCIEAERErr),
  710. VMSTATE_UINT16(flags, PCIEAERErr),
  711. VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
  712. VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
  713. VMSTATE_END_OF_LIST()
  714. }
  715. };
  716. static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
  717. {
  718. PCIEAERLog *s = opaque;
  719. return s->log_num <= s->log_max;
  720. }
  721. const VMStateDescription vmstate_pcie_aer_log = {
  722. .name = "PCIE_AER_ERROR_LOG",
  723. .version_id = 1,
  724. .minimum_version_id = 1,
  725. .fields = (VMStateField[]) {
  726. VMSTATE_UINT16(log_num, PCIEAERLog),
  727. VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
  728. VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
  729. VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
  730. vmstate_pcie_aer_err, PCIEAERErr),
  731. VMSTATE_END_OF_LIST()
  732. }
  733. };
  734. typedef struct PCIEAERErrorName {
  735. const char *name;
  736. uint32_t val;
  737. bool correctable;
  738. } PCIEAERErrorName;
  739. /*
  740. * AER error name -> value conversion table
  741. * This naming scheme is same to linux aer-injection tool.
  742. */
  743. static const struct PCIEAERErrorName pcie_aer_error_list[] = {
  744. {
  745. .name = "DLP",
  746. .val = PCI_ERR_UNC_DLP,
  747. .correctable = false,
  748. }, {
  749. .name = "SDN",
  750. .val = PCI_ERR_UNC_SDN,
  751. .correctable = false,
  752. }, {
  753. .name = "POISON_TLP",
  754. .val = PCI_ERR_UNC_POISON_TLP,
  755. .correctable = false,
  756. }, {
  757. .name = "FCP",
  758. .val = PCI_ERR_UNC_FCP,
  759. .correctable = false,
  760. }, {
  761. .name = "COMP_TIME",
  762. .val = PCI_ERR_UNC_COMP_TIME,
  763. .correctable = false,
  764. }, {
  765. .name = "COMP_ABORT",
  766. .val = PCI_ERR_UNC_COMP_ABORT,
  767. .correctable = false,
  768. }, {
  769. .name = "UNX_COMP",
  770. .val = PCI_ERR_UNC_UNX_COMP,
  771. .correctable = false,
  772. }, {
  773. .name = "RX_OVER",
  774. .val = PCI_ERR_UNC_RX_OVER,
  775. .correctable = false,
  776. }, {
  777. .name = "MALF_TLP",
  778. .val = PCI_ERR_UNC_MALF_TLP,
  779. .correctable = false,
  780. }, {
  781. .name = "ECRC",
  782. .val = PCI_ERR_UNC_ECRC,
  783. .correctable = false,
  784. }, {
  785. .name = "UNSUP",
  786. .val = PCI_ERR_UNC_UNSUP,
  787. .correctable = false,
  788. }, {
  789. .name = "ACSV",
  790. .val = PCI_ERR_UNC_ACSV,
  791. .correctable = false,
  792. }, {
  793. .name = "INTN",
  794. .val = PCI_ERR_UNC_INTN,
  795. .correctable = false,
  796. }, {
  797. .name = "MCBTLP",
  798. .val = PCI_ERR_UNC_MCBTLP,
  799. .correctable = false,
  800. }, {
  801. .name = "ATOP_EBLOCKED",
  802. .val = PCI_ERR_UNC_ATOP_EBLOCKED,
  803. .correctable = false,
  804. }, {
  805. .name = "TLP_PRF_BLOCKED",
  806. .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
  807. .correctable = false,
  808. }, {
  809. .name = "RCVR",
  810. .val = PCI_ERR_COR_RCVR,
  811. .correctable = true,
  812. }, {
  813. .name = "BAD_TLP",
  814. .val = PCI_ERR_COR_BAD_TLP,
  815. .correctable = true,
  816. }, {
  817. .name = "BAD_DLLP",
  818. .val = PCI_ERR_COR_BAD_DLLP,
  819. .correctable = true,
  820. }, {
  821. .name = "REP_ROLL",
  822. .val = PCI_ERR_COR_REP_ROLL,
  823. .correctable = true,
  824. }, {
  825. .name = "REP_TIMER",
  826. .val = PCI_ERR_COR_REP_TIMER,
  827. .correctable = true,
  828. }, {
  829. .name = "ADV_NONFATAL",
  830. .val = PCI_ERR_COR_ADV_NONFATAL,
  831. .correctable = true,
  832. }, {
  833. .name = "INTERNAL",
  834. .val = PCI_ERR_COR_INTERNAL,
  835. .correctable = true,
  836. }, {
  837. .name = "HL_OVERFLOW",
  838. .val = PCI_ERR_COR_HL_OVERFLOW,
  839. .correctable = true,
  840. },
  841. };
  842. static int pcie_aer_parse_error_string(const char *error_name,
  843. uint32_t *status, bool *correctable)
  844. {
  845. int i;
  846. for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
  847. const PCIEAERErrorName *e = &pcie_aer_error_list[i];
  848. if (strcmp(error_name, e->name)) {
  849. continue;
  850. }
  851. *status = e->val;
  852. *correctable = e->correctable;
  853. return 0;
  854. }
  855. return -EINVAL;
  856. }
  857. /*
  858. * Inject an error described by @qdict.
  859. * On success, set @details to show where error was sent.
  860. * Return negative errno if injection failed and a message was emitted.
  861. */
  862. static int do_pcie_aer_inject_error(Monitor *mon,
  863. const QDict *qdict,
  864. PCIEErrorDetails *details)
  865. {
  866. const char *id = qdict_get_str(qdict, "id");
  867. const char *error_name;
  868. uint32_t error_status;
  869. bool correctable;
  870. PCIDevice *dev;
  871. PCIEAERErr err;
  872. int ret;
  873. ret = pci_qdev_find_device(id, &dev);
  874. if (ret < 0) {
  875. monitor_printf(mon,
  876. "id or pci device path is invalid or device not "
  877. "found. %s\n", id);
  878. return ret;
  879. }
  880. if (!pci_is_express(dev)) {
  881. monitor_printf(mon, "the device doesn't support pci express. %s\n",
  882. id);
  883. return -ENOSYS;
  884. }
  885. error_name = qdict_get_str(qdict, "error_status");
  886. if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
  887. char *e = NULL;
  888. error_status = strtoul(error_name, &e, 0);
  889. correctable = qdict_get_try_bool(qdict, "correctable", false);
  890. if (!e || *e != '\0') {
  891. monitor_printf(mon, "invalid error status value. \"%s\"",
  892. error_name);
  893. return -EINVAL;
  894. }
  895. }
  896. err.status = error_status;
  897. err.source_id = pci_requester_id(dev);
  898. err.flags = 0;
  899. if (correctable) {
  900. err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
  901. }
  902. if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
  903. err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
  904. }
  905. if (qdict_haskey(qdict, "header0")) {
  906. err.flags |= PCIE_AER_ERR_HEADER_VALID;
  907. }
  908. if (qdict_haskey(qdict, "prefix0")) {
  909. err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
  910. }
  911. err.header[0] = qdict_get_try_int(qdict, "header0", 0);
  912. err.header[1] = qdict_get_try_int(qdict, "header1", 0);
  913. err.header[2] = qdict_get_try_int(qdict, "header2", 0);
  914. err.header[3] = qdict_get_try_int(qdict, "header3", 0);
  915. err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
  916. err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
  917. err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
  918. err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
  919. ret = pcie_aer_inject_error(dev, &err);
  920. if (ret < 0) {
  921. monitor_printf(mon, "failed to inject error: %s\n",
  922. strerror(-ret));
  923. return ret;
  924. }
  925. details->id = id;
  926. details->root_bus = pci_root_bus_path(dev);
  927. details->bus = pci_dev_bus_num(dev);
  928. details->devfn = dev->devfn;
  929. return 0;
  930. }
  931. void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
  932. {
  933. PCIEErrorDetails data;
  934. if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
  935. return;
  936. }
  937. monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
  938. data.id, data.root_bus, data.bus,
  939. PCI_SLOT(data.devfn), PCI_FUNC(data.devfn));
  940. }