spapr_numa.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. /*
  2. * QEMU PowerPC pSeries Logical Partition NUMA associativity handling
  3. *
  4. * Copyright IBM Corp. 2020
  5. *
  6. * Authors:
  7. * Daniel Henrique Barboza <danielhb413@gmail.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "hw/ppc/spapr_numa.h"
  14. #include "hw/pci-host/spapr.h"
  15. #include "hw/ppc/fdt.h"
  16. /* Moved from hw/ppc/spapr_pci_nvlink2.c */
  17. #define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
  18. /*
  19. * Retrieves max_dist_ref_points of the current NUMA affinity.
  20. */
  21. static int get_max_dist_ref_points(SpaprMachineState *spapr)
  22. {
  23. if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
  24. return FORM2_DIST_REF_POINTS;
  25. }
  26. return FORM1_DIST_REF_POINTS;
  27. }
  28. /*
  29. * Retrieves numa_assoc_size of the current NUMA affinity.
  30. */
  31. static int get_numa_assoc_size(SpaprMachineState *spapr)
  32. {
  33. if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
  34. return FORM2_NUMA_ASSOC_SIZE;
  35. }
  36. return FORM1_NUMA_ASSOC_SIZE;
  37. }
  38. /*
  39. * Retrieves vcpu_assoc_size of the current NUMA affinity.
  40. *
  41. * vcpu_assoc_size is the size of ibm,associativity array
  42. * for CPUs, which has an extra element (vcpu_id) in the end.
  43. */
  44. static int get_vcpu_assoc_size(SpaprMachineState *spapr)
  45. {
  46. return get_numa_assoc_size(spapr) + 1;
  47. }
  48. /*
  49. * Retrieves the ibm,associativity array of NUMA node 'node_id'
  50. * for the current NUMA affinity.
  51. */
  52. static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id)
  53. {
  54. if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
  55. return spapr->FORM2_assoc_array[node_id];
  56. }
  57. return spapr->FORM1_assoc_array[node_id];
  58. }
  59. /*
  60. * Wrapper that returns node distance from ms->numa_state->nodes
  61. * after handling edge cases where the distance might be absent.
  62. */
  63. static int get_numa_distance(MachineState *ms, int src, int dst)
  64. {
  65. NodeInfo *numa_info = ms->numa_state->nodes;
  66. int ret = numa_info[src].distance[dst];
  67. if (ret != 0) {
  68. return ret;
  69. }
  70. /*
  71. * In case QEMU adds a default NUMA single node when the user
  72. * did not add any, or where the user did not supply distances,
  73. * the distance will be absent (zero). Return local/remote
  74. * distance in this case.
  75. */
  76. if (src == dst) {
  77. return NUMA_DISTANCE_MIN;
  78. }
  79. return NUMA_DISTANCE_DEFAULT;
  80. }
  81. static bool spapr_numa_is_symmetrical(MachineState *ms)
  82. {
  83. int nb_numa_nodes = ms->numa_state->num_nodes;
  84. int src, dst;
  85. for (src = 0; src < nb_numa_nodes; src++) {
  86. for (dst = src; dst < nb_numa_nodes; dst++) {
  87. if (get_numa_distance(ms, src, dst) !=
  88. get_numa_distance(ms, dst, src)) {
  89. return false;
  90. }
  91. }
  92. }
  93. return true;
  94. }
  95. /*
  96. * This function will translate the user distances into
  97. * what the kernel understand as possible values: 10
  98. * (local distance), 20, 40, 80 and 160, and return the equivalent
  99. * NUMA level for each. Current heuristic is:
  100. * - local distance (10) returns numa_level = 0x4, meaning there is
  101. * no rounding for local distance
  102. * - distances between 11 and 30 inclusive -> rounded to 20,
  103. * numa_level = 0x3
  104. * - distances between 31 and 60 inclusive -> rounded to 40,
  105. * numa_level = 0x2
  106. * - distances between 61 and 120 inclusive -> rounded to 80,
  107. * numa_level = 0x1
  108. * - everything above 120 returns numa_level = 0 to indicate that
  109. * there is no match. This will be calculated as disntace = 160
  110. * by the kernel (as of v5.9)
  111. */
  112. static uint8_t spapr_numa_get_numa_level(uint8_t distance)
  113. {
  114. if (distance == 10) {
  115. return 0x4;
  116. } else if (distance > 11 && distance <= 30) {
  117. return 0x3;
  118. } else if (distance > 31 && distance <= 60) {
  119. return 0x2;
  120. } else if (distance > 61 && distance <= 120) {
  121. return 0x1;
  122. }
  123. return 0;
  124. }
  125. static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
  126. {
  127. MachineState *ms = MACHINE(spapr);
  128. int nb_numa_nodes = ms->numa_state->num_nodes;
  129. int src, dst, i, j;
  130. /*
  131. * Fill all associativity domains of non-zero NUMA nodes with
  132. * node_id. This is required because the default value (0) is
  133. * considered a match with associativity domains of node 0.
  134. */
  135. for (i = 1; i < nb_numa_nodes; i++) {
  136. for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
  137. spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i);
  138. }
  139. }
  140. for (src = 0; src < nb_numa_nodes; src++) {
  141. for (dst = src; dst < nb_numa_nodes; dst++) {
  142. /*
  143. * This is how the associativity domain between A and B
  144. * is calculated:
  145. *
  146. * - get the distance D between them
  147. * - get the correspondent NUMA level 'n_level' for D
  148. * - all associativity arrays were initialized with their own
  149. * numa_ids, and we're calculating the distance in node_id
  150. * ascending order, starting from node id 0 (the first node
  151. * retrieved by numa_state). This will have a cascade effect in
  152. * the algorithm because the associativity domains that node 0
  153. * defines will be carried over to other nodes, and node 1
  154. * associativities will be carried over after taking node 0
  155. * associativities into account, and so on. This happens because
  156. * we'll assign assoc_src as the associativity domain of dst
  157. * as well, for all NUMA levels beyond and including n_level.
  158. *
  159. * The PPC kernel expects the associativity domains of node 0 to
  160. * be always 0, and this algorithm will grant that by default.
  161. */
  162. uint8_t distance = get_numa_distance(ms, src, dst);
  163. uint8_t n_level = spapr_numa_get_numa_level(distance);
  164. uint32_t assoc_src;
  165. /*
  166. * n_level = 0 means that the distance is greater than our last
  167. * rounded value (120). In this case there is no NUMA level match
  168. * between src and dst and we can skip the remaining of the loop.
  169. *
  170. * The Linux kernel will assume that the distance between src and
  171. * dst, in this case of no match, is 10 (local distance) doubled
  172. * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS
  173. * levels (4), so this gives us 10*2*2*2*2 = 160.
  174. *
  175. * This logic can be seen in the Linux kernel source code, as of
  176. * v5.9, in arch/powerpc/mm/numa.c, function __node_distance().
  177. */
  178. if (n_level == 0) {
  179. continue;
  180. }
  181. /*
  182. * We must assign all assoc_src to dst, starting from n_level
  183. * and going up to 0x1.
  184. */
  185. for (i = n_level; i > 0; i--) {
  186. assoc_src = spapr->FORM1_assoc_array[src][i];
  187. spapr->FORM1_assoc_array[dst][i] = assoc_src;
  188. }
  189. }
  190. }
  191. }
  192. static void spapr_numa_FORM1_affinity_check(MachineState *machine)
  193. {
  194. int i;
  195. /*
  196. * Check we don't have a memory-less/cpu-less NUMA node
  197. * Firmware relies on the existing memory/cpu topology to provide the
  198. * NUMA topology to the kernel.
  199. * And the linux kernel needs to know the NUMA topology at start
  200. * to be able to hotplug CPUs later.
  201. */
  202. if (machine->numa_state->num_nodes) {
  203. for (i = 0; i < machine->numa_state->num_nodes; ++i) {
  204. /* check for memory-less node */
  205. if (machine->numa_state->nodes[i].node_mem == 0) {
  206. CPUState *cs;
  207. int found = 0;
  208. /* check for cpu-less node */
  209. CPU_FOREACH(cs) {
  210. PowerPCCPU *cpu = POWERPC_CPU(cs);
  211. if (cpu->node_id == i) {
  212. found = 1;
  213. break;
  214. }
  215. }
  216. /* memory-less and cpu-less node */
  217. if (!found) {
  218. error_report(
  219. "Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
  220. exit(EXIT_FAILURE);
  221. }
  222. }
  223. }
  224. }
  225. if (!spapr_numa_is_symmetrical(machine)) {
  226. error_report(
  227. "Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA");
  228. exit(EXIT_FAILURE);
  229. }
  230. }
  231. /*
  232. * Set NUMA machine state data based on FORM1 affinity semantics.
  233. */
  234. static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
  235. MachineState *machine)
  236. {
  237. SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
  238. int nb_numa_nodes = machine->numa_state->num_nodes;
  239. int i, j;
  240. /*
  241. * For all associativity arrays: first position is the size,
  242. * position FORM1_DIST_REF_POINTS is always the numa_id,
  243. * represented by the index 'i'.
  244. *
  245. * This will break on sparse NUMA setups, when/if QEMU starts
  246. * to support it, because there will be no more guarantee that
  247. * 'i' will be a valid node_id set by the user.
  248. */
  249. for (i = 0; i < nb_numa_nodes; i++) {
  250. spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
  251. spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
  252. }
  253. for (i = nb_numa_nodes; i < nb_numa_nodes; i++) {
  254. spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
  255. for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
  256. uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
  257. SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
  258. spapr->FORM1_assoc_array[i][j] = gpu_assoc;
  259. }
  260. spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
  261. }
  262. /*
  263. * Guests pseries-5.1 and older uses zeroed associativity domains,
  264. * i.e. no domain definition based on NUMA distance input.
  265. *
  266. * Same thing with guests that have only one NUMA node.
  267. */
  268. if (smc->pre_5_2_numa_associativity ||
  269. machine->numa_state->num_nodes <= 1) {
  270. return;
  271. }
  272. spapr_numa_define_FORM1_domains(spapr);
  273. }
  274. /*
  275. * Init NUMA FORM2 machine state data
  276. */
  277. static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr)
  278. {
  279. int i;
  280. /*
  281. * For all resources but CPUs, FORM2 associativity arrays will
  282. * be a size 2 array with the following format:
  283. *
  284. * ibm,associativity = {1, numa_id}
  285. *
  286. * CPUs will write an additional 'vcpu_id' on top of the arrays
  287. * being initialized here. 'numa_id' is represented by the
  288. * index 'i' of the loop.
  289. */
  290. for (i = 0; i < NUMA_NODES_MAX_NUM; i++) {
  291. spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1);
  292. spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i);
  293. }
  294. }
  295. void spapr_numa_associativity_init(SpaprMachineState *spapr,
  296. MachineState *machine)
  297. {
  298. spapr_numa_FORM1_affinity_init(spapr, machine);
  299. spapr_numa_FORM2_affinity_init(spapr);
  300. }
  301. void spapr_numa_associativity_check(SpaprMachineState *spapr)
  302. {
  303. /*
  304. * FORM2 does not have any restrictions we need to handle
  305. * at CAS time, for now.
  306. */
  307. if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
  308. return;
  309. }
  310. spapr_numa_FORM1_affinity_check(MACHINE(spapr));
  311. }
  312. void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
  313. int offset, int nodeid)
  314. {
  315. const uint32_t *associativity = get_associativity(spapr, nodeid);
  316. _FDT((fdt_setprop(fdt, offset, "ibm,associativity",
  317. associativity,
  318. get_numa_assoc_size(spapr) * sizeof(uint32_t))));
  319. }
  320. static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
  321. PowerPCCPU *cpu)
  322. {
  323. const uint32_t *associativity = get_associativity(spapr, cpu->node_id);
  324. int max_distance_ref_points = get_max_dist_ref_points(spapr);
  325. int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
  326. uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size);
  327. int index = spapr_get_vcpu_id(cpu);
  328. /*
  329. * VCPUs have an extra 'cpu_id' value in ibm,associativity
  330. * compared to other resources. Increment the size at index
  331. * 0, put cpu_id last, then copy the remaining associativity
  332. * domains.
  333. */
  334. vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1);
  335. vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index);
  336. memcpy(vcpu_assoc + 1, associativity + 1,
  337. (vcpu_assoc_size - 2) * sizeof(uint32_t));
  338. return vcpu_assoc;
  339. }
  340. int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
  341. int offset, PowerPCCPU *cpu)
  342. {
  343. g_autofree uint32_t *vcpu_assoc = NULL;
  344. int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
  345. vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
  346. /* Advertise NUMA via ibm,associativity */
  347. return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
  348. vcpu_assoc_size * sizeof(uint32_t));
  349. }
  350. int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
  351. int offset)
  352. {
  353. MachineState *machine = MACHINE(spapr);
  354. int max_distance_ref_points = get_max_dist_ref_points(spapr);
  355. int nb_numa_nodes = machine->numa_state->num_nodes;
  356. int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
  357. g_autofree uint32_t *int_buf = NULL;
  358. uint32_t *cur_index;
  359. int i;
  360. /* ibm,associativity-lookup-arrays */
  361. int_buf = g_new0(uint32_t, nr_nodes * max_distance_ref_points + 2);
  362. cur_index = int_buf;
  363. int_buf[0] = cpu_to_be32(nr_nodes);
  364. /* Number of entries per associativity list */
  365. int_buf[1] = cpu_to_be32(max_distance_ref_points);
  366. cur_index += 2;
  367. for (i = 0; i < nr_nodes; i++) {
  368. /*
  369. * For the lookup-array we use the ibm,associativity array of the
  370. * current NUMA affinity, without the first element (size).
  371. */
  372. const uint32_t *associativity = get_associativity(spapr, i);
  373. memcpy(cur_index, ++associativity,
  374. sizeof(uint32_t) * max_distance_ref_points);
  375. cur_index += max_distance_ref_points;
  376. }
  377. return fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays",
  378. int_buf, (cur_index - int_buf) * sizeof(uint32_t));
  379. }
  380. static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
  381. void *fdt, int rtas)
  382. {
  383. MachineState *ms = MACHINE(spapr);
  384. SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
  385. uint32_t refpoints[] = {
  386. cpu_to_be32(0x4),
  387. cpu_to_be32(0x3),
  388. cpu_to_be32(0x2),
  389. cpu_to_be32(0x1),
  390. };
  391. uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
  392. uint32_t maxdomain = ms->numa_state->num_nodes;
  393. uint32_t maxdomains[] = {
  394. cpu_to_be32(4),
  395. cpu_to_be32(maxdomain),
  396. cpu_to_be32(maxdomain),
  397. cpu_to_be32(maxdomain),
  398. cpu_to_be32(maxdomain)
  399. };
  400. if (smc->pre_5_2_numa_associativity ||
  401. ms->numa_state->num_nodes <= 1) {
  402. uint32_t legacy_refpoints[] = {
  403. cpu_to_be32(0x4),
  404. cpu_to_be32(0x4),
  405. cpu_to_be32(0x2),
  406. };
  407. uint32_t legacy_maxdomains[] = {
  408. cpu_to_be32(4),
  409. cpu_to_be32(0),
  410. cpu_to_be32(0),
  411. cpu_to_be32(0),
  412. cpu_to_be32(maxdomain ? maxdomain : 1),
  413. };
  414. G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints));
  415. G_STATIC_ASSERT(sizeof(legacy_maxdomains) <= sizeof(maxdomains));
  416. nr_refpoints = 3;
  417. memcpy(refpoints, legacy_refpoints, sizeof(legacy_refpoints));
  418. memcpy(maxdomains, legacy_maxdomains, sizeof(legacy_maxdomains));
  419. /* pseries-5.0 and older reference-points array is {0x4, 0x4} */
  420. if (smc->pre_5_1_assoc_refpoints) {
  421. nr_refpoints = 2;
  422. }
  423. }
  424. _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
  425. refpoints, nr_refpoints * sizeof(refpoints[0])));
  426. _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
  427. maxdomains, sizeof(maxdomains)));
  428. }
  429. static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
  430. void *fdt, int rtas)
  431. {
  432. MachineState *ms = MACHINE(spapr);
  433. int nb_numa_nodes = ms->numa_state->num_nodes;
  434. int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
  435. g_autofree uint32_t *lookup_index_table = NULL;
  436. g_autofree uint8_t *distance_table = NULL;
  437. int src, dst, i, distance_table_size;
  438. /*
  439. * ibm,numa-lookup-index-table: array with length and a
  440. * list of NUMA ids present in the guest.
  441. */
  442. lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1);
  443. lookup_index_table[0] = cpu_to_be32(nb_numa_nodes);
  444. for (i = 0; i < nb_numa_nodes; i++) {
  445. lookup_index_table[i + 1] = cpu_to_be32(i);
  446. }
  447. _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table",
  448. lookup_index_table,
  449. (nb_numa_nodes + 1) * sizeof(uint32_t)));
  450. /*
  451. * ibm,numa-distance-table: contains all node distances. First
  452. * element is the size of the table as uint32, followed up
  453. * by all the uint8 distances from the first NUMA node, then all
  454. * distances from the second NUMA node and so on.
  455. *
  456. * ibm,numa-lookup-index-table is used by guest to navigate this
  457. * array because NUMA ids can be sparse (node 0 is the first,
  458. * node 8 is the second ...).
  459. */
  460. distance_table_size = distance_table_entries * sizeof(uint8_t) +
  461. sizeof(uint32_t);
  462. distance_table = g_new0(uint8_t, distance_table_size);
  463. stl_be_p(distance_table, distance_table_entries);
  464. /* Skip the uint32_t array length at the start */
  465. i = sizeof(uint32_t);
  466. for (src = 0; src < nb_numa_nodes; src++) {
  467. for (dst = 0; dst < nb_numa_nodes; dst++) {
  468. distance_table[i++] = get_numa_distance(ms, src, dst);
  469. }
  470. }
  471. _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
  472. distance_table, distance_table_size));
  473. }
  474. /*
  475. * This helper could be compressed in a single function with
  476. * FORM1 logic since we're setting the same DT values, with the
  477. * difference being a call to spapr_numa_FORM2_write_rtas_tables()
  478. * in the end. The separation was made to avoid clogging FORM1 code
  479. * which already has to deal with compat modes from previous
  480. * QEMU machine types.
  481. */
  482. static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr,
  483. void *fdt, int rtas)
  484. {
  485. MachineState *ms = MACHINE(spapr);
  486. /*
  487. * In FORM2, ibm,associativity-reference-points will point to
  488. * the element in the ibm,associativity array that contains the
  489. * primary domain index (for FORM2, the first element).
  490. *
  491. * This value (in our case, the numa-id) is then used as an index
  492. * to retrieve all other attributes of the node (distance,
  493. * bandwidth, latency) via ibm,numa-lookup-index-table and other
  494. * ibm,numa-*-table properties.
  495. */
  496. uint32_t refpoints[] = { cpu_to_be32(1) };
  497. uint32_t maxdomain = ms->numa_state->num_nodes;
  498. uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) };
  499. _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
  500. refpoints, sizeof(refpoints)));
  501. _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
  502. maxdomains, sizeof(maxdomains)));
  503. spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas);
  504. }
  505. /*
  506. * Helper that writes ibm,associativity-reference-points and
  507. * max-associativity-domains in the RTAS pointed by @rtas
  508. * in the DT @fdt.
  509. */
  510. void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
  511. {
  512. if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
  513. spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas);
  514. return;
  515. }
  516. spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas);
  517. }
  518. static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
  519. SpaprMachineState *spapr,
  520. target_ulong opcode,
  521. target_ulong *args)
  522. {
  523. g_autofree uint32_t *vcpu_assoc = NULL;
  524. target_ulong flags = args[0];
  525. target_ulong procno = args[1];
  526. PowerPCCPU *tcpu;
  527. int idx, assoc_idx;
  528. int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
  529. /* only support procno from H_REGISTER_VPA */
  530. if (flags != 0x1) {
  531. return H_FUNCTION;
  532. }
  533. tcpu = spapr_find_cpu(procno);
  534. if (tcpu == NULL) {
  535. return H_P2;
  536. }
  537. /*
  538. * Given that we want to be flexible with the sizes and indexes,
  539. * we must consider that there is a hard limit of how many
  540. * associativities domain we can fit in R4 up to R9, which would be
  541. * 12 associativity domains for vcpus. Assert and bail if that's
  542. * not the case.
  543. */
  544. g_assert((vcpu_assoc_size - 1) <= 12);
  545. vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
  546. /* assoc_idx starts at 1 to skip associativity size */
  547. assoc_idx = 1;
  548. #define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
  549. ((uint64_t)(b) & 0xffffffff))
  550. for (idx = 0; idx < 6; idx++) {
  551. int32_t a, b;
  552. /*
  553. * vcpu_assoc[] will contain the associativity domains for tcpu,
  554. * including tcpu->node_id and procno, meaning that we don't
  555. * need to use these variables here.
  556. *
  557. * We'll read 2 values at a time to fill up the ASSOCIATIVITY()
  558. * macro. The ternary will fill the remaining registers with -1
  559. * after we went through vcpu_assoc[].
  560. */
  561. a = assoc_idx < vcpu_assoc_size ?
  562. be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
  563. b = assoc_idx < vcpu_assoc_size ?
  564. be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
  565. args[idx] = ASSOCIATIVITY(a, b);
  566. }
  567. #undef ASSOCIATIVITY
  568. return H_SUCCESS;
  569. }
  570. static void spapr_numa_register_types(void)
  571. {
  572. /* Virtual Processor Home Node */
  573. spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
  574. h_home_node_associativity);
  575. }
  576. type_init(spapr_numa_register_types)