x86.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796
  1. /*
  2. * Copyright (c) 2003-2004 Fabrice Bellard
  3. * Copyright (c) 2019 Red Hat, Inc.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a copy
  6. * of this software and associated documentation files (the "Software"), to deal
  7. * in the Software without restriction, including without limitation the rights
  8. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. * copies of the Software, and to permit persons to whom the Software is
  10. * furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice shall be included in
  13. * all copies or substantial portions of the Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. * THE SOFTWARE.
  22. */
  23. #include "qemu/osdep.h"
  24. #include "qemu/error-report.h"
  25. #include "qemu/option.h"
  26. #include "qemu/cutils.h"
  27. #include "qemu/units.h"
  28. #include "qemu-common.h"
  29. #include "qapi/error.h"
  30. #include "qapi/qmp/qerror.h"
  31. #include "qapi/qapi-visit-common.h"
  32. #include "qapi/visitor.h"
  33. #include "sysemu/qtest.h"
  34. #include "sysemu/numa.h"
  35. #include "sysemu/replay.h"
  36. #include "sysemu/sysemu.h"
  37. #include "hw/i386/x86.h"
  38. #include "target/i386/cpu.h"
  39. #include "hw/i386/topology.h"
  40. #include "hw/i386/fw_cfg.h"
  41. #include "hw/acpi/cpu_hotplug.h"
  42. #include "hw/nmi.h"
  43. #include "hw/loader.h"
  44. #include "multiboot.h"
  45. #include "elf.h"
  46. #include "standard-headers/asm-x86/bootparam.h"
  47. #define BIOS_FILENAME "bios.bin"
  48. /* Physical Address of PVH entry point read from kernel ELF NOTE */
  49. static size_t pvh_start_addr;
  50. /*
  51. * Calculates initial APIC ID for a specific CPU index
  52. *
  53. * Currently we need to be able to calculate the APIC ID from the CPU index
  54. * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have
  55. * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
  56. * all CPUs up to max_cpus.
  57. */
  58. uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms,
  59. unsigned int cpu_index)
  60. {
  61. MachineState *ms = MACHINE(x86ms);
  62. X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms);
  63. uint32_t correct_id;
  64. static bool warned;
  65. correct_id = x86_apicid_from_cpu_idx(x86ms->smp_dies, ms->smp.cores,
  66. ms->smp.threads, cpu_index);
  67. if (x86mc->compat_apic_id_mode) {
  68. if (cpu_index != correct_id && !warned && !qtest_enabled()) {
  69. error_report("APIC IDs set in compatibility mode, "
  70. "CPU topology won't match the configuration");
  71. warned = true;
  72. }
  73. return cpu_index;
  74. } else {
  75. return correct_id;
  76. }
  77. }
  78. void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp)
  79. {
  80. Object *cpu = NULL;
  81. Error *local_err = NULL;
  82. CPUX86State *env = NULL;
  83. cpu = object_new(MACHINE(x86ms)->cpu_type);
  84. env = &X86_CPU(cpu)->env;
  85. env->nr_dies = x86ms->smp_dies;
  86. object_property_set_uint(cpu, apic_id, "apic-id", &local_err);
  87. object_property_set_bool(cpu, true, "realized", &local_err);
  88. object_unref(cpu);
  89. error_propagate(errp, local_err);
  90. }
  91. void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
  92. {
  93. int i;
  94. const CPUArchIdList *possible_cpus;
  95. MachineState *ms = MACHINE(x86ms);
  96. MachineClass *mc = MACHINE_GET_CLASS(x86ms);
  97. x86_cpu_set_default_version(default_cpu_version);
  98. /*
  99. * Calculates the limit to CPU APIC ID values
  100. *
  101. * Limit for the APIC ID value, so that all
  102. * CPU APIC IDs are < x86ms->apic_id_limit.
  103. *
  104. * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create().
  105. */
  106. x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
  107. ms->smp.max_cpus - 1) + 1;
  108. possible_cpus = mc->possible_cpu_arch_ids(ms);
  109. for (i = 0; i < ms->smp.cpus; i++) {
  110. x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
  111. }
  112. }
  113. CpuInstanceProperties
  114. x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
  115. {
  116. MachineClass *mc = MACHINE_GET_CLASS(ms);
  117. const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
  118. assert(cpu_index < possible_cpus->len);
  119. return possible_cpus->cpus[cpu_index].props;
  120. }
  121. int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
  122. {
  123. X86CPUTopoInfo topo;
  124. X86MachineState *x86ms = X86_MACHINE(ms);
  125. assert(idx < ms->possible_cpus->len);
  126. x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id,
  127. x86ms->smp_dies, ms->smp.cores,
  128. ms->smp.threads, &topo);
  129. return topo.pkg_id % ms->numa_state->num_nodes;
  130. }
  131. const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
  132. {
  133. X86MachineState *x86ms = X86_MACHINE(ms);
  134. int i;
  135. unsigned int max_cpus = ms->smp.max_cpus;
  136. if (ms->possible_cpus) {
  137. /*
  138. * make sure that max_cpus hasn't changed since the first use, i.e.
  139. * -smp hasn't been parsed after it
  140. */
  141. assert(ms->possible_cpus->len == max_cpus);
  142. return ms->possible_cpus;
  143. }
  144. ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
  145. sizeof(CPUArchId) * max_cpus);
  146. ms->possible_cpus->len = max_cpus;
  147. for (i = 0; i < ms->possible_cpus->len; i++) {
  148. X86CPUTopoInfo topo;
  149. ms->possible_cpus->cpus[i].type = ms->cpu_type;
  150. ms->possible_cpus->cpus[i].vcpus_count = 1;
  151. ms->possible_cpus->cpus[i].arch_id =
  152. x86_cpu_apic_id_from_index(x86ms, i);
  153. x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,
  154. x86ms->smp_dies, ms->smp.cores,
  155. ms->smp.threads, &topo);
  156. ms->possible_cpus->cpus[i].props.has_socket_id = true;
  157. ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
  158. if (x86ms->smp_dies > 1) {
  159. ms->possible_cpus->cpus[i].props.has_die_id = true;
  160. ms->possible_cpus->cpus[i].props.die_id = topo.die_id;
  161. }
  162. ms->possible_cpus->cpus[i].props.has_core_id = true;
  163. ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
  164. ms->possible_cpus->cpus[i].props.has_thread_id = true;
  165. ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
  166. }
  167. return ms->possible_cpus;
  168. }
  169. static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
  170. {
  171. /* cpu index isn't used */
  172. CPUState *cs;
  173. CPU_FOREACH(cs) {
  174. X86CPU *cpu = X86_CPU(cs);
  175. if (!cpu->apic_state) {
  176. cpu_interrupt(cs, CPU_INTERRUPT_NMI);
  177. } else {
  178. apic_deliver_nmi(cpu->apic_state);
  179. }
  180. }
  181. }
  182. static long get_file_size(FILE *f)
  183. {
  184. long where, size;
  185. /* XXX: on Unix systems, using fstat() probably makes more sense */
  186. where = ftell(f);
  187. fseek(f, 0, SEEK_END);
  188. size = ftell(f);
  189. fseek(f, where, SEEK_SET);
  190. return size;
  191. }
  192. struct setup_data {
  193. uint64_t next;
  194. uint32_t type;
  195. uint32_t len;
  196. uint8_t data[0];
  197. } __attribute__((packed));
  198. /*
  199. * The entry point into the kernel for PVH boot is different from
  200. * the native entry point. The PVH entry is defined by the x86/HVM
  201. * direct boot ABI and is available in an ELFNOTE in the kernel binary.
  202. *
  203. * This function is passed to load_elf() when it is called from
  204. * load_elfboot() which then additionally checks for an ELF Note of
  205. * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to
  206. * parse the PVH entry address from the ELF Note.
  207. *
  208. * Due to trickery in elf_opts.h, load_elf() is actually available as
  209. * load_elf32() or load_elf64() and this routine needs to be able
  210. * to deal with being called as 32 or 64 bit.
  211. *
  212. * The address of the PVH entry point is saved to the 'pvh_start_addr'
  213. * global variable. (although the entry point is 32-bit, the kernel
  214. * binary can be either 32-bit or 64-bit).
  215. */
  216. static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64)
  217. {
  218. size_t *elf_note_data_addr;
  219. /* Check if ELF Note header passed in is valid */
  220. if (arg1 == NULL) {
  221. return 0;
  222. }
  223. if (is64) {
  224. struct elf64_note *nhdr64 = (struct elf64_note *)arg1;
  225. uint64_t nhdr_size64 = sizeof(struct elf64_note);
  226. uint64_t phdr_align = *(uint64_t *)arg2;
  227. uint64_t nhdr_namesz = nhdr64->n_namesz;
  228. elf_note_data_addr =
  229. ((void *)nhdr64) + nhdr_size64 +
  230. QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
  231. } else {
  232. struct elf32_note *nhdr32 = (struct elf32_note *)arg1;
  233. uint32_t nhdr_size32 = sizeof(struct elf32_note);
  234. uint32_t phdr_align = *(uint32_t *)arg2;
  235. uint32_t nhdr_namesz = nhdr32->n_namesz;
  236. elf_note_data_addr =
  237. ((void *)nhdr32) + nhdr_size32 +
  238. QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
  239. }
  240. pvh_start_addr = *elf_note_data_addr;
  241. return pvh_start_addr;
  242. }
  243. static bool load_elfboot(const char *kernel_filename,
  244. int kernel_file_size,
  245. uint8_t *header,
  246. size_t pvh_xen_start_addr,
  247. FWCfgState *fw_cfg)
  248. {
  249. uint32_t flags = 0;
  250. uint32_t mh_load_addr = 0;
  251. uint32_t elf_kernel_size = 0;
  252. uint64_t elf_entry;
  253. uint64_t elf_low, elf_high;
  254. int kernel_size;
  255. if (ldl_p(header) != 0x464c457f) {
  256. return false; /* no elfboot */
  257. }
  258. bool elf_is64 = header[EI_CLASS] == ELFCLASS64;
  259. flags = elf_is64 ?
  260. ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags;
  261. if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */
  262. error_report("elfboot unsupported flags = %x", flags);
  263. exit(1);
  264. }
  265. uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY;
  266. kernel_size = load_elf(kernel_filename, read_pvh_start_addr,
  267. NULL, &elf_note_type, &elf_entry,
  268. &elf_low, &elf_high, 0, I386_ELF_MACHINE,
  269. 0, 0);
  270. if (kernel_size < 0) {
  271. error_report("Error while loading elf kernel");
  272. exit(1);
  273. }
  274. mh_load_addr = elf_low;
  275. elf_kernel_size = elf_high - elf_low;
  276. if (pvh_start_addr == 0) {
  277. error_report("Error loading uncompressed kernel without PVH ELF Note");
  278. exit(1);
  279. }
  280. fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr);
  281. fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr);
  282. fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size);
  283. return true;
  284. }
  285. void x86_load_linux(X86MachineState *x86ms,
  286. FWCfgState *fw_cfg,
  287. int acpi_data_size,
  288. bool pvh_enabled,
  289. bool linuxboot_dma_enabled)
  290. {
  291. uint16_t protocol;
  292. int setup_size, kernel_size, cmdline_size;
  293. int dtb_size, setup_data_offset;
  294. uint32_t initrd_max;
  295. uint8_t header[8192], *setup, *kernel;
  296. hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
  297. FILE *f;
  298. char *vmode;
  299. MachineState *machine = MACHINE(x86ms);
  300. struct setup_data *setup_data;
  301. const char *kernel_filename = machine->kernel_filename;
  302. const char *initrd_filename = machine->initrd_filename;
  303. const char *dtb_filename = machine->dtb;
  304. const char *kernel_cmdline = machine->kernel_cmdline;
  305. /* Align to 16 bytes as a paranoia measure */
  306. cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
  307. /* load the kernel header */
  308. f = fopen(kernel_filename, "rb");
  309. if (!f) {
  310. fprintf(stderr, "qemu: could not open kernel file '%s': %s\n",
  311. kernel_filename, strerror(errno));
  312. exit(1);
  313. }
  314. kernel_size = get_file_size(f);
  315. if (!kernel_size ||
  316. fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
  317. MIN(ARRAY_SIZE(header), kernel_size)) {
  318. fprintf(stderr, "qemu: could not load kernel '%s': %s\n",
  319. kernel_filename, strerror(errno));
  320. exit(1);
  321. }
  322. /* kernel protocol version */
  323. if (ldl_p(header + 0x202) == 0x53726448) {
  324. protocol = lduw_p(header + 0x206);
  325. } else {
  326. /*
  327. * This could be a multiboot kernel. If it is, let's stop treating it
  328. * like a Linux kernel.
  329. * Note: some multiboot images could be in the ELF format (the same of
  330. * PVH), so we try multiboot first since we check the multiboot magic
  331. * header before to load it.
  332. */
  333. if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
  334. kernel_cmdline, kernel_size, header)) {
  335. return;
  336. }
  337. /*
  338. * Check if the file is an uncompressed kernel file (ELF) and load it,
  339. * saving the PVH entry point used by the x86/HVM direct boot ABI.
  340. * If load_elfboot() is successful, populate the fw_cfg info.
  341. */
  342. if (pvh_enabled &&
  343. load_elfboot(kernel_filename, kernel_size,
  344. header, pvh_start_addr, fw_cfg)) {
  345. fclose(f);
  346. fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
  347. strlen(kernel_cmdline) + 1);
  348. fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
  349. fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header));
  350. fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA,
  351. header, sizeof(header));
  352. /* load initrd */
  353. if (initrd_filename) {
  354. GMappedFile *mapped_file;
  355. gsize initrd_size;
  356. gchar *initrd_data;
  357. GError *gerr = NULL;
  358. mapped_file = g_mapped_file_new(initrd_filename, false, &gerr);
  359. if (!mapped_file) {
  360. fprintf(stderr, "qemu: error reading initrd %s: %s\n",
  361. initrd_filename, gerr->message);
  362. exit(1);
  363. }
  364. x86ms->initrd_mapped_file = mapped_file;
  365. initrd_data = g_mapped_file_get_contents(mapped_file);
  366. initrd_size = g_mapped_file_get_length(mapped_file);
  367. initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
  368. if (initrd_size >= initrd_max) {
  369. fprintf(stderr, "qemu: initrd is too large, cannot support."
  370. "(max: %"PRIu32", need %"PRId64")\n",
  371. initrd_max, (uint64_t)initrd_size);
  372. exit(1);
  373. }
  374. initrd_addr = (initrd_max - initrd_size) & ~4095;
  375. fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
  376. fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
  377. fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data,
  378. initrd_size);
  379. }
  380. option_rom[nb_option_roms].bootindex = 0;
  381. option_rom[nb_option_roms].name = "pvh.bin";
  382. nb_option_roms++;
  383. return;
  384. }
  385. protocol = 0;
  386. }
  387. if (protocol < 0x200 || !(header[0x211] & 0x01)) {
  388. /* Low kernel */
  389. real_addr = 0x90000;
  390. cmdline_addr = 0x9a000 - cmdline_size;
  391. prot_addr = 0x10000;
  392. } else if (protocol < 0x202) {
  393. /* High but ancient kernel */
  394. real_addr = 0x90000;
  395. cmdline_addr = 0x9a000 - cmdline_size;
  396. prot_addr = 0x100000;
  397. } else {
  398. /* High and recent kernel */
  399. real_addr = 0x10000;
  400. cmdline_addr = 0x20000;
  401. prot_addr = 0x100000;
  402. }
  403. /* highest address for loading the initrd */
  404. if (protocol >= 0x20c &&
  405. lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) {
  406. /*
  407. * Linux has supported initrd up to 4 GB for a very long time (2007,
  408. * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013),
  409. * though it only sets initrd_max to 2 GB to "work around bootloader
  410. * bugs". Luckily, QEMU firmware(which does something like bootloader)
  411. * has supported this.
  412. *
  413. * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can
  414. * be loaded into any address.
  415. *
  416. * In addition, initrd_max is uint32_t simply because QEMU doesn't
  417. * support the 64-bit boot protocol (specifically the ext_ramdisk_image
  418. * field).
  419. *
  420. * Therefore here just limit initrd_max to UINT32_MAX simply as well.
  421. */
  422. initrd_max = UINT32_MAX;
  423. } else if (protocol >= 0x203) {
  424. initrd_max = ldl_p(header + 0x22c);
  425. } else {
  426. initrd_max = 0x37ffffff;
  427. }
  428. if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) {
  429. initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
  430. }
  431. fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
  432. fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
  433. fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
  434. if (protocol >= 0x202) {
  435. stl_p(header + 0x228, cmdline_addr);
  436. } else {
  437. stw_p(header + 0x20, 0xA33F);
  438. stw_p(header + 0x22, cmdline_addr - real_addr);
  439. }
  440. /* handle vga= parameter */
  441. vmode = strstr(kernel_cmdline, "vga=");
  442. if (vmode) {
  443. unsigned int video_mode;
  444. int ret;
  445. /* skip "vga=" */
  446. vmode += 4;
  447. if (!strncmp(vmode, "normal", 6)) {
  448. video_mode = 0xffff;
  449. } else if (!strncmp(vmode, "ext", 3)) {
  450. video_mode = 0xfffe;
  451. } else if (!strncmp(vmode, "ask", 3)) {
  452. video_mode = 0xfffd;
  453. } else {
  454. ret = qemu_strtoui(vmode, NULL, 0, &video_mode);
  455. if (ret != 0) {
  456. fprintf(stderr, "qemu: can't parse 'vga' parameter: %s\n",
  457. strerror(-ret));
  458. exit(1);
  459. }
  460. }
  461. stw_p(header + 0x1fa, video_mode);
  462. }
  463. /* loader type */
  464. /*
  465. * High nybble = B reserved for QEMU; low nybble is revision number.
  466. * If this code is substantially changed, you may want to consider
  467. * incrementing the revision.
  468. */
  469. if (protocol >= 0x200) {
  470. header[0x210] = 0xB0;
  471. }
  472. /* heap */
  473. if (protocol >= 0x201) {
  474. header[0x211] |= 0x80; /* CAN_USE_HEAP */
  475. stw_p(header + 0x224, cmdline_addr - real_addr - 0x200);
  476. }
  477. /* load initrd */
  478. if (initrd_filename) {
  479. GMappedFile *mapped_file;
  480. gsize initrd_size;
  481. gchar *initrd_data;
  482. GError *gerr = NULL;
  483. if (protocol < 0x200) {
  484. fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
  485. exit(1);
  486. }
  487. mapped_file = g_mapped_file_new(initrd_filename, false, &gerr);
  488. if (!mapped_file) {
  489. fprintf(stderr, "qemu: error reading initrd %s: %s\n",
  490. initrd_filename, gerr->message);
  491. exit(1);
  492. }
  493. x86ms->initrd_mapped_file = mapped_file;
  494. initrd_data = g_mapped_file_get_contents(mapped_file);
  495. initrd_size = g_mapped_file_get_length(mapped_file);
  496. if (initrd_size >= initrd_max) {
  497. fprintf(stderr, "qemu: initrd is too large, cannot support."
  498. "(max: %"PRIu32", need %"PRId64")\n",
  499. initrd_max, (uint64_t)initrd_size);
  500. exit(1);
  501. }
  502. initrd_addr = (initrd_max - initrd_size) & ~4095;
  503. fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
  504. fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
  505. fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
  506. stl_p(header + 0x218, initrd_addr);
  507. stl_p(header + 0x21c, initrd_size);
  508. }
  509. /* load kernel and setup */
  510. setup_size = header[0x1f1];
  511. if (setup_size == 0) {
  512. setup_size = 4;
  513. }
  514. setup_size = (setup_size + 1) * 512;
  515. if (setup_size > kernel_size) {
  516. fprintf(stderr, "qemu: invalid kernel header\n");
  517. exit(1);
  518. }
  519. kernel_size -= setup_size;
  520. setup = g_malloc(setup_size);
  521. kernel = g_malloc(kernel_size);
  522. fseek(f, 0, SEEK_SET);
  523. if (fread(setup, 1, setup_size, f) != setup_size) {
  524. fprintf(stderr, "fread() failed\n");
  525. exit(1);
  526. }
  527. if (fread(kernel, 1, kernel_size, f) != kernel_size) {
  528. fprintf(stderr, "fread() failed\n");
  529. exit(1);
  530. }
  531. fclose(f);
  532. /* append dtb to kernel */
  533. if (dtb_filename) {
  534. if (protocol < 0x209) {
  535. fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
  536. exit(1);
  537. }
  538. dtb_size = get_image_size(dtb_filename);
  539. if (dtb_size <= 0) {
  540. fprintf(stderr, "qemu: error reading dtb %s: %s\n",
  541. dtb_filename, strerror(errno));
  542. exit(1);
  543. }
  544. setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
  545. kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
  546. kernel = g_realloc(kernel, kernel_size);
  547. stq_p(header + 0x250, prot_addr + setup_data_offset);
  548. setup_data = (struct setup_data *)(kernel + setup_data_offset);
  549. setup_data->next = 0;
  550. setup_data->type = cpu_to_le32(SETUP_DTB);
  551. setup_data->len = cpu_to_le32(dtb_size);
  552. load_image_size(dtb_filename, setup_data->data, dtb_size);
  553. }
  554. memcpy(setup, header, MIN(sizeof(header), setup_size));
  555. fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
  556. fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
  557. fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
  558. fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
  559. fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
  560. fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
  561. option_rom[nb_option_roms].bootindex = 0;
  562. option_rom[nb_option_roms].name = "linuxboot.bin";
  563. if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) {
  564. option_rom[nb_option_roms].name = "linuxboot_dma.bin";
  565. }
  566. nb_option_roms++;
  567. }
  568. void x86_bios_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw)
  569. {
  570. char *filename;
  571. MemoryRegion *bios, *isa_bios;
  572. int bios_size, isa_bios_size;
  573. int ret;
  574. /* BIOS load */
  575. if (bios_name == NULL) {
  576. bios_name = BIOS_FILENAME;
  577. }
  578. filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
  579. if (filename) {
  580. bios_size = get_image_size(filename);
  581. } else {
  582. bios_size = -1;
  583. }
  584. if (bios_size <= 0 ||
  585. (bios_size % 65536) != 0) {
  586. goto bios_error;
  587. }
  588. bios = g_malloc(sizeof(*bios));
  589. memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
  590. if (!isapc_ram_fw) {
  591. memory_region_set_readonly(bios, true);
  592. }
  593. ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
  594. if (ret != 0) {
  595. bios_error:
  596. fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name);
  597. exit(1);
  598. }
  599. g_free(filename);
  600. /* map the last 128KB of the BIOS in ISA space */
  601. isa_bios_size = MIN(bios_size, 128 * KiB);
  602. isa_bios = g_malloc(sizeof(*isa_bios));
  603. memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
  604. bios_size - isa_bios_size, isa_bios_size);
  605. memory_region_add_subregion_overlap(rom_memory,
  606. 0x100000 - isa_bios_size,
  607. isa_bios,
  608. 1);
  609. if (!isapc_ram_fw) {
  610. memory_region_set_readonly(isa_bios, true);
  611. }
  612. /* map all the bios at the top of memory */
  613. memory_region_add_subregion(rom_memory,
  614. (uint32_t)(-bios_size),
  615. bios);
  616. }
  617. static void x86_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
  618. const char *name, void *opaque,
  619. Error **errp)
  620. {
  621. X86MachineState *x86ms = X86_MACHINE(obj);
  622. uint64_t value = x86ms->max_ram_below_4g;
  623. visit_type_size(v, name, &value, errp);
  624. }
  625. static void x86_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
  626. const char *name, void *opaque,
  627. Error **errp)
  628. {
  629. X86MachineState *x86ms = X86_MACHINE(obj);
  630. Error *error = NULL;
  631. uint64_t value;
  632. visit_type_size(v, name, &value, &error);
  633. if (error) {
  634. error_propagate(errp, error);
  635. return;
  636. }
  637. if (value > 4 * GiB) {
  638. error_setg(&error,
  639. "Machine option 'max-ram-below-4g=%"PRIu64
  640. "' expects size less than or equal to 4G", value);
  641. error_propagate(errp, error);
  642. return;
  643. }
  644. if (value < 1 * MiB) {
  645. warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary,"
  646. "BIOS may not work with less than 1MiB", value);
  647. }
  648. x86ms->max_ram_below_4g = value;
  649. }
  650. static void x86_machine_initfn(Object *obj)
  651. {
  652. X86MachineState *x86ms = X86_MACHINE(obj);
  653. x86ms->max_ram_below_4g = 0; /* use default */
  654. x86ms->smp_dies = 1;
  655. }
  656. static void x86_machine_class_init(ObjectClass *oc, void *data)
  657. {
  658. MachineClass *mc = MACHINE_CLASS(oc);
  659. X86MachineClass *x86mc = X86_MACHINE_CLASS(oc);
  660. NMIClass *nc = NMI_CLASS(oc);
  661. mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
  662. mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
  663. mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
  664. x86mc->compat_apic_id_mode = false;
  665. x86mc->save_tsc_khz = true;
  666. nc->nmi_monitor_handler = x86_nmi;
  667. object_class_property_add(oc, X86_MACHINE_MAX_RAM_BELOW_4G, "size",
  668. x86_machine_get_max_ram_below_4g, x86_machine_set_max_ram_below_4g,
  669. NULL, NULL, &error_abort);
  670. object_class_property_set_description(oc, X86_MACHINE_MAX_RAM_BELOW_4G,
  671. "Maximum ram below the 4G boundary (32bit boundary)", &error_abort);
  672. }
  673. static const TypeInfo x86_machine_info = {
  674. .name = TYPE_X86_MACHINE,
  675. .parent = TYPE_MACHINE,
  676. .abstract = true,
  677. .instance_size = sizeof(X86MachineState),
  678. .instance_init = x86_machine_initfn,
  679. .class_size = sizeof(X86MachineClass),
  680. .class_init = x86_machine_class_init,
  681. .interfaces = (InterfaceInfo[]) {
  682. { TYPE_NMI },
  683. { }
  684. },
  685. };
  686. static void x86_machine_register_types(void)
  687. {
  688. type_register_static(&x86_machine_info);
  689. }
  690. type_init(x86_machine_register_types)