2
0

qemu-vmsr-helper.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /*
  2. * Privileged RAPL MSR helper commands for QEMU
  3. *
  4. * Copyright (C) 2024 Red Hat, Inc. <aharivel@redhat.com>
  5. *
  6. * Author: Anthony Harivel <aharivel@redhat.com>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; under version 2 of the License.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, see <http://www.gnu.org/licenses/>.
  19. */
  20. #include "qemu/osdep.h"
  21. #include <getopt.h>
  22. #include <stdbool.h>
  23. #include <sys/ioctl.h>
  24. #ifdef CONFIG_LIBCAP_NG
  25. #include <cap-ng.h>
  26. #endif
  27. #include <pwd.h>
  28. #include <grp.h>
  29. #include "qemu/help-texts.h"
  30. #include "qapi/error.h"
  31. #include "qemu/cutils.h"
  32. #include "qemu/main-loop.h"
  33. #include "qemu/module.h"
  34. #include "qemu/error-report.h"
  35. #include "qemu/config-file.h"
  36. #include "qemu-version.h"
  37. #include "qapi/error.h"
  38. #include "qemu/error-report.h"
  39. #include "qemu/log.h"
  40. #include "qemu/systemd.h"
  41. #include "io/channel.h"
  42. #include "io/channel-socket.h"
  43. #include "trace/control.h"
  44. #include "qemu-version.h"
  45. #include "rapl-msr-index.h"
  46. #define MSR_PATH_TEMPLATE "/dev/cpu/%u/msr"
  47. static char *socket_path;
  48. static char *pidfile;
  49. static enum { RUNNING, TERMINATE, TERMINATING } state;
  50. static QIOChannelSocket *server_ioc;
  51. static int server_watch;
  52. static int num_active_sockets = 1;
  53. static bool verbose;
  54. #ifdef CONFIG_LIBCAP_NG
  55. static int uid = -1;
  56. static int gid = -1;
  57. #endif
  58. static void compute_default_paths(void)
  59. {
  60. g_autofree char *state = qemu_get_local_state_dir();
  61. socket_path = g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL);
  62. pidfile = g_build_filename(state, "run", "qemu-vmsr-helper.pid", NULL);
  63. }
  64. static int is_intel_processor(void)
  65. {
  66. int result;
  67. int ebx, ecx, edx;
  68. /* Execute CPUID instruction with eax=0 (basic identification) */
  69. asm volatile (
  70. "cpuid"
  71. : "=b" (ebx), "=c" (ecx), "=d" (edx)
  72. : "a" (0)
  73. );
  74. /*
  75. * Check if processor is "GenuineIntel"
  76. * 0x756e6547 = "Genu"
  77. * 0x49656e69 = "ineI"
  78. * 0x6c65746e = "ntel"
  79. */
  80. result = (ebx == 0x756e6547) && (edx == 0x49656e69) && (ecx == 0x6c65746e);
  81. return result;
  82. }
  83. static int is_rapl_enabled(void)
  84. {
  85. const char *path = "/sys/class/powercap/intel-rapl/enabled";
  86. FILE *file = fopen(path, "r");
  87. int value = 0;
  88. if (file != NULL) {
  89. if (fscanf(file, "%d", &value) != 1) {
  90. error_report("INTEL RAPL not enabled");
  91. }
  92. fclose(file);
  93. } else {
  94. error_report("Error opening %s", path);
  95. }
  96. return value;
  97. }
  98. /*
  99. * Check if the TID that request the MSR read
  100. * belongs to the peer. It be should a TID of a vCPU.
  101. */
  102. static bool is_tid_present(pid_t pid, pid_t tid)
  103. {
  104. g_autofree char *tidPath = g_strdup_printf("/proc/%d/task/%d", pid, tid);
  105. /* Check if the TID directory exists within the PID directory */
  106. if (access(tidPath, F_OK) == 0) {
  107. return true;
  108. }
  109. error_report("Failed to open /proc at %s", tidPath);
  110. return false;
  111. }
  112. /*
  113. * Only the RAPL MSR in target/i386/cpu.h are allowed
  114. */
  115. static bool is_msr_allowed(uint32_t reg)
  116. {
  117. switch (reg) {
  118. case MSR_RAPL_POWER_UNIT:
  119. case MSR_PKG_POWER_LIMIT:
  120. case MSR_PKG_ENERGY_STATUS:
  121. case MSR_PKG_POWER_INFO:
  122. return true;
  123. default:
  124. return false;
  125. }
  126. }
  127. static uint64_t vmsr_read_msr(uint32_t msr_register, unsigned int cpu_id)
  128. {
  129. int fd;
  130. uint64_t result = 0;
  131. g_autofree char *path = g_strdup_printf(MSR_PATH_TEMPLATE, cpu_id);
  132. fd = open(path, O_RDONLY);
  133. if (fd < 0) {
  134. error_report("Failed to open MSR file at %s", path);
  135. return result;
  136. }
  137. if (pread(fd, &result, sizeof(result), msr_register) != sizeof(result)) {
  138. error_report("Failed to read MSR");
  139. result = 0;
  140. }
  141. close(fd);
  142. return result;
  143. }
  144. static void usage(const char *name)
  145. {
  146. (printf) (
  147. "Usage: %s [OPTIONS] FILE\n"
  148. "Virtual RAPL MSR helper program for QEMU\n"
  149. "\n"
  150. " -h, --help display this help and exit\n"
  151. " -V, --version output version information and exit\n"
  152. "\n"
  153. " -d, --daemon run in the background\n"
  154. " -f, --pidfile=PATH PID file when running as a daemon\n"
  155. " (default '%s')\n"
  156. " -k, --socket=PATH path to the unix socket\n"
  157. " (default '%s')\n"
  158. " -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
  159. " specify tracing options\n"
  160. #ifdef CONFIG_LIBCAP_NG
  161. " -u, --user=USER user to drop privileges to\n"
  162. " -g, --group=GROUP group to drop privileges to\n"
  163. #endif
  164. "\n"
  165. QEMU_HELP_BOTTOM "\n"
  166. , name, pidfile, socket_path);
  167. }
  168. static void version(const char *name)
  169. {
  170. printf(
  171. "%s " QEMU_FULL_VERSION "\n"
  172. "Written by Anthony Harivel.\n"
  173. "\n"
  174. QEMU_COPYRIGHT "\n"
  175. "This is free software; see the source for copying conditions. There is NO\n"
  176. "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
  177. , name);
  178. }
  179. typedef struct VMSRHelperClient {
  180. QIOChannelSocket *ioc;
  181. Coroutine *co;
  182. } VMSRHelperClient;
  183. static void coroutine_fn vh_co_entry(void *opaque)
  184. {
  185. VMSRHelperClient *client = opaque;
  186. Error *local_err = NULL;
  187. unsigned int peer_pid;
  188. uint32_t request[3];
  189. uint64_t vmsr;
  190. int r;
  191. qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
  192. false, NULL);
  193. qio_channel_set_follow_coroutine_ctx(QIO_CHANNEL(client->ioc), true);
  194. /*
  195. * Check peer credentials
  196. */
  197. r = qio_channel_get_peerpid(QIO_CHANNEL(client->ioc),
  198. &peer_pid,
  199. &local_err);
  200. if (r < 0) {
  201. goto out;
  202. }
  203. for (;;) {
  204. /*
  205. * Read the requested MSR
  206. * Only RAPL MSR in rapl-msr-index.h is allowed
  207. */
  208. r = qio_channel_read_all_eof(QIO_CHANNEL(client->ioc),
  209. (char *) &request, sizeof(request), &local_err);
  210. if (r <= 0) {
  211. break;
  212. }
  213. if (!is_msr_allowed(request[0])) {
  214. error_report("Requested unallowed msr: %d", request[0]);
  215. break;
  216. }
  217. vmsr = vmsr_read_msr(request[0], request[1]);
  218. if (!is_tid_present(peer_pid, request[2])) {
  219. error_report("Requested TID not in peer PID: %d %d",
  220. peer_pid, request[2]);
  221. vmsr = 0;
  222. }
  223. r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
  224. (char *) &vmsr,
  225. sizeof(vmsr),
  226. &local_err);
  227. if (r < 0) {
  228. break;
  229. }
  230. }
  231. out:
  232. if (local_err) {
  233. if (!verbose) {
  234. error_free(local_err);
  235. } else {
  236. error_report_err(local_err);
  237. }
  238. }
  239. object_unref(OBJECT(client->ioc));
  240. g_free(client);
  241. }
  242. static gboolean accept_client(QIOChannel *ioc,
  243. GIOCondition cond,
  244. gpointer opaque)
  245. {
  246. QIOChannelSocket *cioc;
  247. VMSRHelperClient *vmsrh;
  248. cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
  249. NULL);
  250. if (!cioc) {
  251. return TRUE;
  252. }
  253. vmsrh = g_new(VMSRHelperClient, 1);
  254. vmsrh->ioc = cioc;
  255. vmsrh->co = qemu_coroutine_create(vh_co_entry, vmsrh);
  256. qemu_coroutine_enter(vmsrh->co);
  257. return TRUE;
  258. }
  259. static void termsig_handler(int signum)
  260. {
  261. qatomic_cmpxchg(&state, RUNNING, TERMINATE);
  262. qemu_notify_event();
  263. }
  264. static void close_server_socket(void)
  265. {
  266. assert(server_ioc);
  267. g_source_remove(server_watch);
  268. server_watch = -1;
  269. object_unref(OBJECT(server_ioc));
  270. num_active_sockets--;
  271. }
  272. #ifdef CONFIG_LIBCAP_NG
  273. static int drop_privileges(void)
  274. {
  275. /* clear all capabilities */
  276. capng_clear(CAPNG_SELECT_BOTH);
  277. if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
  278. CAP_SYS_RAWIO) < 0) {
  279. return -1;
  280. }
  281. return 0;
  282. }
  283. #endif
  284. int main(int argc, char **argv)
  285. {
  286. const char *sopt = "hVk:f:dT:u:g:vq";
  287. struct option lopt[] = {
  288. { "help", no_argument, NULL, 'h' },
  289. { "version", no_argument, NULL, 'V' },
  290. { "socket", required_argument, NULL, 'k' },
  291. { "pidfile", required_argument, NULL, 'f' },
  292. { "daemon", no_argument, NULL, 'd' },
  293. { "trace", required_argument, NULL, 'T' },
  294. { "verbose", no_argument, NULL, 'v' },
  295. { NULL, 0, NULL, 0 }
  296. };
  297. int opt_ind = 0;
  298. int ch;
  299. Error *local_err = NULL;
  300. bool daemonize = false;
  301. bool pidfile_specified = false;
  302. bool socket_path_specified = false;
  303. unsigned socket_activation;
  304. struct sigaction sa_sigterm;
  305. memset(&sa_sigterm, 0, sizeof(sa_sigterm));
  306. sa_sigterm.sa_handler = termsig_handler;
  307. sigaction(SIGTERM, &sa_sigterm, NULL);
  308. sigaction(SIGINT, &sa_sigterm, NULL);
  309. sigaction(SIGHUP, &sa_sigterm, NULL);
  310. signal(SIGPIPE, SIG_IGN);
  311. error_init(argv[0]);
  312. module_call_init(MODULE_INIT_TRACE);
  313. module_call_init(MODULE_INIT_QOM);
  314. qemu_add_opts(&qemu_trace_opts);
  315. qemu_init_exec_dir(argv[0]);
  316. compute_default_paths();
  317. /*
  318. * Sanity check
  319. * 1. cpu must be Intel cpu
  320. * 2. RAPL must be enabled
  321. */
  322. if (!is_intel_processor()) {
  323. error_report("error: CPU is not INTEL cpu");
  324. exit(EXIT_FAILURE);
  325. }
  326. if (!is_rapl_enabled()) {
  327. error_report("error: RAPL driver not enable");
  328. exit(EXIT_FAILURE);
  329. }
  330. while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
  331. switch (ch) {
  332. case 'k':
  333. g_free(socket_path);
  334. socket_path = g_strdup(optarg);
  335. socket_path_specified = true;
  336. if (socket_path[0] != '/') {
  337. error_report("socket path must be absolute");
  338. exit(EXIT_FAILURE);
  339. }
  340. break;
  341. case 'f':
  342. g_free(pidfile);
  343. pidfile = g_strdup(optarg);
  344. pidfile_specified = true;
  345. break;
  346. #ifdef CONFIG_LIBCAP_NG
  347. case 'u': {
  348. unsigned long res;
  349. struct passwd *userinfo = getpwnam(optarg);
  350. if (userinfo) {
  351. uid = userinfo->pw_uid;
  352. } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
  353. (uid_t)res == res) {
  354. uid = res;
  355. } else {
  356. error_report("invalid user '%s'", optarg);
  357. exit(EXIT_FAILURE);
  358. }
  359. break;
  360. }
  361. case 'g': {
  362. unsigned long res;
  363. struct group *groupinfo = getgrnam(optarg);
  364. if (groupinfo) {
  365. gid = groupinfo->gr_gid;
  366. } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
  367. (gid_t)res == res) {
  368. gid = res;
  369. } else {
  370. error_report("invalid group '%s'", optarg);
  371. exit(EXIT_FAILURE);
  372. }
  373. break;
  374. }
  375. #else
  376. case 'u':
  377. case 'g':
  378. error_report("-%c not supported by this %s", ch, argv[0]);
  379. exit(1);
  380. #endif
  381. case 'd':
  382. daemonize = true;
  383. break;
  384. case 'v':
  385. verbose = true;
  386. break;
  387. case 'T':
  388. trace_opt_parse(optarg);
  389. break;
  390. case 'V':
  391. version(argv[0]);
  392. exit(EXIT_SUCCESS);
  393. break;
  394. case 'h':
  395. usage(argv[0]);
  396. exit(EXIT_SUCCESS);
  397. break;
  398. case '?':
  399. error_report("Try `%s --help' for more information.", argv[0]);
  400. exit(EXIT_FAILURE);
  401. }
  402. }
  403. if (!trace_init_backends()) {
  404. exit(EXIT_FAILURE);
  405. }
  406. trace_init_file();
  407. qemu_set_log(LOG_TRACE, &error_fatal);
  408. socket_activation = check_socket_activation();
  409. if (socket_activation == 0) {
  410. SocketAddress saddr;
  411. saddr = (SocketAddress){
  412. .type = SOCKET_ADDRESS_TYPE_UNIX,
  413. .u.q_unix.path = socket_path,
  414. };
  415. server_ioc = qio_channel_socket_new();
  416. if (qio_channel_socket_listen_sync(server_ioc, &saddr,
  417. 1, &local_err) < 0) {
  418. object_unref(OBJECT(server_ioc));
  419. error_report_err(local_err);
  420. return 1;
  421. }
  422. } else {
  423. /* Using socket activation - check user didn't use -p etc. */
  424. if (socket_path_specified) {
  425. error_report("Unix socket can't be set when"
  426. "using socket activation");
  427. exit(EXIT_FAILURE);
  428. }
  429. /* Can only listen on a single socket. */
  430. if (socket_activation > 1) {
  431. error_report("%s does not support socket activation"
  432. "with LISTEN_FDS > 1",
  433. argv[0]);
  434. exit(EXIT_FAILURE);
  435. }
  436. server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
  437. &local_err);
  438. if (server_ioc == NULL) {
  439. error_reportf_err(local_err,
  440. "Failed to use socket activation: ");
  441. exit(EXIT_FAILURE);
  442. }
  443. }
  444. qemu_init_main_loop(&error_fatal);
  445. server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
  446. G_IO_IN,
  447. accept_client,
  448. NULL, NULL);
  449. if (daemonize) {
  450. if (daemon(0, 0) < 0) {
  451. error_report("Failed to daemonize: %s", strerror(errno));
  452. exit(EXIT_FAILURE);
  453. }
  454. }
  455. if (daemonize || pidfile_specified) {
  456. qemu_write_pidfile(pidfile, &error_fatal);
  457. }
  458. #ifdef CONFIG_LIBCAP_NG
  459. if (drop_privileges() < 0) {
  460. error_report("Failed to drop privileges: %s", strerror(errno));
  461. exit(EXIT_FAILURE);
  462. }
  463. #endif
  464. info_report("Listening on %s", socket_path);
  465. state = RUNNING;
  466. do {
  467. main_loop_wait(false);
  468. if (state == TERMINATE) {
  469. state = TERMINATING;
  470. close_server_socket();
  471. }
  472. } while (num_active_sockets > 0);
  473. exit(EXIT_SUCCESS);
  474. }