qemu-vmsr-helper.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. /*
  2. * Privileged RAPL MSR helper commands for QEMU
  3. *
  4. * Copyright (C) 2024 Red Hat, Inc. <aharivel@redhat.com>
  5. *
  6. * Author: Anthony Harivel <aharivel@redhat.com>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; under version 2 of the License.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, see <http://www.gnu.org/licenses/>.
  19. */
  20. #include "qemu/osdep.h"
  21. #include <getopt.h>
  22. #include <stdbool.h>
  23. #include <sys/ioctl.h>
  24. #ifdef CONFIG_LIBCAP_NG
  25. #include <cap-ng.h>
  26. #endif
  27. #include <pwd.h>
  28. #include <grp.h>
  29. #include "qemu/help-texts.h"
  30. #include "qapi/error.h"
  31. #include "qemu/cutils.h"
  32. #include "qemu/main-loop.h"
  33. #include "qemu/module.h"
  34. #include "qemu/error-report.h"
  35. #include "qemu/config-file.h"
  36. #include "qemu-version.h"
  37. #include "qapi/error.h"
  38. #include "qemu/error-report.h"
  39. #include "qemu/log.h"
  40. #include "qemu/systemd.h"
  41. #include "io/channel.h"
  42. #include "io/channel-socket.h"
  43. #include "trace/control.h"
  44. #include "qemu-version.h"
  45. #include "rapl-msr-index.h"
  46. #define MSR_PATH_TEMPLATE "/dev/cpu/%u/msr"
  47. static char *socket_path;
  48. static char *pidfile;
  49. static enum { RUNNING, TERMINATE, TERMINATING } state;
  50. static QIOChannelSocket *server_ioc;
  51. static int server_watch;
  52. static int num_active_sockets = 1;
  53. #ifdef CONFIG_LIBCAP_NG
  54. static int uid = -1;
  55. static int gid = -1;
  56. #endif
  57. static void compute_default_paths(void)
  58. {
  59. g_autofree char *state = qemu_get_local_state_dir();
  60. socket_path = g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL);
  61. pidfile = g_build_filename(state, "run", "qemu-vmsr-helper.pid", NULL);
  62. }
  63. static int is_intel_processor(void)
  64. {
  65. int result;
  66. int ebx, ecx, edx;
  67. /* Execute CPUID instruction with eax=0 (basic identification) */
  68. asm volatile (
  69. "cpuid"
  70. : "=b" (ebx), "=c" (ecx), "=d" (edx)
  71. : "a" (0)
  72. );
  73. /*
  74. * Check if processor is "GenuineIntel"
  75. * 0x756e6547 = "Genu"
  76. * 0x49656e69 = "ineI"
  77. * 0x6c65746e = "ntel"
  78. */
  79. result = (ebx == 0x756e6547) && (edx == 0x49656e69) && (ecx == 0x6c65746e);
  80. return result;
  81. }
  82. static int is_rapl_enabled(void)
  83. {
  84. const char *path = "/sys/class/powercap/intel-rapl/enabled";
  85. FILE *file = fopen(path, "r");
  86. int value = 0;
  87. if (file != NULL) {
  88. if (fscanf(file, "%d", &value) != 1) {
  89. error_report("INTEL RAPL not enabled");
  90. }
  91. fclose(file);
  92. } else {
  93. error_report("Error opening %s", path);
  94. }
  95. return value;
  96. }
  97. /*
  98. * Check if the TID that request the MSR read
  99. * belongs to the peer. It be should a TID of a vCPU.
  100. */
  101. static bool is_tid_present(pid_t pid, pid_t tid)
  102. {
  103. g_autofree char *tidPath = g_strdup_printf("/proc/%d/task/%d", pid, tid);
  104. /* Check if the TID directory exists within the PID directory */
  105. if (access(tidPath, F_OK) == 0) {
  106. return true;
  107. }
  108. error_report("Failed to open /proc at %s", tidPath);
  109. return false;
  110. }
  111. /*
  112. * Only the RAPL MSR in target/i386/cpu.h are allowed
  113. */
  114. static bool is_msr_allowed(uint32_t reg)
  115. {
  116. switch (reg) {
  117. case MSR_RAPL_POWER_UNIT:
  118. case MSR_PKG_POWER_LIMIT:
  119. case MSR_PKG_ENERGY_STATUS:
  120. case MSR_PKG_POWER_INFO:
  121. return true;
  122. default:
  123. return false;
  124. }
  125. }
  126. static uint64_t vmsr_read_msr(uint32_t msr_register, unsigned int cpu_id)
  127. {
  128. int fd;
  129. uint64_t result = 0;
  130. g_autofree char *path = g_strdup_printf(MSR_PATH_TEMPLATE, cpu_id);
  131. fd = open(path, O_RDONLY);
  132. if (fd < 0) {
  133. error_report("Failed to open MSR file at %s", path);
  134. return result;
  135. }
  136. if (pread(fd, &result, sizeof(result), msr_register) != sizeof(result)) {
  137. error_report("Failed to read MSR");
  138. result = 0;
  139. }
  140. close(fd);
  141. return result;
  142. }
  143. static void usage(const char *name)
  144. {
  145. (printf) (
  146. "Usage: %s [OPTIONS] FILE\n"
  147. "Virtual RAPL MSR helper program for QEMU\n"
  148. "\n"
  149. " -h, --help display this help and exit\n"
  150. " -V, --version output version information and exit\n"
  151. "\n"
  152. " -d, --daemon run in the background\n"
  153. " -f, --pidfile=PATH PID file when running as a daemon\n"
  154. " (default '%s')\n"
  155. " -k, --socket=PATH path to the unix socket\n"
  156. " (default '%s')\n"
  157. " -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
  158. " specify tracing options\n"
  159. #ifdef CONFIG_LIBCAP_NG
  160. " -u, --user=USER user to drop privileges to\n"
  161. " -g, --group=GROUP group to drop privileges to\n"
  162. #endif
  163. "\n"
  164. QEMU_HELP_BOTTOM "\n"
  165. , name, pidfile, socket_path);
  166. }
  167. static void version(const char *name)
  168. {
  169. printf(
  170. "%s " QEMU_FULL_VERSION "\n"
  171. "Written by Anthony Harivel.\n"
  172. "\n"
  173. QEMU_COPYRIGHT "\n"
  174. "This is free software; see the source for copying conditions. There is NO\n"
  175. "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
  176. , name);
  177. }
  178. typedef struct VMSRHelperClient {
  179. QIOChannelSocket *ioc;
  180. Coroutine *co;
  181. } VMSRHelperClient;
  182. static void coroutine_fn vh_co_entry(void *opaque)
  183. {
  184. VMSRHelperClient *client = opaque;
  185. Error *local_err = NULL;
  186. unsigned int peer_pid;
  187. uint32_t request[3];
  188. uint64_t vmsr;
  189. int r;
  190. qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
  191. false, NULL);
  192. qio_channel_set_follow_coroutine_ctx(QIO_CHANNEL(client->ioc), true);
  193. /*
  194. * Check peer credentials
  195. */
  196. r = qio_channel_get_peerpid(QIO_CHANNEL(client->ioc),
  197. &peer_pid,
  198. &local_err);
  199. if (r < 0) {
  200. error_report_err(local_err);
  201. goto out;
  202. }
  203. while (r < 0) {
  204. /*
  205. * Read the requested MSR
  206. * Only RAPL MSR in rapl-msr-index.h is allowed
  207. */
  208. r = qio_channel_read_all(QIO_CHANNEL(client->ioc),
  209. (char *) &request, sizeof(request), &local_err);
  210. if (r < 0) {
  211. error_report_err(local_err);
  212. break;
  213. }
  214. if (!is_msr_allowed(request[0])) {
  215. error_report("Requested unallowed msr: %d", request[0]);
  216. break;
  217. }
  218. vmsr = vmsr_read_msr(request[0], request[1]);
  219. if (!is_tid_present(peer_pid, request[2])) {
  220. error_report("Requested TID not in peer PID: %d %d",
  221. peer_pid, request[2]);
  222. vmsr = 0;
  223. }
  224. r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
  225. (char *) &vmsr,
  226. sizeof(vmsr),
  227. &local_err);
  228. if (r < 0) {
  229. error_report_err(local_err);
  230. break;
  231. }
  232. }
  233. out:
  234. object_unref(OBJECT(client->ioc));
  235. g_free(client);
  236. }
  237. static gboolean accept_client(QIOChannel *ioc,
  238. GIOCondition cond,
  239. gpointer opaque)
  240. {
  241. QIOChannelSocket *cioc;
  242. VMSRHelperClient *vmsrh;
  243. cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
  244. NULL);
  245. if (!cioc) {
  246. return TRUE;
  247. }
  248. vmsrh = g_new(VMSRHelperClient, 1);
  249. vmsrh->ioc = cioc;
  250. vmsrh->co = qemu_coroutine_create(vh_co_entry, vmsrh);
  251. qemu_coroutine_enter(vmsrh->co);
  252. return TRUE;
  253. }
  254. static void termsig_handler(int signum)
  255. {
  256. qatomic_cmpxchg(&state, RUNNING, TERMINATE);
  257. qemu_notify_event();
  258. }
  259. static void close_server_socket(void)
  260. {
  261. assert(server_ioc);
  262. g_source_remove(server_watch);
  263. server_watch = -1;
  264. object_unref(OBJECT(server_ioc));
  265. num_active_sockets--;
  266. }
  267. #ifdef CONFIG_LIBCAP_NG
  268. static int drop_privileges(void)
  269. {
  270. /* clear all capabilities */
  271. capng_clear(CAPNG_SELECT_BOTH);
  272. if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
  273. CAP_SYS_RAWIO) < 0) {
  274. return -1;
  275. }
  276. return 0;
  277. }
  278. #endif
  279. int main(int argc, char **argv)
  280. {
  281. const char *sopt = "hVk:f:dT:u:g:vq";
  282. struct option lopt[] = {
  283. { "help", no_argument, NULL, 'h' },
  284. { "version", no_argument, NULL, 'V' },
  285. { "socket", required_argument, NULL, 'k' },
  286. { "pidfile", required_argument, NULL, 'f' },
  287. { "daemon", no_argument, NULL, 'd' },
  288. { "trace", required_argument, NULL, 'T' },
  289. { "verbose", no_argument, NULL, 'v' },
  290. { NULL, 0, NULL, 0 }
  291. };
  292. int opt_ind = 0;
  293. int ch;
  294. Error *local_err = NULL;
  295. bool daemonize = false;
  296. bool pidfile_specified = false;
  297. bool socket_path_specified = false;
  298. unsigned socket_activation;
  299. struct sigaction sa_sigterm;
  300. memset(&sa_sigterm, 0, sizeof(sa_sigterm));
  301. sa_sigterm.sa_handler = termsig_handler;
  302. sigaction(SIGTERM, &sa_sigterm, NULL);
  303. sigaction(SIGINT, &sa_sigterm, NULL);
  304. sigaction(SIGHUP, &sa_sigterm, NULL);
  305. signal(SIGPIPE, SIG_IGN);
  306. error_init(argv[0]);
  307. module_call_init(MODULE_INIT_TRACE);
  308. module_call_init(MODULE_INIT_QOM);
  309. qemu_add_opts(&qemu_trace_opts);
  310. qemu_init_exec_dir(argv[0]);
  311. compute_default_paths();
  312. /*
  313. * Sanity check
  314. * 1. cpu must be Intel cpu
  315. * 2. RAPL must be enabled
  316. */
  317. if (!is_intel_processor()) {
  318. error_report("error: CPU is not INTEL cpu");
  319. exit(EXIT_FAILURE);
  320. }
  321. if (!is_rapl_enabled()) {
  322. error_report("error: RAPL driver not enable");
  323. exit(EXIT_FAILURE);
  324. }
  325. while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
  326. switch (ch) {
  327. case 'k':
  328. g_free(socket_path);
  329. socket_path = g_strdup(optarg);
  330. socket_path_specified = true;
  331. if (socket_path[0] != '/') {
  332. error_report("socket path must be absolute");
  333. exit(EXIT_FAILURE);
  334. }
  335. break;
  336. case 'f':
  337. g_free(pidfile);
  338. pidfile = g_strdup(optarg);
  339. pidfile_specified = true;
  340. break;
  341. #ifdef CONFIG_LIBCAP_NG
  342. case 'u': {
  343. unsigned long res;
  344. struct passwd *userinfo = getpwnam(optarg);
  345. if (userinfo) {
  346. uid = userinfo->pw_uid;
  347. } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
  348. (uid_t)res == res) {
  349. uid = res;
  350. } else {
  351. error_report("invalid user '%s'", optarg);
  352. exit(EXIT_FAILURE);
  353. }
  354. break;
  355. }
  356. case 'g': {
  357. unsigned long res;
  358. struct group *groupinfo = getgrnam(optarg);
  359. if (groupinfo) {
  360. gid = groupinfo->gr_gid;
  361. } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
  362. (gid_t)res == res) {
  363. gid = res;
  364. } else {
  365. error_report("invalid group '%s'", optarg);
  366. exit(EXIT_FAILURE);
  367. }
  368. break;
  369. }
  370. #else
  371. case 'u':
  372. case 'g':
  373. error_report("-%c not supported by this %s", ch, argv[0]);
  374. exit(1);
  375. #endif
  376. case 'd':
  377. daemonize = true;
  378. break;
  379. case 'T':
  380. trace_opt_parse(optarg);
  381. break;
  382. case 'V':
  383. version(argv[0]);
  384. exit(EXIT_SUCCESS);
  385. break;
  386. case 'h':
  387. usage(argv[0]);
  388. exit(EXIT_SUCCESS);
  389. break;
  390. case '?':
  391. error_report("Try `%s --help' for more information.", argv[0]);
  392. exit(EXIT_FAILURE);
  393. }
  394. }
  395. if (!trace_init_backends()) {
  396. exit(EXIT_FAILURE);
  397. }
  398. trace_init_file();
  399. qemu_set_log(LOG_TRACE, &error_fatal);
  400. socket_activation = check_socket_activation();
  401. if (socket_activation == 0) {
  402. SocketAddress saddr;
  403. saddr = (SocketAddress){
  404. .type = SOCKET_ADDRESS_TYPE_UNIX,
  405. .u.q_unix.path = socket_path,
  406. };
  407. server_ioc = qio_channel_socket_new();
  408. if (qio_channel_socket_listen_sync(server_ioc, &saddr,
  409. 1, &local_err) < 0) {
  410. object_unref(OBJECT(server_ioc));
  411. error_report_err(local_err);
  412. return 1;
  413. }
  414. } else {
  415. /* Using socket activation - check user didn't use -p etc. */
  416. if (socket_path_specified) {
  417. error_report("Unix socket can't be set when"
  418. "using socket activation");
  419. exit(EXIT_FAILURE);
  420. }
  421. /* Can only listen on a single socket. */
  422. if (socket_activation > 1) {
  423. error_report("%s does not support socket activation"
  424. "with LISTEN_FDS > 1",
  425. argv[0]);
  426. exit(EXIT_FAILURE);
  427. }
  428. server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
  429. &local_err);
  430. if (server_ioc == NULL) {
  431. error_reportf_err(local_err,
  432. "Failed to use socket activation: ");
  433. exit(EXIT_FAILURE);
  434. }
  435. }
  436. qemu_init_main_loop(&error_fatal);
  437. server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
  438. G_IO_IN,
  439. accept_client,
  440. NULL, NULL);
  441. if (daemonize) {
  442. if (daemon(0, 0) < 0) {
  443. error_report("Failed to daemonize: %s", strerror(errno));
  444. exit(EXIT_FAILURE);
  445. }
  446. }
  447. if (daemonize || pidfile_specified) {
  448. qemu_write_pidfile(pidfile, &error_fatal);
  449. }
  450. #ifdef CONFIG_LIBCAP_NG
  451. if (drop_privileges() < 0) {
  452. error_report("Failed to drop privileges: %s", strerror(errno));
  453. exit(EXIT_FAILURE);
  454. }
  455. #endif
  456. info_report("Listening on %s", socket_path);
  457. state = RUNNING;
  458. do {
  459. main_loop_wait(false);
  460. if (state == TERMINATE) {
  461. state = TERMINATING;
  462. close_server_socket();
  463. }
  464. } while (num_active_sockets > 0);
  465. exit(EXIT_SUCCESS);
  466. }