vmclock-abi.h 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
  2. /*
  3. * This structure provides a vDSO-style clock to VM guests, exposing the
  4. * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch
  5. * counter, etc.) and real time. It is designed to address the problem of
  6. * live migration, which other clock enlightenments do not.
  7. *
  8. * When a guest is live migrated, this affects the clock in two ways.
  9. *
  10. * First, even between identical hosts the actual frequency of the underlying
  11. * counter will change within the tolerances of its specification (typically
  12. * ±50PPM, or 4 seconds a day). This frequency also varies over time on the
  13. * same host, but can be tracked by NTP as it generally varies slowly. With
  14. * live migration there is a step change in the frequency, with no warning.
  15. *
  16. * Second, there may be a step change in the value of the counter itself, as
  17. * its accuracy is limited by the precision of the NTP synchronization on the
  18. * source and destination hosts.
  19. *
  20. * So any calibration (NTP, PTP, etc.) which the guest has done on the source
  21. * host before migration is invalid, and needs to be redone on the new host.
  22. *
  23. * In its most basic mode, this structure provides only an indication to the
  24. * guest that live migration has occurred. This allows the guest to know that
  25. * its clock is invalid and take remedial action. For applications that need
  26. * reliable accurate timestamps (e.g. distributed databases), the structure
  27. * can be mapped all the way to userspace. This allows the application to see
  28. * directly for itself that the clock is disrupted and take appropriate
  29. * action, even when using a vDSO-style method to get the time instead of a
  30. * system call.
  31. *
  32. * In its more advanced mode. this structure can also be used to expose the
  33. * precise relationship of the CPU counter to real time, as calibrated by the
  34. * host. This means that userspace applications can have accurate time
  35. * immediately after live migration, rather than having to pause operations
  36. * and wait for NTP to recover. This mode does, of course, rely on the
  37. * counter being reliable and consistent across CPUs.
  38. *
  39. * Note that this must be true UTC, never with smeared leap seconds. If a
  40. * guest wishes to construct a smeared clock, it can do so. Presenting a
  41. * smeared clock through this interface would be problematic because it
  42. * actually messes with the apparent counter *period*. A linear smearing
  43. * of 1 ms per second would effectively tweak the counter period by 1000PPM
  44. * at the start/end of the smearing period, while a sinusoidal smear would
  45. * basically be impossible to represent.
  46. *
  47. * This structure is offered with the intent that it be adopted into the
  48. * nascent virtio-rtc standard, as a virtio-rtc that does not address the live
  49. * migration problem seems a little less than fit for purpose. For that
  50. * reason, certain fields use precisely the same numeric definitions as in
  51. * the virtio-rtc proposal. The structure can also be exposed through an ACPI
  52. * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for
  53. * the fact that it uses a real _CRS to convey the address of the structure
  54. * (which should be a full page, to allow for mapping directly to userspace).
  55. */
  56. #ifndef __VMCLOCK_ABI_H__
  57. #define __VMCLOCK_ABI_H__
  58. #include "standard-headers/linux/types.h"
  59. struct vmclock_abi {
  60. /* CONSTANT FIELDS */
  61. uint32_t magic;
  62. #define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */
  63. uint32_t size; /* Size of region containing this structure */
  64. uint16_t version; /* 1 */
  65. uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */
  66. #define VMCLOCK_COUNTER_ARM_VCNT 0
  67. #define VMCLOCK_COUNTER_X86_TSC 1
  68. #define VMCLOCK_COUNTER_INVALID 0xff
  69. uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */
  70. #define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */
  71. #define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */
  72. #define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */
  73. #define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */
  74. #define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */
  75. /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */
  76. uint32_t seq_count; /* Low bit means an update is in progress */
  77. /*
  78. * This field changes to another non-repeating value when the CPU
  79. * counter is disrupted, for example on live migration. This lets
  80. * the guest know that it should discard any calibration it has
  81. * performed of the counter against external sources (NTP/PTP/etc.).
  82. */
  83. uint64_t disruption_marker;
  84. uint64_t flags;
  85. /* Indicates that the tai_offset_sec field is valid */
  86. #define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0)
  87. /*
  88. * Optionally used to notify guests of pending maintenance events.
  89. * A guest which provides latency-sensitive services may wish to
  90. * remove itself from service if an event is coming up. Two flags
  91. * indicate the approximate imminence of the event.
  92. */
  93. #define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */
  94. #define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */
  95. #define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3)
  96. #define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4)
  97. #define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5)
  98. #define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6)
  99. /*
  100. * If the MONOTONIC flag is set then (other than leap seconds) it is
  101. * guaranteed that the time calculated according this structure at
  102. * any given moment shall never appear to be later than the time
  103. * calculated via the structure at any *later* moment.
  104. *
  105. * In particular, a timestamp based on a counter reading taken
  106. * immediately after setting the low bit of seq_count (and the
  107. * associated memory barrier), using the previously-valid time and
  108. * period fields, shall never be later than a timestamp based on
  109. * a counter reading taken immediately before *clearing* the low
  110. * bit again after the update, using the about-to-be-valid fields.
  111. */
  112. #define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7)
  113. uint8_t pad[2];
  114. uint8_t clock_status;
  115. #define VMCLOCK_STATUS_UNKNOWN 0
  116. #define VMCLOCK_STATUS_INITIALIZING 1
  117. #define VMCLOCK_STATUS_SYNCHRONIZED 2
  118. #define VMCLOCK_STATUS_FREERUNNING 3
  119. #define VMCLOCK_STATUS_UNRELIABLE 4
  120. /*
  121. * The time exposed through this device is never smeared. This field
  122. * corresponds to the 'subtype' field in virtio-rtc, which indicates
  123. * the smearing method. However in this case it provides a *hint* to
  124. * the guest operating system, such that *if* the guest OS wants to
  125. * provide its users with an alternative clock which does not follow
  126. * UTC, it may do so in a fashion consistent with the other systems
  127. * in the nearby environment.
  128. */
  129. uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */
  130. #define VMCLOCK_SMEARING_STRICT 0
  131. #define VMCLOCK_SMEARING_NOON_LINEAR 1
  132. #define VMCLOCK_SMEARING_UTC_SLS 2
  133. uint16_t tai_offset_sec; /* Actually two's complement signed */
  134. uint8_t leap_indicator;
  135. /*
  136. * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined
  137. * in the current draft of virtio-rtc, but since smearing cannot be
  138. * used with the shared memory device, some values are not used.
  139. *
  140. * The _POST_POS and _POST_NEG values allow the guest to perform
  141. * its own smearing during the day or so after a leap second when
  142. * such smearing may need to continue being applied for a leap
  143. * second which is now theoretically "historical".
  144. */
  145. #define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */
  146. #define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */
  147. #define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */
  148. #define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */
  149. #define VMCLOCK_LEAP_POST_POS 0x04
  150. #define VMCLOCK_LEAP_POST_NEG 0x05
  151. /* Bit shift for counter_period_frac_sec and its error rate */
  152. uint8_t counter_period_shift;
  153. /*
  154. * Paired values of counter and UTC at a given point in time.
  155. */
  156. uint64_t counter_value;
  157. /*
  158. * Counter period, and error margin of same. The unit of these
  159. * fields is 1/2^(64 + counter_period_shift) of a second.
  160. */
  161. uint64_t counter_period_frac_sec;
  162. uint64_t counter_period_esterror_rate_frac_sec;
  163. uint64_t counter_period_maxerror_rate_frac_sec;
  164. /*
  165. * Time according to time_type field above.
  166. */
  167. uint64_t time_sec; /* Seconds since time_type epoch */
  168. uint64_t time_frac_sec; /* Units of 1/2^64 of a second */
  169. uint64_t time_esterror_nanosec;
  170. uint64_t time_maxerror_nanosec;
  171. };
  172. #endif /* __VMCLOCK_ABI_H__ */