asset.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # Test utilities for fetching & caching assets
  2. #
  3. # Copyright 2024 Red Hat, Inc.
  4. #
  5. # This work is licensed under the terms of the GNU GPL, version 2 or
  6. # later. See the COPYING file in the top-level directory.
  7. import hashlib
  8. import logging
  9. import os
  10. import stat
  11. import sys
  12. import unittest
  13. import urllib.request
  14. from time import sleep
  15. from pathlib import Path
  16. from shutil import copyfileobj
  17. from urllib.error import HTTPError
  18. class AssetError(Exception):
  19. def __init__(self, asset, msg, transient=False):
  20. self.url = asset.url
  21. self.msg = msg
  22. self.transient = transient
  23. def __str__(self):
  24. return "%s: %s" % (self.url, self.msg)
  25. # Instances of this class must be declared as class level variables
  26. # starting with a name "ASSET_". This enables the pre-caching logic
  27. # to easily find all referenced assets and download them prior to
  28. # execution of the tests.
  29. class Asset:
  30. def __init__(self, url, hashsum):
  31. self.url = url
  32. self.hash = hashsum
  33. cache_dir_env = os.getenv('QEMU_TEST_CACHE_DIR')
  34. if cache_dir_env:
  35. self.cache_dir = Path(cache_dir_env, "download")
  36. else:
  37. self.cache_dir = Path(Path("~").expanduser(),
  38. ".cache", "qemu", "download")
  39. self.cache_file = Path(self.cache_dir, hashsum)
  40. self.log = logging.getLogger('qemu-test')
  41. def __repr__(self):
  42. return "Asset: url=%s hash=%s cache=%s" % (
  43. self.url, self.hash, self.cache_file)
  44. def __str__(self):
  45. return str(self.cache_file)
  46. def _check(self, cache_file):
  47. if self.hash is None:
  48. return True
  49. if len(self.hash) == 64:
  50. hl = hashlib.sha256()
  51. elif len(self.hash) == 128:
  52. hl = hashlib.sha512()
  53. else:
  54. raise AssetError(self, "unknown hash type")
  55. # Calculate the hash of the file:
  56. with open(cache_file, 'rb') as file:
  57. while True:
  58. chunk = file.read(1 << 20)
  59. if not chunk:
  60. break
  61. hl.update(chunk)
  62. return self.hash == hl.hexdigest()
  63. def valid(self):
  64. return self.cache_file.exists() and self._check(self.cache_file)
  65. def fetchable(self):
  66. return not os.environ.get("QEMU_TEST_NO_DOWNLOAD", False)
  67. def available(self):
  68. return self.valid() or self.fetchable()
  69. def _wait_for_other_download(self, tmp_cache_file):
  70. # Another thread already seems to download the asset, so wait until
  71. # it is done, while also checking the size to see whether it is stuck
  72. try:
  73. current_size = tmp_cache_file.stat().st_size
  74. new_size = current_size
  75. except:
  76. if os.path.exists(self.cache_file):
  77. return True
  78. raise
  79. waittime = lastchange = 600
  80. while waittime > 0:
  81. sleep(1)
  82. waittime -= 1
  83. try:
  84. new_size = tmp_cache_file.stat().st_size
  85. except:
  86. if os.path.exists(self.cache_file):
  87. return True
  88. raise
  89. if new_size != current_size:
  90. lastchange = waittime
  91. current_size = new_size
  92. elif lastchange - waittime > 90:
  93. return False
  94. self.log.debug("Time out while waiting for %s!", tmp_cache_file)
  95. raise
  96. def fetch(self):
  97. if not self.cache_dir.exists():
  98. self.cache_dir.mkdir(parents=True, exist_ok=True)
  99. if self.valid():
  100. self.log.debug("Using cached asset %s for %s",
  101. self.cache_file, self.url)
  102. return str(self.cache_file)
  103. if not self.fetchable():
  104. raise AssetError(self,
  105. "Asset cache is invalid and downloads disabled")
  106. self.log.info("Downloading %s to %s...", self.url, self.cache_file)
  107. tmp_cache_file = self.cache_file.with_suffix(".download")
  108. for retries in range(3):
  109. try:
  110. with tmp_cache_file.open("xb") as dst:
  111. with urllib.request.urlopen(self.url) as resp:
  112. copyfileobj(resp, dst)
  113. length_hdr = resp.getheader("Content-Length")
  114. # Verify downloaded file size against length metadata, if
  115. # available.
  116. if length_hdr is not None:
  117. length = int(length_hdr)
  118. fsize = tmp_cache_file.stat().st_size
  119. if fsize != length:
  120. self.log.error("Unable to download %s: "
  121. "connection closed before "
  122. "transfer complete (%d/%d)",
  123. self.url, fsize, length)
  124. tmp_cache_file.unlink()
  125. continue
  126. break
  127. except FileExistsError:
  128. self.log.debug("%s already exists, "
  129. "waiting for other thread to finish...",
  130. tmp_cache_file)
  131. if self._wait_for_other_download(tmp_cache_file):
  132. return str(self.cache_file)
  133. self.log.debug("%s seems to be stale, "
  134. "deleting and retrying download...",
  135. tmp_cache_file)
  136. tmp_cache_file.unlink()
  137. continue
  138. except HTTPError as e:
  139. tmp_cache_file.unlink()
  140. self.log.error("Unable to download %s: HTTP error %d",
  141. self.url, e.code)
  142. # Treat 404 as fatal, since it is highly likely to
  143. # indicate a broken test rather than a transient
  144. # server or networking problem
  145. if e.code == 404:
  146. raise AssetError(self, "Unable to download: "
  147. "HTTP error %d" % e.code)
  148. continue
  149. except Exception as e:
  150. tmp_cache_file.unlink()
  151. raise AssetError(self, "Unable to download: " % e)
  152. if not os.path.exists(tmp_cache_file):
  153. raise AssetError(self, "Download retries exceeded", transient=True)
  154. try:
  155. # Set these just for informational purposes
  156. os.setxattr(str(tmp_cache_file), "user.qemu-asset-url",
  157. self.url.encode('utf8'))
  158. os.setxattr(str(tmp_cache_file), "user.qemu-asset-hash",
  159. self.hash.encode('utf8'))
  160. except Exception as e:
  161. self.log.debug("Unable to set xattr on %s: %s", tmp_cache_file, e)
  162. pass
  163. if not self._check(tmp_cache_file):
  164. tmp_cache_file.unlink()
  165. raise AssetError(self, "Hash does not match %s" % self.hash)
  166. tmp_cache_file.replace(self.cache_file)
  167. # Remove write perms to stop tests accidentally modifying them
  168. os.chmod(self.cache_file, stat.S_IRUSR | stat.S_IRGRP)
  169. self.log.info("Cached %s at %s" % (self.url, self.cache_file))
  170. return str(self.cache_file)
  171. def precache_test(test):
  172. log = logging.getLogger('qemu-test')
  173. log.setLevel(logging.DEBUG)
  174. handler = logging.StreamHandler(sys.stdout)
  175. handler.setLevel(logging.DEBUG)
  176. formatter = logging.Formatter(
  177. '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  178. handler.setFormatter(formatter)
  179. log.addHandler(handler)
  180. for name, asset in vars(test.__class__).items():
  181. if name.startswith("ASSET_") and type(asset) == Asset:
  182. log.info("Attempting to cache '%s'" % asset)
  183. try:
  184. asset.fetch()
  185. except AssetError as e:
  186. if not e.transient:
  187. raise
  188. log.error("%s: skipping asset precache" % e)
  189. log.removeHandler(handler)
  190. def precache_suite(suite):
  191. for test in suite:
  192. if isinstance(test, unittest.TestSuite):
  193. Asset.precache_suite(test)
  194. elif isinstance(test, unittest.TestCase):
  195. Asset.precache_test(test)
  196. def precache_suites(path, cacheTstamp):
  197. loader = unittest.loader.defaultTestLoader
  198. tests = loader.loadTestsFromNames([path], None)
  199. with open(cacheTstamp, "w") as fh:
  200. Asset.precache_suite(tests)