download_from_google_storage_unittest.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. #!/usr/bin/env vpython3
  2. # Copyright (c) 2012 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. # pylint: disable=protected-access
  6. """Unit tests for download_from_google_storage.py."""
  7. import optparse
  8. import os
  9. import queue
  10. import shutil
  11. import sys
  12. import tarfile
  13. import tempfile
  14. import threading
  15. import unittest
  16. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  17. import upload_to_google_storage
  18. import download_from_google_storage
  19. # ../third_party/gsutil/gsutil
  20. GSUTIL_DEFAULT_PATH = os.path.join(
  21. os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'gsutil.py')
  22. TEST_DIR = os.path.dirname(os.path.abspath(__file__))
  23. class GsutilMock(object):
  24. def __init__(self, path, boto_path, timeout=None):
  25. self.path = path
  26. self.timeout = timeout
  27. self.boto_path = boto_path
  28. self.expected = []
  29. self.history = []
  30. self.lock = threading.Lock()
  31. def add_expected(self, return_code, out, err, fn=None):
  32. self.expected.append((return_code, out, err, fn))
  33. def append_history(self, method, args):
  34. self.history.append((method, args))
  35. def call(self, *args):
  36. with self.lock:
  37. self.append_history('call', args)
  38. if self.expected:
  39. code, _out, _err, fn = self.expected.pop(0)
  40. if fn:
  41. fn()
  42. return code
  43. return 0
  44. def check_call(self, *args):
  45. with self.lock:
  46. self.append_history('check_call', args)
  47. if self.expected:
  48. code, out, err, fn = self.expected.pop(0)
  49. if fn:
  50. fn()
  51. return code, out, err
  52. return (0, '', '')
  53. def check_call_with_retries(self, *args):
  54. return self.check_call(*args)
  55. class ChangedWorkingDirectory(object):
  56. def __init__(self, working_directory):
  57. self._old_cwd = ''
  58. self._working_directory = working_directory
  59. def __enter__(self):
  60. self._old_cwd = os.getcwd()
  61. print("Enter directory = ", self._working_directory)
  62. os.chdir(self._working_directory)
  63. def __exit__(self, *_):
  64. print("Enter directory = ", self._old_cwd)
  65. os.chdir(self._old_cwd)
  66. class GstoolsUnitTests(unittest.TestCase):
  67. def setUp(self):
  68. self.temp_dir = tempfile.mkdtemp(prefix='gstools_test')
  69. self.base_path = os.path.join(self.temp_dir, 'test_files')
  70. shutil.copytree(os.path.join(TEST_DIR, 'gstools'), self.base_path)
  71. def tearDown(self):
  72. shutil.rmtree(self.temp_dir)
  73. def test_validate_tar_file(self):
  74. lorem_ipsum = os.path.join(self.base_path, 'lorem_ipsum.txt')
  75. with ChangedWorkingDirectory(self.base_path):
  76. # Sanity ok check.
  77. tar_dir = 'ok_dir'
  78. os.makedirs(os.path.join(self.base_path, tar_dir))
  79. tar = 'good.tar.gz'
  80. lorem_ipsum_copy = os.path.join(tar_dir, 'lorem_ipsum.txt')
  81. shutil.copyfile(lorem_ipsum, lorem_ipsum_copy)
  82. with tarfile.open(tar, 'w:gz') as tar:
  83. tar.add(lorem_ipsum_copy)
  84. self.assertTrue(
  85. download_from_google_storage._validate_tar_file(
  86. tar, tar_dir))
  87. # os.symlink doesn't exist on Windows.
  88. if sys.platform != 'win32':
  89. # Test no links.
  90. tar_dir_link = 'for_tar_link'
  91. os.makedirs(tar_dir_link)
  92. link = os.path.join(tar_dir_link, 'link')
  93. os.symlink(lorem_ipsum, link)
  94. tar_with_links = 'with_links.tar.gz'
  95. with tarfile.open(tar_with_links, 'w:gz') as tar:
  96. tar.add(link)
  97. self.assertFalse(
  98. download_from_google_storage._validate_tar_file(
  99. tar, tar_dir_link))
  100. # Test not outside.
  101. tar_dir_outside = 'outside_tar'
  102. os.makedirs(tar_dir_outside)
  103. tar_with_outside = 'with_outside.tar.gz'
  104. with tarfile.open(tar_with_outside, 'w:gz') as tar:
  105. tar.add(lorem_ipsum)
  106. self.assertFalse(
  107. download_from_google_storage._validate_tar_file(
  108. tar, tar_dir_outside))
  109. # Test no ../
  110. tar_with_dotdot = 'with_dotdot.tar.gz'
  111. dotdot_file = os.path.join(tar_dir, '..', tar_dir,
  112. 'lorem_ipsum.txt')
  113. with tarfile.open(tar_with_dotdot, 'w:gz') as tar:
  114. tar.add(dotdot_file)
  115. self.assertFalse(
  116. download_from_google_storage._validate_tar_file(
  117. tar, tar_dir))
  118. # Test normal file with .. in name okay
  119. tar_with_hidden = 'with_normal_dotdot.tar.gz'
  120. hidden_file = os.path.join(tar_dir, '..hidden_file.txt')
  121. shutil.copyfile(lorem_ipsum, hidden_file)
  122. with tarfile.open(tar_with_hidden, 'w:gz') as tar:
  123. tar.add(hidden_file)
  124. self.assertTrue(
  125. download_from_google_storage._validate_tar_file(
  126. tar, tar_dir))
  127. def test_gsutil(self):
  128. # This will download a real gsutil package from Google Storage.
  129. gsutil = download_from_google_storage.Gsutil(GSUTIL_DEFAULT_PATH, None)
  130. self.assertEqual(gsutil.path, GSUTIL_DEFAULT_PATH)
  131. code, _, err = gsutil.check_call()
  132. self.assertEqual(code, 0, err)
  133. self.assertEqual(err, '')
  134. def test_get_sha1(self):
  135. lorem_ipsum = os.path.join(self.base_path, 'lorem_ipsum.txt')
  136. self.assertEqual(download_from_google_storage.get_sha1(lorem_ipsum),
  137. '7871c8e24da15bad8b0be2c36edc9dc77e37727f')
  138. def test_get_md5(self):
  139. lorem_ipsum = os.path.join(self.base_path, 'lorem_ipsum.txt')
  140. self.assertEqual(upload_to_google_storage.get_md5(lorem_ipsum),
  141. '634d7c1ed3545383837428f031840a1e')
  142. def test_get_md5_cached_read(self):
  143. lorem_ipsum = os.path.join(self.base_path, 'lorem_ipsum.txt')
  144. # Use a fake 'stale' MD5 sum. Expected behavior is to return stale sum.
  145. self.assertEqual(upload_to_google_storage.get_md5_cached(lorem_ipsum),
  146. '734d7c1ed3545383837428f031840a1e')
  147. def test_get_md5_cached_write(self):
  148. lorem_ipsum2 = os.path.join(self.base_path, 'lorem_ipsum2.txt')
  149. lorem_ipsum2_md5 = os.path.join(self.base_path, 'lorem_ipsum2.txt.md5')
  150. if os.path.exists(lorem_ipsum2_md5):
  151. os.remove(lorem_ipsum2_md5)
  152. # Use a fake 'stale' MD5 sum. Expected behavior is to return stale sum.
  153. self.assertEqual(upload_to_google_storage.get_md5_cached(lorem_ipsum2),
  154. '4c02d1eb455a0f22c575265d17b84b6d')
  155. self.assertTrue(os.path.exists(lorem_ipsum2_md5))
  156. self.assertEqual(
  157. open(lorem_ipsum2_md5, 'rb').read().decode(),
  158. '4c02d1eb455a0f22c575265d17b84b6d')
  159. os.remove(lorem_ipsum2_md5) # Clean up.
  160. self.assertFalse(os.path.exists(lorem_ipsum2_md5))
  161. class DownloadTests(unittest.TestCase):
  162. def setUp(self):
  163. self.gsutil = GsutilMock(GSUTIL_DEFAULT_PATH, None)
  164. self.temp_dir = tempfile.mkdtemp(prefix='gstools_test')
  165. self.checkout_test_files = os.path.join(TEST_DIR, 'gstools',
  166. 'download_test_data')
  167. self.base_path = os.path.join(self.temp_dir, 'download_test_data')
  168. shutil.copytree(self.checkout_test_files, self.base_path)
  169. self.base_url = 'gs://sometesturl'
  170. self.parser = optparse.OptionParser()
  171. self.queue = queue.Queue()
  172. self.ret_codes = queue.Queue()
  173. self.lorem_ipsum = os.path.join(TEST_DIR, 'gstools', 'lorem_ipsum.txt')
  174. self.lorem_ipsum_sha1 = '7871c8e24da15bad8b0be2c36edc9dc77e37727f'
  175. self.maxDiff = None
  176. def tearDown(self):
  177. shutil.rmtree(self.temp_dir)
  178. def test_enumerate_files_non_recursive(self):
  179. for item in download_from_google_storage.enumerate_input(
  180. self.base_path, True, False, False, None, False, False):
  181. self.queue.put(item)
  182. expected_queue = [('e6c4fbd4fe7607f3e6ebf68b2ea4ef694da7b4fe',
  183. os.path.join(self.base_path, 'rootfolder_text.txt')),
  184. ('7871c8e24da15bad8b0be2c36edc9dc77e37727f',
  185. os.path.join(self.base_path,
  186. 'uploaded_lorem_ipsum.txt'))]
  187. self.assertEqual(sorted(expected_queue), sorted(self.queue.queue))
  188. def test_enumerate_files_recursive(self):
  189. for item in download_from_google_storage.enumerate_input(
  190. self.base_path, True, True, False, None, False, False):
  191. self.queue.put(item)
  192. expected_queue = [('e6c4fbd4fe7607f3e6ebf68b2ea4ef694da7b4fe',
  193. os.path.join(self.base_path, 'rootfolder_text.txt')),
  194. ('7871c8e24da15bad8b0be2c36edc9dc77e37727f',
  195. os.path.join(self.base_path,
  196. 'uploaded_lorem_ipsum.txt')),
  197. ('b5415aa0b64006a95c0c409182e628881d6d6463',
  198. os.path.join(self.base_path, 'subfolder',
  199. 'subfolder_text.txt'))]
  200. self.assertEqual(sorted(expected_queue), sorted(self.queue.queue))
  201. def test_download_worker_single_file(self):
  202. sha1_hash = self.lorem_ipsum_sha1
  203. input_filename = '%s/%s' % (self.base_url, sha1_hash)
  204. output_filename = os.path.join(self.base_path,
  205. 'uploaded_lorem_ipsum.txt')
  206. self.gsutil.add_expected(
  207. 0, '', '',
  208. lambda: shutil.copyfile(self.lorem_ipsum, output_filename)) # cp
  209. self.queue.put((sha1_hash, output_filename))
  210. self.queue.put((None, None))
  211. stdout_queue = queue.Queue()
  212. download_from_google_storage._downloader_worker_thread(
  213. 0, self.queue, False, self.base_url, self.gsutil, stdout_queue,
  214. self.ret_codes, True, False)
  215. expected_calls = [('check_call', ('cp', input_filename,
  216. output_filename))]
  217. sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f'
  218. if sys.platform != 'win32':
  219. expected_calls.append(
  220. ('check_call', ('stat', 'gs://sometesturl/' + sha1_hash)))
  221. expected_output = [
  222. '0> Downloading %s@%s...' % (output_filename, sha1_hash)
  223. ]
  224. expected_ret_codes = []
  225. self.assertEqual(list(stdout_queue.queue), expected_output)
  226. self.assertEqual(self.gsutil.history, expected_calls)
  227. self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
  228. def test_download_worker_skips_file(self):
  229. sha1_hash = 'e6c4fbd4fe7607f3e6ebf68b2ea4ef694da7b4fe'
  230. output_filename = os.path.join(self.base_path, 'rootfolder_text.txt')
  231. self.queue.put((sha1_hash, output_filename))
  232. self.queue.put((None, None))
  233. stdout_queue = queue.Queue()
  234. download_from_google_storage._downloader_worker_thread(
  235. 0, self.queue, False, self.base_url, self.gsutil, stdout_queue,
  236. self.ret_codes, True, False)
  237. # dfgs does not output anything in the no-op case.
  238. self.assertEqual(list(stdout_queue.queue), [])
  239. self.assertEqual(self.gsutil.history, [])
  240. def test_download_extract_archive(self):
  241. # Generate a gzipped tarfile
  242. output_filename = os.path.join(self.base_path, 'subfolder.tar.gz')
  243. output_dirname = os.path.join(self.base_path, 'subfolder')
  244. extracted_filename = os.path.join(output_dirname, 'subfolder_text.txt')
  245. with tarfile.open(output_filename, 'w:gz') as tar:
  246. tar.add(output_dirname, arcname='subfolder')
  247. shutil.rmtree(output_dirname)
  248. sha1_hash = download_from_google_storage.get_sha1(output_filename)
  249. input_filename = '%s/%s' % (self.base_url, sha1_hash)
  250. # Initial download
  251. self.queue.put((sha1_hash, output_filename))
  252. self.queue.put((None, None))
  253. stdout_queue = queue.Queue()
  254. download_from_google_storage._downloader_worker_thread(0,
  255. self.queue,
  256. True,
  257. self.base_url,
  258. self.gsutil,
  259. stdout_queue,
  260. self.ret_codes,
  261. True,
  262. True,
  263. delete=False)
  264. expected_calls = [('check_call', ('cp', input_filename,
  265. output_filename))]
  266. if sys.platform != 'win32':
  267. expected_calls.append(
  268. ('check_call', ('stat', 'gs://sometesturl/%s' % sha1_hash)))
  269. expected_output = [
  270. '0> Downloading %s@%s...' % (output_filename, sha1_hash)
  271. ]
  272. expected_output.extend([
  273. '0> Extracting 3 entries from %s to %s' %
  274. (output_filename, output_dirname)
  275. ])
  276. expected_ret_codes = []
  277. self.assertEqual(list(stdout_queue.queue), expected_output)
  278. self.assertEqual(self.gsutil.history, expected_calls)
  279. self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
  280. self.assertTrue(os.path.exists(output_dirname))
  281. self.assertTrue(os.path.exists(extracted_filename))
  282. # Test noop download
  283. self.queue.put((sha1_hash, output_filename))
  284. self.queue.put((None, None))
  285. stdout_queue = queue.Queue()
  286. download_from_google_storage._downloader_worker_thread(0,
  287. self.queue,
  288. False,
  289. self.base_url,
  290. self.gsutil,
  291. stdout_queue,
  292. self.ret_codes,
  293. True,
  294. True,
  295. delete=False)
  296. self.assertEqual(list(stdout_queue.queue), [])
  297. self.assertEqual(self.gsutil.history, expected_calls)
  298. self.assertEqual(list(self.ret_codes.queue), [])
  299. self.assertTrue(os.path.exists(output_dirname))
  300. self.assertTrue(os.path.exists(extracted_filename))
  301. # With dirty flag file, previous extraction wasn't complete
  302. with open(os.path.join(self.base_path, 'subfolder.tmp'), 'a'):
  303. pass
  304. self.queue.put((sha1_hash, output_filename))
  305. self.queue.put((None, None))
  306. stdout_queue = queue.Queue()
  307. download_from_google_storage._downloader_worker_thread(0,
  308. self.queue,
  309. False,
  310. self.base_url,
  311. self.gsutil,
  312. stdout_queue,
  313. self.ret_codes,
  314. True,
  315. True,
  316. delete=False)
  317. expected_calls += [('check_call', ('cp', input_filename,
  318. output_filename))]
  319. if sys.platform != 'win32':
  320. expected_calls.append(
  321. ('check_call', ('stat', 'gs://sometesturl/%s' % sha1_hash)))
  322. expected_output = [
  323. '0> Detected tmp flag file for %s, re-downloading...' %
  324. (output_filename),
  325. '0> Downloading %s@%s...' % (output_filename, sha1_hash),
  326. '0> Removed %s...' % (output_dirname),
  327. '0> Extracting 3 entries from %s to %s' %
  328. (output_filename, output_dirname),
  329. ]
  330. expected_ret_codes = []
  331. self.assertEqual(list(stdout_queue.queue), expected_output)
  332. self.assertEqual(self.gsutil.history, expected_calls)
  333. self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
  334. self.assertTrue(os.path.exists(output_dirname))
  335. self.assertTrue(os.path.exists(extracted_filename))
  336. def test_download_worker_skips_not_found_file(self):
  337. sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f'
  338. input_filename = '%s/%s' % (self.base_url, sha1_hash)
  339. output_filename = os.path.join(self.base_path,
  340. 'uploaded_lorem_ipsum.txt')
  341. self.queue.put((sha1_hash, output_filename))
  342. self.queue.put((None, None))
  343. stdout_queue = queue.Queue()
  344. self.gsutil.add_expected(1, '', '') # Return error when 'cp' is called.
  345. download_from_google_storage._downloader_worker_thread(
  346. 0, self.queue, False, self.base_url, self.gsutil, stdout_queue,
  347. self.ret_codes, True, False)
  348. expected_output = [
  349. '0> Downloading %s@%s...' % (output_filename, sha1_hash),
  350. '0> Failed to fetch file %s for %s, skipping. [Err: ]' %
  351. (input_filename, output_filename),
  352. ]
  353. expected_calls = [('check_call', ('cp', input_filename,
  354. output_filename))]
  355. expected_ret_codes = [(1, 'Failed to fetch file %s for %s. [Err: ]' %
  356. (input_filename, output_filename))]
  357. self.assertEqual(list(stdout_queue.queue), expected_output)
  358. self.assertEqual(self.gsutil.history, expected_calls)
  359. self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
  360. def test_download_cp_fails(self):
  361. sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f'
  362. input_filename = '%s/%s' % (self.base_url, sha1_hash)
  363. output_filename = os.path.join(self.base_path,
  364. 'uploaded_lorem_ipsum.txt')
  365. self.gsutil.add_expected(101, '', 'Test error message.') # cp
  366. code = download_from_google_storage.download_from_google_storage(
  367. input_filename=sha1_hash,
  368. base_url=self.base_url,
  369. gsutil=self.gsutil,
  370. num_threads=1,
  371. directory=False,
  372. recursive=False,
  373. force=True,
  374. output=output_filename,
  375. ignore_errors=False,
  376. sha1_file=False,
  377. verbose=True,
  378. auto_platform=False,
  379. extract=False)
  380. expected_calls = [('check_call', ('cp', input_filename,
  381. output_filename))]
  382. self.assertEqual(self.gsutil.history, expected_calls)
  383. self.assertEqual(code, 101)
  384. def test_corrupt_download(self):
  385. q = queue.Queue()
  386. out_q = queue.Queue()
  387. ret_codes = queue.Queue()
  388. tmp_dir = tempfile.mkdtemp()
  389. sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f'
  390. output_filename = os.path.join(tmp_dir, 'lorem_ipsum.txt')
  391. q.put(('7871c8e24da15bad8b0be2c36edc9dc77e37727f', output_filename))
  392. q.put((None, None))
  393. def _write_bad_file():
  394. with open(output_filename, 'w') as f:
  395. f.write('foobar')
  396. self.gsutil.add_expected(0, '', '', _write_bad_file) # cp
  397. download_from_google_storage._downloader_worker_thread(
  398. 1, q, True, self.base_url, self.gsutil, out_q, ret_codes, True,
  399. False)
  400. self.assertTrue(q.empty())
  401. msg = ('1> ERROR remote sha1 (%s) does not match expected sha1 (%s).' %
  402. ('8843d7f92416211de9ebb963ff4ce28125932878', sha1_hash))
  403. self.assertEqual(
  404. out_q.get(),
  405. '1> Downloading %s@%s...' % (output_filename, sha1_hash))
  406. self.assertEqual(out_q.get(), msg)
  407. self.assertEqual(ret_codes.get(), (20, msg))
  408. self.assertTrue(out_q.empty())
  409. self.assertTrue(ret_codes.empty())
  410. def test_download_directory_no_recursive_non_force(self):
  411. sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f'
  412. input_filename = '%s/%s' % (self.base_url, sha1_hash)
  413. output_filename = os.path.join(self.base_path,
  414. 'uploaded_lorem_ipsum.txt')
  415. self.gsutil.add_expected(0, '', '') # version
  416. self.gsutil.add_expected(
  417. 0, '', '',
  418. lambda: shutil.copyfile(self.lorem_ipsum, output_filename)) # cp
  419. code = download_from_google_storage.download_from_google_storage(
  420. input_filename=self.base_path,
  421. base_url=self.base_url,
  422. gsutil=self.gsutil,
  423. num_threads=1,
  424. directory=True,
  425. recursive=False,
  426. force=False,
  427. output=None,
  428. ignore_errors=False,
  429. sha1_file=False,
  430. verbose=True,
  431. auto_platform=False,
  432. extract=False)
  433. expected_calls = [('check_call', ('version', )),
  434. ('check_call', ('cp', input_filename,
  435. output_filename))]
  436. if sys.platform != 'win32':
  437. expected_calls.append(
  438. ('check_call',
  439. ('stat',
  440. 'gs://sometesturl/7871c8e24da15bad8b0be2c36edc9dc77e37727f')))
  441. self.assertEqual(self.gsutil.history, expected_calls)
  442. self.assertEqual(code, 0)
  443. if __name__ == '__main__':
  444. unittest.main()