llvm_checksum.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #!/usr/bin/python
  2. """ A small program to compute checksums of LLVM checkout.
  3. """
  4. from __future__ import absolute_import
  5. from __future__ import division
  6. from __future__ import print_function
  7. import hashlib
  8. import logging
  9. import re
  10. import sys
  11. from argparse import ArgumentParser
  12. from project_tree import *
  13. SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
  14. def main():
  15. parser = ArgumentParser()
  16. parser.add_argument(
  17. "-v", "--verbose", action="store_true", help="enable debug logging")
  18. parser.add_argument(
  19. "-c",
  20. "--check",
  21. metavar="reference_file",
  22. help="read checksums from reference_file and " +
  23. "check they match checksums of llvm_path.")
  24. parser.add_argument(
  25. "--partial",
  26. action="store_true",
  27. help="ignore projects from reference_file " +
  28. "that are not checked out in llvm_path.")
  29. parser.add_argument(
  30. "--multi_dir",
  31. action="store_true",
  32. help="indicates llvm_path contains llvm, checked out " +
  33. "into multiple directories, as opposed to a " +
  34. "typical single source tree checkout.")
  35. parser.add_argument("llvm_path")
  36. args = parser.parse_args()
  37. if args.check is not None:
  38. with open(args.check, "r") as f:
  39. reference_checksums = ReadLLVMChecksums(f)
  40. else:
  41. reference_checksums = None
  42. if args.verbose:
  43. logging.basicConfig(level=logging.DEBUG)
  44. llvm_projects = CreateLLVMProjects(not args.multi_dir)
  45. checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
  46. if reference_checksums is None:
  47. WriteLLVMChecksums(checksums, sys.stdout)
  48. sys.exit(0)
  49. if not ValidateChecksums(reference_checksums, checksums, args.partial):
  50. sys.stdout.write("Checksums differ.\nNew checksums:\n")
  51. WriteLLVMChecksums(checksums, sys.stdout)
  52. sys.stdout.write("Reference checksums:\n")
  53. WriteLLVMChecksums(reference_checksums, sys.stdout)
  54. sys.exit(1)
  55. else:
  56. sys.stdout.write("Checksums match.")
  57. def ComputeLLVMChecksums(root_path, projects):
  58. """Compute checksums for LLVM sources checked out using svn.
  59. Args:
  60. root_path: a directory of llvm checkout.
  61. projects: a list of LLVMProject instances, which describe checkout paths,
  62. relative to root_path.
  63. Returns:
  64. A dict mapping from project name to project checksum.
  65. """
  66. hash_algo = hashlib.sha256
  67. def collapse_svn_substitutions(contents):
  68. # Replace svn substitutions for $Date$ and $LastChangedDate$.
  69. # Unfortunately, these are locale-specific.
  70. return SVN_DATES_REGEX.sub("$\1$", contents)
  71. def read_and_collapse_svn_subsitutions(file_path):
  72. with open(file_path, "rb") as f:
  73. contents = f.read()
  74. new_contents = collapse_svn_substitutions(contents)
  75. if contents != new_contents:
  76. logging.debug("Replaced svn keyword substitutions in %s", file_path)
  77. logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
  78. return new_contents
  79. project_checksums = dict()
  80. # Hash each project.
  81. for proj in projects:
  82. project_root = os.path.join(root_path, proj.relpath)
  83. if not os.path.exists(project_root):
  84. logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
  85. proj.name)
  86. continue
  87. files = list()
  88. def add_file_hash(file_path):
  89. if os.path.islink(file_path) and not os.path.exists(file_path):
  90. content = os.readlink(file_path)
  91. else:
  92. content = read_and_collapse_svn_subsitutions(file_path)
  93. hasher = hash_algo()
  94. hasher.update(content)
  95. file_digest = hasher.hexdigest()
  96. logging.debug("Checksum %s for file %s", file_digest, file_path)
  97. files.append((file_path, file_digest))
  98. logging.info("Computing checksum for %s", proj.name)
  99. WalkProjectFiles(root_path, projects, proj, add_file_hash)
  100. # Compute final checksum.
  101. files.sort(key=lambda x: x[0])
  102. hasher = hash_algo()
  103. for file_path, file_digest in files:
  104. file_path = os.path.relpath(file_path, project_root)
  105. hasher.update(file_path)
  106. hasher.update(file_digest)
  107. project_checksums[proj.name] = hasher.hexdigest()
  108. return project_checksums
  109. def WriteLLVMChecksums(checksums, f):
  110. """Writes checksums to a text file.
  111. Args:
  112. checksums: a dict mapping from project name to project checksum (result of
  113. ComputeLLVMChecksums).
  114. f: a file object to write into.
  115. """
  116. for proj in sorted(checksums.keys()):
  117. f.write("{} {}\n".format(checksums[proj], proj))
  118. def ReadLLVMChecksums(f):
  119. """Reads checksums from a text file, produced by WriteLLVMChecksums.
  120. Returns:
  121. A dict, mapping from project name to project checksum.
  122. """
  123. checksums = {}
  124. while True:
  125. line = f.readline()
  126. if line == "":
  127. break
  128. checksum, proj = line.split()
  129. checksums[proj] = checksum
  130. return checksums
  131. def ValidateChecksums(reference_checksums,
  132. new_checksums,
  133. allow_missing_projects=False):
  134. """Validates that reference_checksums and new_checksums match.
  135. Args:
  136. reference_checksums: a dict of reference checksums, mapping from a project
  137. name to a project checksum.
  138. new_checksums: a dict of checksums to be checked, mapping from a project
  139. name to a project checksum.
  140. allow_missing_projects:
  141. When True, reference_checksums may contain more projects than
  142. new_checksums. Projects missing from new_checksums are ignored.
  143. When False, new_checksums and reference_checksums must contain checksums
  144. for the same set of projects. If there is a project in
  145. reference_checksums, missing from new_checksums, ValidateChecksums
  146. will return False.
  147. Returns:
  148. True, if checksums match with regards to allow_missing_projects flag value.
  149. False, otherwise.
  150. """
  151. if not allow_missing_projects:
  152. if len(new_checksums) != len(reference_checksums):
  153. return False
  154. for proj, checksum in new_checksums.items():
  155. # We never computed a checksum for this project.
  156. if proj not in reference_checksums:
  157. return False
  158. # Checksum did not match.
  159. if reference_checksums[proj] != checksum:
  160. return False
  161. return True
  162. if __name__ == "__main__":
  163. main()