123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 |
- #!/usr/bin/python
- """ A small program to compute checksums of LLVM checkout.
- """
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import hashlib
- import logging
- import re
- import sys
- from argparse import ArgumentParser
- from project_tree import *
- SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
- def main():
- parser = ArgumentParser()
- parser.add_argument(
- "-v", "--verbose", action="store_true", help="enable debug logging")
- parser.add_argument(
- "-c",
- "--check",
- metavar="reference_file",
- help="read checksums from reference_file and " +
- "check they match checksums of llvm_path.")
- parser.add_argument(
- "--partial",
- action="store_true",
- help="ignore projects from reference_file " +
- "that are not checked out in llvm_path.")
- parser.add_argument(
- "--multi_dir",
- action="store_true",
- help="indicates llvm_path contains llvm, checked out " +
- "into multiple directories, as opposed to a " +
- "typical single source tree checkout.")
- parser.add_argument("llvm_path")
- args = parser.parse_args()
- if args.check is not None:
- with open(args.check, "r") as f:
- reference_checksums = ReadLLVMChecksums(f)
- else:
- reference_checksums = None
- if args.verbose:
- logging.basicConfig(level=logging.DEBUG)
- llvm_projects = CreateLLVMProjects(not args.multi_dir)
- checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
- if reference_checksums is None:
- WriteLLVMChecksums(checksums, sys.stdout)
- sys.exit(0)
- if not ValidateChecksums(reference_checksums, checksums, args.partial):
- sys.stdout.write("Checksums differ.\nNew checksums:\n")
- WriteLLVMChecksums(checksums, sys.stdout)
- sys.stdout.write("Reference checksums:\n")
- WriteLLVMChecksums(reference_checksums, sys.stdout)
- sys.exit(1)
- else:
- sys.stdout.write("Checksums match.")
- def ComputeLLVMChecksums(root_path, projects):
- """Compute checksums for LLVM sources checked out using svn.
- Args:
- root_path: a directory of llvm checkout.
- projects: a list of LLVMProject instances, which describe checkout paths,
- relative to root_path.
- Returns:
- A dict mapping from project name to project checksum.
- """
- hash_algo = hashlib.sha256
- def collapse_svn_substitutions(contents):
- # Replace svn substitutions for $Date$ and $LastChangedDate$.
- # Unfortunately, these are locale-specific.
- return SVN_DATES_REGEX.sub("$\1$", contents)
- def read_and_collapse_svn_subsitutions(file_path):
- with open(file_path, "rb") as f:
- contents = f.read()
- new_contents = collapse_svn_substitutions(contents)
- if contents != new_contents:
- logging.debug("Replaced svn keyword substitutions in %s", file_path)
- logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
- return new_contents
- project_checksums = dict()
- # Hash each project.
- for proj in projects:
- project_root = os.path.join(root_path, proj.relpath)
- if not os.path.exists(project_root):
- logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
- proj.name)
- continue
- files = list()
- def add_file_hash(file_path):
- if os.path.islink(file_path) and not os.path.exists(file_path):
- content = os.readlink(file_path)
- else:
- content = read_and_collapse_svn_subsitutions(file_path)
- hasher = hash_algo()
- hasher.update(content)
- file_digest = hasher.hexdigest()
- logging.debug("Checksum %s for file %s", file_digest, file_path)
- files.append((file_path, file_digest))
- logging.info("Computing checksum for %s", proj.name)
- WalkProjectFiles(root_path, projects, proj, add_file_hash)
- # Compute final checksum.
- files.sort(key=lambda x: x[0])
- hasher = hash_algo()
- for file_path, file_digest in files:
- file_path = os.path.relpath(file_path, project_root)
- hasher.update(file_path)
- hasher.update(file_digest)
- project_checksums[proj.name] = hasher.hexdigest()
- return project_checksums
- def WriteLLVMChecksums(checksums, f):
- """Writes checksums to a text file.
- Args:
- checksums: a dict mapping from project name to project checksum (result of
- ComputeLLVMChecksums).
- f: a file object to write into.
- """
- for proj in sorted(checksums.keys()):
- f.write("{} {}\n".format(checksums[proj], proj))
- def ReadLLVMChecksums(f):
- """Reads checksums from a text file, produced by WriteLLVMChecksums.
- Returns:
- A dict, mapping from project name to project checksum.
- """
- checksums = {}
- while True:
- line = f.readline()
- if line == "":
- break
- checksum, proj = line.split()
- checksums[proj] = checksum
- return checksums
- def ValidateChecksums(reference_checksums,
- new_checksums,
- allow_missing_projects=False):
- """Validates that reference_checksums and new_checksums match.
- Args:
- reference_checksums: a dict of reference checksums, mapping from a project
- name to a project checksum.
- new_checksums: a dict of checksums to be checked, mapping from a project
- name to a project checksum.
- allow_missing_projects:
- When True, reference_checksums may contain more projects than
- new_checksums. Projects missing from new_checksums are ignored.
- When False, new_checksums and reference_checksums must contain checksums
- for the same set of projects. If there is a project in
- reference_checksums, missing from new_checksums, ValidateChecksums
- will return False.
- Returns:
- True, if checksums match with regards to allow_missing_projects flag value.
- False, otherwise.
- """
- if not allow_missing_projects:
- if len(new_checksums) != len(reference_checksums):
- return False
- for proj, checksum in new_checksums.items():
- # We never computed a checksum for this project.
- if proj not in reference_checksums:
- return False
- # Checksum did not match.
- if reference_checksums[proj] != checksum:
- return False
- return True
- if __name__ == "__main__":
- main()
|