presubmit_diff.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2024 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Tool for generating a unified git diff outside of a git workspace.
  6. This is intended as a preprocessor for presubmit_support.py.
  7. """
  8. from __future__ import annotations
  9. import argparse
  10. import base64
  11. import os
  12. import platform
  13. import sys
  14. import concurrent.futures
  15. import gclient_utils
  16. from gerrit_util import (CreateHttpConn, ReadHttpResponse,
  17. MAX_CONCURRENT_CONNECTION)
  18. import subprocess2
  19. DEV_NULL = "/dev/null"
  20. HEADER_DELIMITER = "@@"
  21. def fetch_content(host: str, repo: str, ref: str, file: str) -> str:
  22. """Fetches the content of a file from Gitiles.
  23. If the file does not exist at the commit, it returns an empty string.
  24. Args:
  25. host: Gerrit host.
  26. repo: Gerrit repo.
  27. ref: Gerrit commit.
  28. file: Path of file to fetch.
  29. Returns:
  30. A string containing the content of the file at the commit, or an empty
  31. string if the file does not exist at the commit.
  32. """
  33. conn = CreateHttpConn(f"{host}.googlesource.com",
  34. f"{repo}/+show/{ref}/{file}?format=text")
  35. response = ReadHttpResponse(conn, accept_statuses=[200, 404])
  36. return base64.b64decode(response.read()).decode("utf-8")
  37. def git_diff(src: str | None, dest: str | None) -> str:
  38. """Returns the result of `git diff --no-index` between two paths.
  39. If a path is not specified, the diff is against /dev/null. At least one of
  40. src or dest must be specified.
  41. Args:
  42. src: Source path.
  43. dest: Destination path.
  44. Returns:
  45. A string containing the git diff.
  46. """
  47. return subprocess2.capture(
  48. ["git", "diff", "--no-index", "--", src or DEV_NULL, dest
  49. or DEV_NULL]).decode("utf-8")
  50. def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
  51. """Adjust paths in the diff header so they're relative to the root.
  52. This also modifies paths on Windows to use forward slashes.
  53. """
  54. if not diff:
  55. return ""
  56. has_chunk_header = HEADER_DELIMITER in diff
  57. if has_chunk_header:
  58. header, body = diff.split(HEADER_DELIMITER, maxsplit=1)
  59. else:
  60. # Only the file mode changed.
  61. header = diff
  62. norm_src = src_root.rstrip(os.sep)
  63. norm_dst = dst_root.rstrip(os.sep)
  64. if platform.system() == "Windows":
  65. # Absolute paths on Windows use the format:
  66. # "a/C:\\abspath\\to\\file.txt"
  67. header = header.replace("\\\\", "\\")
  68. header = header.replace('"', "")
  69. header = header.replace(norm_src + "\\", "")
  70. header = header.replace(norm_dst + "\\", "")
  71. else:
  72. # Other systems use:
  73. # a/abspath/to/file.txt
  74. header = header.replace(norm_src, "")
  75. header = header.replace(norm_dst, "")
  76. if has_chunk_header:
  77. return header + HEADER_DELIMITER + body
  78. return header
  79. def _create_diff(host: str, repo: str, ref: str, root: str, file: str) -> str:
  80. new_file = os.path.join(root, file)
  81. if not os.path.exists(new_file):
  82. new_file = None
  83. with gclient_utils.temporary_directory() as tmp_root:
  84. old_file = None
  85. old_content = fetch_content(host, repo, ref, file)
  86. if old_content:
  87. old_file = os.path.join(tmp_root, file)
  88. os.makedirs(os.path.dirname(old_file), exist_ok=True)
  89. with open(old_file, "w") as f:
  90. f.write(old_content)
  91. if not old_file and not new_file:
  92. raise RuntimeError(f"Could not access file {file} from {root} "
  93. f"or from {host}/{repo}:{ref}.")
  94. diff = git_diff(old_file, new_file)
  95. return _process_diff(diff, tmp_root, root)
  96. def create_diffs(host: str, repo: str, ref: str, root: str,
  97. files: list[str]) -> dict[str, str]:
  98. """Calculates diffs of files in a directory against a commit.
  99. Args:
  100. host: Gerrit host.
  101. repo: Gerrit repo.
  102. ref: Gerrit commit.
  103. root: Path of local directory containing modified files.
  104. files: List of file paths relative to root.
  105. Returns:
  106. A dict mapping file paths to diffs.
  107. Raises:
  108. RuntimeError: If a file is missing in both the root and the repo.
  109. """
  110. diffs = {}
  111. with concurrent.futures.ThreadPoolExecutor(
  112. max_workers=MAX_CONCURRENT_CONNECTION) as executor:
  113. futures_to_file = {
  114. executor.submit(_create_diff, host, repo, ref, root, file): file
  115. for file in files
  116. }
  117. for future in concurrent.futures.as_completed(futures_to_file):
  118. file = futures_to_file[future]
  119. diffs[file] = future.result()
  120. return diffs
  121. def main(argv):
  122. parser = argparse.ArgumentParser(
  123. usage="%(prog)s [options] <files...>",
  124. description="Makes a unified git diff against a Gerrit commit.",
  125. )
  126. parser.add_argument("--output", help="File to write the diff to.")
  127. parser.add_argument("--host", required=True, help="Gerrit host.")
  128. parser.add_argument("--repo", required=True, help="Gerrit repo.")
  129. parser.add_argument("--ref",
  130. required=True,
  131. help="Gerrit ref to diff against.")
  132. parser.add_argument("--root",
  133. required=True,
  134. help="Folder containing modified files.")
  135. parser.add_argument(
  136. "files",
  137. nargs="+",
  138. help="List of changed files. Paths are relative to the repo root.",
  139. )
  140. options = parser.parse_args(argv)
  141. diffs = create_diffs(options.host, options.repo, options.ref, options.root,
  142. options.files)
  143. unified_diff = "\n".join([d for d in diffs.values() if d])
  144. if options.output:
  145. with open(options.output, "w") as f:
  146. f.write(unified_diff)
  147. else:
  148. print(unified_diff)
  149. return 0
  150. if __name__ == "__main__":
  151. sys.exit(main(sys.argv[1:]))