presubmit_diff.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2024 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Tool for generating a unified git diff outside of a git workspace.
  6. This is intended as a preprocessor for presubmit_support.py.
  7. """
  8. from __future__ import annotations
  9. import argparse
  10. import base64
  11. import concurrent.futures
  12. import os
  13. import platform
  14. import sys
  15. import gclient_utils
  16. from gerrit_util import (CreateHttpConn, ReadHttpResponse,
  17. MAX_CONCURRENT_CONNECTION)
  18. import subprocess2
  19. DEV_NULL = "/dev/null"
  20. HEADER_DELIMITER = "@@"
  21. def fetch_content(host: str, repo: str, ref: str, file: str) -> bytes:
  22. """Fetches the content of a file from Gitiles.
  23. If the file does not exist at the commit, returns an empty bytes object.
  24. Args:
  25. host: Gerrit host.
  26. repo: Gerrit repo.
  27. ref: Gerrit commit.
  28. file: Path of file to fetch.
  29. Returns:
  30. Bytes of the file at the commit or an empty bytes object if the file
  31. does not exist at the commit.
  32. """
  33. conn = CreateHttpConn(f"{host}.googlesource.com",
  34. f"{repo}/+show/{ref}/{file}?format=text")
  35. response = ReadHttpResponse(conn, accept_statuses=[200, 404])
  36. return base64.b64decode(response.read())
  37. def git_diff(src: str | None,
  38. dest: str | None,
  39. unified: int | None = None) -> str:
  40. """Returns the result of `git diff --no-index` between two paths.
  41. If a path is not specified, the diff is against /dev/null. At least one of
  42. src or dest must be specified.
  43. Args:
  44. src: Source path.
  45. dest: Destination path.
  46. unified: Number of lines of context. If None, git diff uses 3 as
  47. the default value.
  48. Returns:
  49. A string containing the git diff.
  50. """
  51. args = ["git", "diff", "--no-index"]
  52. if unified is not None:
  53. # git diff doesn't error out even if it's given a negative <n> value.
  54. # e.g., --unified=-3323, -U-3
  55. #
  56. # It just ignores the value and treats it as 0.
  57. # hence, this script doesn't bother validating the <n> value.
  58. args.append(f"-U{unified}")
  59. args.extend(["--", src or DEV_NULL, dest or DEV_NULL])
  60. return subprocess2.capture(args).decode("utf-8")
  61. def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
  62. """Adjust paths in the diff header so they're relative to the root.
  63. This also modifies paths on Windows to use forward slashes.
  64. """
  65. if not diff:
  66. return ""
  67. has_chunk_header = HEADER_DELIMITER in diff
  68. if has_chunk_header:
  69. header, body = diff.split(HEADER_DELIMITER, maxsplit=1)
  70. else:
  71. # Only the file mode changed.
  72. header = diff
  73. norm_src = src_root.rstrip(os.sep)
  74. norm_dst = dst_root.rstrip(os.sep)
  75. if platform.system() == "Windows":
  76. # Absolute paths on Windows use the format:
  77. # "a/C:\\abspath\\to\\file.txt"
  78. header = header.replace("\\\\", "\\")
  79. header = header.replace('"', "")
  80. header = header.replace(norm_src + "\\", "")
  81. header = header.replace(norm_dst + "\\", "")
  82. else:
  83. # Other systems use:
  84. # a/abspath/to/file.txt
  85. header = header.replace(norm_src, "")
  86. header = header.replace(norm_dst, "")
  87. if has_chunk_header:
  88. return header + HEADER_DELIMITER + body
  89. return header
  90. def _create_diff(host: str, repo: str, ref: str, root: str, file: str,
  91. unified: int | None) -> str:
  92. new_file = os.path.join(root, file)
  93. if not os.path.exists(new_file):
  94. new_file = None
  95. with gclient_utils.temporary_directory() as tmp_root:
  96. old_file = None
  97. old_content = fetch_content(host, repo, ref, file)
  98. if old_content:
  99. old_file = os.path.join(tmp_root, file)
  100. os.makedirs(os.path.dirname(old_file), exist_ok=True)
  101. with open(old_file, "wb") as f:
  102. f.write(old_content)
  103. if not old_file and not new_file:
  104. raise RuntimeError(f"Could not access file {file} from {root} "
  105. f"or from {host}/{repo}:{ref}.")
  106. diff = git_diff(old_file, new_file, unified)
  107. return _process_diff(diff, tmp_root, root)
  108. def create_diffs(host: str,
  109. repo: str,
  110. ref: str,
  111. root: str,
  112. files: list[str],
  113. unified: int | None = None) -> dict[str, str]:
  114. """Calculates diffs of files in a directory against a commit.
  115. Args:
  116. host: Gerrit host.
  117. repo: Gerrit repo.
  118. ref: Gerrit commit.
  119. root: Path of local directory containing modified files.
  120. files: List of file paths relative to root.
  121. unified: Number of lines of context. If None, git diff uses 3 as
  122. the default value.
  123. Returns:
  124. A dict mapping file paths to diffs.
  125. Raises:
  126. RuntimeError: If a file is missing in both the root and the repo.
  127. """
  128. diffs = {}
  129. with concurrent.futures.ThreadPoolExecutor(
  130. max_workers=MAX_CONCURRENT_CONNECTION) as executor:
  131. futures_to_file = {
  132. executor.submit(_create_diff, host, repo, ref, root, file, unified):
  133. file
  134. for file in files
  135. }
  136. for future in concurrent.futures.as_completed(futures_to_file):
  137. file = futures_to_file[future]
  138. diffs[file] = future.result()
  139. return diffs
  140. def main(argv):
  141. parser = argparse.ArgumentParser(
  142. usage="%(prog)s [options] <files...>",
  143. description="Makes a unified git diff against a Gerrit commit.",
  144. )
  145. parser.add_argument("--output", help="File to write the diff to.")
  146. parser.add_argument("--host", required=True, help="Gerrit host.")
  147. parser.add_argument("--repo", required=True, help="Gerrit repo.")
  148. parser.add_argument("--ref",
  149. required=True,
  150. help="Gerrit ref to diff against.")
  151. parser.add_argument("--root",
  152. required=True,
  153. help="Folder containing modified files.")
  154. parser.add_argument("-U",
  155. "--unified",
  156. required=False,
  157. type=int,
  158. help="generate diffs with <n> lines context",
  159. metavar='<n>')
  160. parser.add_argument(
  161. "files",
  162. nargs="+",
  163. help="List of changed files. Paths are relative to the repo root.",
  164. )
  165. options = parser.parse_args(argv)
  166. diffs = create_diffs(options.host, options.repo, options.ref, options.root,
  167. options.files, options.unified)
  168. unified_diff = "\n".join([d for d in diffs.values() if d])
  169. if options.output:
  170. with open(options.output, "w") as f:
  171. f.write(unified_diff)
  172. else:
  173. print(unified_diff)
  174. return 0
  175. if __name__ == "__main__":
  176. sys.exit(main(sys.argv[1:]))