123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215 |
- #!/usr/bin/env python3
- # Copyright (c) 2024 The Chromium Authors. All rights reserved.
- # Use of this source code is governed by a BSD-style license that can be
- # found in the LICENSE file.
- """Tool for generating a unified git diff outside of a git workspace.
- This is intended as a preprocessor for presubmit_support.py.
- """
- from __future__ import annotations
- import argparse
- import base64
- import concurrent.futures
- import os
- import platform
- import sys
- import gclient_utils
- from gerrit_util import (CreateHttpConn, ReadHttpResponse,
- MAX_CONCURRENT_CONNECTION)
- import subprocess2
- DEV_NULL = "/dev/null"
- HEADER_DELIMITER = "@@"
- def fetch_content(host: str, repo: str, ref: str, file: str) -> bytes:
- """Fetches the content of a file from Gitiles.
- If the file does not exist at the commit, returns an empty bytes object.
- Args:
- host: Gerrit host.
- repo: Gerrit repo.
- ref: Gerrit commit.
- file: Path of file to fetch.
- Returns:
- Bytes of the file at the commit or an empty bytes object if the file
- does not exist at the commit.
- """
- conn = CreateHttpConn(f"{host}.googlesource.com",
- f"{repo}/+show/{ref}/{file}?format=text")
- response = ReadHttpResponse(conn, accept_statuses=[200, 404])
- return base64.b64decode(response.read())
- def git_diff(src: str | None,
- dest: str | None,
- unified: int | None = None) -> str:
- """Returns the result of `git diff --no-index` between two paths.
- If a path is not specified, the diff is against /dev/null. At least one of
- src or dest must be specified.
- Args:
- src: Source path.
- dest: Destination path.
- unified: Number of lines of context. If None, git diff uses 3 as
- the default value.
- Returns:
- A string containing the git diff.
- """
- args = ["git", "diff", "--no-index"]
- if unified is not None:
- # git diff doesn't error out even if it's given a negative <n> value.
- # e.g., --unified=-3323, -U-3
- #
- # It just ignores the value and treats it as 0.
- # hence, this script doesn't bother validating the <n> value.
- args.append(f"-U{unified}")
- args.extend(["--", src or DEV_NULL, dest or DEV_NULL])
- return subprocess2.capture(args).decode("utf-8")
- def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
- """Adjust paths in the diff header so they're relative to the root.
- This also modifies paths on Windows to use forward slashes.
- """
- if not diff:
- return ""
- has_chunk_header = HEADER_DELIMITER in diff
- if has_chunk_header:
- header, body = diff.split(HEADER_DELIMITER, maxsplit=1)
- else:
- # Only the file mode changed.
- header = diff
- norm_src = src_root.rstrip(os.sep)
- norm_dst = dst_root.rstrip(os.sep)
- if platform.system() == "Windows":
- # Absolute paths on Windows use the format:
- # "a/C:\\abspath\\to\\file.txt"
- header = header.replace("\\\\", "\\")
- header = header.replace('"', "")
- header = header.replace(norm_src + "\\", "")
- header = header.replace(norm_dst + "\\", "")
- else:
- # Other systems use:
- # a/abspath/to/file.txt
- header = header.replace(norm_src, "")
- header = header.replace(norm_dst, "")
- if has_chunk_header:
- return header + HEADER_DELIMITER + body
- return header
- def _create_diff(host: str, repo: str, ref: str, root: str, file: str,
- unified: int | None) -> str:
- new_file = os.path.join(root, file)
- if not os.path.exists(new_file):
- new_file = None
- with gclient_utils.temporary_directory() as tmp_root:
- old_file = None
- old_content = fetch_content(host, repo, ref, file)
- if old_content:
- old_file = os.path.join(tmp_root, file)
- os.makedirs(os.path.dirname(old_file), exist_ok=True)
- with open(old_file, "wb") as f:
- f.write(old_content)
- if not old_file and not new_file:
- raise RuntimeError(f"Could not access file {file} from {root} "
- f"or from {host}/{repo}:{ref}.")
- diff = git_diff(old_file, new_file, unified)
- return _process_diff(diff, tmp_root, root)
- def create_diffs(host: str,
- repo: str,
- ref: str,
- root: str,
- files: list[str],
- unified: int | None = None) -> dict[str, str]:
- """Calculates diffs of files in a directory against a commit.
- Args:
- host: Gerrit host.
- repo: Gerrit repo.
- ref: Gerrit commit.
- root: Path of local directory containing modified files.
- files: List of file paths relative to root.
- unified: Number of lines of context. If None, git diff uses 3 as
- the default value.
- Returns:
- A dict mapping file paths to diffs.
- Raises:
- RuntimeError: If a file is missing in both the root and the repo.
- """
- diffs = {}
- with concurrent.futures.ThreadPoolExecutor(
- max_workers=MAX_CONCURRENT_CONNECTION) as executor:
- futures_to_file = {
- executor.submit(_create_diff, host, repo, ref, root, file, unified):
- file
- for file in files
- }
- for future in concurrent.futures.as_completed(futures_to_file):
- file = futures_to_file[future]
- diffs[file] = future.result()
- return diffs
- def main(argv):
- parser = argparse.ArgumentParser(
- usage="%(prog)s [options] <files...>",
- description="Makes a unified git diff against a Gerrit commit.",
- )
- parser.add_argument("--output", help="File to write the diff to.")
- parser.add_argument("--host", required=True, help="Gerrit host.")
- parser.add_argument("--repo", required=True, help="Gerrit repo.")
- parser.add_argument("--ref",
- required=True,
- help="Gerrit ref to diff against.")
- parser.add_argument("--root",
- required=True,
- help="Folder containing modified files.")
- parser.add_argument("-U",
- "--unified",
- required=False,
- type=int,
- help="generate diffs with <n> lines context",
- metavar='<n>')
- parser.add_argument(
- "files",
- nargs="+",
- help="List of changed files. Paths are relative to the repo root.",
- )
- options = parser.parse_args(argv)
- diffs = create_diffs(options.host, options.repo, options.ref, options.root,
- options.files, options.unified)
- unified_diff = "\n".join([d for d in diffs.values() if d])
- if options.output:
- with open(options.output, "w") as f:
- f.write(unified_diff)
- else:
- print(unified_diff)
- return 0
- if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
|