Ver Fonte

Add presubmit_diff.py

Create a script to create a unified git diff of a local workspace
against a remote Gerrit commit. This will be used to create a diff file
for running presubmits.

Bug: b/323243527
Change-Id: Ia0d287624162dbfe5231a5653c9a962a6c85c8e1
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5374833
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Gavin Mak <gavinmak@google.com>
Gavin Mak há 1 ano atrás
pai
commit
9782552ca4
2 ficheiros alterados com 393 adições e 0 exclusões
  1. 170 0
      presubmit_diff.py
  2. 223 0
      tests/presubmit_diff_test.py

+ 170 - 0
presubmit_diff.py

@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+# Copyright (c) 2024 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Tool for generating a unified git diff outside of a git workspace.
+
+This is intended as a preprocessor for presubmit_support.py.
+"""
+from __future__ import annotations
+
+import argparse
+import base64
+import os
+import platform
+import sys
+
+import gclient_utils
+from gerrit_util import CreateHttpConn, ReadHttpResponse
+import subprocess2
+
+DEV_NULL = "/dev/null"
+HEADER_DELIMITER = "@@"
+
+
+def fetch_content(host: str, repo: str, ref: str, file: str) -> str:
+    """Fetches the content of a file from Gitiles.
+
+    If the file does not exist at the commit, it returns an empty string.
+
+    Args:
+      host: Gerrit host.
+      repo: Gerrit repo.
+      ref: Gerrit commit.
+      file: Path of file to fetch.
+
+    Returns:
+        A string containing the content of the file at the commit, or an empty
+        string if the file does not exist at the commit.
+    """
+    conn = CreateHttpConn(f"{host}.googlesource.com",
+                          f"{repo}/+show/{ref}/{file}?format=text")
+    response = ReadHttpResponse(conn, accept_statuses=[200, 404])
+    return base64.b64decode(response.read()).decode("utf-8")
+
+
+def git_diff(src: str | None, dest: str | None) -> str:
+    """Returns the result of `git diff --no-index` between two paths.
+
+    If a path is not specified, the diff is against /dev/null. At least one of
+    src or dest must be specified.
+
+    Args:
+      src: Source path.
+      dest: Destination path.
+
+    Returns:
+        A string containing the git diff.
+    """
+    return subprocess2.capture(
+        ["git", "diff", "--no-index", "--", src or DEV_NULL, dest
+         or DEV_NULL]).decode("utf-8")
+
+
+def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
+    """Adjust paths in the diff header so they're relative to the root.
+
+    This also modifies paths on Windows to use forward slashes.
+    """
+    if not diff:
+        return ""
+
+    header, body = diff.split(HEADER_DELIMITER, maxsplit=1)
+    norm_src = src_root.rstrip(os.sep)
+    norm_dst = dst_root.rstrip(os.sep)
+
+    if platform.system() == "Windows":
+        # Absolute paths on Windows use the format:
+        #   "a/C:\\abspath\\to\\file.txt"
+        header = header.replace("\\\\", "\\")
+        header = header.replace('"', "")
+        header = header.replace(norm_src + "\\", "")
+        header = header.replace(norm_dst + "\\", "")
+    else:
+        # Other systems use:
+        #  a/abspath/to/file.txt
+        header = header.replace(norm_src, "")
+        header = header.replace(norm_dst, "")
+
+    return header + HEADER_DELIMITER + body
+
+
+def create_diffs(host: str, repo: str, ref: str, root: str,
+                 files: list[str]) -> dict[str, str]:
+    """Calculates diffs of files in a directory against a commit.
+
+    Args:
+      host: Gerrit host.
+      repo: Gerrit repo.
+      ref: Gerrit commit.
+      root: Path of local directory containing modified files.
+      files: List of file paths relative to root.
+
+    Returns:
+        A dict mapping file paths to diffs.
+
+    Raises:
+        RuntimeError: If a file is missing in both the root and the repo.
+    """
+    diffs = {}
+    with gclient_utils.temporary_directory() as tmp_root:
+        # TODO(gavinmak): Parallelize fetching content.
+        for file in files:
+            new_file = os.path.join(root, file)
+            if not os.path.exists(new_file):
+                new_file = None
+
+            old_file = None
+            old_content = fetch_content(host, repo, ref, file)
+            if old_content:
+                old_file = os.path.join(tmp_root, file)
+                os.makedirs(os.path.dirname(old_file), exist_ok=True)
+                with open(old_file, "w") as f:
+                    f.write(old_content)
+
+            if not old_file and not new_file:
+                raise RuntimeError(f"Could not access file {file} from {root} "
+                                   f"or from {host}/{repo}:{ref}.")
+
+            diff = git_diff(old_file, new_file)
+            diffs[file] = _process_diff(diff, tmp_root, root)
+
+    return diffs
+
+
+def main(argv):
+    parser = argparse.ArgumentParser(
+        usage="%(prog)s [options] <files...>",
+        description="Makes a unified git diff against a Gerrit commit.",
+    )
+    parser.add_argument("--output", help="File to write the diff to.")
+    parser.add_argument("--host", required=True, help="Gerrit host.")
+    parser.add_argument("--repo", required=True, help="Gerrit repo.")
+    parser.add_argument("--ref",
+                        required=True,
+                        help="Gerrit ref to diff against.")
+    parser.add_argument("--root",
+                        required=True,
+                        help="Folder containing modified files.")
+    parser.add_argument(
+        "files",
+        nargs="+",
+        help="List of changed files. Paths are relative to the repo root.",
+    )
+    options = parser.parse_args(argv)
+
+    diffs = create_diffs(options.host, options.repo, options.ref, options.root,
+                         options.files)
+
+    unified_diff = "\n".join([d for d in diffs.values() if d])
+    if options.output:
+        with open(options.output, "w") as f:
+            f.write(unified_diff)
+    else:
+        print(unified_diff)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))

+ 223 - 0
tests/presubmit_diff_test.py

@@ -0,0 +1,223 @@
+#!/usr/bin/env vpython3
+# Copyright (c) 2024 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Unit tests for presubmit_diff.py."""
+
+import os
+import sys
+import tempfile
+import unittest
+from unittest import mock
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import gclient_utils
+import presubmit_diff
+
+
+class PresubmitDiffTest(unittest.TestCase):
+
+    def setUp(self):
+        # State of the local directory.
+        self.root = tempfile.mkdtemp()
+        os.makedirs(os.path.join(self.root, "nested"))
+        with open(os.path.join(self.root, "unchanged.txt"), "w") as f:
+            f.write("unchanged\n")
+        with open(os.path.join(self.root, "added.txt"), "w") as f:
+            f.write("added\n")
+        with open(os.path.join(self.root, "modified.txt"), "w") as f:
+            f.write("modified... foo\n")
+        with open(os.path.join(self.root, "nested/modified.txt"), "w") as f:
+            f.write("goodbye\n")
+
+        # State of the remote repository.
+        fetch_data = {
+            "unchanged.txt": "unchanged\n",
+            "deleted.txt": "deleted\n",
+            "modified.txt": "modified... bar\n",
+            "nested/modified.txt": "hello\n",
+        }
+
+        def fetch_side_effect(host, repo, ref, file):
+            return fetch_data.get(file, "")
+
+        fetch_content_mock = mock.patch("presubmit_diff.fetch_content",
+                                        side_effect=fetch_side_effect)
+        fetch_content_mock.start()
+
+        self.addCleanup(mock.patch.stopall)
+
+    def tearDown(self):
+        gclient_utils.rmtree(self.root)
+
+    def _test_create_diffs(self, files: list[str], expected: dict[str, str]):
+        actual = presubmit_diff.create_diffs("host", "repo", "ref", self.root,
+                                             files)
+        self.assertEqual(actual.keys(), expected.keys())
+
+        # Manually check each line in the diffs except the "index" line because
+        # hashes can differ in length.
+        for file, diff in actual.items():
+            expected_lines = expected[file].splitlines()
+            for idx, line in enumerate(diff.splitlines()):
+                if line.startswith("index "):
+                    continue
+                self.assertEqual(line, expected_lines[idx])
+
+    def test_create_diffs_with_nonexistent_file_raises_error(self):
+        self.assertRaises(
+            RuntimeError,
+            presubmit_diff.create_diffs,
+            "host",
+            "repo",
+            "ref",
+            self.root,
+            ["doesnotexist.txt"],
+        )
+
+    def test_create_diffs_with_unchanged_file(self):
+        self._test_create_diffs(
+            ["unchanged.txt"],
+            {"unchanged.txt": ""},
+        )
+
+    def test_create_diffs_with_added_file(self):
+        expected_diff = """diff --git a/added.txt b/added.txt
+new file mode 100644
+index 00000000..d5f7fc3f
+--- /dev/null
++++ b/added.txt
+@@ -0,0 +1 @@
++added
+"""
+        self._test_create_diffs(
+            ["added.txt"],
+            {"added.txt": expected_diff},
+        )
+
+    def test_create_diffs_with_deleted_file(self):
+        expected_diff = """diff --git a/deleted.txt b/deleted.txt
+deleted file mode 100644
+index 71779d2c..00000000
+--- a/deleted.txt
++++ /dev/null
+@@ -1 +0,0 @@
+-deleted
+"""
+        self._test_create_diffs(
+            ["deleted.txt"],
+            {"deleted.txt": expected_diff},
+        )
+
+    # pylint: disable=line-too-long
+
+    def test_create_diffs_with_modified_files(self):
+        expected_diff = """diff --git a/modified.txt b/modified.txt
+index a7dd0b00..12d68703 100644
+--- a/modified.txt
++++ b/modified.txt
+@@ -1 +1 @@
+-modified... bar
++modified... foo
+"""
+        expected_nested_diff = """diff --git a/nested/modified.txt b/nested/modified.txt
+index ce013625..dd7e1c6f 100644
+--- a/nested/modified.txt
++++ b/nested/modified.txt
+@@ -1 +1 @@
+-hello
++goodbye
+"""
+        self._test_create_diffs(
+            ["modified.txt", "nested/modified.txt"],
+            {
+                "modified.txt": expected_diff,
+                "nested/modified.txt": expected_nested_diff,
+            },
+        )
+
+    # Test cases for _process_diff.
+    def test_process_diff_with_no_changes(self):
+        self.assertEqual(
+            presubmit_diff._process_diff(
+                "",
+                "/path/to/src",
+                "/path/to/dst",
+            ),
+            "",
+        )
+
+    @mock.patch("platform.system", return_value="Linux")
+    @mock.patch("os.sep", new="/")
+    def test_process_diff_handles_unix_paths(self, sys_mock):
+        diff = """diff --git a/path/to/src/file.txt b/path/to/dst/file.txt
+index ce013625..dd7e1c6f 100644
+--- a/path/to/file.txt
++++ b/path/to/file.txt
+@@ -1 +1 @@
+-random
++content
+"""
+        expected = """diff --git a/file.txt b/file.txt
+index ce013625..dd7e1c6f 100644
+--- a/path/to/file.txt
++++ b/path/to/file.txt
+@@ -1 +1 @@
+-random
++content
+"""
+        self.assertEqual(
+            presubmit_diff._process_diff(
+                diff,
+                "/path/to/src",
+                "/path/to/dst",
+            ),
+            expected,
+        )
+
+        # Trailing slashes are handled.
+        self.assertEqual(
+            presubmit_diff._process_diff(
+                diff,
+                "/path/to/src/",
+                "/path/to/dst/",
+            ),
+            expected,
+        )
+
+    @mock.patch("platform.system", return_value="Windows")
+    @mock.patch("os.sep", new="\\")
+    def test_process_diff_handles_windows_paths(self, sys_mock):
+        diff = """diff --git "a/C:\\\\path\\\\to\\\\src\\\\file.txt" "b/C:\\\\path\\\\to\\\\dst\\\\file.txt"
+index ce013625..dd7e1c6f 100644
+--- "a/C:\\\\path\\\\to\\\\src\\\\file.txt
++++ "b/C:\\\\path\\\\to\\\\dst\\\\file.txt"
+@@ -1 +1 @@
+-random
++content
+"""
+        expected = """diff --git a/file.txt b/file.txt
+index ce013625..dd7e1c6f 100644
+--- a/file.txt
++++ b/file.txt
+@@ -1 +1 @@
+-random
++content
+"""
+        self.assertEqual(
+            expected,
+            presubmit_diff._process_diff(diff, "C:\\path\\to\\src",
+                                         "C:\\path\\to\\dst"),
+        )
+
+        # Trailing slashes are handled.
+        self.assertEqual(
+            expected,
+            presubmit_diff._process_diff(diff, "C:\\path\\to\\src\\",
+                                         "C:\\path\\to\\dst\\"),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()