瀏覽代碼

Handle binaries in presubmit_diff

presubmit_diff always tries to decode files from Gitiles with UTF-8
but if the file is binary, it gets a UnicodeDecodeError. Don't
assume that a file is text and just write the bytes directly.

Bug: b/358175830
Change-Id: Iaa259051b4157737397933e9d994c00e9da5836c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5771938
Reviewed-by: Scott Lee <ddoman@chromium.org>
Commit-Queue: Gavin Mak <gavinmak@google.com>
Gavin Mak 1 年之前
父節點
當前提交
f52dad9747
共有 2 個文件被更改,包括 37 次插入20 次删除
  1. 7 7
      presubmit_diff.py
  2. 30 13
      tests/presubmit_diff_test.py

+ 7 - 7
presubmit_diff.py

@@ -10,10 +10,10 @@ from __future__ import annotations
 
 import argparse
 import base64
+import concurrent.futures
 import os
 import platform
 import sys
-import concurrent.futures
 
 import gclient_utils
 from gerrit_util import (CreateHttpConn, ReadHttpResponse,
@@ -24,10 +24,10 @@ DEV_NULL = "/dev/null"
 HEADER_DELIMITER = "@@"
 
 
-def fetch_content(host: str, repo: str, ref: str, file: str) -> str:
+def fetch_content(host: str, repo: str, ref: str, file: str) -> bytes:
     """Fetches the content of a file from Gitiles.
 
-    If the file does not exist at the commit, it returns an empty string.
+    If the file does not exist at the commit, returns an empty bytes object.
 
     Args:
       host: Gerrit host.
@@ -36,13 +36,13 @@ def fetch_content(host: str, repo: str, ref: str, file: str) -> str:
       file: Path of file to fetch.
 
     Returns:
-        A string containing the content of the file at the commit, or an empty
-        string if the file does not exist at the commit.
+        Bytes of the file at the commit or an empty bytes object if the file
+        does not exist at the commit.
     """
     conn = CreateHttpConn(f"{host}.googlesource.com",
                           f"{repo}/+show/{ref}/{file}?format=text")
     response = ReadHttpResponse(conn, accept_statuses=[200, 404])
-    return base64.b64decode(response.read()).decode("utf-8")
+    return base64.b64decode(response.read())
 
 
 def git_diff(src: str | None, dest: str | None) -> str:
@@ -110,7 +110,7 @@ def _create_diff(host: str, repo: str, ref: str, root: str, file: str) -> str:
         if old_content:
             old_file = os.path.join(tmp_root, file)
             os.makedirs(os.path.dirname(old_file), exist_ok=True)
-            with open(old_file, "w") as f:
+            with open(old_file, "wb") as f:
                 f.write(old_content)
 
         if not old_file and not new_file:

+ 30 - 13
tests/presubmit_diff_test.py

@@ -23,25 +23,31 @@ class PresubmitDiffTest(unittest.TestCase):
         # State of the local directory.
         self.root = tempfile.mkdtemp()
         os.makedirs(os.path.join(self.root, "nested"))
-        with open(os.path.join(self.root, "unchanged.txt"), "w") as f:
-            f.write("unchanged\n")
-        with open(os.path.join(self.root, "added.txt"), "w") as f:
-            f.write("added\n")
-        with open(os.path.join(self.root, "modified.txt"), "w") as f:
-            f.write("modified... foo\n")
-        with open(os.path.join(self.root, "nested/modified.txt"), "w") as f:
-            f.write("goodbye\n")
+
+        # On Windows, writing "\n" in text mode becomes "\r\n". Write in binary
+        # so that doesn't happen, otherwise tests will fail.
+        with open(os.path.join(self.root, "unchanged.txt"), "wb") as f:
+            f.write("unchanged\n".encode("utf-8"))
+        with open(os.path.join(self.root, "added.txt"), "wb") as f:
+            f.write("added\n".encode("utf-8"))
+        with open(os.path.join(self.root, "modified.txt"), "wb") as f:
+            f.write("modified... foo\n".encode("utf-8"))
+        with open(os.path.join(self.root, "nested/modified.txt"), "wb") as f:
+            f.write("goodbye\n".encode("utf-8"))
 
         # State of the remote repository.
         fetch_data = {
-            "unchanged.txt": "unchanged\n",
-            "deleted.txt": "deleted\n",
-            "modified.txt": "modified... bar\n",
-            "nested/modified.txt": "hello\n",
+            "unchanged.txt": "unchanged\n".encode("utf-8"),
+            "deleted.txt": "deleted\n".encode("utf-8"),
+            "modified.txt": "modified... bar\n".encode("utf-8"),
+            "nested/modified.txt": "hello\n".encode("utf-8"),
+
+            # Intenionally invalid start byte for utf-8.
+            "deleted_binary": b"\xff\x00",
         }
 
         def fetch_side_effect(host, repo, ref, file):
-            return fetch_data.get(file, "")
+            return fetch_data.get(file, b"")
 
         fetch_content_mock = mock.patch("presubmit_diff.fetch_content",
                                         side_effect=fetch_side_effect)
@@ -111,6 +117,17 @@ index 71779d2c..00000000
             {"deleted.txt": expected_diff},
         )
 
+    def test_create_diffs_with_binary_file(self):
+        expected_diff = """diff --git a/deleted_binary b/deleted_binary
+deleted file mode 100644
+index ce542efaa..00000000
+Binary files a/deleted_binary and /dev/null differ
+"""
+        self._test_create_diffs(
+            ["deleted_binary"],
+            {"deleted_binary": expected_diff},
+        )
+
     # pylint: disable=line-too-long
 
     def test_create_diffs_with_modified_files(self):