Browse Source

Fetch file contents from Gitiles in parallel

This makes it much faster to get a diff for a set of multiple files.

Bug: b/323243527
Change-Id: I6c54a4fa8782d4a0bbb187b8bed4fbd52be47344
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5384647
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Gavin Mak <gavinmak@google.com>
Gavin Mak 1 year ago
parent
commit
6551a7eb43
1 changed files with 34 additions and 23 deletions
  1. 34 23
      presubmit_diff.py

+ 34 - 23
presubmit_diff.py

@@ -13,9 +13,11 @@ import base64
 import os
 import os
 import platform
 import platform
 import sys
 import sys
+import concurrent.futures
 
 
 import gclient_utils
 import gclient_utils
-from gerrit_util import CreateHttpConn, ReadHttpResponse
+from gerrit_util import (CreateHttpConn, ReadHttpResponse,
+                         MAX_CONCURRENT_CONNECTION)
 import subprocess2
 import subprocess2
 
 
 DEV_NULL = "/dev/null"
 DEV_NULL = "/dev/null"
@@ -89,6 +91,28 @@ def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
     return header + HEADER_DELIMITER + body
     return header + HEADER_DELIMITER + body
 
 
 
 
+def _create_diff(host: str, repo: str, ref: str, root: str, file: str) -> str:
+    new_file = os.path.join(root, file)
+    if not os.path.exists(new_file):
+        new_file = None
+
+    with gclient_utils.temporary_directory() as tmp_root:
+        old_file = None
+        old_content = fetch_content(host, repo, ref, file)
+        if old_content:
+            old_file = os.path.join(tmp_root, file)
+            os.makedirs(os.path.dirname(old_file), exist_ok=True)
+            with open(old_file, "w") as f:
+                f.write(old_content)
+
+        if not old_file and not new_file:
+            raise RuntimeError(f"Could not access file {file} from {root} "
+                               f"or from {host}/{repo}:{ref}.")
+
+        diff = git_diff(old_file, new_file)
+        return _process_diff(diff, tmp_root, root)
+
+
 def create_diffs(host: str, repo: str, ref: str, root: str,
 def create_diffs(host: str, repo: str, ref: str, root: str,
                  files: list[str]) -> dict[str, str]:
                  files: list[str]) -> dict[str, str]:
     """Calculates diffs of files in a directory against a commit.
     """Calculates diffs of files in a directory against a commit.
@@ -107,28 +131,15 @@ def create_diffs(host: str, repo: str, ref: str, root: str,
         RuntimeError: If a file is missing in both the root and the repo.
         RuntimeError: If a file is missing in both the root and the repo.
     """
     """
     diffs = {}
     diffs = {}
-    with gclient_utils.temporary_directory() as tmp_root:
-        # TODO(gavinmak): Parallelize fetching content.
-        for file in files:
-            new_file = os.path.join(root, file)
-            if not os.path.exists(new_file):
-                new_file = None
-
-            old_file = None
-            old_content = fetch_content(host, repo, ref, file)
-            if old_content:
-                old_file = os.path.join(tmp_root, file)
-                os.makedirs(os.path.dirname(old_file), exist_ok=True)
-                with open(old_file, "w") as f:
-                    f.write(old_content)
-
-            if not old_file and not new_file:
-                raise RuntimeError(f"Could not access file {file} from {root} "
-                                   f"or from {host}/{repo}:{ref}.")
-
-            diff = git_diff(old_file, new_file)
-            diffs[file] = _process_diff(diff, tmp_root, root)
-
+    with concurrent.futures.ThreadPoolExecutor(
+            max_workers=MAX_CONCURRENT_CONNECTION) as executor:
+        futures_to_file = {
+            executor.submit(_create_diff, host, repo, ref, root, file): file
+            for file in files
+        }
+        for future in concurrent.futures.as_completed(futures_to_file):
+            file = futures_to_file[future]
+            diffs[file] = future.result()
     return diffs
     return diffs