Bläddra i källkod

watchlists: Precompile filepath regular expressions before using them.

Instead of calling re.search() for every entry in WATCHLIST_DEFINITIONS for
every file being processed, create the regular expressions object beforehand
when parsing the watchlist file.

Processing a Chromium commit with 17k files went down from about 25 minutes
to 10 seconds with this change.

Bug: 780055
Change-Id: I6493971b67a7466ce8e1e3b28537018a724bbf47
Reviewed-on: https://chromium-review.googlesource.com/751463
Reviewed-by: Aaron Gable <agable@chromium.org>
Commit-Queue: Raphael Kubo da Costa (rakuco) <raphael.kubo.da.costa@intel.com>
Raphael Kubo da Costa 7 år sedan
förälder
incheckning
ae97943bbc
1 ändrade filer med 12 tillägg och 5 borttagningar
  1. 12 5
      watchlists.py

+ 12 - 5
watchlists.py

@@ -38,6 +38,7 @@ class Watchlists(object):
   _RULES_FILENAME = _RULES
   _RULES_FILENAME = _RULES
   _repo_root = None
   _repo_root = None
   _defns = {}       # Definitions
   _defns = {}       # Definitions
+  _path_regexps = {}  # Name -> Regular expression mapping
   _watchlists = {}  # name to email mapping
   _watchlists = {}  # name to email mapping
 
 
   def __init__(self, repo_root):
   def __init__(self, repo_root):
@@ -88,6 +89,15 @@ class Watchlists(object):
     self._defns = defns
     self._defns = defns
     self._watchlists = watchlists
     self._watchlists = watchlists
 
 
+    # Compile the regular expressions ahead of time to avoid creating them
+    # on-the-fly multiple times per file.
+    self._path_regexps = {}
+    for name, rule in defns.iteritems():
+      filepath = rule.get('filepath')
+      if not filepath:
+        continue
+      self._path_regexps[name] = re.compile(filepath)
+
     # Verify that all watchlist names are defined
     # Verify that all watchlist names are defined
     for name in watchlists:
     for name in watchlists:
       if name not in defns:
       if name not in defns:
@@ -105,13 +115,10 @@ class Watchlists(object):
     watchers = set()  # A set, to avoid duplicates
     watchers = set()  # A set, to avoid duplicates
     for path in paths:
     for path in paths:
       path = path.replace(os.sep, '/')
       path = path.replace(os.sep, '/')
-      for name, rule in self._defns.iteritems():
+      for name, rule in self._path_regexps.iteritems():
         if name not in self._watchlists:
         if name not in self._watchlists:
           continue
           continue
-        rex_str = rule.get('filepath')
-        if not rex_str:
-          continue
-        if re.search(rex_str, path):
+        if rule.search(path):
           map(watchers.add, self._watchlists[name])
           map(watchers.add, self._watchlists[name])
     return list(watchers)
     return list(watchers)