owners.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. # Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. # Use of this source code is governed by a BSD-style license that can be
  3. # found in the LICENSE file.
  4. r"""A database of OWNERS files.
  5. OWNERS files indicate who is allowed to approve changes in a specific directory
  6. (or who is allowed to make changes without needing approval of another OWNER).
  7. Note that all changes must still be reviewed by someone familiar with the code,
  8. so you may need approval from both an OWNER and a reviewer in many cases.
  9. The syntax of the OWNERS file is, roughly:
  10. lines := (\s* line? \s* comment? \s* "\n")*
  11. line := directive
  12. | "per-file" \s+ glob \s* "=" \s* directive
  13. directive := "set noparent"
  14. | "file:" owner_file
  15. | email_address
  16. | "*"
  17. glob := [a-zA-Z0-9_-*?]+
  18. comment := "#" [^"\n"]*
  19. owner_file := "OWNERS"
  20. | [^"\n"]* "_OWNERS"
  21. Email addresses must follow the foo@bar.com short form (exact syntax given
  22. in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
  23. shell conventions, and relative and absolute paths are not allowed (i.e.,
  24. globs only refer to the files in the current directory).
  25. If a user's email is one of the email_addresses in the file, the user is
  26. considered an "OWNER" for all files in the directory.
  27. If the "per-file" directive is used, the line only applies to files in that
  28. directory that match the filename glob specified.
  29. If the "set noparent" directive used, then only entries in this OWNERS file
  30. apply to files in this directory; if the "set noparent" directive is not
  31. used, then entries in OWNERS files in enclosing (upper) directories also
  32. apply (up until a "set noparent is encountered").
  33. If "per-file glob=set noparent" is used, then global directives are ignored
  34. for the glob, and only the "per-file" owners are used for files matching that
  35. glob.
  36. If the "file:" directive is used, the referred to OWNERS file will be parsed and
  37. considered when determining the valid set of OWNERS. If the filename starts with
  38. "//" it is relative to the root of the repository, otherwise it is relative to
  39. the current file. The referred to file *must* be named OWNERS or end in a suffix
  40. of _OWNERS.
  41. Examples for all of these combinations can be found in tests/owners_unittest.py.
  42. """
  43. import collections
  44. import fnmatch
  45. import random
  46. import re
  47. try:
  48. # This fallback applies for all versions of Python before 3.3
  49. import collections.abc as collections_abc
  50. except ImportError:
  51. import collections as collections_abc
  52. # If this is present by itself on a line, this means that everyone can review.
  53. EVERYONE = '*'
  54. # Recognizes 'X@Y' email addresses. Very simplistic.
  55. BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
  56. # Key for global comments per email address. Should be unlikely to be a
  57. # pathname.
  58. GLOBAL_STATUS = '*'
  59. def _assert_is_collection(obj):
  60. assert not isinstance(obj, str)
  61. # Module 'collections' has no 'Iterable' member
  62. # pylint: disable=no-member
  63. if hasattr(collections_abc, 'Iterable') and hasattr(collections_abc, 'Sized'):
  64. assert (isinstance(obj, collections_abc.Iterable) and
  65. isinstance(obj, collections_abc.Sized))
  66. class SyntaxErrorInOwnersFile(Exception):
  67. def __init__(self, path, lineno, msg):
  68. super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
  69. self.path = path
  70. self.lineno = lineno
  71. self.msg = msg
  72. def __str__(self):
  73. return '%s:%d syntax error: %s' % (self.path, self.lineno, self.msg)
  74. class Database(object):
  75. """A database of OWNERS files for a repository.
  76. This class allows you to find a suggested set of reviewers for a list
  77. of changed files, and see if a list of changed files is covered by a
  78. list of reviewers."""
  79. def __init__(self, root, fopen, os_path):
  80. """Args:
  81. root: the path to the root of the Repository
  82. open: function callback to open a text file for reading
  83. os_path: module/object callback with fields for 'abspath', 'dirname',
  84. 'exists', 'join', and 'relpath'
  85. """
  86. self.root = root
  87. self.fopen = fopen
  88. self.os_path = os_path
  89. # Pick a default email regexp to use; callers can override as desired.
  90. self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
  91. # Replacement contents for the given files. Maps the file name of an
  92. # OWNERS file (relative to root) to an iterator returning the replacement
  93. # file contents.
  94. self.override_files = {}
  95. # Mapping of owners to the paths or globs they own.
  96. self._owners_to_paths = {EVERYONE: set()}
  97. # Mappings of directories -> globs in the directory -> owners
  98. # Example: "chrome/browser" -> "chrome/browser/*.h" -> ("john", "maria")
  99. self._paths_to_owners = {}
  100. # Mapping reviewers to the preceding comment per file in the OWNERS files.
  101. self.comments = {}
  102. # Cache of compiled regexes for _fnmatch()
  103. self._fnmatch_cache = {}
  104. # Sets of paths that stop us from looking above them for owners.
  105. # (This is implicitly true for the root directory).
  106. #
  107. # The implementation is a mapping:
  108. # Directory -> globs in the directory,
  109. #
  110. # Example:
  111. # 'ui/events/devices/mojo' -> 'ui/events/devices/mojo/*_struct_traits*.*'
  112. self._stop_looking = {'': set([''])}
  113. # Set of files which have already been read.
  114. self.read_files = set()
  115. # Set of files which were included from other files. Files are processed
  116. # differently depending on whether they are regular owners files or
  117. # being included from another file.
  118. self._included_files = {}
  119. # File with global status lines for owners.
  120. self._status_file = None
  121. def _file_affects_ownership(self, path):
  122. """Returns true if the path refers to a file that could affect ownership."""
  123. filename = self.os_path.split(path)[-1]
  124. return filename == 'OWNERS' or filename.endswith('_OWNERS')
  125. def reviewers_for(self, files, author):
  126. """Returns a suggested set of reviewers that will cover the files.
  127. files is a sequence of paths relative to (and under) self.root.
  128. If author is nonempty, we ensure it is not included in the set returned
  129. in order avoid suggesting the author as a reviewer for their own changes."""
  130. self._check_paths(files)
  131. self.load_data_needed_for(files)
  132. suggested_owners = self._covering_set_of_owners_for(files, author)
  133. if EVERYONE in suggested_owners:
  134. if len(suggested_owners) > 1:
  135. suggested_owners.remove(EVERYONE)
  136. else:
  137. suggested_owners = set(['<anyone>'])
  138. return suggested_owners
  139. def files_not_covered_by(self, files, reviewers):
  140. """Returns the files not owned by one of the reviewers.
  141. Args:
  142. files is a sequence of paths relative to (and under) self.root.
  143. reviewers is a sequence of strings matching self.email_regexp.
  144. """
  145. self._check_paths(files)
  146. self._check_reviewers(reviewers)
  147. self.load_data_needed_for(files)
  148. return set(f for f in files if not self._is_obj_covered_by(f, reviewers))
  149. def _check_paths(self, files):
  150. def _is_under(f, pfx):
  151. return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
  152. _assert_is_collection(files)
  153. assert all(not self.os_path.isabs(f) and
  154. _is_under(f, self.os_path.abspath(self.root)) for f in files)
  155. def _check_reviewers(self, reviewers):
  156. _assert_is_collection(reviewers)
  157. assert all(self.email_regexp.match(r) for r in reviewers), reviewers
  158. def _is_obj_covered_by(self, objname, reviewers):
  159. reviewers = list(reviewers) + [EVERYONE]
  160. while True:
  161. for reviewer in reviewers:
  162. for owned_pattern in self._owners_to_paths.get(reviewer, set()):
  163. if fnmatch.fnmatch(objname, owned_pattern):
  164. return True
  165. if self._should_stop_looking(objname):
  166. break
  167. objname = self.os_path.dirname(objname)
  168. return False
  169. def enclosing_dir_with_owners(self, objname):
  170. """Returns the innermost enclosing directory that has an OWNERS file."""
  171. dirpath = objname
  172. while not self._owners_for(dirpath):
  173. if self._should_stop_looking(dirpath):
  174. break
  175. dirpath = self.os_path.dirname(dirpath)
  176. return dirpath
  177. def load_data_needed_for(self, files):
  178. self._read_global_comments()
  179. visited_dirs = set()
  180. for f in files:
  181. dirpath = self.os_path.dirname(f)
  182. while dirpath not in visited_dirs:
  183. visited_dirs.add(dirpath)
  184. obj_owners = self._owners_for(dirpath)
  185. if obj_owners:
  186. break
  187. self._read_owners(self.os_path.join(dirpath, 'OWNERS'))
  188. if self._should_stop_looking(dirpath):
  189. break
  190. dirpath = self.os_path.dirname(dirpath)
  191. def _should_stop_looking(self, objname):
  192. dirname = objname
  193. while True:
  194. if dirname in self._stop_looking:
  195. if any(self._fnmatch(objname, stop_looking)
  196. for stop_looking in self._stop_looking[dirname]):
  197. return True
  198. up_dirname = self.os_path.dirname(dirname)
  199. if up_dirname == dirname:
  200. break
  201. dirname = up_dirname
  202. return False
  203. def _get_root_affected_dir(self, obj_name):
  204. """Returns the deepest directory/path that is affected by a file pattern
  205. |obj_name|."""
  206. root_affected_dir = obj_name
  207. while '*' in root_affected_dir:
  208. root_affected_dir = self.os_path.dirname(root_affected_dir)
  209. return root_affected_dir
  210. def _owners_for(self, objname):
  211. obj_owners = set()
  212. # Possibly relevant rules can be found stored at every directory
  213. # level so iterate upwards, looking for them.
  214. dirname = objname
  215. while True:
  216. dir_owner_rules = self._paths_to_owners.get(dirname)
  217. if dir_owner_rules:
  218. for owned_path, path_owners in dir_owner_rules.items():
  219. if self._fnmatch(objname, owned_path):
  220. obj_owners |= path_owners
  221. up_dirname = self.os_path.dirname(dirname)
  222. if up_dirname == dirname:
  223. break
  224. dirname = up_dirname
  225. return obj_owners
  226. def _read_owners(self, path):
  227. owners_path = self.os_path.join(self.root, path)
  228. if not (self.os_path.exists(owners_path) or (path in self.override_files)):
  229. return
  230. if owners_path in self.read_files:
  231. return
  232. self.read_files.add(owners_path)
  233. is_toplevel = path == 'OWNERS'
  234. comment = []
  235. dirpath = self.os_path.dirname(path)
  236. in_comment = False
  237. # We treat the beginning of the file as an blank line.
  238. previous_line_was_blank = True
  239. reset_comment_after_use = False
  240. lineno = 0
  241. if path in self.override_files:
  242. file_iter = self.override_files[path]
  243. else:
  244. file_iter = self.fopen(owners_path)
  245. for line in file_iter:
  246. lineno += 1
  247. line = line.strip()
  248. if line.startswith('#'):
  249. if is_toplevel:
  250. m = re.match(r'#\s*OWNERS_STATUS\s+=\s+(.+)$', line)
  251. if m:
  252. self._status_file = m.group(1).strip()
  253. continue
  254. if not in_comment:
  255. comment = []
  256. reset_comment_after_use = not previous_line_was_blank
  257. comment.append(line[1:].strip())
  258. in_comment = True
  259. continue
  260. in_comment = False
  261. if line == '':
  262. comment = []
  263. previous_line_was_blank = True
  264. continue
  265. # If the line ends with a comment, strip the comment and store it for this
  266. # line only.
  267. line, _, line_comment = line.partition('#')
  268. line = line.strip()
  269. line_comment = [line_comment.strip()] if line_comment else []
  270. previous_line_was_blank = False
  271. if line == 'set noparent':
  272. self._stop_looking.setdefault(
  273. self._get_root_affected_dir(dirpath), set()).add(dirpath)
  274. continue
  275. m = re.match('per-file (.+)=(.+)', line)
  276. if m:
  277. glob_string = m.group(1).strip()
  278. directive = m.group(2).strip()
  279. full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
  280. if '/' in glob_string or '\\' in glob_string:
  281. raise SyntaxErrorInOwnersFile(owners_path, lineno,
  282. 'per-file globs cannot span directories or use escapes: "%s"' %
  283. line)
  284. relative_glob_string = self.os_path.relpath(full_glob_string, self.root)
  285. self._add_entry(relative_glob_string, directive, owners_path,
  286. lineno, '\n'.join(comment + line_comment))
  287. if reset_comment_after_use:
  288. comment = []
  289. continue
  290. if line.startswith('set '):
  291. raise SyntaxErrorInOwnersFile(owners_path, lineno,
  292. 'unknown option: "%s"' % line[4:].strip())
  293. self._add_entry(dirpath, line, owners_path, lineno,
  294. ' '.join(comment + line_comment))
  295. if reset_comment_after_use:
  296. comment = []
  297. def _read_global_comments(self):
  298. if not self._status_file:
  299. if not 'OWNERS' in self.read_files:
  300. self._read_owners('OWNERS')
  301. if not self._status_file:
  302. return
  303. owners_status_path = self.os_path.join(self.root, self._status_file)
  304. if not self.os_path.exists(owners_status_path):
  305. raise IOError('Could not find global status file "%s"' %
  306. owners_status_path)
  307. if owners_status_path in self.read_files:
  308. return
  309. self.read_files.add(owners_status_path)
  310. lineno = 0
  311. for line in self.fopen(owners_status_path):
  312. lineno += 1
  313. line = line.strip()
  314. if line.startswith('#'):
  315. continue
  316. if line == '':
  317. continue
  318. m = re.match('(.+?):(.+)', line)
  319. if m:
  320. owner = m.group(1).strip()
  321. comment = m.group(2).strip()
  322. if not self.email_regexp.match(owner):
  323. raise SyntaxErrorInOwnersFile(owners_status_path, lineno,
  324. 'invalid email address: "%s"' % owner)
  325. self.comments.setdefault(owner, {})
  326. self.comments[owner][GLOBAL_STATUS] = comment
  327. continue
  328. raise SyntaxErrorInOwnersFile(owners_status_path, lineno,
  329. 'cannot parse status entry: "%s"' % line.strip())
  330. def _add_entry(self, owned_paths, directive, owners_path, lineno, comment):
  331. if directive == 'set noparent':
  332. self._stop_looking.setdefault(
  333. self._get_root_affected_dir(owned_paths), set()).add(owned_paths)
  334. elif directive.startswith('file:'):
  335. include_file = self._resolve_include(directive[5:], owners_path, lineno)
  336. if not include_file:
  337. raise SyntaxErrorInOwnersFile(owners_path, lineno,
  338. ('%s does not refer to an existing file.' % directive[5:]))
  339. included_owners = self._read_just_the_owners(include_file)
  340. for owner in included_owners:
  341. self._owners_to_paths.setdefault(owner, set()).add(owned_paths)
  342. self._paths_to_owners.setdefault(
  343. self._get_root_affected_dir(owned_paths), {}).setdefault(
  344. owned_paths, set()).add(owner)
  345. elif self.email_regexp.match(directive) or directive == EVERYONE:
  346. if comment:
  347. self.comments.setdefault(directive, {})
  348. self.comments[directive][owned_paths] = comment
  349. self._owners_to_paths.setdefault(directive, set()).add(owned_paths)
  350. self._paths_to_owners.setdefault(
  351. self._get_root_affected_dir(owned_paths), {}).setdefault(
  352. owned_paths, set()).add(directive)
  353. else:
  354. raise SyntaxErrorInOwnersFile(owners_path, lineno,
  355. ('"%s" is not a "set noparent", file include, "*", '
  356. 'or an email address.' % (directive,)))
  357. def _resolve_include(self, path, start, lineno):
  358. if path.startswith('//'):
  359. include_path = path[2:]
  360. else:
  361. assert start.startswith(self.root)
  362. start = self.os_path.dirname(self.os_path.relpath(start, self.root))
  363. include_path = self.os_path.normpath(self.os_path.join(start, path))
  364. if include_path in self.override_files:
  365. return include_path
  366. owners_path = self.os_path.join(self.root, include_path)
  367. # Paths included via "file:" must end in OWNERS or _OWNERS. Files that can
  368. # affect ownership have a different set of ownership rules, so that users
  369. # cannot self-approve changes adding themselves to an OWNERS file.
  370. if not self._file_affects_ownership(owners_path):
  371. raise SyntaxErrorInOwnersFile(start, lineno, 'file: include must specify '
  372. 'a file named OWNERS or ending in _OWNERS')
  373. if not self.os_path.exists(owners_path):
  374. return None
  375. return include_path
  376. def _read_just_the_owners(self, include_file):
  377. if include_file in self._included_files:
  378. return self._included_files[include_file]
  379. owners = set()
  380. self._included_files[include_file] = owners
  381. lineno = 0
  382. if include_file in self.override_files:
  383. file_iter = self.override_files[include_file]
  384. else:
  385. file_iter = self.fopen(self.os_path.join(self.root, include_file))
  386. for line in file_iter:
  387. lineno += 1
  388. line = line.strip()
  389. if (line.startswith('#') or line == '' or
  390. line.startswith('set noparent') or
  391. line.startswith('per-file')):
  392. continue
  393. # If the line ends with a comment, strip the comment.
  394. line, _delim, _comment = line.partition('#')
  395. line = line.strip()
  396. if self.email_regexp.match(line) or line == EVERYONE:
  397. owners.add(line)
  398. continue
  399. if line.startswith('file:'):
  400. sub_include_file = self._resolve_include(line[5:], include_file, lineno)
  401. sub_owners = self._read_just_the_owners(sub_include_file)
  402. owners.update(sub_owners)
  403. continue
  404. raise SyntaxErrorInOwnersFile(include_file, lineno,
  405. ('"%s" is not a "set noparent", file include, "*", '
  406. 'or an email address.' % (line,)))
  407. return owners
  408. def _covering_set_of_owners_for(self, files, author):
  409. dirs_remaining = set(self.enclosing_dir_with_owners(f) for f in files)
  410. all_possible_owners = self.all_possible_owners(dirs_remaining, author)
  411. suggested_owners = set()
  412. while dirs_remaining and all_possible_owners:
  413. owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
  414. suggested_owners.add(owner)
  415. dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
  416. dirs_remaining -= dirs_to_remove
  417. # Now that we've used `owner` and covered all their dirs, remove them
  418. # from consideration.
  419. del all_possible_owners[owner]
  420. for o, dirs in list(all_possible_owners.items()):
  421. new_dirs = [(d, dist) for (d, dist) in dirs if d not in dirs_to_remove]
  422. if not new_dirs:
  423. del all_possible_owners[o]
  424. else:
  425. all_possible_owners[o] = new_dirs
  426. return suggested_owners
  427. def _all_possible_owners_for_dir_or_file(self, dir_or_file, author,
  428. cache):
  429. """Returns a dict of {potential owner: (dir_or_file, distance)} mappings.
  430. """
  431. assert not dir_or_file.startswith("/")
  432. res = cache.get(dir_or_file)
  433. if res is None:
  434. res = {}
  435. dirname = dir_or_file
  436. for owner in self._owners_for(dirname):
  437. if author and owner == author:
  438. continue
  439. res.setdefault(owner, [])
  440. res[owner] = (dir_or_file, 1)
  441. if not self._should_stop_looking(dirname):
  442. dirname = self.os_path.dirname(dirname)
  443. parent_res = self._all_possible_owners_for_dir_or_file(dirname,
  444. author, cache)
  445. # Merge the parent information with our information, adjusting
  446. # distances as necessary, and replacing the parent directory
  447. # names with our names.
  448. for owner, par_dir_and_distances in parent_res.items():
  449. if owner in res:
  450. # If the same person is in multiple OWNERS files above a given
  451. # directory, only count the closest one.
  452. continue
  453. parent_distance = par_dir_and_distances[1]
  454. res[owner] = (dir_or_file, parent_distance + 1)
  455. cache[dir_or_file] = res
  456. return res
  457. def all_possible_owners(self, dirs_and_files, author):
  458. """Returns a dict of {potential owner: (dir, distance)} mappings.
  459. A distance of 1 is the lowest/closest possible distance (which makes the
  460. subsequent math easier).
  461. """
  462. all_possible_owners_for_dir_or_file_cache = {}
  463. all_possible_owners = {}
  464. for current_dir in dirs_and_files:
  465. dir_owners = self._all_possible_owners_for_dir_or_file(
  466. current_dir, author,
  467. all_possible_owners_for_dir_or_file_cache)
  468. for owner, dir_and_distance in dir_owners.items():
  469. if owner in all_possible_owners:
  470. all_possible_owners[owner].append(dir_and_distance)
  471. else:
  472. all_possible_owners[owner] = [dir_and_distance]
  473. return all_possible_owners
  474. def _fnmatch(self, filename, pattern):
  475. """Same as fnmatch.fnmatch(), but interally caches the compiled regexes."""
  476. matcher = self._fnmatch_cache.get(pattern)
  477. if matcher is None:
  478. matcher = re.compile(fnmatch.translate(pattern)).match
  479. self._fnmatch_cache[pattern] = matcher
  480. return matcher(filename)
  481. @staticmethod
  482. def total_costs_by_owner(all_possible_owners, dirs):
  483. # We want to minimize both the number of reviewers and the distance
  484. # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
  485. # an arbitrarily-selected scaling factor that seems to work well - it
  486. # will select one reviewer in the parent directory over three reviewers
  487. # in subdirs, but not one reviewer over just two.
  488. result = {}
  489. for owner in all_possible_owners:
  490. total_distance = 0
  491. num_directories_owned = 0
  492. for dirname, distance in all_possible_owners[owner]:
  493. if dirname in dirs:
  494. total_distance += distance
  495. num_directories_owned += 1
  496. if num_directories_owned:
  497. result[owner] = (total_distance /
  498. pow(num_directories_owned, 1.75))
  499. return result
  500. @staticmethod
  501. def lowest_cost_owner(all_possible_owners, dirs):
  502. total_costs_by_owner = Database.total_costs_by_owner(all_possible_owners,
  503. dirs)
  504. # Return the lowest cost owner. In the case of a tie, pick one randomly.
  505. lowest_cost = min(total_costs_by_owner.values())
  506. lowest_cost_owners = [
  507. owner for owner, cost in total_costs_by_owner.items()
  508. if cost == lowest_cost]
  509. return random.Random().choice(lowest_cost_owners)
  510. def owners_rooted_at_file(self, filename):
  511. """Returns a set of all owners transitively listed in filename.
  512. This function returns a set of all the owners either listed in filename, or
  513. in a file transitively included by filename. Lines that are not plain owners
  514. (i.e. per-file owners) are ignored.
  515. """
  516. return self._read_just_the_owners(filename)