dependency_metadata.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. #!/usr/bin/env python3
  2. # Copyright 2023 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. from collections import defaultdict
  6. import os
  7. import sys
  8. import itertools
  9. from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
  10. _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
  11. # The repo's root directory.
  12. _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
  13. # Add the repo's root directory for clearer imports.
  14. sys.path.insert(0, _ROOT_DIR)
  15. import metadata.fields.field_types as field_types
  16. import metadata.fields.custom.license as license_util
  17. import metadata.fields.custom.version as version_util
  18. import metadata.fields.custom.mitigated as mitigated_util
  19. import metadata.fields.known as known_fields
  20. import metadata.fields.util as util
  21. import metadata.validation_result as vr
  22. from metadata.fields.custom.license_allowlist import OPEN_SOURCE_SPDX_LICENSES
  23. class DependencyMetadata:
  24. """The metadata for a single dependency.
  25. See @property declarations below to retrieve validated fields for
  26. downstream consumption.
  27. The property returns `None` if the provided value (e.g. in
  28. README.chromium file) is clearly invalid.
  29. Otherwise, it returns a suitably typed value (see comments on each
  30. property).
  31. To retrieve unvalidated (i.e. raw values) fields, use get_entries().
  32. """
  33. # Fields that are always required.
  34. _MANDATORY_FIELDS = {
  35. known_fields.NAME,
  36. known_fields.URL,
  37. known_fields.VERSION,
  38. known_fields.LICENSE,
  39. known_fields.SECURITY_CRITICAL,
  40. known_fields.SHIPPED,
  41. }
  42. # Aliases for fields, where:
  43. # * key is the alias field; and
  44. # * value is the main field to which it should be mapped.
  45. # Note: if both the alias and main fields are specified in metadata,
  46. # the value from the alias field will be used.
  47. _FIELD_ALIASES = {
  48. known_fields.SHIPPED_IN_CHROMIUM: known_fields.SHIPPED,
  49. }
  50. def __init__(self):
  51. # The record of all entries added, including repeated fields.
  52. self._entries: List[Tuple[str, str]] = []
  53. # The current value of each field.
  54. self._metadata: Dict[field_types.MetadataField, str] = {}
  55. # The line numbers of each metadata fields.
  56. self._metadata_line_numbers: Dict[field_types.MetadataField,
  57. Set[int]] = defaultdict(lambda: set())
  58. # The line numbers of the first and the last line (in the text file)
  59. # of this dependency metadata.
  60. self._first_line = float('inf')
  61. self._last_line = -1
  62. # The record of how many times a field entry was added.
  63. self._occurrences: Dict[field_types.MetadataField,
  64. int] = defaultdict(int)
  65. def add_entry(self, field_name: str, field_value: str):
  66. value = field_value.strip()
  67. self._entries.append((field_name, value))
  68. field = known_fields.get_field(field_name)
  69. if field:
  70. self._metadata[field] = value
  71. self._occurrences[field] += 1
  72. def has_entries(self) -> bool:
  73. return len(self._entries) > 0
  74. def get_entries(self) -> List[Tuple[str, str]]:
  75. return list(self._entries)
  76. def record_line(self, line_number):
  77. """Records `line_number` to be part of this metadata."""
  78. self._first_line = min(self._first_line, line_number)
  79. self._last_line = max(self._last_line, line_number)
  80. def record_field_line_number(self, field: field_types.MetadataField,
  81. line_number: int):
  82. self._metadata_line_numbers[field].add(line_number)
  83. def get_first_and_last_line_number(self) -> Tuple[int, int]:
  84. return (self._first_line, self._last_line)
  85. def get_field_line_numbers(self,
  86. field: field_types.MetadataField) -> List[int]:
  87. return sorted(self._metadata_line_numbers[field])
  88. def all_licenses_allowlisted(self, license_field_value: str, is_open_source_project: bool) -> bool:
  89. """Returns whether all licenses in the field are allowlisted.
  90. Assumes a non-empty license_field_value"""
  91. licenses = license_util.process_license_value(
  92. license_field_value,
  93. atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
  94. for lic, valid in licenses:
  95. allowed = license_util.is_license_allowlisted(lic, is_open_source_project=is_open_source_project)
  96. if not valid or not allowed:
  97. return False
  98. return True
  99. def only_open_source_licenses(self, license_field_value: str) ->List[str]:
  100. """Returns a list of licenses that are only allowed in open source projects."""
  101. licenses = license_util.process_license_value(
  102. license_field_value,
  103. atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
  104. open_source_only = []
  105. for lic, valid in licenses:
  106. if valid and lic in OPEN_SOURCE_SPDX_LICENSES:
  107. open_source_only.append(lic)
  108. return open_source_only
  109. def _assess_required_fields(self, is_open_source_project: bool = False) -> Set[field_types.MetadataField]:
  110. """Returns the set of required fields, based on the current
  111. metadata.
  112. """
  113. required = set(self._MANDATORY_FIELDS)
  114. # Assume the dependency is shipped if not specified.
  115. shipped_value = self._metadata.get(known_fields.SHIPPED)
  116. is_shipped = (shipped_value is None
  117. or util.infer_as_boolean(shipped_value, default=True))
  118. if is_shipped:
  119. # A license file is required if the dependency is shipped.
  120. required.add(known_fields.LICENSE_FILE)
  121. # License compatibility with Android must be set if the
  122. # package is shipped and the license is not in the
  123. # allowlist.
  124. license_value = self._metadata.get(known_fields.LICENSE)
  125. if not license_value or not self.all_licenses_allowlisted(license_value, is_open_source_project):
  126. required.add(known_fields.LICENSE_ANDROID_COMPATIBLE)
  127. return required
  128. def validate(self, source_file_dir: str,
  129. repo_root_dir: str,
  130. is_open_source_project: bool = False) -> List[vr.ValidationResult]:
  131. """Validates all the metadata.
  132. Args:
  133. source_file_dir: the directory of the file that the metadata
  134. is from.
  135. repo_root_dir: the repository's root directory.
  136. is_open_source_project: whether the project is open source.
  137. Returns: the metadata's validation results.
  138. """
  139. results = []
  140. # Check for duplicate fields.
  141. repeated_fields = [
  142. field for field, count in self._occurrences.items() if count > 1
  143. ]
  144. if repeated_fields:
  145. repeated = ", ".join([
  146. f"{field.get_name()} ({self._occurrences[field]})"
  147. for field in repeated_fields
  148. ])
  149. error = vr.ValidationError(reason="There is a repeated field.",
  150. additional=[
  151. f"Repeated fields: {repeated}",
  152. ])
  153. # Merge line numbers.
  154. lines = sorted(
  155. set(
  156. itertools.chain.from_iterable([
  157. self.get_field_line_numbers(field)
  158. for field in repeated_fields
  159. ])))
  160. error.set_lines(lines)
  161. results.append(error)
  162. # Process alias fields.
  163. sources = {}
  164. for alias_field, main_field in self._FIELD_ALIASES.items():
  165. if alias_field in self._metadata:
  166. # Validate the value that was present for the main field
  167. # before overwriting it with the alias field value.
  168. if main_field in self._metadata:
  169. main_value = self._metadata.get(main_field)
  170. field_result = main_field.validate(main_value)
  171. if field_result:
  172. field_result.set_tag(tag="field",
  173. value=main_field.get_name())
  174. field_result.set_lines(
  175. self.get_field_line_numbers(main_field))
  176. results.append(field_result)
  177. self._metadata[main_field] = self._metadata[alias_field]
  178. sources[main_field] = alias_field
  179. self._metadata.pop(alias_field)
  180. # Validate values for all present fields.
  181. for field, value in self._metadata.items():
  182. source_field = sources.get(field) or field
  183. field_result = source_field.validate(value)
  184. if field_result:
  185. field_result.set_tag(tag="field", value=source_field.get_name())
  186. field_result.set_lines(
  187. self.get_field_line_numbers(source_field))
  188. results.append(field_result)
  189. # Check required fields are present.
  190. required_fields = self._assess_required_fields(is_open_source_project=is_open_source_project)
  191. for field in required_fields:
  192. if field not in self._metadata:
  193. field_name = field.get_name()
  194. error = vr.ValidationError(
  195. reason=f"Required field '{field_name}' is missing.")
  196. results.append(error)
  197. # If the repository is hosted somewhere (i.e. Chromium isn't the
  198. # canonical repositroy of the dependency), at least one of the fields
  199. # Version, Date or Revision must be provided.
  200. if (not (self.is_canonical or self.version or self.date or self.revision
  201. or self.revision_in_deps)):
  202. versioning_fields = [
  203. known_fields.VERSION, known_fields.DATE, known_fields.REVISION
  204. ]
  205. names = util.quoted(
  206. [field.get_name() for field in versioning_fields])
  207. error = vr.ValidationError(
  208. reason="Versioning fields are insufficient.",
  209. additional=[f"Provide at least one of [{names}]."],
  210. )
  211. results.append(error)
  212. # Check existence of the license file(s) on disk.
  213. license_file_value = self._metadata.get(known_fields.LICENSE_FILE)
  214. if license_file_value is not None:
  215. result = known_fields.LICENSE_FILE.validate_on_disk(
  216. value=license_file_value,
  217. source_file_dir=source_file_dir,
  218. repo_root_dir=repo_root_dir,
  219. )
  220. if result:
  221. result.set_tag(tag="field",
  222. value=known_fields.LICENSE_FILE.get_name())
  223. result.set_lines(
  224. self.get_field_line_numbers(known_fields.LICENSE_FILE))
  225. results.append(result)
  226. if not is_open_source_project:
  227. license_value = self._metadata.get(known_fields.LICENSE)
  228. if license_value is not None:
  229. not_allowed_licenses = self.only_open_source_licenses(license_value)
  230. if len(not_allowed_licenses) > 0:
  231. license_result = vr.ValidationWarning(
  232. reason=f"License has a license not in the allowlist."
  233. " (see https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:metadata/fields/custom/license_allowlist.py).",
  234. additional=[
  235. f"The following license{'s are' if len(not_allowed_licenses) > 1 else ' is'} only allowed in open source projects: "
  236. f"{util.quoted(not_allowed_licenses)}.",
  237. ])
  238. license_result.set_tag(tag="field", value=known_fields.LICENSE.get_name())
  239. license_result.set_lines(
  240. self.get_field_line_numbers(known_fields.LICENSE))
  241. results.append(license_result)
  242. # Match values reported in the 'Mitigated:' field with the supplementry
  243. # fields e.g. 'CVE-2024-12345: description'.
  244. mitigated_values = self._return_as_property(known_fields.MITIGATED)
  245. mitigated_ids = set()
  246. if mitigated_values is not None:
  247. mitigated_ids = set(mitigated_values)
  248. # Reported as their own field e.g. 'CVE-2024-12345: description'.
  249. mitigated_entries = set(self._mitigations_from_entries().keys())
  250. missing_descriptions = mitigated_ids - mitigated_entries
  251. if missing_descriptions:
  252. results.append(
  253. vr.ValidationWarning(
  254. reason="Missing descriptions for vulnerability IDs",
  255. additional=[
  256. f"Add descriptions for: {util.quoted(missing_descriptions)}"
  257. ]))
  258. extra_descriptions = mitigated_entries - mitigated_ids
  259. if extra_descriptions:
  260. results.append(
  261. vr.ValidationWarning(
  262. reason="Found descriptions for unlisted vulnerability IDs",
  263. additional=[
  264. f"List these IDs in the 'Mitigated:' field: {util.quoted(extra_descriptions)}"
  265. ]))
  266. return results
  267. def _mitigations_from_entries(self) -> Dict[str, str]:
  268. result = {}
  269. for key, value in self._entries:
  270. if mitigated_util.PATTERN_VULN_ID_WITH_ANCHORS.match(key):
  271. result[key] = value.strip()
  272. return result
  273. def _return_as_property(self, field: field_types.MetadataField) -> Any:
  274. """Helper function to create a property for DependencyMetadata.
  275. The property accessor will validate and return sanitized field value.
  276. """
  277. assert field in known_fields.ALL_FIELDS
  278. raw_value = self._metadata.get(field, None)
  279. if raw_value is None:
  280. # Field is not set.
  281. return None
  282. return field.narrow_type(raw_value)
  283. @property
  284. def name(self) -> Optional[str]:
  285. return self._return_as_property(known_fields.NAME)
  286. @property
  287. def mitigations(self) -> Dict[str, str]:
  288. """Returns mapping of vulnerability IDs to their descriptions."""
  289. result = self._mitigations_from_entries()
  290. mitigated_values = self._return_as_property(known_fields.MITIGATED) or []
  291. # Add entries listed in Mitigated field but without a supplement
  292. # mitigation description line.
  293. for id in mitigated_values:
  294. if id not in result:
  295. result[id] = ""
  296. return result
  297. @property
  298. def short_name(self) -> Optional[str]:
  299. return self._return_as_property(known_fields.SHORT_NAME)
  300. @property
  301. def url(self) -> Optional[List[str]]:
  302. """
  303. Returns a list of URLs that points to upstream repo.
  304. The URLs are guaranteed to `urllib.parse.urlparse` without errors.
  305. Returns None if this repository is the canonical repository of this
  306. dependency (see is_canonical below).
  307. """
  308. return self._return_as_property(known_fields.URL)
  309. @property
  310. def is_canonical(self) -> bool:
  311. """
  312. Returns whether this repository is the canonical public repository of this dependency.
  313. This is derived from a special value in the URL field.
  314. """
  315. value = self._metadata.get(known_fields.URL, "")
  316. return known_fields.URL.repo_is_canonical(value)
  317. @property
  318. def version(self) -> Optional[str]:
  319. return self._return_as_property(known_fields.VERSION)
  320. @property
  321. def date(self) -> Optional[str]:
  322. """Returns in "YYYY-MM-DD" format."""
  323. return self._return_as_property(known_fields.DATE)
  324. @property
  325. def revision(self) -> Optional[str]:
  326. return self._return_as_property(known_fields.REVISION)
  327. @property
  328. def revision_in_deps(self) -> bool:
  329. value = self._metadata.get(known_fields.REVISION, "")
  330. return known_fields.REVISION.is_revision_in_deps(value)
  331. @property
  332. def license(self) -> Optional[List[str]]:
  333. """Returns a list of license names."""
  334. return self._return_as_property(known_fields.LICENSE)
  335. @property
  336. def license_file(self) -> Optional[List[str]]:
  337. # TODO(b/321154076): Consider excluding files that doesn't exist on
  338. # disk if it's not too hard.
  339. #
  340. # Plumbing src_root and dependency_dir into field validator is
  341. # required.
  342. return self._return_as_property(known_fields.LICENSE_FILE)
  343. @property
  344. def security_critical(self) -> Optional[bool]:
  345. return self._return_as_property(known_fields.SECURITY_CRITICAL)
  346. @property
  347. def shipped(self) -> Optional[bool]:
  348. return self._return_as_property(known_fields.SHIPPED)
  349. @property
  350. def shipped_in_chromium(self) -> Optional[bool]:
  351. return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM)
  352. @property
  353. def license_android_compatible(self) -> Optional[bool]:
  354. return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE)
  355. @property
  356. def cpe_prefix(self) -> Optional[str]:
  357. """Returns a lowercase string (CPE names are case-insensitive)."""
  358. return self._return_as_property(known_fields.CPE_PREFIX)
  359. @property
  360. def description(self) -> Optional[str]:
  361. return self._return_as_property(known_fields.DESCRIPTION)
  362. @property
  363. def local_modifications(self) -> Optional[Union[Literal[False], str]]:
  364. """Returns `False` if there's no local modifications.
  365. Otherwise the text content extracted from the metadata.
  366. """
  367. return self._return_as_property(known_fields.LOCAL_MODIFICATIONS)