123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442 |
- #!/usr/bin/env python3
- # Copyright 2023 The Chromium Authors. All rights reserved.
- # Use of this source code is governed by a BSD-style license that can be
- # found in the LICENSE file.
- from collections import defaultdict
- import os
- import sys
- import itertools
- from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
- _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
- # The repo's root directory.
- _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
- # Add the repo's root directory for clearer imports.
- sys.path.insert(0, _ROOT_DIR)
- import metadata.fields.field_types as field_types
- import metadata.fields.custom.license as license_util
- import metadata.fields.custom.version as version_util
- import metadata.fields.custom.mitigated as mitigated_util
- import metadata.fields.known as known_fields
- import metadata.fields.util as util
- import metadata.validation_result as vr
- from metadata.fields.custom.license_allowlist import OPEN_SOURCE_SPDX_LICENSES
- class DependencyMetadata:
- """The metadata for a single dependency.
- See @property declarations below to retrieve validated fields for
- downstream consumption.
- The property returns `None` if the provided value (e.g. in
- README.chromium file) is clearly invalid.
- Otherwise, it returns a suitably typed value (see comments on each
- property).
- To retrieve unvalidated (i.e. raw values) fields, use get_entries().
- """
- # Fields that are always required.
- _MANDATORY_FIELDS = {
- known_fields.NAME,
- known_fields.URL,
- known_fields.VERSION,
- known_fields.LICENSE,
- known_fields.SECURITY_CRITICAL,
- known_fields.SHIPPED,
- }
- # Aliases for fields, where:
- # * key is the alias field; and
- # * value is the main field to which it should be mapped.
- # Note: if both the alias and main fields are specified in metadata,
- # the value from the alias field will be used.
- _FIELD_ALIASES = {
- known_fields.SHIPPED_IN_CHROMIUM: known_fields.SHIPPED,
- }
- def __init__(self):
- # The record of all entries added, including repeated fields.
- self._entries: List[Tuple[str, str]] = []
- # The current value of each field.
- self._metadata: Dict[field_types.MetadataField, str] = {}
- # The line numbers of each metadata fields.
- self._metadata_line_numbers: Dict[field_types.MetadataField,
- Set[int]] = defaultdict(lambda: set())
- # The line numbers of the first and the last line (in the text file)
- # of this dependency metadata.
- self._first_line = float('inf')
- self._last_line = -1
- # The record of how many times a field entry was added.
- self._occurrences: Dict[field_types.MetadataField,
- int] = defaultdict(int)
- def add_entry(self, field_name: str, field_value: str):
- value = field_value.strip()
- self._entries.append((field_name, value))
- field = known_fields.get_field(field_name)
- if field:
- self._metadata[field] = value
- self._occurrences[field] += 1
- def has_entries(self) -> bool:
- return len(self._entries) > 0
- def get_entries(self) -> List[Tuple[str, str]]:
- return list(self._entries)
- def record_line(self, line_number):
- """Records `line_number` to be part of this metadata."""
- self._first_line = min(self._first_line, line_number)
- self._last_line = max(self._last_line, line_number)
- def record_field_line_number(self, field: field_types.MetadataField,
- line_number: int):
- self._metadata_line_numbers[field].add(line_number)
- def get_first_and_last_line_number(self) -> Tuple[int, int]:
- return (self._first_line, self._last_line)
- def get_field_line_numbers(self,
- field: field_types.MetadataField) -> List[int]:
- return sorted(self._metadata_line_numbers[field])
- def all_licenses_allowlisted(self, license_field_value: str, is_open_source_project: bool) -> bool:
- """Returns whether all licenses in the field are allowlisted.
- Assumes a non-empty license_field_value"""
- licenses = license_util.process_license_value(
- license_field_value,
- atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
- for lic, valid in licenses:
- allowed = license_util.is_license_allowlisted(lic, is_open_source_project=is_open_source_project)
- if not valid or not allowed:
- return False
- return True
- def only_open_source_licenses(self, license_field_value: str) ->List[str]:
- """Returns a list of licenses that are only allowed in open source projects."""
- licenses = license_util.process_license_value(
- license_field_value,
- atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
- open_source_only = []
- for lic, valid in licenses:
- if valid and lic in OPEN_SOURCE_SPDX_LICENSES:
- open_source_only.append(lic)
- return open_source_only
- def _assess_required_fields(self, is_open_source_project: bool = False) -> Set[field_types.MetadataField]:
- """Returns the set of required fields, based on the current
- metadata.
- """
- required = set(self._MANDATORY_FIELDS)
- # Assume the dependency is shipped if not specified.
- shipped_value = self._metadata.get(known_fields.SHIPPED)
- is_shipped = (shipped_value is None
- or util.infer_as_boolean(shipped_value, default=True))
- if is_shipped:
- # A license file is required if the dependency is shipped.
- required.add(known_fields.LICENSE_FILE)
- # License compatibility with Android must be set if the
- # package is shipped and the license is not in the
- # allowlist.
- license_value = self._metadata.get(known_fields.LICENSE)
- if not license_value or not self.all_licenses_allowlisted(license_value, is_open_source_project):
- required.add(known_fields.LICENSE_ANDROID_COMPATIBLE)
- return required
- def validate(self, source_file_dir: str,
- repo_root_dir: str,
- is_open_source_project: bool = False) -> List[vr.ValidationResult]:
- """Validates all the metadata.
- Args:
- source_file_dir: the directory of the file that the metadata
- is from.
- repo_root_dir: the repository's root directory.
- is_open_source_project: whether the project is open source.
- Returns: the metadata's validation results.
- """
- results = []
- # Check for duplicate fields.
- repeated_fields = [
- field for field, count in self._occurrences.items() if count > 1
- ]
- if repeated_fields:
- repeated = ", ".join([
- f"{field.get_name()} ({self._occurrences[field]})"
- for field in repeated_fields
- ])
- error = vr.ValidationError(reason="There is a repeated field.",
- additional=[
- f"Repeated fields: {repeated}",
- ])
- # Merge line numbers.
- lines = sorted(
- set(
- itertools.chain.from_iterable([
- self.get_field_line_numbers(field)
- for field in repeated_fields
- ])))
- error.set_lines(lines)
- results.append(error)
- # Process alias fields.
- sources = {}
- for alias_field, main_field in self._FIELD_ALIASES.items():
- if alias_field in self._metadata:
- # Validate the value that was present for the main field
- # before overwriting it with the alias field value.
- if main_field in self._metadata:
- main_value = self._metadata.get(main_field)
- field_result = main_field.validate(main_value)
- if field_result:
- field_result.set_tag(tag="field",
- value=main_field.get_name())
- field_result.set_lines(
- self.get_field_line_numbers(main_field))
- results.append(field_result)
- self._metadata[main_field] = self._metadata[alias_field]
- sources[main_field] = alias_field
- self._metadata.pop(alias_field)
- # Validate values for all present fields.
- for field, value in self._metadata.items():
- source_field = sources.get(field) or field
- field_result = source_field.validate(value)
- if field_result:
- field_result.set_tag(tag="field", value=source_field.get_name())
- field_result.set_lines(
- self.get_field_line_numbers(source_field))
- results.append(field_result)
- # Check required fields are present.
- required_fields = self._assess_required_fields(is_open_source_project=is_open_source_project)
- for field in required_fields:
- if field not in self._metadata:
- field_name = field.get_name()
- error = vr.ValidationError(
- reason=f"Required field '{field_name}' is missing.")
- results.append(error)
- # If the repository is hosted somewhere (i.e. Chromium isn't the
- # canonical repositroy of the dependency), at least one of the fields
- # Version, Date or Revision must be provided.
- if (not (self.is_canonical or self.version or self.date or self.revision
- or self.revision_in_deps)):
- versioning_fields = [
- known_fields.VERSION, known_fields.DATE, known_fields.REVISION
- ]
- names = util.quoted(
- [field.get_name() for field in versioning_fields])
- error = vr.ValidationError(
- reason="Versioning fields are insufficient.",
- additional=[f"Provide at least one of [{names}]."],
- )
- results.append(error)
- # Check existence of the license file(s) on disk.
- license_file_value = self._metadata.get(known_fields.LICENSE_FILE)
- if license_file_value is not None:
- result = known_fields.LICENSE_FILE.validate_on_disk(
- value=license_file_value,
- source_file_dir=source_file_dir,
- repo_root_dir=repo_root_dir,
- )
- if result:
- result.set_tag(tag="field",
- value=known_fields.LICENSE_FILE.get_name())
- result.set_lines(
- self.get_field_line_numbers(known_fields.LICENSE_FILE))
- results.append(result)
- if not is_open_source_project:
- license_value = self._metadata.get(known_fields.LICENSE)
- if license_value is not None:
- not_allowed_licenses = self.only_open_source_licenses(license_value)
- if len(not_allowed_licenses) > 0:
- license_result = vr.ValidationWarning(
- reason=f"License has a license not in the allowlist."
- " (see https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:metadata/fields/custom/license_allowlist.py).",
- additional=[
- f"The following license{'s are' if len(not_allowed_licenses) > 1 else ' is'} only allowed in open source projects: "
- f"{util.quoted(not_allowed_licenses)}.",
- ])
- license_result.set_tag(tag="field", value=known_fields.LICENSE.get_name())
- license_result.set_lines(
- self.get_field_line_numbers(known_fields.LICENSE))
- results.append(license_result)
- # Match values reported in the 'Mitigated:' field with the supplementry
- # fields e.g. 'CVE-2024-12345: description'.
- mitigated_values = self._return_as_property(known_fields.MITIGATED)
- mitigated_ids = set()
- if mitigated_values is not None:
- mitigated_ids = set(mitigated_values)
- # Reported as their own field e.g. 'CVE-2024-12345: description'.
- mitigated_entries = set(self._mitigations_from_entries().keys())
- missing_descriptions = mitigated_ids - mitigated_entries
- if missing_descriptions:
- results.append(
- vr.ValidationWarning(
- reason="Missing descriptions for vulnerability IDs",
- additional=[
- f"Add descriptions for: {util.quoted(missing_descriptions)}"
- ]))
- extra_descriptions = mitigated_entries - mitigated_ids
- if extra_descriptions:
- results.append(
- vr.ValidationWarning(
- reason="Found descriptions for unlisted vulnerability IDs",
- additional=[
- f"List these IDs in the 'Mitigated:' field: {util.quoted(extra_descriptions)}"
- ]))
- return results
- def _mitigations_from_entries(self) -> Dict[str, str]:
- result = {}
- for key, value in self._entries:
- if mitigated_util.PATTERN_VULN_ID_WITH_ANCHORS.match(key):
- result[key] = value.strip()
- return result
- def _return_as_property(self, field: field_types.MetadataField) -> Any:
- """Helper function to create a property for DependencyMetadata.
- The property accessor will validate and return sanitized field value.
- """
- assert field in known_fields.ALL_FIELDS
- raw_value = self._metadata.get(field, None)
- if raw_value is None:
- # Field is not set.
- return None
- return field.narrow_type(raw_value)
- @property
- def name(self) -> Optional[str]:
- return self._return_as_property(known_fields.NAME)
- @property
- def mitigations(self) -> Dict[str, str]:
- """Returns mapping of vulnerability IDs to their descriptions."""
- result = self._mitigations_from_entries()
- mitigated_values = self._return_as_property(known_fields.MITIGATED) or []
- # Add entries listed in Mitigated field but without a supplement
- # mitigation description line.
- for id in mitigated_values:
- if id not in result:
- result[id] = ""
- return result
- @property
- def short_name(self) -> Optional[str]:
- return self._return_as_property(known_fields.SHORT_NAME)
- @property
- def url(self) -> Optional[List[str]]:
- """
- Returns a list of URLs that points to upstream repo.
- The URLs are guaranteed to `urllib.parse.urlparse` without errors.
- Returns None if this repository is the canonical repository of this
- dependency (see is_canonical below).
- """
- return self._return_as_property(known_fields.URL)
- @property
- def is_canonical(self) -> bool:
- """
- Returns whether this repository is the canonical public repository of this dependency.
- This is derived from a special value in the URL field.
- """
- value = self._metadata.get(known_fields.URL, "")
- return known_fields.URL.repo_is_canonical(value)
- @property
- def version(self) -> Optional[str]:
- return self._return_as_property(known_fields.VERSION)
- @property
- def date(self) -> Optional[str]:
- """Returns in "YYYY-MM-DD" format."""
- return self._return_as_property(known_fields.DATE)
- @property
- def revision(self) -> Optional[str]:
- return self._return_as_property(known_fields.REVISION)
- @property
- def revision_in_deps(self) -> bool:
- value = self._metadata.get(known_fields.REVISION, "")
- return known_fields.REVISION.is_revision_in_deps(value)
- @property
- def license(self) -> Optional[List[str]]:
- """Returns a list of license names."""
- return self._return_as_property(known_fields.LICENSE)
- @property
- def license_file(self) -> Optional[List[str]]:
- # TODO(b/321154076): Consider excluding files that doesn't exist on
- # disk if it's not too hard.
- #
- # Plumbing src_root and dependency_dir into field validator is
- # required.
- return self._return_as_property(known_fields.LICENSE_FILE)
- @property
- def security_critical(self) -> Optional[bool]:
- return self._return_as_property(known_fields.SECURITY_CRITICAL)
- @property
- def shipped(self) -> Optional[bool]:
- return self._return_as_property(known_fields.SHIPPED)
- @property
- def shipped_in_chromium(self) -> Optional[bool]:
- return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM)
- @property
- def license_android_compatible(self) -> Optional[bool]:
- return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE)
- @property
- def cpe_prefix(self) -> Optional[str]:
- """Returns a lowercase string (CPE names are case-insensitive)."""
- return self._return_as_property(known_fields.CPE_PREFIX)
- @property
- def description(self) -> Optional[str]:
- return self._return_as_property(known_fields.DESCRIPTION)
- @property
- def local_modifications(self) -> Optional[Union[Literal[False], str]]:
- """Returns `False` if there's no local modifications.
- Otherwise the text content extracted from the metadata.
- """
- return self._return_as_property(known_fields.LOCAL_MODIFICATIONS)
|