소스 검색

[ssci] Added validate method for single dependencies

Bug: b:277147404
Change-Id: I54c9c82d093cb11813e1c224da125b8d555f1b29
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4797050
Reviewed-by: Rachael Newitt <renewitt@google.com>
Commit-Queue: Anne Redulla <aredulla@google.com>
Anne Redulla 2 년 전
부모
커밋
3aeb682373

+ 13 - 0
metadata/PRESUBMIT.py

@@ -0,0 +1,13 @@
+#!/usr/bin/env vpython3
+# Copyright (c) 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+PRESUBMIT_VERSION = '2.0.0'
+
+
+def CheckPythonUnitTests(input_api, output_api):
+  tests = input_api.canned_checks.GetUnitTestsInDirectory(
+      input_api, output_api, "tests", files_to_check=[r'.+_test\.py$'])
+
+  return input_api.RunTests(tests)

+ 136 - 3
metadata/dependency_metadata.py

@@ -3,19 +3,152 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-from typing import List, Tuple
+from collections import defaultdict
+import os
+import sys
+from typing import Dict, List, Set, Tuple
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.fields.types as field_types
+import metadata.fields.custom.license
+import metadata.fields.custom.version
+import metadata.fields.known as known_fields
+import metadata.fields.util as util
+import metadata.validation_result as vr
 
 
 class DependencyMetadata:
   """The metadata for a single dependency."""
+
+  # Fields that are always required.
+  _MANDATORY_FIELDS = {
+      known_fields.NAME,
+      known_fields.URL,
+      known_fields.VERSION,
+      known_fields.LICENSE,
+      known_fields.SECURITY_CRITICAL,
+      known_fields.SHIPPED,
+  }
+
   def __init__(self):
-    self._entries = []
+    # The record of all entries added, including repeated fields.
+    self._entries: List[Tuple[str, str]] = []
+
+    # The current value of each field.
+    self._metadata: Dict[field_types.MetadataField, str] = {}
+
+    # The record of how many times a field entry was added.
+    self._occurrences: Dict[field_types.MetadataField, int] = defaultdict(int)
 
   def add_entry(self, field_name: str, field_value: str):
-    self._entries.append((field_name, field_value.strip()))
+    value = field_value.strip()
+    self._entries.append((field_name, value))
+
+    field = known_fields.get_field(field_name)
+    if field:
+      self._metadata[field] = value
+      self._occurrences[field] += 1
 
   def has_entries(self) -> bool:
     return len(self._entries) > 0
 
   def get_entries(self) -> List[Tuple[str, str]]:
     return list(self._entries)
+
+  def _assess_required_fields(self) -> Set[field_types.MetadataField]:
+    """Returns the set of required fields, based on the current metadata."""
+    required = set(self._MANDATORY_FIELDS)
+
+    # The date and revision are required if the version has not been specified.
+    version_value = self._metadata.get(known_fields.VERSION)
+    if (version_value is None
+        or metadata.fields.custom.version.is_unknown(version_value)):
+      required.add(known_fields.DATE)
+      required.add(known_fields.REVISION)
+
+    # Assume the dependency is shipped if not specified.
+    shipped_value = self._metadata.get(known_fields.SHIPPED)
+    is_shipped = (shipped_value is None
+                  or util.infer_as_boolean(shipped_value, default=True))
+
+    if is_shipped:
+      # A license file is required if the dependency is shipped.
+      required.add(known_fields.LICENSE_FILE)
+
+      # License compatibility with Android must be set if the package is shipped
+      # and the license is not in the allowlist.
+      has_allowlisted = False
+      license_value = self._metadata.get(known_fields.LICENSE)
+      if license_value:
+        licenses = metadata.fields.custom.license.process_license_value(
+            license_value,
+            atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
+        for _, allowed in licenses:
+          if allowed:
+            has_allowlisted = True
+            break
+
+      if not has_allowlisted:
+        required.add(known_fields.LICENSE_ANDROID_COMPATIBLE)
+
+    return required
+
+  def validate(self, source_file_dir: str,
+               repo_root_dir: str) -> List[vr.ValidationResult]:
+    """Validates all the metadata.
+
+    Args:
+      source_file_dir: the directory of the file that the metadata is from.
+      repo_root_dir: the repository's root directory.
+
+    Returns: the metadata's validation results.
+    """
+    results = []
+
+    # Check for duplicate fields.
+    repeated_field_info = [
+        f"{field.get_name()} ({count})"
+        for field, count in self._occurrences.items() if count > 1
+    ]
+    if repeated_field_info:
+      repeated = ", ".join(repeated_field_info)
+      error = vr.ValidationError(
+          f"Multiple entries for the same field: {repeated}")
+      error.set_tag(tag="reason", value="repeated field")
+      results.append(error)
+
+    # Check required fields are present.
+    required_fields = self._assess_required_fields()
+    for field in required_fields:
+      if field not in self._metadata:
+        field_name = field.get_name()
+        error = vr.ValidationError(f"Required field '{field_name}' is missing")
+        error.set_tag(tag="reason", value="missing required field")
+        results.append(error)
+
+    # Validate values for all present fields.
+    for field, value in self._metadata.items():
+      field_result = field.validate(value)
+      if field_result:
+        field_result.set_tag(tag="field", value=field.get_name())
+        results.append(field_result)
+
+    # Check existence of the license file(s) on disk.
+    license_file_value = self._metadata.get(known_fields.LICENSE_FILE)
+    if license_file_value is not None:
+      result = known_fields.LICENSE_FILE.validate_on_disk(
+          value=license_file_value,
+          source_file_dir=source_file_dir,
+          repo_root_dir=repo_root_dir,
+      )
+      if result:
+        result.set_tag(tag="field", value=known_fields.LICENSE_FILE.get_name())
+        results.append(result)
+
+    return results

+ 15 - 4
metadata/fields/custom/license_file.py

@@ -64,12 +64,23 @@ class LicenseFileField(field_types.MetadataField):
   def validate_on_disk(
       self,
       value: str,
-      readme_dir: str,
-      chromium_src_dir: str,
+      source_file_dir: str,
+      repo_root_dir: str,
   ) -> Union[vr.ValidationResult, None]:
     """Checks the given value consists of file paths which exist on disk.
 
     Note: this field supports multiple values.
+
+    Args:
+      value: the value to validate.
+      source_file_dir: the directory of the metadata file that the license file
+                       value is from; this is needed to construct file paths to
+                       license files.
+      repo_root_dir: the repository's root directory; this is needed to
+                     construct file paths to license files.
+
+    Returns: a validation result based on the license file value, and whether
+             the license file(s) exist on disk, otherwise None.
     """
     if value == _NOT_SHIPPED:
       return vr.ValidationWarning(
@@ -80,9 +91,9 @@ class LicenseFileField(field_types.MetadataField):
       license_filename = license_filename.strip()
       if license_filename.startswith("/"):
         license_filepath = os.path.join(
-            chromium_src_dir, os.path.normpath(license_filename.lstrip("/")))
+            repo_root_dir, os.path.normpath(license_filename.lstrip("/")))
       else:
-        license_filepath = os.path.join(readme_dir,
+        license_filepath = os.path.join(source_file_dir,
                                         os.path.normpath(license_filename))
 
       if not os.path.exists(license_filepath):

+ 1 - 1
metadata/fields/custom/version.py

@@ -22,7 +22,7 @@ import metadata.validation_result as vr
 _PATTERN_NOT_APPLICABLE = re.compile(r"^N ?\/ ?A$", re.IGNORECASE)
 
 
-def is_version_unknown(value: str) -> bool:
+def is_unknown(value: str) -> bool:
   """Returns whether the value denotes the version being unknown."""
   return (value == "0" or util.matches(_PATTERN_NOT_APPLICABLE, value)
           or util.is_unknown(value))

+ 2 - 2
metadata/fields/known.py

@@ -61,8 +61,8 @@ ALL_FIELDS = (
     LOCAL_MODIFICATIONS,
 )
 ALL_FIELD_NAMES = {field.get_name() for field in ALL_FIELDS}
-FIELD_MAPPING = {field.get_name().lower(): field for field in ALL_FIELDS}
+_FIELD_MAPPING = {field.get_name().lower(): field for field in ALL_FIELDS}
 
 
 def get_field(label: str) -> Union[field_types.MetadataField, None]:
-  return FIELD_MAPPING.get(label.lower())
+  return _FIELD_MAPPING.get(label.lower())

+ 9 - 0
metadata/fields/types.py

@@ -37,6 +37,15 @@ class MetadataField:
     self._name = name
     self._one_liner = one_liner
 
+  def __eq__(self, other):
+    if not isinstance(other, MetadataField):
+      return False
+
+    return self._name.lower() == other._name.lower()
+
+  def __hash__(self):
+    return hash(self._name.lower())
+
   def get_name(self):
     return self._name
 

+ 20 - 0
metadata/fields/util.py

@@ -17,6 +17,12 @@ _PATTERN_UNKNOWN = re.compile(r"^unknown$", re.IGNORECASE)
 # empty string, or all characters are only whitespace.
 _PATTERN_ONLY_WHITESPACE = re.compile(r"^\s*$")
 
+# Pattern used to check if the string starts with "yes", case-insensitive.
+_PATTERN_STARTS_WITH_YES = re.compile(r"^yes", re.IGNORECASE)
+
+# Pattern used to check if the string starts with "no", case-insensitive.
+_PATTERN_STARTS_WITH_NO = re.compile(r"^no", re.IGNORECASE)
+
 
 def matches(pattern: re.Pattern, value: str) -> bool:
   """Returns whether the value matches the pattern."""
@@ -36,3 +42,17 @@ def is_unknown(value: str) -> bool:
 def quoted(values: List[str]) -> str:
   """Returns a string of the given values, each being individually quoted."""
   return ", ".join([f"'{entry}'" for entry in values])
+
+
+def infer_as_boolean(value: str, default: bool = True) -> bool:
+  """Attempts to infer the value as a boolean, where:
+    - "yes"-ish values return True;
+    - "no"-ish values return False; and
+    - default is returned otherwise.
+  """
+  if matches(_PATTERN_STARTS_WITH_YES, value):
+    return True
+  elif matches(_PATTERN_STARTS_WITH_NO, value):
+    return False
+  else:
+    return default

+ 1 - 1
metadata/parse.py

@@ -40,7 +40,7 @@ def parse_file(filepath: str) -> List[dm.DependencyMetadata]:
     Returns:
         each dependency's metadata described in the file.
   """
-  with open(filepath, "r") as f:
+  with open(filepath, "r", encoding="utf-8") as f:
     lines = f.readlines()
 
   dependencies = []

+ 132 - 0
metadata/tests/dependency_metadata_test.py

@@ -0,0 +1,132 @@
+#!/usr/bin/env vpython3
+# Copyright (c) 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+import unittest
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.dependency_metadata as dm
+import metadata.fields.known as known_fields
+import metadata.fields.types as field_types
+import metadata.validation_result as vr
+
+
+class DependencyValidationTest(unittest.TestCase):
+  def test_repeated_field(self):
+    """Check that a validation error is returned for a repeated field."""
+    dependency = dm.DependencyMetadata()
+    dependency.add_entry(known_fields.NAME.get_name(), "Test repeated field")
+    dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com")
+    dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
+    dependency.add_entry(known_fields.LICENSE.get_name(), "Public Domain")
+    dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE")
+    dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no")
+    dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
+    dependency.add_entry(known_fields.NAME.get_name(), "again")
+
+    results = dependency.validate(
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 1)
+    self.assertTrue(isinstance(results[0], vr.ValidationError))
+    self.assertDictEqual(results[0].get_all_tags(),
+                         {"reason": "repeated field"})
+
+  def test_required_field(self):
+    """Check that a validation error is returned for a missing field."""
+    dependency = dm.DependencyMetadata()
+    dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
+    dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no")
+    dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE")
+    dependency.add_entry(known_fields.LICENSE.get_name(), "Public Domain")
+    dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
+    dependency.add_entry(known_fields.NAME.get_name(), "Test missing field")
+    # Leave URL field unspecified.
+
+    results = dependency.validate(
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 1)
+    self.assertTrue(isinstance(results[0], vr.ValidationError))
+    self.assertDictEqual(results[0].get_all_tags(),
+                         {"reason": "missing required field"})
+
+  def test_invalid_field(self):
+    """Check field validation issues are returned."""
+    dependency = dm.DependencyMetadata()
+    dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com")
+    dependency.add_entry(known_fields.NAME.get_name(), "Test invalid field")
+    dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
+    dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE")
+    dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain")
+    dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
+    dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "test")
+
+    results = dependency.validate(
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 1)
+    self.assertTrue(isinstance(results[0], vr.ValidationError))
+    self.assertDictEqual(results[0].get_all_tags(),
+                         {"field": known_fields.SECURITY_CRITICAL.get_name()})
+
+  def test_invalid_license_file_path(self):
+    """Check license file path validation issues are returned."""
+    dependency = dm.DependencyMetadata()
+    dependency.add_entry(known_fields.NAME.get_name(), "Test license file path")
+    dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com")
+    dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
+    dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain")
+    dependency.add_entry(known_fields.LICENSE_FILE.get_name(),
+                         "MISSING-LICENSE")
+    dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no")
+    dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
+
+    results = dependency.validate(
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 1)
+    self.assertTrue(isinstance(results[0], vr.ValidationError))
+    self.assertDictEqual(results[0].get_all_tags(),
+                         {"field": known_fields.LICENSE_FILE.get_name()})
+
+  def test_multiple_validation_issues(self):
+    """Check all validation issues are returned."""
+    dependency = dm.DependencyMetadata()
+    dependency.add_entry(known_fields.NAME.get_name(), "Test multiple errors")
+    # Leave URL field unspecified.
+    dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
+    dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain")
+    dependency.add_entry(known_fields.LICENSE_FILE.get_name(),
+                         "MISSING-LICENSE")
+    dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "test")
+    dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
+    dependency.add_entry(known_fields.NAME.get_name(), "again")
+
+    # Check 4 validation results are returned, for:
+    #   - missing field;
+    #   - invalid license file path;
+    #   - invalid yes/no field value; and
+    #   - repeated field entry.
+    results = dependency.validate(
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 4)
+
+
+if __name__ == "__main__":
+  unittest.main()

+ 8 - 8
metadata/tests/fields_test.py

@@ -113,32 +113,32 @@ class FieldValidationTest(unittest.TestCase):
     # Check relative path from README directory, and multiple license files.
     result = known_fields.LICENSE_FILE.validate_on_disk(
         value="LICENSE, src/LICENSE.txt",
-        readme_dir=os.path.join(_THIS_DIR, "data"),
-        chromium_src_dir=_THIS_DIR,
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
     )
     self.assertIsNone(result)
 
     # Check relative path from Chromium src directory.
     result = known_fields.LICENSE_FILE.validate_on_disk(
         value="//data/LICENSE",
-        readme_dir=os.path.join(_THIS_DIR, "data"),
-        chromium_src_dir=_THIS_DIR,
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
     )
     self.assertIsNone(result)
 
     # Check missing file.
     result = known_fields.LICENSE_FILE.validate_on_disk(
         value="MISSING_LICENSE",
-        readme_dir=os.path.join(_THIS_DIR, "data"),
-        chromium_src_dir=_THIS_DIR,
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
     )
     self.assertIsInstance(result, vr.ValidationError)
 
     # Check deprecated NOT_SHIPPED.
     result = known_fields.LICENSE_FILE.validate_on_disk(
         value="NOT_SHIPPED",
-        readme_dir=os.path.join(_THIS_DIR, "data"),
-        chromium_src_dir=_THIS_DIR,
+        source_file_dir=os.path.join(_THIS_DIR, "data"),
+        repo_root_dir=_THIS_DIR,
     )
     self.assertIsInstance(result, vr.ValidationWarning)
 

+ 2 - 2
metadata/validation_result.py

@@ -9,10 +9,10 @@ from typing import Dict, Union
 
 class ValidationResult:
   """Base class for validation issues."""
-  def __init__(self, message: str, fatal: bool, **tags: Dict[str, str]):
+  def __init__(self, message: str, fatal: bool):
     self._fatal = fatal
     self._message = message
-    self._tags = tags
+    self._tags = {}
 
   def __str__(self) -> str:
     prefix = "ERROR" if self._fatal else "[non-fatal]"