Browse Source

Define main metadata validation functions

This is a reland of commit a1cfc693af3505461fea2a59eaf484720c897c4f

The original commit was reverted do to `ModuleNotFoundError`s. I believe this was due to not specifying `metadata` to be part of the `depot_tools` recipe bundle. I have updated `.gitattributes` for this, and also added `__init__.py` files.

I will put the changes to `presubmit_canned_checks.py` in a later CL, once I can confirm `metadata` is being bundled.

Original change's description:
> [ssci] Added CheckChromiumMetadataFiles in presubmit_canned_checks
>
> Bug: b:277147404
> Change-Id: I14a2f11b256bc85fdfe225443ef533c38463ca3e
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4796694
> Reviewed-by: Gavin Mak <gavinmak@google.com>
> Reviewed-by: Rachael Newitt <renewitt@google.com>
> Commit-Queue: Anne Redulla <aredulla@google.com>

Bug: b:277147404
Change-Id: Ibd9efd5970a5393c157ca8763f97064d7c167803
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4803385
Reviewed-by: Rachael Newitt <renewitt@google.com>
Reviewed-by: Gavin Mak <gavinmak@google.com>
Commit-Queue: Anne Redulla <aredulla@google.com>
Anne Redulla 2 năm trước cách đây
mục cha
commit
c7aca34c8e

+ 4 - 0
.gitattributes

@@ -37,6 +37,10 @@
 
 /lib/* recipes
 
+# Metadata validation package, without its tests.
+/metadata/**       recipes
+/metadata/tests/** -recipes
+
 # TODO: There are some really junky dependencies in here that we should probably
 # move to vpython/cipd.
 /third_party/**                     recipes

+ 4 - 0
metadata/README.md

@@ -0,0 +1,4 @@
+# Validation for Chromium's Third Party Metadata Files
+
+This directory contains the code to validate Chromium's third party metadata
+files, i.e. `README.chromium` files.

+ 4 - 0
metadata/__init__.py

@@ -0,0 +1,4 @@
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""File to enable importing from this folder."""

+ 3 - 3
metadata/dependency_metadata.py

@@ -15,7 +15,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.fields.custom.license
 import metadata.fields.custom.version
 import metadata.fields.known as known_fields
@@ -119,7 +119,7 @@ class DependencyMetadata:
     if repeated_field_info:
       repeated = ", ".join(repeated_field_info)
       error = vr.ValidationError(
-          f"Multiple entries for the same field: {repeated}")
+          f"Multiple entries for the same field: {repeated}.")
       error.set_tag(tag="reason", value="repeated field")
       results.append(error)
 
@@ -128,7 +128,7 @@ class DependencyMetadata:
     for field in required_fields:
       if field not in self._metadata:
         field_name = field.get_name()
-        error = vr.ValidationError(f"Required field '{field_name}' is missing")
+        error = vr.ValidationError(f"Required field '{field_name}' is missing.")
         error.set_tag(tag="reason", value="missing required field")
         results.append(error)
 

+ 4 - 0
metadata/fields/__init__.py

@@ -0,0 +1,4 @@
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""File to enable importing from this folder."""

+ 4 - 0
metadata/fields/custom/__init__.py

@@ -0,0 +1,4 @@
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""File to enable importing from this folder."""

+ 1 - 1
metadata/fields/custom/cpe_prefix.py

@@ -15,7 +15,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.fields.util as util
 import metadata.validation_result as vr
 

+ 1 - 1
metadata/fields/custom/date.py

@@ -15,7 +15,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.fields.util as util
 import metadata.validation_result as vr
 

+ 1 - 1
metadata/fields/custom/license.py

@@ -15,7 +15,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.fields.util as util
 import metadata.validation_result as vr
 

+ 1 - 1
metadata/fields/custom/license_file.py

@@ -15,7 +15,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.fields.util as util
 import metadata.validation_result as vr
 

+ 1 - 1
metadata/fields/custom/url.py

@@ -15,7 +15,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.fields.util as util
 import metadata.validation_result as vr
 

+ 1 - 1
metadata/fields/custom/version.py

@@ -15,7 +15,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.fields.util as util
 import metadata.validation_result as vr
 

+ 0 - 0
metadata/fields/types.py → metadata/fields/field_types.py


+ 1 - 1
metadata/fields/known.py

@@ -20,7 +20,7 @@ import metadata.fields.custom.license
 import metadata.fields.custom.license_file
 import metadata.fields.custom.url
 import metadata.fields.custom.version
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 
 # Freeform text fields.
 NAME = field_types.FreeformTextField("Name")

+ 3 - 3
metadata/tests/data/README.chromium.test → metadata/tests/data/README.chromium.test.multi-invalid

@@ -1,4 +1,4 @@
-Name: Test-A README for Chromium metadata
+Name: Test-A README for Chromium metadata (0 errors, 0 warnings)
 Short Name: metadata-test-valid
 URL: https://www.example.com/metadata,
      https://www.example.com/parser
@@ -21,7 +21,7 @@ EXCEPT:
 
 -------------------- DEPENDENCY DIVIDER --------------------
 
-Name: Test-B README for Chromium metadata
+Name: Test-B README for Chromium metadata (4 errors, 1 warning)
 SHORT NAME: metadata-test-invalid
 URL: file://home/drive/chromium/src/metadata
 Version:0
@@ -37,7 +37,7 @@ Local Modifications:     None.
 -------------------- DEPENDENCY DIVIDER --------------------
 -------------------- DEPENDENCY DIVIDER --------------------
 
-Name: Test-C README for Chromium metadata
+Name: Test-C README for Chromium metadata (5 errors, 1 warning)
 URL: https://www.example.com/first
 URL: https://www.example.com/second
 Version: N/A

+ 32 - 0
metadata/tests/data/README.chromium.test.multi-valid

@@ -0,0 +1,32 @@
+Name: Test-A README for Chromium metadata
+Short Name: metadata-test-valid
+URL: https://www.example.com/metadata,
+     https://www.example.com/parser
+Version: 1.0.12
+Date: 2020-12-03
+License: Apache, 2.0 and MIT
+License File: LICENSE
+Security Critical: yes
+Shipped: yes
+CPEPrefix: unknown
+This line should be ignored because CPEPrefix is a one-liner field.
+Description:
+A test metadata file, with a
+ multi-line description.
+
+Local Modifications:
+None,
+EXCEPT:
+* nothing.
+
+-------------------- DEPENDENCY DIVIDER --------------------
+
+Name: Test-B README for Chromium metadata
+Short Name: metadata-test-valid-again
+URL: https://www.example.com/metadata
+Version: 1.0.12
+Date: 2020-12-03
+License: Apache, 2.0 and MIT
+License File: LICENSE
+Security Critical: yes
+Shipped: yes

+ 20 - 0
metadata/tests/data/README.chromium.test.single-valid

@@ -0,0 +1,20 @@
+Name: Test-A README for Chromium metadata
+Short Name: metadata-test-valid
+URL: https://www.example.com/metadata,
+     https://www.example.com/parser
+Version: 1.0.12
+Date: 2020-12-03
+License: Apache, 2.0 and MIT
+License File: LICENSE
+Security Critical: yes
+Shipped: yes
+CPEPrefix: unknown
+This line should be ignored because CPEPrefix is a one-liner field.
+Description:
+A test metadata file, with a
+ multi-line description.
+
+Local Modifications:
+None,
+EXCEPT:
+* nothing.

+ 0 - 1
metadata/tests/dependency_metadata_test.py

@@ -16,7 +16,6 @@ sys.path.insert(0, _ROOT_DIR)
 
 import metadata.dependency_metadata as dm
 import metadata.fields.known as known_fields
-import metadata.fields.types as field_types
 import metadata.validation_result as vr
 
 

+ 1 - 1
metadata/tests/fields_test.py

@@ -16,7 +16,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 sys.path.insert(0, _ROOT_DIR)
 
 import metadata.fields.known as known_fields
-import metadata.fields.types as field_types
+import metadata.fields.field_types as field_types
 import metadata.validation_result as vr
 
 

+ 37 - 5
metadata/tests/parse_test.py

@@ -18,8 +18,37 @@ import metadata.parse
 
 
 class ParseTest(unittest.TestCase):
-  def test_parse(self):
-    filepath = os.path.join(_THIS_DIR, "data", "README.chromium.test")
+  def test_parse_single(self):
+    """Check parsing works for a single dependency's metadata."""
+    filepath = os.path.join(_THIS_DIR, "data",
+                            "README.chromium.test.single-valid")
+    all_metadata = metadata.parse.parse_file(filepath)
+
+    self.assertEqual(len(all_metadata), 1)
+    self.assertListEqual(
+        all_metadata[0].get_entries(),
+        [
+            ("Name", "Test-A README for Chromium metadata"),
+            ("Short Name", "metadata-test-valid"),
+            ("URL", "https://www.example.com/metadata,\n"
+             "     https://www.example.com/parser"),
+            ("Version", "1.0.12"),
+            ("Date", "2020-12-03"),
+            ("License", "Apache, 2.0 and MIT"),
+            ("License File", "LICENSE"),
+            ("Security Critical", "yes"),
+            ("Shipped", "yes"),
+            ("CPEPrefix", "unknown"),
+            ("Description", "A test metadata file, with a\n"
+             " multi-line description."),
+            ("Local Modifications", "None,\nEXCEPT:\n* nothing."),
+        ],
+    )
+
+  def test_parse_multiple(self):
+    """Check parsing works for multiple dependencies' metadata."""
+    filepath = os.path.join(_THIS_DIR, "data",
+                            "README.chromium.test.multi-invalid")
     all_metadata = metadata.parse.parse_file(filepath)
 
     # Dependency metadata with no entries at all are ignored.
@@ -29,7 +58,8 @@ class ParseTest(unittest.TestCase):
     self.assertListEqual(
         all_metadata[0].get_entries(),
         [
-            ("Name", "Test-A README for Chromium metadata"),
+            ("Name",
+             "Test-A README for Chromium metadata (0 errors, 0 warnings)"),
             ("Short Name", "metadata-test-valid"),
             ("URL", "https://www.example.com/metadata,\n"
              "     https://www.example.com/parser"),
@@ -51,7 +81,8 @@ class ParseTest(unittest.TestCase):
     self.assertListEqual(
         all_metadata[1].get_entries(),
         [
-            ("Name", "Test-B README for Chromium metadata"),
+            ("Name",
+             "Test-B README for Chromium metadata (4 errors, 1 warning)"),
             ("SHORT NAME", "metadata-test-invalid"),
             ("URL", "file://home/drive/chromium/src/metadata"),
             ("Version", "0"),
@@ -68,7 +99,8 @@ class ParseTest(unittest.TestCase):
     self.assertListEqual(
         all_metadata[2].get_entries(),
         [
-            ("Name", "Test-C README for Chromium metadata"),
+            ("Name",
+             "Test-C README for Chromium metadata (5 errors, 1 warning)"),
             ("URL", "https://www.example.com/first"),
             ("URL", "https://www.example.com/second"),
             ("Version", "N/A"),

+ 76 - 0
metadata/tests/validate_test.py

@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+import unittest
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.validate
+
+
+class ValidateTest(unittest.TestCase):
+  def test_validate_file(self):
+    # Validate a valid file (no errors or warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-valid")
+    results = metadata.validate.validate_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 0)
+
+    # Validate an invalid file (both errors and warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-invalid")
+    results = metadata.validate.validate_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 11)
+    error_count = 0
+    warning_count = 0
+    for result in results:
+      if result.is_fatal():
+        error_count += 1
+      else:
+        warning_count += 1
+    self.assertEqual(error_count, 9)
+    self.assertEqual(warning_count, 2)
+
+  def test_check_file(self):
+    # Check a valid file (no errors or warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-valid")
+    errors, warnings = metadata.validate.check_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(errors), 0)
+    self.assertEqual(len(warnings), 0)
+
+    # Check an invalid file (both errors and warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-invalid")
+    errors, warnings = metadata.validate.check_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    # TODO(aredulla): update this test once validation errors can be returned
+    #                 as errors.
+    # self.assertEqual(len(errors), 9)
+    # self.assertEqual(len(warnings), 2)
+    self.assertEqual(len(errors), 0)
+    self.assertEqual(len(warnings), 11)
+
+
+if __name__ == "__main__":
+  unittest.main()

+ 80 - 0
metadata/validate.py

@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+from typing import List, Tuple
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.parse
+import metadata.validation_result as vr
+
+
+def validate_file(filepath: str,
+                  repo_root_dir: str) -> List[vr.ValidationResult]:
+  """Validate the metadata file."""
+  if not os.path.exists(filepath):
+    result = vr.ValidationError(f"'{filepath}' not found.")
+    result.set_tag(tag="reason", value="file not found")
+    return [result]
+
+  # Get the directory the metadata file is in.
+  parent_dir = os.path.dirname(filepath)
+
+  results = []
+  dependencies = metadata.parse.parse_file(filepath)
+  for dependency in dependencies:
+    results.extend(
+        dependency.validate(
+            source_file_dir=parent_dir,
+            repo_root_dir=repo_root_dir,
+        ))
+
+  return results
+
+
+def check_file(filepath: str,
+               repo_root_dir: str) -> Tuple[List[str], List[str]]:
+  """Run metadata validation on the given file, and return all validation
+  errors and validation warnings.
+
+  Args:
+    filepath: the path to a metadata file,
+              e.g. "/chromium/src/third_party/libname/README.chromium"
+    repo_root_dir: the repository's root directory; this is needed to construct
+                   file paths to license files.
+
+  Returns:
+    error_messages: the fatal validation issues present in the file;
+                    i.e. presubmit should fail.
+    warning_messages: the non-fatal validation issues present in the file;
+                      i.e. presubmit should still pass.
+  """
+  error_messages = []
+  warning_messages = []
+  for result in validate_file(filepath, repo_root_dir):
+    # Construct the message.
+    if result.get_tag("reason") == "file not found":
+      message = result.get_message(postscript="", width=60)
+    else:
+      message = result.get_message(width=60)
+
+    # TODO(aredulla): Actually distinguish between validation errors and
+    # warnings. The quality of metadata is currently being uplifted, but is not
+    # yet guaranteed to pass validation. So for now, all validation results will
+    # be returned as warnings so CLs are not blocked by invalid metadata in
+    # presubmits yet.
+    # if result.is_fatal():
+    #   error_messages.append(message)
+    # else:
+    warning_messages.append(message)
+
+  return error_messages, warning_messages

+ 19 - 7
metadata/validation_result.py

@@ -7,6 +7,11 @@ import textwrap
 from typing import Dict, Union
 
 
+_CHROMIUM_METADATA_PRESCRIPT = "Third party metadata issue:"
+_CHROMIUM_METADATA_POSTSCRIPT = ("Check //third_party/README.chromium.template "
+                                 "for details.")
+
+
 class ValidationResult:
   """Base class for validation issues."""
   def __init__(self, message: str, fatal: bool):
@@ -33,20 +38,27 @@ class ValidationResult:
   def get_all_tags(self) -> Dict[str, str]:
     return dict(self._tags)
 
-  def get_message(self, width: int = 0) -> str:
+  def get_message(self,
+                  prescript: str = _CHROMIUM_METADATA_PRESCRIPT,
+                  postscript: str = _CHROMIUM_METADATA_POSTSCRIPT,
+                  width: int = 0) -> str:
+    components = [prescript, self._message, postscript]
+    message = " ".join(
+        [component for component in components if len(component) > 0])
+
     if width > 0:
-      return textwrap.fill(text=self._message, width=width)
+      return textwrap.fill(text=message, width=width)
 
-    return self._message
+    return message
 
 
 class ValidationError(ValidationResult):
   """Fatal validation issue. Presubmit should fail."""
-  def __init__(self, message: str, **tags: Dict[str, str]):
-    super().__init__(message=message, fatal=True, **tags)
+  def __init__(self, message: str):
+    super().__init__(message=message, fatal=True)
 
 
 class ValidationWarning(ValidationResult):
   """Non-fatal validation issue. Presubmit should pass."""
-  def __init__(self, message: str, **tags: Dict[str, str]):
-    super().__init__(message=message, fatal=False, **tags)
+  def __init__(self, message: str):
+    super().__init__(message=message, fatal=False)