parse.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #!/usr/bin/env python3
  2. # Copyright 2023 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. import os
  6. import re
  7. import sys
  8. from typing import List
  9. _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
  10. # The repo's root directory.
  11. _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
  12. # Add the repo's root directory for clearer imports.
  13. sys.path.insert(0, _ROOT_DIR)
  14. import metadata.fields.known as known_fields
  15. import metadata.dependency_metadata as dm
  16. import metadata.fields.custom.mitigated
  17. # Line used to separate dependencies within the same metadata file.
  18. DEPENDENCY_DIVIDER = re.compile(r"^-{20} DEPENDENCY DIVIDER -{20}$")
  19. # Delimiter used to separate a field's name from its value.
  20. FIELD_DELIMITER = ":"
  21. # Heuristic for detecting unknown field names.
  22. _PATTERN_FIELD_NAME_WORD_HEURISTIC = r"[A-Z]\w+"
  23. _PATTERN_FIELD_NAME_HEURISTIC = re.compile(r"^({}(?: {})*){}[\b\s]".format(
  24. _PATTERN_FIELD_NAME_WORD_HEURISTIC, _PATTERN_FIELD_NAME_WORD_HEURISTIC,
  25. FIELD_DELIMITER))
  26. _DEFAULT_TO_STRUCTURED_TEXT = False
  27. # Pattern used to check if a line from a metadata file declares a new
  28. # field. This includes all valid vulnerability IDs.
  29. _PATTERN_KNOWN_FIELD_DECLARATION = re.compile(
  30. "^({}){}".format(
  31. "|".join(
  32. list(known_fields.ALL_FIELD_NAMES) +
  33. [metadata.fields.custom.mitigated.PATTERN_VULN_ID.pattern]),
  34. FIELD_DELIMITER), re.IGNORECASE)
  35. def parse_content(content: str) -> List[dm.DependencyMetadata]:
  36. """Reads and parses the metadata from the given string.
  37. Args:
  38. content: the string to parse metadata from.
  39. Returns: all the metadata, which may be for zero or more
  40. dependencies, from the given string.
  41. """
  42. dependencies = []
  43. current_metadata = dm.DependencyMetadata()
  44. current_field_spec = None
  45. current_field_name = None
  46. current_field_value = ""
  47. for line_number, line in enumerate(content.splitlines(keepends=True), 1):
  48. # Whether the current line should be part of a structured value.
  49. if current_field_spec:
  50. expect_structured_field_value = current_field_spec.is_structured()
  51. else:
  52. expect_structured_field_value = _DEFAULT_TO_STRUCTURED_TEXT
  53. # Check if a new dependency is being described.
  54. if DEPENDENCY_DIVIDER.match(line):
  55. if current_field_name:
  56. # Save the field value for the previous dependency.
  57. current_metadata.add_entry(current_field_name,
  58. current_field_value)
  59. if current_metadata.has_entries():
  60. # Add the previous dependency to the results.
  61. dependencies.append(current_metadata)
  62. # Reset for the new dependency's metadata,
  63. # and reset the field state.
  64. current_metadata = dm.DependencyMetadata()
  65. current_field_spec = None
  66. current_field_name = None
  67. current_field_value = ""
  68. elif _PATTERN_KNOWN_FIELD_DECLARATION.match(line) or (
  69. expect_structured_field_value
  70. and _PATTERN_FIELD_NAME_HEURISTIC.match(line)):
  71. # Save the field value to the current dependency's metadata.
  72. if current_field_name:
  73. current_metadata.add_entry(current_field_name,
  74. current_field_value)
  75. current_field_name, current_field_value = line.split(
  76. FIELD_DELIMITER, 1)
  77. current_field_spec = known_fields.get_field(current_field_name)
  78. current_metadata.record_line(line_number)
  79. if current_field_spec:
  80. current_metadata.record_field_line_number(
  81. current_field_spec, line_number)
  82. elif current_field_name:
  83. if line.strip():
  84. current_metadata.record_line(line_number)
  85. if current_field_spec:
  86. current_metadata.record_field_line_number(
  87. current_field_spec, line_number)
  88. # The field is on multiple lines, so add this line to the
  89. # field value.
  90. current_field_value += line
  91. else:
  92. # Text that aren't part of any field (e.g. free form text).
  93. # Record the line number if the line is non-empty.
  94. if line.strip():
  95. current_metadata.record_line(line_number)
  96. # Check if current field value indicates end of the field.
  97. if current_field_spec and current_field_spec.should_terminate_field(
  98. current_field_value):
  99. assert current_field_name
  100. current_metadata.record_line(line_number)
  101. if current_field_spec:
  102. current_metadata.record_field_line_number(
  103. current_field_spec, line_number)
  104. current_metadata.add_entry(current_field_name, current_field_value)
  105. current_field_spec = None
  106. current_field_name = None
  107. current_field_value = ""
  108. # At this point, the end of the file has been reached.
  109. # Save any remaining field data and metadata.
  110. if current_field_name:
  111. current_metadata.add_entry(current_field_name, current_field_value)
  112. if current_metadata.has_entries():
  113. dependencies.append(current_metadata)
  114. return dependencies