فهرست منبع

Add anonymization and the protos to the telemetry lib

Add the protos for the traces used to publish metrics to Clearcut and
the anonymization code/test that ensure we don't collect de-anonymized
paths.

Bug: 326277821
Change-Id: Ifae4d51f59db2219995a0a8d21785729f5eeb137
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5850298
Reviewed-by: Terrence Reilly <treilly@google.com>
Commit-Queue: Struan Shrimpton <sshrimp@google.com>
Struan Shrimpton 11 ماه پیش
والد
کامیت
55d065cc0c

+ 55 - 0
infra_lib/telemetry/anonymization.py

@@ -0,0 +1,55 @@
+# Copyright 2024 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Util for anonymizing telemetry spans."""
+
+import getpass
+import re
+
+from typing import Optional, Pattern, Sequence, Tuple
+from google.protobuf import json_format
+
+from .proto import trace_span_pb2
+
+
+class Anonymizer:
+    """Redact the personally identifiable information."""
+
+    def __init__(
+        self,
+        replacements: Optional[Sequence[Tuple[Pattern[str],
+                                              str]]] = None) -> None:
+        self._replacements = list(replacements or [])
+        if getpass.getuser() != "root":
+            # Substituting the root user doesn't actually anonymize anything.
+            self._replacements.append(
+                (re.compile(re.escape(getpass.getuser())), "<user>"))
+
+    def __call__(self, *args, **kwargs):
+        return self.apply(*args, **kwargs)
+
+    def apply(self, data: str) -> str:
+        """Applies the replacement rules to data text."""
+        if not data:
+            return data
+
+        for repl_from, repl_to in self._replacements:
+            data = re.sub(repl_from, repl_to, data)
+
+        return data
+
+
+class AnonymizingFilter:
+    """Applies the anonymizer to TraceSpan messages."""
+
+    def __init__(self, anonymizer: Anonymizer) -> None:
+        self._anonymizer = anonymizer
+
+    def __call__(self,
+                 msg: trace_span_pb2.TraceSpan) -> trace_span_pb2.TraceSpan:
+        """Applies the anonymizer to TraceSpan message."""
+        raw = json_format.MessageToJson(msg)
+        json_msg = self._anonymizer.apply(raw)
+        output = trace_span_pb2.TraceSpan()
+        json_format.Parse(json_msg, output)
+        return output

+ 53 - 0
infra_lib/telemetry/anonymization_unittest.py

@@ -0,0 +1,53 @@
+# Copyright 2024 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Test the config and anonymizer utils."""
+
+import getpass
+import re
+import pytest
+
+from . import anonymization
+
+
+def test_default_anonymizer_to_remove_username_from_path(monkeypatch) -> None:
+    """Test that default Anonymizer redacts username."""
+    monkeypatch.setattr(getpass, "getuser", lambda: "user")
+
+    a = anonymization.Anonymizer()
+    output = a.apply("/home/user/docs")
+
+    assert output == "/home/<user>/docs"
+
+
+def test_anonymizer_to_apply_passed_replacements() -> None:
+    """Test anonymizer to apply the requested replacements."""
+    text = "/home/%s/docs" % getpass.getuser()
+
+    replacements = [(re.escape(getpass.getuser()), "<user>")]
+    a = anonymization.Anonymizer(replacements=replacements)
+    output = a.apply(text)
+
+    assert output == "/home/<user>/docs"
+
+
+def test_anonymizer_to_apply_multiple_replacements() -> None:
+    """Test anonymizer to apply the passed replacements in order."""
+    replacements = [(re.escape("abc"), "x"), (re.escape("xyz"), "t")]
+    text = "hello abcd. how is xyz. abcyz"
+
+    a = anonymization.Anonymizer(replacements=replacements)
+    output = a.apply(text)
+
+    assert output == "hello xd. how is t. t"
+
+
+def test_default_anonymizer_skip_root(monkeypatch) -> None:
+    """Test the anonymizer skips the root user."""
+    monkeypatch.setattr(getpass, "getuser", lambda: "root")
+
+    text = "/root/home service.sysroot.SetupBoard"
+    a = anonymization.Anonymizer()
+    output = a.apply(text)
+
+    assert output == text

+ 33 - 0
infra_lib/telemetry/proto/clientanalytics_pb2.py

@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: chromite/telemetry/clientanalytics.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n(chromite/telemetry/clientanalytics.proto\x12\x12\x63hromite.telemetry\";\n\x08LogEvent\x12\x15\n\revent_time_ms\x18\x01 \x01(\x03\x12\x18\n\x10source_extension\x18\x06 \x01(\x0c\"!\n\nClientInfo\x12\x13\n\x0b\x63lient_type\x18\x01 \x01(\x05\"\x9f\x01\n\nLogRequest\x12\x33\n\x0b\x63lient_info\x18\x01 \x01(\x0b\x32\x1e.chromite.telemetry.ClientInfo\x12\x12\n\nlog_source\x18\x02 \x01(\x05\x12\x17\n\x0frequest_time_ms\x18\x04 \x01(\x03\x12/\n\tlog_event\x18\x03 \x03(\x0b\x32\x1c.chromite.telemetry.LogEvent\"/\n\x0bLogResponse\x12 \n\x18next_request_wait_millis\x18\x01 \x01(\x03\x42>Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
+)
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(
+    DESCRIPTOR, 'chromite.telemetry.clientanalytics_pb2', _globals)
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b'Z<go.chromium.org/chromiumos/infra/proto/go/chromite/telemetry'
+    _globals['_LOGEVENT']._serialized_start = 64
+    _globals['_LOGEVENT']._serialized_end = 123
+    _globals['_CLIENTINFO']._serialized_start = 125
+    _globals['_CLIENTINFO']._serialized_end = 158
+    _globals['_LOGREQUEST']._serialized_start = 161
+    _globals['_LOGREQUEST']._serialized_end = 320
+    _globals['_LOGRESPONSE']._serialized_start = 322
+    _globals['_LOGRESPONSE']._serialized_end = 369
+# @@protoc_insertion_point(module_scope)

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 15 - 0
infra_lib/telemetry/proto/trace_span_pb2.py


+ 4 - 0
infra_lib/telemetry/proto/update.sh

@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+gob-curl https://chromium.googlesource.com/chromiumos/chromite/+/main/api/gen_sdk/chromite/telemetry/clientanalytics_pb2.py?format=TEXT | base64 --decode > clientanalytics_pb2.py
+gob-curl https://chromium.googlesource.com/chromiumos/chromite/+/main/api/gen_sdk/chromite/telemetry/trace_span_pb2.py?format=TEXT | base64 --decode > trace_span_pb2.py

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است