ソースを参照

[fix_encoding] Remove fix_encoding

The fix_encoding module within depot_tools was included back in the python2[1] days to as a be all encoding fix boilerplate that is called across depot_tools scripts.

However, now that depot_tools officially deprecated support for py2 and support >= 3.8[2], the boilerplate is not needed anymore.

* `fix_win_codec()`[3] The 'cp65001' codec issue this fixes is fixed in python 3.3[4].
* `fix_default_encoding()`[5] python3 defaults to utf8.
* `fix_win_sys_argv()`[6] sys.srgv unicode issue is fixed in python3[7].
* `fix_win_console()`[8] Fixed[9].

[1] https://codereview.chromium.org/6721029
[2] https://crrev.com/371aa997c04791d21e222ed43a1a0d55b450dd53/README.md
[3] https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:fix_encoding.py;l=123-132;drc=cfa826c9845122d445dce4f51f556381865dbed3
[4] https://github.com/python/cpython/issues/57425#issuecomment-1093559969
[5] https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:fix_encoding.py;l=29-66;drc=cfa826c9845122d445dce4f51f556381865dbed3
[6] https://crsrc.org/d/fix_encoding.py;l=73-120;drc=cfa826c9845122d445dce4f51f556381865dbed3
[7] https://github.com/python/cpython/issues/46381#issuecomment-1093409968
[8] https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:fix_encoding.py;l=315-344;drc=cfa826c9845122d445dce4f51f556381865dbed3
[9] https://github.com/python/cpython/issues/45943#issuecomment-1093402603

Bug: 1501984
Change-Id: I1d512a4b1bfe14e680ac0aa08027849b999cc638
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5263016
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Dirk Pranke <dpranke@google.com>
Commit-Queue: Aravind Vasudevan <aravindvasudev@google.com>
Aravind Vasudevan 1 年間 前
コミット
1f441eb375
8 ファイル変更0 行追加393 行削除
  1. 0 322
      fix_encoding.py
  2. 0 2
      gclient.py
  3. 0 2
      gerrit_client.py
  4. 0 2
      git_cl.py
  5. 0 2
      git_migrate_default_branch.py
  6. 0 2
      my_activity.py
  7. 0 2
      presubmit_support.py
  8. 0 59
      tests/fix_encoding_test.py

+ 0 - 322
fix_encoding.py

@@ -1,322 +0,0 @@
-# Copyright (c) 2011 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-"""Collection of functions and classes to fix various encoding problems on
-multiple platforms with python.
-"""
-
-import codecs
-import locale
-import os
-import sys
-
-
-def complain(message):
-    """If any exception occurs in this file, we'll probably try to print it
-    on stderr, which makes for frustrating debugging if stderr is directed
-    to our wrapper. So be paranoid about catching errors and reporting them
-    to sys.__stderr__, so that the user has a higher chance to see them.
-    """
-    print(isinstance(message, str) and message or repr(message),
-          file=sys.__stderr__)
-
-
-def fix_default_encoding():
-    """Forces utf8 solidly on all platforms.
-
-    By default python execution environment is lazy and defaults to ascii
-    encoding.
-
-    http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
-    """
-    if sys.getdefaultencoding() == 'utf-8':
-        return False
-
-    # Regenerate setdefaultencoding.
-    reload(sys)
-    # Module 'sys' has no 'setdefaultencoding' member
-    # pylint: disable=no-member
-    sys.setdefaultencoding('utf-8')
-    for attr in dir(locale):
-        if attr[0:3] != 'LC_':
-            continue
-        aref = getattr(locale, attr)
-        try:
-            locale.setlocale(aref, '')
-        except locale.Error:
-            continue
-        try:
-            lang, _ = locale.getdefaultlocale()
-        except (TypeError, ValueError):
-            continue
-        if lang:
-            try:
-                locale.setlocale(aref, (lang, 'UTF-8'))
-            except locale.Error:
-                os.environ[attr] = lang + '.UTF-8'
-    try:
-        locale.setlocale(locale.LC_ALL, '')
-    except locale.Error:
-        pass
-    return True
-
-
-###############################
-# Windows specific
-
-
-def fix_win_codec():
-    """Works around <http://bugs.python.org/issue6058>."""
-    # <http://msdn.microsoft.com/en-us/library/dd317756.aspx>
-    try:
-        codecs.lookup('cp65001')
-        return False
-    except LookupError:
-        codecs.register(
-            lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
-        return True
-
-
-class WinUnicodeOutputBase(object):
-    """Base class to adapt sys.stdout or sys.stderr to behave correctly on
-    Windows.
-
-    Setting encoding to utf-8 is recommended.
-    """
-    def __init__(self, fileno, name, encoding):
-        # Corresponding file handle.
-        self._fileno = fileno
-        self.encoding = encoding
-        self.name = name
-
-        self.closed = False
-        self.softspace = False
-        self.mode = 'w'
-
-    @staticmethod
-    def isatty():
-        return False
-
-    def close(self):
-        # Don't really close the handle, that would only cause problems.
-        self.closed = True
-
-    def fileno(self):
-        return self._fileno
-
-    def flush(self):
-        raise NotImplementedError()
-
-    def write(self, text):
-        raise NotImplementedError()
-
-    def writelines(self, lines):
-        try:
-            for line in lines:
-                self.write(line)
-        except Exception as e:
-            complain('%s.writelines: %r' % (self.name, e))
-            raise
-
-
-class WinUnicodeConsoleOutput(WinUnicodeOutputBase):
-    """Output adapter to a Windows Console.
-
-    Understands how to use the win32 console API.
-    """
-    def __init__(self, console_handle, fileno, stream_name, encoding):
-        super(WinUnicodeConsoleOutput,
-              self).__init__(fileno, '<Unicode console %s>' % stream_name,
-                             encoding)
-        # Handle to use for WriteConsoleW
-        self._console_handle = console_handle
-
-        # Loads the necessary function.
-        # These types are available on linux but not Mac.
-        # pylint: disable=no-name-in-module,F0401
-        from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE
-        from ctypes.wintypes import BOOL, DWORD, HANDLE, LPWSTR
-        from ctypes.wintypes import LPVOID  # pylint: disable=no-name-in-module
-
-        self._DWORD = DWORD
-        self._byref = byref
-
-        # <http://msdn.microsoft.com/en-us/library/ms687401.aspx>
-        self._WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD,
-                                          POINTER(DWORD),
-                                          LPVOID)(('WriteConsoleW',
-                                                   windll.kernel32))
-        self._GetLastError = GetLastError
-
-    def flush(self):
-        # No need to flush the console since it's immediate.
-        pass
-
-    def write(self, text):
-        try:
-            if isinstance(text, bytes):
-                # Bytestrings need to be decoded to a string before being passed
-                # to Windows.
-                text = text.decode(self.encoding, 'replace')
-            remaining = len(text)
-            while remaining > 0:
-                n = self._DWORD(0)
-                # There is a shorter-than-documented limitation on the length of
-                # the string passed to WriteConsoleW. See
-                # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
-                retval = self._WriteConsoleW(self._console_handle, text,
-                                             min(remaining, 10000),
-                                             self._byref(n), None)
-                if retval == 0 or n.value == 0:
-                    raise IOError('WriteConsoleW returned %r, n.value = %r, '
-                                  'last error = %r' %
-                                  (retval, n.value, self._GetLastError()))
-                remaining -= n.value
-                if not remaining:
-                    break
-                text = text[int(n.value):]
-        except Exception as e:
-            complain('%s.write: %r' % (self.name, e))
-            raise
-
-
-class WinUnicodeOutput(WinUnicodeOutputBase):
-    """Output adaptor to a file output on Windows.
-
-    If the standard FileWrite function is used, it will be encoded in the
-    current code page. WriteConsoleW() permits writing any character.
-    """
-    def __init__(self, stream, fileno, encoding):
-        super(WinUnicodeOutput,
-              self).__init__(fileno, '<Unicode redirected %s>' % stream.name,
-                             encoding)
-        # Output stream
-        self._stream = stream
-
-        # Flush right now.
-        self.flush()
-
-    def flush(self):
-        try:
-            self._stream.flush()
-        except Exception as e:
-            complain('%s.flush: %r from %r' % (self.name, e, self._stream))
-            raise
-
-    def write(self, text):
-        try:
-            if isinstance(text, bytes):
-                # Replace characters that cannot be printed instead of failing.
-                text = text.decode(self.encoding, 'replace')
-            # When redirecting to a file or process any \n characters will be
-            # replaced with \r\n. If the text to be printed already has \r\n
-            # line endings then \r\r\n line endings will be generated, leading
-            # to double-spacing of some output. Normalizing line endings to \n
-            # avoids this problem.
-            text = text.replace('\r\n', '\n')
-            self._stream.write(text)
-        except Exception as e:
-            complain('%s.write: %r' % (self.name, e))
-            raise
-
-
-def win_handle_is_a_console(handle):
-    """Returns True if a Windows file handle is a handle to a console."""
-    # These types are available on linux but not Mac.
-    # pylint: disable=no-name-in-module,F0401
-    from ctypes import byref, POINTER, windll, WINFUNCTYPE
-    from ctypes.wintypes import BOOL, DWORD, HANDLE
-
-    FILE_TYPE_CHAR = 0x0002
-    FILE_TYPE_REMOTE = 0x8000
-    INVALID_HANDLE_VALUE = DWORD(-1).value
-
-    # <http://msdn.microsoft.com/en-us/library/ms683167.aspx>
-    GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(
-        ('GetConsoleMode', windll.kernel32))
-    # <http://msdn.microsoft.com/en-us/library/aa364960.aspx>
-    GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32))
-
-    # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle.
-    if handle == INVALID_HANDLE_VALUE or handle is None:
-        return False
-    return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR
-            and GetConsoleMode(handle, byref(DWORD())))
-
-
-def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding):
-    """Returns a unicode-compatible stream.
-
-    This function will return a direct-Console writing object only if:
-    - the file number is the expected console file number
-    - the handle the expected file handle
-    - the 'real' handle is in fact a handle to a console.
-    """
-    old_fileno = getattr(stream, 'fileno', lambda: None)()
-    if old_fileno == excepted_fileno:
-        # These types are available on linux but not Mac.
-        # pylint: disable=no-name-in-module,F0401
-        from ctypes import windll, WINFUNCTYPE
-        from ctypes.wintypes import DWORD, HANDLE
-
-        # <http://msdn.microsoft.com/en-us/library/ms683231.aspx>
-        GetStdHandle = WINFUNCTYPE(HANDLE,
-                                   DWORD)(('GetStdHandle', windll.kernel32))
-
-        real_output_handle = GetStdHandle(DWORD(output_handle))
-        if win_handle_is_a_console(real_output_handle):
-            # It's a console.
-            return WinUnicodeConsoleOutput(real_output_handle, old_fileno,
-                                           stream.name, encoding)
-
-    # It's something else. Create an auto-encoding stream.
-    return WinUnicodeOutput(stream, old_fileno, encoding)
-
-
-def fix_win_console(encoding):
-    """Makes Unicode console output work independently of the current code page.
-
-    This also fixes <http://bugs.python.org/issue1602>.
-    Credit to Michael Kaplan
-    <http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx> and
-    TZOmegaTZIOY
-    <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
-    """
-    if (isinstance(sys.stdout, WinUnicodeOutputBase)
-            or isinstance(sys.stderr, WinUnicodeOutputBase)):
-        return False
-
-    try:
-        # SetConsoleCP and SetConsoleOutputCP could be used to change the code
-        # page but it's not really useful since the code here is using
-        # WriteConsoleW(). Also, changing the code page is 'permanent' to the
-        # console and needs to be reverted manually. In practice one needs to
-        # set the console font to a TTF font to be able to see all the
-        # characters but it failed for me in practice. In any case, it won't
-        # throw any exception when printing, which is the important part. -11
-        # and -12 are defined in stdio.h
-        sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding)
-        sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding)
-        # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation
-        # is "It doesn't appear to be possible to read Unicode characters in
-        # UTF-8 mode" and this appears to be a limitation of cmd.exe.
-    except Exception as e:
-        complain('exception %r while fixing up sys.stdout and sys.stderr' % e)
-    return True
-
-
-def fix_encoding():
-    """Fixes various encoding problems on all platforms.
-
-    Should be called at the very beginning of the process.
-    """
-    ret = True
-    if sys.platform == 'win32':
-        ret &= fix_win_codec()
-
-    ret &= fix_default_encoding()
-
-    if sys.platform == 'win32':
-        encoding = sys.getdefaultencoding()
-        ret &= fix_win_console(encoding)
-    return ret

+ 0 - 2
gclient.py

@@ -97,7 +97,6 @@ import urllib.parse
 from collections.abc import Collection, Mapping, Sequence
 
 import detect_host_arch
-import fix_encoding
 import git_common
 import gclient_eval
 import gclient_paths
@@ -4122,7 +4121,6 @@ def main(argv):
     execute."""
     if not can_run_gclient_and_helpers():
         return 2
-    fix_encoding.fix_encoding()
     disable_buffering()
     setup_color.init()
     dispatcher = subcommand.CommandDispatcher(__name__)

+ 0 - 2
gerrit_client.py

@@ -15,7 +15,6 @@ import subcommand
 import sys
 import urllib.parse
 
-import fix_encoding
 import gerrit_util
 import setup_color
 
@@ -518,7 +517,6 @@ def main(argv):
 if __name__ == '__main__':
     # These affect sys.stdout so do it outside of main() to simplify mocks in
     # unit testing.
-    fix_encoding.fix_encoding()
     setup_color.init()
     try:
         sys.exit(main(sys.argv[1:]))

+ 0 - 2
git_cl.py

@@ -40,7 +40,6 @@ from typing import Sequence
 from typing import Tuple
 import auth
 import clang_format
-import fix_encoding
 import gclient_paths
 import gclient_utils
 import gerrit_util
@@ -6707,7 +6706,6 @@ def main(argv):
 if __name__ == '__main__':
     # These affect sys.stdout, so do it outside of main() to simplify mocks in
     # the unit tests.
-    fix_encoding.fix_encoding()
     setup_color.init()
     with metrics.collector.print_notice_and_exit():
         sys.exit(main(sys.argv[1:]))

+ 0 - 2
git_migrate_default_branch.py

@@ -4,7 +4,6 @@
 # found in the LICENSE file.
 """Migrate local repository onto new default branch."""
 
-import fix_encoding
 import gerrit_util
 import git_common
 import metrics
@@ -91,7 +90,6 @@ def main():
 
 
 if __name__ == '__main__':
-    fix_encoding.fix_encoding()
     logging.basicConfig(level=logging.INFO)
     with metrics.collector.print_notice_and_exit():
         try:

+ 0 - 2
my_activity.py

@@ -49,7 +49,6 @@ import urllib
 import re
 
 import auth
-import fix_encoding
 import gclient_utils
 import gerrit_util
 
@@ -1040,7 +1039,6 @@ def main():
 
 if __name__ == '__main__':
     # Fix encoding to support non-ascii issue titles.
-    fix_encoding.fix_encoding()
 
     try:
         sys.exit(main())

+ 0 - 2
presubmit_support.py

@@ -39,7 +39,6 @@ from typing import Mapping
 from warnings import warn
 
 # Local imports.
-import fix_encoding
 import gclient_paths  # Exposed through the API
 import gclient_utils
 import git_footers
@@ -2190,7 +2189,6 @@ def main(argv=None):
 
 
 if __name__ == '__main__':
-    fix_encoding.fix_encoding()
     try:
         sys.exit(main())
     except KeyboardInterrupt:

+ 0 - 59
tests/fix_encoding_test.py

@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-# coding=utf-8
-# Copyright (c) 2011 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-"""Unit tests for fix_encoding.py."""
-
-import os
-import sys
-import unittest
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-import fix_encoding
-
-
-class FixEncodingTest(unittest.TestCase):
-    # Nice mix of latin, hebrew, arabic and chinese. Doesn't mean anything.
-    text = u'Héllô 偉大 سيد'
-
-    def test_code_page(self):
-        # Make sure printing garbage won't throw.
-        print(self.text.encode() + b'\xff')
-        print(self.text.encode() + b'\xff', file=sys.stderr)
-
-    def test_utf8(self):
-        # Make sure printing utf-8 works.
-        print(self.text.encode('utf-8'))
-        print(self.text.encode('utf-8'), file=sys.stderr)
-
-    @unittest.skipIf(os.name == 'nt', 'Does not work on Windows')
-    def test_unicode(self):
-        # Make sure printing unicode works.
-        print(self.text)
-        print(self.text, file=sys.stderr)
-
-    @unittest.skipIf(os.name == 'nt', 'Does not work on Windows')
-    def test_default_encoding(self):
-        self.assertEqual('utf-8', sys.getdefaultencoding())
-
-    def test_win_console(self):
-        if sys.platform != 'win32':
-            return
-        # This should fail if not redirected, e.g. run directly instead of
-        # through the presubmit check. Can be checked with: python
-        # tests\fix_encoding_test.py
-        self.assertEqual(sys.stdout.__class__, fix_encoding.WinUnicodeOutput)
-        self.assertEqual(sys.stderr.__class__, fix_encoding.WinUnicodeOutput)
-        self.assertEqual(sys.stdout.encoding, sys.getdefaultencoding())
-        self.assertEqual(sys.stderr.encoding, sys.getdefaultencoding())
-
-    def test_multiple_calls(self):
-        # Shouldn't do anything.
-        self.assertEqual(False, fix_encoding.fix_encoding())
-
-
-if __name__ == '__main__':
-    fix_encoding.fix_encoding()
-    unittest.main()