Browse Source

Reland r374389: [lit] Clean up internal diff's encoding handling

To avoid breaking some tests, D66574, D68664, D67643, and D68668
landed together.  However, D68664 introduced an issue now addressed by
D68839, with which these are now all relanding.

Differential Revision: https://reviews.llvm.org/D68664

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374649 91177308-0d34-0410-b5e6-96231b3b80d8
Joel E. Denny 5 years ago
parent
commit
725593eab9

+ 20 - 31
utils/lit/lit/builtin_commands/diff.py

@@ -1,6 +1,7 @@
 import difflib
 import functools
 import getopt
+import locale
 import os
 import sys
 
@@ -24,37 +25,26 @@ def getDirTree(path, basedir=""):
         return path, sorted(child_trees)
 
 def compareTwoFiles(flags, filepaths):
-    compare_bytes = False
-    encoding = None
     filelines = []
     for file in filepaths:
-        try:
-            with open(file, 'r') as f:
-                filelines.append(f.readlines())
-        except UnicodeDecodeError:
-            try:
-                with io.open(file, 'r', encoding="utf-8") as f:
-                    filelines.append(f.readlines())
-                encoding = "utf-8"
-            except:
-                compare_bytes = True
-
-    if compare_bytes:
-        return compareTwoBinaryFiles(flags, filepaths)
-    else:
-        return compareTwoTextFiles(flags, filepaths, encoding)
+        with open(file, 'rb') as file_bin:
+            filelines.append(file_bin.readlines())
 
-def compareTwoBinaryFiles(flags, filepaths):
-    filelines = []
-    for file in filepaths:
-        with open(file, 'rb') as f:
-            filelines.append(f.readlines())
+    try:
+        return compareTwoTextFiles(flags, filepaths, filelines,
+                                   locale.getpreferredencoding(False))
+    except UnicodeDecodeError:
+        try:
+            return compareTwoTextFiles(flags, filepaths, filelines, "utf-8")
+        except:
+            return compareTwoBinaryFiles(flags, filepaths, filelines)
 
+def compareTwoBinaryFiles(flags, filepaths, filelines):
     exitCode = 0
     if hasattr(difflib, 'diff_bytes'):
         # python 3.5 or newer
         diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
-        diffs = [diff.decode() for diff in diffs]
+        diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
     else:
         # python 2.7
         if flags.unified_diff:
@@ -68,15 +58,14 @@ def compareTwoBinaryFiles(flags, filepaths):
         exitCode = 1
     return exitCode
 
-def compareTwoTextFiles(flags, filepaths, encoding):
+def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding):
     filelines = []
-    for file in filepaths:
-        if encoding is None:
-            with open(file, 'r') as f:
-                filelines.append(f.readlines())
-        else:
-            with io.open(file, 'r', encoding=encoding) as f:
-                filelines.append(f.readlines())
+    for lines_bin in filelines_bin:
+        lines = []
+        for line_bin in lines_bin:
+            line = line_bin.decode(encoding=encoding)
+            lines.append(line)
+        filelines.append(lines)
 
     exitCode = 0
     def compose2(f, g):

+ 9 - 0
utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt

@@ -0,0 +1,9 @@
+# Check that diff falls back to binary mode if it cannot decode a file.
+
+# RUN: diff -u diff-in.bin diff-in.bin
+# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
+# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
+# RUN: diff -u diff-in.bin diff-in.utf8 && false || true
+
+# Fail so lit will print output.
+# RUN: false

BIN
utils/lit/tests/Inputs/shtest-shell/diff-in.bin


BIN
utils/lit/tests/Inputs/shtest-shell/diff-in.utf16


+ 3 - 0
utils/lit/tests/Inputs/shtest-shell/diff-in.utf8

@@ -0,0 +1,3 @@
+foo
+bar
+baz

+ 1 - 1
utils/lit/tests/max-failures.py

@@ -8,7 +8,7 @@
 #
 # END.
 
-# CHECK: Failing Tests (27)
+# CHECK: Failing Tests (28)
 # CHECK: Failing Tests (1)
 # CHECK: Failing Tests (2)
 # CHECK: error: argument --max-failures: requires positive integer, but found '0'

+ 53 - 1
utils/lit/tests/shtest-shell.py

@@ -34,6 +34,58 @@
 # CHECK: error: command failed with exit status: 127
 # CHECK: ***
 
+
+# CHECK: FAIL: shtest-shell :: diff-encodings.txt
+# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED ***
+
+# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin"
+# CHECK-NOT: error
+
+# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: {{^ .f.o.o.$}}
+# CHECK-NEXT: {{^-.b.a.r.$}}
+# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^ .b.a.z.$}}
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: -foo
+# CHECK-NEXT: -bar
+# CHECK-NEXT: -baz
+# CHECK-NEXT: {{^\+.f.o.o.$}}
+# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^\+.b.a.z.$}}
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8"
+# CHECK: # command output:
+# CHECK-NEXT: ---
+# CHECK-NEXT: +++
+# CHECK-NEXT: @@
+# CHECK-NEXT: {{^\-.f.o.o.$}}
+# CHECK-NEXT: {{^\-.b.a.r..}}
+# CHECK-NEXT: {{^\-.b.a.z.$}}
+# CHECK-NEXT: +foo
+# CHECK-NEXT: +bar
+# CHECK-NEXT: +baz
+# CHECK: error: command failed with exit status: 1
+# CHECK: $ "true"
+
+# CHECK: $ "false"
+
+# CHECK: ***
+
+
 # CHECK: FAIL: shtest-shell :: diff-error-1.txt
 # CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED ***
 # CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt"
@@ -245,4 +297,4 @@
 # CHECK: PASS: shtest-shell :: sequencing-0.txt
 # CHECK: XFAIL: shtest-shell :: sequencing-1.txt
 # CHECK: PASS: shtest-shell :: valid-shell.txt
-# CHECK: Failing Tests (27)
+# CHECK: Failing Tests (28)