Преглед на файлове

fix: Fix unicode Regex miscounting emoji length (#2942)

Many emojis are 2+ unicode bytes long. The \u tag which allows searching for punctuation also counts emojis as single chars. Slicing the strings into an array restores the correct character count.
Trevor Buckner преди 2 години
родител
ревизия
f3af23ec98
променени са 3 файла, в които са добавени 36 реда и са изтрити 3 реда
  1. 4 3
      src/Tokenizer.ts
  2. 11 0
      test/specs/new/emoji_inline.html
  3. 21 0
      test/specs/new/emoji_inline.md

+ 4 - 3
src/Tokenizer.ts

@@ -625,7 +625,8 @@ export class _Tokenizer {
     const nextChar = match[1] || match[2] || '';
 
     if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) {
-      const lLength = match[0].length - 1;
+      // unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below)
+      const lLength = [...match[0]].length - 1;
       let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
 
       const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
@@ -639,7 +640,7 @@ export class _Tokenizer {
 
         if (!rDelim) continue; // skip single * in __abc*abc__
 
-        rLength = rDelim.length;
+        rLength = [...rDelim].length;
 
         if (match[3] || match[4]) { // found another Left Delim
           delimTotal += rLength;
@@ -658,7 +659,7 @@ export class _Tokenizer {
         // Remove extra characters. *a*** -> *a*
         rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
 
-        const raw = src.slice(0, lLength + match.index + rLength + 1);
+        const raw = [...src].slice(0, lLength + match.index + rLength + 1).join('');
 
         // Create `em` if smallest delimiter has odd char count. *a***
         if (Math.min(lLength, rLength) % 2) {

+ 11 - 0
test/specs/new/emoji_inline.html

@@ -0,0 +1,11 @@
+<p>Situations where it fails:</p>
+<p><strong>test 💁</strong></p>
+<p><strong>💁 test</strong></p>
+<p><strong>🤓 test</strong></p>
+<p><strong>🏖️ test</strong></p>
+<p><strong>🏖️🤓💁 test</strong></p>
+<p>Situations where it works:</p>
+<p>**💁 **</p>
+<p><strong>⚠️ test</strong></p>
+<p>Here, the emoji rendering works, but the text doesn't get rendered in italic.</p>
+<p><em>💁 test</em></p>

+ 21 - 0
test/specs/new/emoji_inline.md

@@ -0,0 +1,21 @@
+Situations where it fails:
+
+**test 💁**
+
+**💁 test**
+
+**🤓 test**
+
+**🏖️ test**
+
+**🏖️🤓💁 test**
+
+Situations where it works:
+
+**💁 **
+
+**⚠️ test**
+
+Here, the emoji rendering works, but the text doesn't get rendered in italic.
+
+*💁 test*