Просмотр исходного кода

fix: Fix unicode Regex miscounting emoji length (#2942)

Many emojis are 2+ unicode bytes long. The \u tag which allows searching for punctuation also counts emojis as single chars. Slicing the strings into an array restores the correct character count.
Trevor Buckner 2 лет назад
Родитель
Сommit
f3af23ec98
3 измененных файлов с 36 добавлено и 3 удалено
  1. 4 3
      src/Tokenizer.ts
  2. 11 0
      test/specs/new/emoji_inline.html
  3. 21 0
      test/specs/new/emoji_inline.md

+ 4 - 3
src/Tokenizer.ts

@@ -625,7 +625,8 @@ export class _Tokenizer {
     const nextChar = match[1] || match[2] || '';
     const nextChar = match[1] || match[2] || '';
 
 
     if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) {
     if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) {
-      const lLength = match[0].length - 1;
+      // unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below)
+      const lLength = [...match[0]].length - 1;
       let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
       let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
 
 
       const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
       const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
@@ -639,7 +640,7 @@ export class _Tokenizer {
 
 
         if (!rDelim) continue; // skip single * in __abc*abc__
         if (!rDelim) continue; // skip single * in __abc*abc__
 
 
-        rLength = rDelim.length;
+        rLength = [...rDelim].length;
 
 
         if (match[3] || match[4]) { // found another Left Delim
         if (match[3] || match[4]) { // found another Left Delim
           delimTotal += rLength;
           delimTotal += rLength;
@@ -658,7 +659,7 @@ export class _Tokenizer {
         // Remove extra characters. *a*** -> *a*
         // Remove extra characters. *a*** -> *a*
         rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
         rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
 
 
-        const raw = src.slice(0, lLength + match.index + rLength + 1);
+        const raw = [...src].slice(0, lLength + match.index + rLength + 1).join('');
 
 
         // Create `em` if smallest delimiter has odd char count. *a***
         // Create `em` if smallest delimiter has odd char count. *a***
         if (Math.min(lLength, rLength) % 2) {
         if (Math.min(lLength, rLength) % 2) {

+ 11 - 0
test/specs/new/emoji_inline.html

@@ -0,0 +1,11 @@
+<p>Situations where it fails:</p>
+<p><strong>test 💁</strong></p>
+<p><strong>💁 test</strong></p>
+<p><strong>🤓 test</strong></p>
+<p><strong>🏖️ test</strong></p>
+<p><strong>🏖️🤓💁 test</strong></p>
+<p>Situations where it works:</p>
+<p>**💁 **</p>
+<p><strong>⚠️ test</strong></p>
+<p>Here, the emoji rendering works, but the text doesn't get rendered in italic.</p>
+<p><em>💁 test</em></p>

+ 21 - 0
test/specs/new/emoji_inline.md

@@ -0,0 +1,21 @@
+Situations where it fails:
+
+**test 💁**
+
+**💁 test**
+
+**🤓 test**
+
+**🏖️ test**
+
+**🏖️🤓💁 test**
+
+Situations where it works:
+
+**💁 **
+
+**⚠️ test**
+
+Here, the emoji rendering works, but the text doesn't get rendered in italic.
+
+*💁 test*