Skip to content

Commit

Permalink
fix: unicode surrogate pair parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait authored and jonathantneal committed Mar 5, 2019
1 parent 9aad10f commit 5d7817b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 8 deletions.
6 changes: 6 additions & 0 deletions src/__tests__/classes.js
Expand Up @@ -246,3 +246,9 @@ test('class selector with escaping (33)', '.f\\+o\\+o', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].type, 'class');
t.deepEqual(tree.nodes[0].nodes[0].raws.value, 'f\\+o\\+o');
});

test('class selector with escaping (34)', '.\\1D306', (t, tree) => {
t.deepEqual(tree.nodes[0].nodes[0].value, '𝌆');
t.deepEqual(tree.nodes[0].nodes[0].type, 'class');
t.deepEqual(tree.nodes[0].nodes[0].raws.value, '\\1D306');
});
24 changes: 16 additions & 8 deletions src/util/unesc.js
@@ -1,11 +1,19 @@
const HEX_ESC = /\\(?:([0-9a-fA-F]{6})|([0-9a-fA-F]{1,5})(?: |(?![0-9a-fA-F])))/g;
const OTHER_ESC = /\\(.)/g;
const whitespace = '[\\x20\\t\\r\\n\\f]';
const unescapeRegExp = new RegExp('\\\\([\\da-f]{1,6}' + whitespace + '?|(' + whitespace + ')|.)', 'ig');

export default function unesc (str) {
str = str.replace(HEX_ESC, (_, hex1, hex2) => {
let hex = hex1 || hex2;
let code = parseInt(hex, 16);
return String.fromCharCode(code);
return str.replace(unescapeRegExp, (_, escaped, escapedWhitespace) => {
const high = '0x' + escaped - 0x10000;

// NaN means non-codepoint
// Workaround erroneous numeric interpretation of +"0x"
// eslint-disable-next-line no-self-compare
return high !== high || escapedWhitespace
? escaped
: high < 0
? // BMP codepoint
String.fromCharCode(high + 0x10000)
: // Supplemental Plane codepoint (surrogate pair)
String.fromCharCode((high >> 10) | 0xd800, (high & 0x3ff) | 0xdc00);
});
str = str.replace(OTHER_ESC, (_, char) => char);
return str;
}

0 comments on commit 5d7817b

Please sign in to comment.