Skip to content

Commit

Permalink
Prevent data loss when using decodeEntities
Browse files Browse the repository at this point in the history
Without this patch, the `decodeEntities` option decodes e.g. `&` even in cases when doing so changes the meaning (and rendering) of the resulting HTML.

Fixes #964.
  • Loading branch information
mathiasbynens committed Sep 28, 2018
1 parent df720b3 commit 9b0c7cc
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 59 deletions.
111 changes: 58 additions & 53 deletions dist/htmlminifier.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/htmlminifier.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -57,7 +57,7 @@
"camel-case": "3.0.x",
"clean-css": "4.2.x",
"commander": "2.17.x",
"he": "1.1.x",
"he": "1.2.x",
"param-case": "2.1.x",
"relateurl": "0.2.x",
"uglify-js": "3.4.x"
Expand Down
7 changes: 5 additions & 2 deletions src/htmlminifier.js
Expand Up @@ -1195,9 +1195,12 @@ function minify(value, options, partialMarkup) {
}
charsPrevTag = /^\s*$/.test(text) ? prevTag : 'comment';
if (options.decodeEntities && text && !specialContentTags(currentTag)) {
// semi-colon can be omitted
// Escape any `&` symbols that start either:
// 1) a legacy named character reference (i.e. one that doesn't end with `;`)
// 2) or any other character reference (i.e. one that does end with `;`)
// Note that `&` can be escaped as `&amp`, without the semi-colon.
// https://mathiasbynens.be/notes/ambiguous-ampersands
text = text.replace(/&(#?[0-9a-zA-Z]+;)/g, '&amp$1').replace(/</g, '&lt;');
text = text.replace(/&((?:Iacute|aacute|uacute|plusmn|Otilde|otilde|agrave|Agrave|Yacute|yacute|Oslash|oslash|atilde|Atilde|brvbar|ccedil|Ccedil|Ograve|curren|divide|eacute|Eacute|ograve|Oacute|egrave|Egrave|Ugrave|frac12|frac14|frac34|ugrave|oacute|iacute|Ntilde|ntilde|Uacute|middot|igrave|Igrave|iquest|Aacute|cedil|laquo|micro|iexcl|Icirc|icirc|acirc|Ucirc|Ecirc|ocirc|Ocirc|ecirc|ucirc|Aring|aring|AElig|aelig|acute|pound|raquo|Acirc|times|THORN|szlig|thorn|COPY|auml|ordf|ordm|Uuml|macr|uuml|Auml|ouml|Ouml|para|nbsp|euml|quot|QUOT|Euml|yuml|cent|sect|copy|sup1|sup2|sup3|iuml|Iuml|ETH|shy|reg|not|yen|amp|AMP|REG|uml|eth|deg|gt|GT|LT|lt)(?!;)|(?:#?[0-9a-zA-Z]+;))/g, '&amp$1').replace(/</g, '&lt;');
}
if (uidPattern && options.collapseWhitespace && stackNoTrimWhitespace.length) {
text = text.replace(uidPattern, function(match, prefix, index) {
Expand Down
4 changes: 2 additions & 2 deletions tests/index.html
Expand Up @@ -3,14 +3,14 @@
<head>
<meta charset="utf-8">
<title>HTML Minifier Tests</title>
<link rel="stylesheet" href="https://code.jquery.com/qunit/qunit-2.6.1.css">
<link rel="stylesheet" href="https://code.jquery.com/qunit/qunit-2.6.2.css">
</head>
<body>
<div id="qunit">
<button onclick="QUnit.start()">Start</button>
</div>
<div id="qunit-fixture"></div>
<script src="https://code.jquery.com/qunit/qunit-2.6.1.js"></script>
<script src="https://code.jquery.com/qunit/qunit-2.6.2.js"></script>
<script src="../dist/htmlminifier.min.js"></script>
<script src="minifier.js"></script>
</body>
Expand Down
5 changes: 5 additions & 0 deletions tests/minifier.js
Expand Up @@ -3353,6 +3353,11 @@ QUnit.test('decode entity characters', function(assert) {
assert.equal(minify(input, { decodeEntities: false }), input);
assert.equal(minify(input, { decodeEntities: true }), input);

// https://github.com/kangax/html-minifier/issues/964
input = '&amp;xxx; &amp;xxx &ampthorn; &ampthorn &ampcurren;t &ampcurrent';
output = '&ampxxx; &xxx &ampthorn; &ampthorn &ampcurren;t &ampcurrent';
assert.equal(minify(input, { decodeEntities: true }), output);

input = '<script type="text/html">&colon;</script>';
assert.equal(minify(input), input);
assert.equal(minify(input, { decodeEntities: false }), input);
Expand Down

0 comments on commit 9b0c7cc

Please sign in to comment.