Skip to content

Commit

Permalink
Use whatwg-mimetype and data-urls packages
Browse files Browse the repository at this point in the history
This improves our MIME type and data URL parsing to be per-spec.
  • Loading branch information
domenic committed Feb 6, 2018
1 parent 830a6fd commit d563965
Show file tree
Hide file tree
Showing 14 changed files with 90 additions and 96 deletions.
20 changes: 10 additions & 10 deletions lib/api.js
Expand Up @@ -8,7 +8,7 @@ const sniffHTMLEncoding = require("html-encoding-sniffer");
const whatwgURL = require("whatwg-url");
const whatwgEncoding = require("whatwg-encoding");
const { URL } = require("whatwg-url");
const parseContentType = require("content-type-parser");
const MIMEType = require("whatwg-mimetype");
const idlUtils = require("./jsdom/living/generated/utils.js");
const VirtualConsole = require("./jsdom/virtual-console.js");
const Window = require("./jsdom/browser/Window.js");
Expand Down Expand Up @@ -152,8 +152,11 @@ class JSDOM {
};

return request(url, requestOptions).then(res => {
const parsedContentType = parseContentType(res.headers["content-type"]);
const transportLayerEncodingLabel = parsedContentType && parsedContentType.get("charset");
let transportLayerEncodingLabel;
if ("content-type" in res.headers) {
const mimeType = new MIMEType(res.headers["content-type"]);
transportLayerEncodingLabel = mimeType.parameters.get("charset");
}

options = Object.assign(options, {
url: res.request.href + parsedURL.hash,
Expand Down Expand Up @@ -250,17 +253,14 @@ function transformOptions(options, encoding) {
};

if (options.contentType !== undefined) {
const contentTypeParsed = parseContentType(options.contentType);
if (contentTypeParsed === null) {
throw new TypeError(`Could not parse the given content type of "${options.contentType}"`);
}
const mimeType = new MIMEType(options.contentType);

if (!contentTypeParsed.isHTML() && !contentTypeParsed.isXML()) {
if (!mimeType.isHTML() && !mimeType.isXML()) {
throw new RangeError(`The given content type of "${options.contentType}" was not a HTML or XML content type`);
}

transformed.windowOptions.contentType = contentTypeParsed.type + "/" + contentTypeParsed.subtype;
transformed.windowOptions.parsingMode = contentTypeParsed.isHTML() ? "html" : "xml";
transformed.windowOptions.contentType = mimeType.essence;
transformed.windowOptions.parsingMode = mimeType.isHTML() ? "html" : "xml";
}

if (options.url !== undefined) {
Expand Down
36 changes: 17 additions & 19 deletions lib/jsdom/browser/resource-loader.js
@@ -1,9 +1,9 @@
"use strict";

const parseContentType = require("content-type-parser");
const MIMEType = require("whatwg-mimetype");
const parseDataURL = require("data-urls");
const sniffHTMLEncoding = require("html-encoding-sniffer");
const whatwgEncoding = require("whatwg-encoding");
const { parseDataUrl } = require("../utils");
const fs = require("fs");
const request = require("request");
const { documentBaseURLSerialized } = require("../living/helpers/document-base-url");
Expand Down Expand Up @@ -87,30 +87,28 @@ exports.readFile = function (filePath, { defaultEncoding, detectMetaCharset }, c
};
};

function readDataUrl(dataUrl, { defaultEncoding, detectMetaCharset }, callback) {
function readDataURL(dataURL, { defaultEncoding, detectMetaCharset }, callback) {
try {
const data = parseDataUrl(dataUrl);
const parsed = parseDataURL(dataURL);
// If default encoding does not exist, pass on binary data.
if (defaultEncoding) {
const contentType = parseContentType(data.type) || parseContentType("text/plain");
const sniffOptions = {
transportLayerEncodingLabel: contentType.get("charset"),
transportLayerEncodingLabel: parsed.mimeType.parameters.get("charset"),
defaultEncoding
};

const encoding = detectMetaCharset ?
sniffHTMLEncoding(data.buffer, sniffOptions) :
whatwgEncoding.getBOMEncoding(data.buffer) ||
whatwgEncoding.labelToName(contentType.get("charset")) ||
sniffHTMLEncoding(parsed.body, sniffOptions) :
whatwgEncoding.getBOMEncoding(parsed.body) ||
whatwgEncoding.labelToName(parsed.mimeType.parameters.get("charset")) ||
defaultEncoding;
const decoded = whatwgEncoding.decode(data.buffer, encoding);
const decoded = whatwgEncoding.decode(parsed.body, encoding);

contentType.set("charset", encoding);
data.type = contentType.toString();
parsed.mimeType.parameters.set("charset", encoding);

callback(null, decoded, { headers: { "content-type": data.type } });
callback(null, decoded, { headers: { "content-type": parsed.mimeType.toString() } });
} else {
callback(null, data.buffer, { headers: { "content-type": data.type } });
callback(null, parsed.body, { headers: { "content-type": parsed.mimeType.toString() } });
}
} catch (err) {
callback(err, null);
Expand All @@ -130,7 +128,7 @@ exports.wrapCookieJarForRequest = cookieJar => {

function fetch(urlObj, options, callback) {
if (urlObj.protocol === "data:") {
return readDataUrl(urlObj.href, options, callback);
return readDataURL(urlObj.href, options, callback);
} else if (urlObj.hostname) {
return exports.download(urlObj, options, callback);
}
Expand Down Expand Up @@ -184,20 +182,20 @@ exports.download = function (url, options, callback) {
if (!error) {
// If default encoding does not exist, pass on binary data.
if (defaultEncoding) {
const contentType = parseContentType(response.headers["content-type"]) || parseContentType("text/plain");
const contentType = MIMEType.parse(response.headers["content-type"]) || new MIMEType("text/plain");
const sniffOptions = {
transportLayerEncodingLabel: contentType.get("charset"),
transportLayerEncodingLabel: contentType.parameters.get("charset"),
defaultEncoding
};

const encoding = detectMetaCharset ?
sniffHTMLEncoding(bufferData, sniffOptions) :
whatwgEncoding.getBOMEncoding(bufferData) ||
whatwgEncoding.labelToName(contentType.get("charset")) ||
whatwgEncoding.labelToName(contentType.parameters.get("charset")) ||
defaultEncoding;
const decoded = whatwgEncoding.decode(bufferData, encoding);

contentType.set("charset", encoding);
contentType.parameters.set("charset", encoding);
response.headers["content-type"] = contentType.toString();

callback(null, decoded, response);
Expand Down
11 changes: 6 additions & 5 deletions lib/jsdom/living/file-api/FileReader-impl.js
@@ -1,7 +1,7 @@
"use strict";

const whatwgEncoding = require("whatwg-encoding");
const parseContentType = require("content-type-parser");
const MIMEType = require("whatwg-mimetype");
const querystring = require("querystring");
const DOMException = require("domexception");
const EventTargetImpl = require("../events/EventTarget-impl").implementation;
Expand Down Expand Up @@ -97,14 +97,15 @@ class FileReaderImpl extends EventTargetImpl {
break;
}
case "dataURL": {
// Spec seems very unclear here; see https://github.com/whatwg/fetch/issues/665#issuecomment-362930079.
let dataUrl = "data:";
const contentType = parseContentType(file.type);
if (contentType && contentType.isText()) {
const contentType = MIMEType.parse(file.type);
if (contentType && contentType.type === "text") {
const fallbackEncoding = whatwgEncoding.getBOMEncoding(data) ||
whatwgEncoding.labelToName(contentType.get("charset")) || "UTF-8";
whatwgEncoding.labelToName(contentType.parameters.get("charset")) || "UTF-8";
const decoded = whatwgEncoding.decode(data, fallbackEncoding);

contentType.set("charset", encoding);
contentType.parameters.set("charset", encoding);
dataUrl += contentType.toString();
dataUrl += ",";
dataUrl += querystring.escape(decoded);
Expand Down
6 changes: 3 additions & 3 deletions lib/jsdom/living/nodes/HTMLFrameElement-impl.js
@@ -1,5 +1,5 @@
"use strict";
const parseContentType = require("content-type-parser");
const MIMEType = require("whatwg-mimetype");
const { parseURL, serializeURL } = require("whatwg-url");
const { evaluateJavaScriptURL } = require("../window/navigation");
const HTMLElementImpl = require("./HTMLElement-impl").implementation;
Expand Down Expand Up @@ -79,12 +79,12 @@ function loadFrame(frame) {
{ defaultEncoding: parentDoc._encoding, detectMetaCharset: true },
(html, responseURL, response) => {
if (response) {
const contentType = parseContentType(response.headers["content-type"]);
const contentType = MIMEType.parse(response.headers["content-type"]);
if (contentType) {
if (contentType.isXML()) {
contentDoc._parsingMode = "xml";
}
contentDoc._encoding = contentType.get("charset");
contentDoc._encoding = contentType.parameters.get("charset");
}
}
contentDoc.write(html);
Expand Down
2 changes: 2 additions & 0 deletions lib/jsdom/living/nodes/HTMLScriptElement-impl.js
Expand Up @@ -198,6 +198,8 @@ function processJavaScript(element, code, filename) {

function getType(typeString) {
const lowercased = asciiLowercase(typeString);
// Cannot use whatwg-mimetype parsing because that strips whitespace. The spec demands a strict string comparison.
// That is, the type="" attribute is not really related to MIME types at all.
if (jsMIMETypes.has(lowercased)) {
return "classic";
}
Expand Down
11 changes: 6 additions & 5 deletions lib/jsdom/living/xhr-utils.js
Expand Up @@ -5,8 +5,8 @@ const Event = require("./generated/Event");
const ProgressEvent = require("./generated/ProgressEvent");
const fs = require("fs");
const { URL } = require("whatwg-url");
const parseDataURL = require("data-urls");

const utils = require("../utils");
const DOMException = require("domexception");
const xhrSymbols = require("./xmlhttprequest-symbols");

Expand Down Expand Up @@ -215,11 +215,12 @@ function createClient(xhr) {

let buffer;
try {
const dataUrlContent = utils.parseDataUrl(uri);
buffer = dataUrlContent.buffer;
const parsed = parseDataURL(uri);
const contentType = parsed.mimeType.toString();
buffer = parsed.body;
response.statusCode = 200;
response.rawHeaders = dataUrlContent.type ? ["Content-Type", dataUrlContent.type] : [];
response.headers = dataUrlContent.type ? { "content-type": dataUrlContent.type } : {};
response.rawHeaders = ["Content-Type", contentType];
response.headers = { "content-type": contentType };
} catch (err) {
process.nextTick(() => client.emit("error", err));
return client;
Expand Down
39 changes: 21 additions & 18 deletions lib/jsdom/living/xmlhttprequest.js
Expand Up @@ -5,14 +5,15 @@ const { spawnSync } = require("child_process");
const { URL } = require("whatwg-url");
const whatwgEncoding = require("whatwg-encoding");
const tough = require("tough-cookie");
const parseContentType = require("content-type-parser");
const MIMEType = require("whatwg-mimetype");
const conversions = require("webidl-conversions");

const xhrUtils = require("./xhr-utils");
const DOMException = require("domexception");
const xhrSymbols = require("./xmlhttprequest-symbols");
const { addConstants } = require("../utils");
const { documentBaseURLSerialized } = require("./helpers/document-base-url");
const { asciiCaseInsensitiveMatch } = require("./helpers/strings");
const idlUtils = require("./generated/utils");
const Document = require("./generated/Document");
const Blob = require("./generated/Blob");
Expand Down Expand Up @@ -214,7 +215,7 @@ module.exports = function createXMLHttpRequest(window) {
const contentType = finalMIMEType(this);
res = Blob.create([
[new Uint8Array(responseBuffer)],
{ type: (contentType && contentType.toString()) || "" }
{ type: contentType || "" }
]);
break;
}
Expand Down Expand Up @@ -282,12 +283,14 @@ module.exports = function createXMLHttpRequest(window) {
if (!responseBuffer) {
return null;
}

const contentType = finalMIMEType(this);
let isHTML = false;
let isXML = false;
if (contentType) {
isHTML = contentType.isHTML();
isXML = contentType.isXML();
const parsed = MIMEType.parse(contentType);
if (parsed) {
isHTML = parsed.isHTML();
isXML = parsed.isXML();
if (!isXML && !isHTML) {
return null;
}
Expand Down Expand Up @@ -502,11 +505,12 @@ module.exports = function createXMLHttpRequest(window) {

this[xhrSymbols.flag].overrideMIMEType = "application/octet-stream";

const parsed = parseContentType(mime);
// Waiting for better spec: https://github.com/whatwg/xhr/issues/157
const parsed = MIMEType.parse(mime);
if (parsed) {
this[xhrSymbols.flag].overrideMIMEType = parsed.type + "/" + parsed.subtype;
this[xhrSymbols.flag].overrideMIMEType = parsed.essence;

const charset = parsed.get("charset");
const charset = parsed.parameters.get("charset");
if (charset) {
this[xhrSymbols.flag].overrideCharset = whatwgEncoding.labelToName(charset);
}
Expand Down Expand Up @@ -557,14 +561,13 @@ module.exports = function createXMLHttpRequest(window) {
if (mimeType !== null && existingContentType === null) {
flag.requestHeaders["Content-Type"] = mimeType;
} else if (existingContentType !== null && encoding !== null) {
const parsed = parseContentType(existingContentType);
// Waiting for better spec: https://github.com/whatwg/xhr/issues/188. This seems like a good guess at what
// the spec will be, in the meantime.
const parsed = MIMEType.parse(existingContentType);
if (parsed) {
for (const param of parsed.parameterList) {
if (param.key && param.key.toLowerCase() === "charset") {
if (param.value.toLowerCase() !== encoding.toLowerCase()) {
param.value = encoding;
}
}
const charset = parsed.parameters.get("charset");
if (charset && !asciiCaseInsensitiveMatch(charset, encoding) && encoding !== null) {
parsed.parameters.set("charset", encoding);
}
xhrUtils.updateRequestHeader(flag.requestHeaders, "content-type", parsed.toString());
}
Expand Down Expand Up @@ -985,17 +988,17 @@ function toByteString(value) {

function finalMIMEType(xhr) {
const flag = xhr[xhrSymbols.flag];
return parseContentType(flag.overrideMIMEType || getResponseHeader(xhr, "content-type"));
return flag.overrideMIMEType || getResponseHeader(xhr, "content-type");
}

function finalCharset(xhr) {
const flag = xhr[xhrSymbols.flag];
if (flag.overrideCharset) {
return flag.overrideCharset;
}
const parsedContentType = parseContentType(getResponseHeader(xhr, "content-type"));
const parsedContentType = MIMEType.parse(getResponseHeader(xhr, "content-type"));
if (parsedContentType) {
return whatwgEncoding.labelToName(parsedContentType.get("charset"));
return whatwgEncoding.labelToName(parsedContentType.parameters.get("charset"));
}
return null;
}
Expand Down
17 changes: 0 additions & 17 deletions lib/jsdom/utils.js
@@ -1,7 +1,6 @@
"use strict";
const path = require("path");
const whatwgURL = require("whatwg-url");
const querystring = require("querystring");
const { domSymbolTree } = require("./living/helpers/internal-constants");
const SYMBOL_TREE_POSITION = require("symbol-tree").TreePosition;
const { parseURLToResultingURLRecord } = require("./living/helpers/document-base-url");
Expand Down Expand Up @@ -179,22 +178,6 @@ exports.treeOrderSorter = function (a, b) {
return 0;
};

const base64Regexp = /^(?:[A-Z0-9+/]{4})*(?:[A-Z0-9+/]{2}==|[A-Z0-9+/]{3}=|[A-Z0-9+/]{4})$/i;

exports.parseDataUrl = function parseDataUrl(url) {
const urlParts = url.match(/^data:(.+?)(?:;(base64))?,([^#]*)/);
let buffer;
if (urlParts[2] === "base64") {
if (urlParts[3] && !base64Regexp.test(urlParts[3])) {
throw new Error("Not a base64 string");
}
buffer = Buffer.from(urlParts[3], "base64");
} else {
buffer = Buffer.from(querystring.unescape(urlParts[3]));
}
return { buffer, type: urlParts[1] };
};

/* eslint-disable global-require */

exports.Canvas = null;
Expand Down
14 changes: 7 additions & 7 deletions lib/old-api.js
Expand Up @@ -6,7 +6,7 @@
const fs = require("fs");
const path = require("path");
const { CookieJar } = require("tough-cookie");
const parseContentType = require("content-type-parser");
const MIMEType = require("whatwg-mimetype");

const { toFileUrl } = require("./jsdom/utils");
const documentFeatures = require("./jsdom/browser/documentfeatures");
Expand Down Expand Up @@ -221,8 +221,8 @@ exports.env = exports.jsdom.env = function () {
return;
}

const contentType = parseContentType(res.headers["content-type"]);
config.encoding = contentType.get("charset");
const contentType = new MIMEType(res.headers["content-type"]);
config.encoding = contentType.parameters.get("charset");
setParsingModeFromExtension(config, config.file);

config.html = text;
Expand Down Expand Up @@ -251,8 +251,8 @@ exports.env = exports.jsdom.env = function () {
reportInitError(err, config);
}
} else {
const contentType = parseContentType(res.headers["content-type"]);
config.encoding = contentType.get("charset");
const contentType = new MIMEType(res.headers["content-type"]);
config.encoding = contentType.parameters.get("charset");
setParsingModeFromExtension(config, config.somethingToAutodetect);

config.html = text;
Expand Down Expand Up @@ -304,13 +304,13 @@ exports.env = exports.jsdom.env = function () {
config.lastModified = new Date(res.headers["last-modified"]);
}

const contentType = parseContentType(res.headers["content-type"]);
const contentType = new MIMEType(res.headers["content-type"]);
if (config.parsingMode === "auto") {
if (contentType.isXML()) {
config.parsingMode = "xml";
}
}
config.encoding = contentType.get("charset");
config.encoding = contentType.parameters.get("charset");

processHTML(config);
});
Expand Down

0 comments on commit d563965

Please sign in to comment.