Skip to content

Commit

Permalink
core(jsonld): add structured data validation (#6750)
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickhulce committed Apr 8, 2019
1 parent fcd8115 commit 3f15b67
Show file tree
Hide file tree
Showing 17 changed files with 17,947 additions and 0 deletions.
7,137 changes: 7,137 additions & 0 deletions lighthouse-core/lib/sd-validation/assets/jsonldcontext.json

Large diffs are not rendered by default.

9,900 changes: 9,900 additions & 0 deletions lighthouse-core/lib/sd-validation/assets/schema-tree.json

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions lighthouse-core/lib/sd-validation/helpers/walk-object.js
@@ -0,0 +1,30 @@
/**
* @license Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

/**
* Recursively (DFS) traverses an object and calls provided function for each field.
*
* @param {*} obj
* @param {function(string, any, Array<string>, any): void} callback
* @param {Array<string>} path
*/
module.exports = function walkObject(obj, callback, path = []) {
if (obj === null) {
return;
}

Object.entries(obj).forEach(([fieldName, fieldValue]) => {
const newPath = Array.from(path);
newPath.push(fieldName);

callback(fieldName, fieldValue, newPath, obj);

if (typeof fieldValue === 'object') {
walkObject(fieldValue, callback, newPath);
}
});
};
56 changes: 56 additions & 0 deletions lighthouse-core/lib/sd-validation/json-expander.js
@@ -0,0 +1,56 @@
/**
* @license Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const {URL} = require('../url-shim.js');
const jsonld = require('jsonld');
const schemaOrgContext = require('./assets/jsonldcontext.json');
const SCHEMA_ORG_HOST = 'schema.org';

/**
* Custom loader that prevents network calls and allows us to return local version of the
* schema.org document
* @param {string} schemaUrl
* @param {(err: null|Error, value?: any) => void} callback
*/
function documentLoader(schemaUrl, callback) {
let urlObj = null;

try {
// Give a dummy base URL so relative URLs will be considered valid.
urlObj = new URL(schemaUrl, 'http://example.com');
} catch (e) {
return callback(new Error('Error parsing URL: ' + schemaUrl), undefined);
}

if (urlObj.host === SCHEMA_ORG_HOST && urlObj.pathname === '/') {
callback(null, {
document: schemaOrgContext,
});
} else {
// We only process schema.org, for other schemas we return an empty object
callback(null, {
document: {},
});
}
}

/**
* Takes JSON-LD object and normalizes it by following the expansion algorithm
* (https://json-ld.org/spec/latest/json-ld-api/#expansion).
*
* @param {any} inputObject
* @returns {Promise<LH.StructuredData.ExpandedSchemaRepresentation|null>}
*/
module.exports = async function expand(inputObject) {
try {
return await jsonld.expand(inputObject, {documentLoader});
} catch (err) {
// jsonld wraps real errors in a bunch of junk, so see we have an underlying error first
if (err.details && err.details.cause) throw err.details.cause;
throw err;
}
};
49 changes: 49 additions & 0 deletions lighthouse-core/lib/sd-validation/json-linter.js
@@ -0,0 +1,49 @@
/**
* @license Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const jsonlint = require('jsonlint-mod');

/**
* @param {string} input
* @returns {{message: string, lineNumber: string|null}|null}
*/
module.exports = function parseJSON(input) {
try {
jsonlint.parse(input);
} catch (error) {
/** @type {string|null} */
let line = error.at;

// extract line number from message
if (!line) {
const regexLineResult = error.message.match(/Parse error on line (\d+)/);

if (regexLineResult) {
line = regexLineResult[1];
}
}


// jsonlint error message points to a specific character, but we just want the message.
// Example:
// ---------^
// Unexpected character {
let message = /** @type {string} */ (error.message);
const regexMessageResult = error.message.match(/-+\^\n(.+)$/);

if (regexMessageResult) {
message = regexMessageResult[1];
}

return {
message,
lineNumber: line,
};
}

return null;
};
50 changes: 50 additions & 0 deletions lighthouse-core/lib/sd-validation/jsonld-keyword-validator.js
@@ -0,0 +1,50 @@
/**
* @license Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const walkObject = require('./helpers/walk-object.js');

// This list comes from the JSON-LD 1.1 editors draft:
// https://w3c.github.io/json-ld-syntax/#syntax-tokens-and-keywords
const VALID_KEYWORDS = new Set([
'@base',
'@container',
'@context',
'@graph',
'@id',
'@index',
'@language',
'@list',
'@nest',
'@none',
'@prefix',
'@reverse',
'@set',
'@type',
'@value',
'@version',
'@vocab',
]);

/**
* @param {*} json
* @return {Array<{path: string, message: string}>}
*/
module.exports = function validateJsonLD(json) {
/** @type {Array<{path: string, message: string}>} */
const errors = [];

walkObject(json, (name, value, path) => {
if (name.startsWith('@') && !VALID_KEYWORDS.has(name)) {
errors.push({
path: path.join('/'),
message: 'Unknown keyword',
});
}
});

return errors;
};
135 changes: 135 additions & 0 deletions lighthouse-core/lib/sd-validation/schema-validator.js
@@ -0,0 +1,135 @@
/**
* @license Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const walkObject = require('./helpers/walk-object.js');
const schemaStructure = require('./assets/schema-tree.json');
const TYPE_KEYWORD = '@type';
const SCHEMA_ORG_URL_REGEX = /https?:\/\/schema\.org\//;

/**
* @param {string} uri
* @returns {string}
*/
function cleanName(uri) {
return uri.replace(SCHEMA_ORG_URL_REGEX, '');
}

/**
* @param {string} type
* @returns {Array<string>}
*/
function getPropsForType(type) {
const cleanType = cleanName(type);
const props = schemaStructure.properties
.filter(prop => prop.parent.includes(cleanType))
.map(prop => prop.name);
const foundType = findType(type);
if (!foundType) throw new Error(`Unable to get props for missing type "${type}"`);
const parentTypes = foundType.parent;

return parentTypes.reduce((allProps, type) => allProps.concat(getPropsForType(type)), props);
}

/**
* @param {string} type
* @returns {{name: string, parent: Array<string>}|undefined}
*/
function findType(type) {
const cleanType = cleanName(type);

return schemaStructure.types.find(typeObj => typeObj.name === cleanType);
}

/**
* Validates keys of given object based on its type(s). Returns an array of error messages.
*
* @param {string|Array<string>} typeOrTypes
* @param {Array<string>} keys
* @returns {Array<string>}
*/
function validateObjectKeys(typeOrTypes, keys) {
/** @type {Array<string>} */
let types = [];

if (typeof typeOrTypes === 'string') {
types = [typeOrTypes];
} else if (Array.isArray(typeOrTypes)) {
types = typeOrTypes;
const invalidIndex = typeOrTypes.findIndex(s => typeof s !== 'string');
if (invalidIndex >= 0) return [`Unknown value type at index ${invalidIndex}`];
} else {
return ['Unknown value type'];
}

const unknownTypes = types.filter(t => !findType(t));

if (unknownTypes.length) {
return unknownTypes
.filter(type => SCHEMA_ORG_URL_REGEX.test(type))
.map(type => `Unrecognized schema.org type ${type}`);
}

/** @type {Set<string>} */
const allKnownProps = new Set();

types.forEach(type => {
const knownProps = getPropsForType(type);

knownProps.forEach(key => allKnownProps.add(key));
});

const cleanKeys = keys
// Skip JSON-LD keywords (including invalid ones as they were already flagged in the json-ld validator)
.filter(key => key.indexOf('@') !== 0)
.map(key => cleanName(key));

return cleanKeys
// remove Schema.org input/output constraints http://schema.org/docs/actions.html#part-4
.map(key => key.replace(/-(input|output)$/, ''))
.filter(key => !allKnownProps.has(key))
.map(key => `Unexpected property "${key}"`);
}

/**
* @param {LH.StructuredData.ExpandedSchemaRepresentation|null} expandedObj Valid JSON-LD object in expanded form
* @return {Array<{path: string, message: string}>}
*/
module.exports = function validateSchemaOrg(expandedObj) {
/** @type {Array<{path: string, message: string}>} */
const errors = [];

if (expandedObj === null) {
return errors;
}

// If the array only has a single item, treat it as if it was at the root to simplify the error path.
// Arrays longer than a single item are handled in `walkObject` below.
if (Array.isArray(expandedObj) && expandedObj.length === 1) {
expandedObj = expandedObj[0];
}

walkObject(expandedObj, (name, value, path, obj) => {
if (name === TYPE_KEYWORD) {
const keyErrorMessages = validateObjectKeys(value, Object.keys(obj));

keyErrorMessages.forEach(message =>
errors.push({
// get rid of the first chunk (/@type) as it's the same for all errors
path:
'/' +
path
.slice(0, -1)
.map(cleanName)
.join('/'),
message,
})
);
}
});

return errors;
};
@@ -0,0 +1,30 @@
/**
* @license Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

/**
* Call this script to update assets/jsonldcontext.json with the latest schema.org spec
*/

const fetch = require('isomorphic-fetch');
const path = require('path');
const fs = require('fs');

const SCHEMA_ORG_URL = 'https://schema.org';
const CONTEXT_FILE = path.join(__dirname, '../assets/jsonldcontext.json');

async function run() {
try {
const response = await fetch(SCHEMA_ORG_URL, {headers: {Accept: 'application/ld+json'}});
const data = await response.json();
fs.writeFileSync(CONTEXT_FILE, JSON.stringify(data, null, 2));
console.log('Success.'); // eslint-disable-line no-console
} catch (e) {
console.error(e); // eslint-disable-line no-console
}
}

run();

0 comments on commit 3f15b67

Please sign in to comment.