Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
core(jsonld): add structured data validation (#6750)
- Loading branch information
1 parent
fcd8115
commit 3f15b67
Showing
17 changed files
with
17,947 additions
and
0 deletions.
There are no files selected for viewing
7,137 changes: 7,137 additions & 0 deletions
7,137
lighthouse-core/lib/sd-validation/assets/jsonldcontext.json
Large diffs are not rendered by default.
Oops, something went wrong.
9,900 changes: 9,900 additions & 0 deletions
9,900
lighthouse-core/lib/sd-validation/assets/schema-tree.json
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/** | ||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
/** | ||
* Recursively (DFS) traverses an object and calls provided function for each field. | ||
* | ||
* @param {*} obj | ||
* @param {function(string, any, Array<string>, any): void} callback | ||
* @param {Array<string>} path | ||
*/ | ||
module.exports = function walkObject(obj, callback, path = []) { | ||
if (obj === null) { | ||
return; | ||
} | ||
|
||
Object.entries(obj).forEach(([fieldName, fieldValue]) => { | ||
const newPath = Array.from(path); | ||
newPath.push(fieldName); | ||
|
||
callback(fieldName, fieldValue, newPath, obj); | ||
|
||
if (typeof fieldValue === 'object') { | ||
walkObject(fieldValue, callback, newPath); | ||
} | ||
}); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
/** | ||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const {URL} = require('../url-shim.js'); | ||
const jsonld = require('jsonld'); | ||
const schemaOrgContext = require('./assets/jsonldcontext.json'); | ||
const SCHEMA_ORG_HOST = 'schema.org'; | ||
|
||
/** | ||
* Custom loader that prevents network calls and allows us to return local version of the | ||
* schema.org document | ||
* @param {string} schemaUrl | ||
* @param {(err: null|Error, value?: any) => void} callback | ||
*/ | ||
function documentLoader(schemaUrl, callback) { | ||
let urlObj = null; | ||
|
||
try { | ||
// Give a dummy base URL so relative URLs will be considered valid. | ||
urlObj = new URL(schemaUrl, 'http://example.com'); | ||
} catch (e) { | ||
return callback(new Error('Error parsing URL: ' + schemaUrl), undefined); | ||
} | ||
|
||
if (urlObj.host === SCHEMA_ORG_HOST && urlObj.pathname === '/') { | ||
callback(null, { | ||
document: schemaOrgContext, | ||
}); | ||
} else { | ||
// We only process schema.org, for other schemas we return an empty object | ||
callback(null, { | ||
document: {}, | ||
}); | ||
} | ||
} | ||
|
||
/** | ||
* Takes JSON-LD object and normalizes it by following the expansion algorithm | ||
* (https://json-ld.org/spec/latest/json-ld-api/#expansion). | ||
* | ||
* @param {any} inputObject | ||
* @returns {Promise<LH.StructuredData.ExpandedSchemaRepresentation|null>} | ||
*/ | ||
module.exports = async function expand(inputObject) { | ||
try { | ||
return await jsonld.expand(inputObject, {documentLoader}); | ||
} catch (err) { | ||
// jsonld wraps real errors in a bunch of junk, so see we have an underlying error first | ||
if (err.details && err.details.cause) throw err.details.cause; | ||
throw err; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/** | ||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const jsonlint = require('jsonlint-mod'); | ||
|
||
/** | ||
* @param {string} input | ||
* @returns {{message: string, lineNumber: string|null}|null} | ||
*/ | ||
module.exports = function parseJSON(input) { | ||
try { | ||
jsonlint.parse(input); | ||
} catch (error) { | ||
/** @type {string|null} */ | ||
let line = error.at; | ||
|
||
// extract line number from message | ||
if (!line) { | ||
const regexLineResult = error.message.match(/Parse error on line (\d+)/); | ||
|
||
if (regexLineResult) { | ||
line = regexLineResult[1]; | ||
} | ||
} | ||
|
||
|
||
// jsonlint error message points to a specific character, but we just want the message. | ||
// Example: | ||
// ---------^ | ||
// Unexpected character { | ||
let message = /** @type {string} */ (error.message); | ||
const regexMessageResult = error.message.match(/-+\^\n(.+)$/); | ||
|
||
if (regexMessageResult) { | ||
message = regexMessageResult[1]; | ||
} | ||
|
||
return { | ||
message, | ||
lineNumber: line, | ||
}; | ||
} | ||
|
||
return null; | ||
}; |
50 changes: 50 additions & 0 deletions
50
lighthouse-core/lib/sd-validation/jsonld-keyword-validator.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/** | ||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const walkObject = require('./helpers/walk-object.js'); | ||
|
||
// This list comes from the JSON-LD 1.1 editors draft: | ||
// https://w3c.github.io/json-ld-syntax/#syntax-tokens-and-keywords | ||
const VALID_KEYWORDS = new Set([ | ||
'@base', | ||
'@container', | ||
'@context', | ||
'@graph', | ||
'@id', | ||
'@index', | ||
'@language', | ||
'@list', | ||
'@nest', | ||
'@none', | ||
'@prefix', | ||
'@reverse', | ||
'@set', | ||
'@type', | ||
'@value', | ||
'@version', | ||
'@vocab', | ||
]); | ||
|
||
/** | ||
* @param {*} json | ||
* @return {Array<{path: string, message: string}>} | ||
*/ | ||
module.exports = function validateJsonLD(json) { | ||
/** @type {Array<{path: string, message: string}>} */ | ||
const errors = []; | ||
|
||
walkObject(json, (name, value, path) => { | ||
if (name.startsWith('@') && !VALID_KEYWORDS.has(name)) { | ||
errors.push({ | ||
path: path.join('/'), | ||
message: 'Unknown keyword', | ||
}); | ||
} | ||
}); | ||
|
||
return errors; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/** | ||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const walkObject = require('./helpers/walk-object.js'); | ||
const schemaStructure = require('./assets/schema-tree.json'); | ||
const TYPE_KEYWORD = '@type'; | ||
const SCHEMA_ORG_URL_REGEX = /https?:\/\/schema\.org\//; | ||
|
||
/** | ||
* @param {string} uri | ||
* @returns {string} | ||
*/ | ||
function cleanName(uri) { | ||
return uri.replace(SCHEMA_ORG_URL_REGEX, ''); | ||
} | ||
|
||
/** | ||
* @param {string} type | ||
* @returns {Array<string>} | ||
*/ | ||
function getPropsForType(type) { | ||
const cleanType = cleanName(type); | ||
const props = schemaStructure.properties | ||
.filter(prop => prop.parent.includes(cleanType)) | ||
.map(prop => prop.name); | ||
const foundType = findType(type); | ||
if (!foundType) throw new Error(`Unable to get props for missing type "${type}"`); | ||
const parentTypes = foundType.parent; | ||
|
||
return parentTypes.reduce((allProps, type) => allProps.concat(getPropsForType(type)), props); | ||
} | ||
|
||
/** | ||
* @param {string} type | ||
* @returns {{name: string, parent: Array<string>}|undefined} | ||
*/ | ||
function findType(type) { | ||
const cleanType = cleanName(type); | ||
|
||
return schemaStructure.types.find(typeObj => typeObj.name === cleanType); | ||
} | ||
|
||
/** | ||
* Validates keys of given object based on its type(s). Returns an array of error messages. | ||
* | ||
* @param {string|Array<string>} typeOrTypes | ||
* @param {Array<string>} keys | ||
* @returns {Array<string>} | ||
*/ | ||
function validateObjectKeys(typeOrTypes, keys) { | ||
/** @type {Array<string>} */ | ||
let types = []; | ||
|
||
if (typeof typeOrTypes === 'string') { | ||
types = [typeOrTypes]; | ||
} else if (Array.isArray(typeOrTypes)) { | ||
types = typeOrTypes; | ||
const invalidIndex = typeOrTypes.findIndex(s => typeof s !== 'string'); | ||
if (invalidIndex >= 0) return [`Unknown value type at index ${invalidIndex}`]; | ||
} else { | ||
return ['Unknown value type']; | ||
} | ||
|
||
const unknownTypes = types.filter(t => !findType(t)); | ||
|
||
if (unknownTypes.length) { | ||
return unknownTypes | ||
.filter(type => SCHEMA_ORG_URL_REGEX.test(type)) | ||
.map(type => `Unrecognized schema.org type ${type}`); | ||
} | ||
|
||
/** @type {Set<string>} */ | ||
const allKnownProps = new Set(); | ||
|
||
types.forEach(type => { | ||
const knownProps = getPropsForType(type); | ||
|
||
knownProps.forEach(key => allKnownProps.add(key)); | ||
}); | ||
|
||
const cleanKeys = keys | ||
// Skip JSON-LD keywords (including invalid ones as they were already flagged in the json-ld validator) | ||
.filter(key => key.indexOf('@') !== 0) | ||
.map(key => cleanName(key)); | ||
|
||
return cleanKeys | ||
// remove Schema.org input/output constraints http://schema.org/docs/actions.html#part-4 | ||
.map(key => key.replace(/-(input|output)$/, '')) | ||
.filter(key => !allKnownProps.has(key)) | ||
.map(key => `Unexpected property "${key}"`); | ||
} | ||
|
||
/** | ||
* @param {LH.StructuredData.ExpandedSchemaRepresentation|null} expandedObj Valid JSON-LD object in expanded form | ||
* @return {Array<{path: string, message: string}>} | ||
*/ | ||
module.exports = function validateSchemaOrg(expandedObj) { | ||
/** @type {Array<{path: string, message: string}>} */ | ||
const errors = []; | ||
|
||
if (expandedObj === null) { | ||
return errors; | ||
} | ||
|
||
// If the array only has a single item, treat it as if it was at the root to simplify the error path. | ||
// Arrays longer than a single item are handled in `walkObject` below. | ||
if (Array.isArray(expandedObj) && expandedObj.length === 1) { | ||
expandedObj = expandedObj[0]; | ||
} | ||
|
||
walkObject(expandedObj, (name, value, path, obj) => { | ||
if (name === TYPE_KEYWORD) { | ||
const keyErrorMessages = validateObjectKeys(value, Object.keys(obj)); | ||
|
||
keyErrorMessages.forEach(message => | ||
errors.push({ | ||
// get rid of the first chunk (/@type) as it's the same for all errors | ||
path: | ||
'/' + | ||
path | ||
.slice(0, -1) | ||
.map(cleanName) | ||
.join('/'), | ||
message, | ||
}) | ||
); | ||
} | ||
}); | ||
|
||
return errors; | ||
}; |
30 changes: 30 additions & 0 deletions
30
lighthouse-core/lib/sd-validation/scripts/download-jsonldcontext.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/** | ||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
/** | ||
* Call this script to update assets/jsonldcontext.json with the latest schema.org spec | ||
*/ | ||
|
||
const fetch = require('isomorphic-fetch'); | ||
const path = require('path'); | ||
const fs = require('fs'); | ||
|
||
const SCHEMA_ORG_URL = 'https://schema.org'; | ||
const CONTEXT_FILE = path.join(__dirname, '../assets/jsonldcontext.json'); | ||
|
||
async function run() { | ||
try { | ||
const response = await fetch(SCHEMA_ORG_URL, {headers: {Accept: 'application/ld+json'}}); | ||
const data = await response.json(); | ||
fs.writeFileSync(CONTEXT_FILE, JSON.stringify(data, null, 2)); | ||
console.log('Success.'); // eslint-disable-line no-console | ||
} catch (e) { | ||
console.error(e); // eslint-disable-line no-console | ||
} | ||
} | ||
|
||
run(); |
Oops, something went wrong.