Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature / Use AST to translate WHERE clause (#54)
* replace regex matching with ast parsing * add tests from #53 * update a test based on the bug story
- Loading branch information
1 parent
92c8f16
commit cfa88c7
Showing
3 changed files
with
143 additions
and
126 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,165 +1,149 @@ | ||
const OPERATORS = ['>', '<', '=', '>=', '<=', 'like', 'ilike', 'in', 'is'] | ||
const _ = require('lodash'); | ||
const Parser = require('flora-sql-parser').Parser; | ||
const parser = new Parser(); | ||
|
||
/** | ||
* Creates a viable SQL where clause from a passed in SQL (from a url "where" param) | ||
* | ||
* @param {string} where - a sql where clause | ||
* @param {Array} fields - a list of fields in to support coded value domains | ||
* @return {string} sql | ||
* Convert an expression node to its string representation. | ||
* @param {object} node AST expression node | ||
* @param {object} options winnow options | ||
* @return {string} expression string | ||
*/ | ||
function createClause (options) { | ||
options = options || {} | ||
if (!options.where) return '' | ||
let tokens = tokenize(options.where) | ||
if (options.esriFields) { | ||
tokens = decodeDomains(tokens, options.esriFields) | ||
function handleExpr(node, options) { | ||
let expr; | ||
|
||
if (node.type === 'unary_expr') { | ||
expr = `${node.operator} ${traverse(node.expr, options)}` | ||
} else if (node.operator === '=' && node.left.value === 1 && node.right.value === 1) { | ||
// a special case related to arcgis server | ||
return '1=1' | ||
} else { | ||
// store the column node for value decoding | ||
|
||
if (node.left.type === 'column_ref') { | ||
node.right.columnNode = node.left | ||
} | ||
|
||
if (node.right.type === 'column_ref') { | ||
node.left.columnNode = node.right | ||
} | ||
|
||
expr = `${traverse(node.left, options)} ${node.operator} ${traverse(node.right, options)}` | ||
} | ||
return translate(tokens, options) | ||
} | ||
|
||
/** | ||
* Take arbitrary sql and turns it into a consistent set of tokens | ||
*/ | ||
function tokenize (sql) { | ||
// normalize all the binary expressions | ||
sql = pad(sql) | ||
let temp | ||
// find any multi-word tokens and replace the spaces with a special character | ||
// note: only 1 or 2 word tokens are accepted. Anything larger does not work | ||
const regex = /['"]\S+\s\S+['"]/g | ||
while ((temp = regex.exec(sql)) !== null) { | ||
const field = temp[0].replace(/\s/, '|@') | ||
sql = sql.replace(temp[0], field) | ||
if (node.parentheses) { | ||
expr = `(${expr})` | ||
} | ||
return sql.split(' ') | ||
|
||
return expr | ||
} | ||
|
||
/** | ||
* Normalize binary operations to consistent spacing | ||
* Convert an expression list node to its string representation. | ||
* @param {object} node AST expression list node | ||
* @param {object} options winnow options | ||
* @return {string} expression list string | ||
*/ | ||
function pad (sql) { | ||
const operators = [ | ||
{regex: />=/g, string: '>='}, | ||
{regex: /<=/g, string: '<='}, | ||
{regex: /=/g, string: '='}, | ||
{regex: />(?!=)/g, string: '>'}, | ||
{regex: /<(?!=)/g, string: '<'} | ||
] | ||
const padded = operators.reduce(function (statement, op) { | ||
const pad = statement.replace(op.regex, ` ${op.string} `) | ||
// ugly hack because javascripts haz no lookbehind | ||
return pad.replace(/> =/, '>=').replace(/< =/, '<=').replace(/i like/, 'ilike') | ||
}, sql) | ||
return padded.replace(/\s\s/g, ' ') | ||
function handleExprList(node, options) { | ||
const values = node.value.map((valueNode) => traverse(valueNode, options)).join(',') | ||
return `(${values})` | ||
} | ||
|
||
/** | ||
* Iterate through all tokens and replace values that belong to a coded domain | ||
* @param {array} tokens - a set of tokens for a sql where clause | ||
* @param {array} fields - the set of fields from a geoservices compatible service | ||
* @return {array} a set of tokens where any coded values have been decoded | ||
* Convert a function node to its string representation. | ||
* @param {object} node AST function node | ||
* @param {object} options winnow options | ||
* @return {string} function string | ||
*/ | ||
function decodeDomains (tokens, fields) { | ||
return tokens.map(function (token, i) { | ||
if (i < 2) return token | ||
const left = tokens[i - 2] | ||
const middle = tokens[i - 1] | ||
const right = token | ||
// if this set of 3 tokens makes a binary operation then check if we need to apply a domain | ||
if (isBinaryOp(left, middle, right)) return applyDomain(left, right, fields) | ||
else return token | ||
}) | ||
function handleFunction(node, options) { | ||
const args = handleExprList(node.args, options) | ||
return `${node.name}${args}` | ||
} | ||
|
||
/** | ||
* Check whether 3 tokens make up a binary operation | ||
* Convert a column node to its string representation. | ||
* @param {object} node AST column node | ||
* @param {object} options winnow options | ||
* @return {string} column string | ||
*/ | ||
function isBinaryOp (left, middle, right) { | ||
if (!left || !middle || !right) return false | ||
return OPERATORS.indexOf(middle) > -1 | ||
function handleColumn(node, options) { | ||
return options.esri ? `attributes->\`${node.column}\`` : `properties->\`${node.column}\`` | ||
} | ||
|
||
/** | ||
* Check for any coded values in the fields | ||
* if we find a match, replace value with the coded val | ||
* Convert a value node to its string representation. | ||
* | ||
* If the value node has a reference to its column and this column is an encoded | ||
* field, this function will try to decode the value. | ||
* | ||
* @param {string} fieldName - the name of field to look for | ||
* @param {number} value - the coded value | ||
* @param {Array} fields - a list of fields to use for coded value replacements | ||
* @param {object} node AST value node | ||
* @param {object} options winnow options | ||
* @return {string} value string | ||
*/ | ||
function applyDomain (fieldName, value, fields) { | ||
const field = fields.filter(f => { return f.name === fieldName })[0] | ||
if (!field) return value | ||
const domain = field.domain | ||
if (domain && domain.codedValues) { | ||
const decoded = domain.codedValues.filter(cv => { return value.match(cv.code) })[0].name | ||
return typeof decoded === 'string' ? `'${decoded}'` : decoded | ||
} else { | ||
return value | ||
function handleValue(node, options) { | ||
let value = node.value; | ||
|
||
if (node.columnNode) { | ||
const field = _.find(options.esriFields, { name: node.columnNode.column }) | ||
|
||
if (_.has(field, 'domain.codedValues')) { | ||
const actual = _.find(field.domain.codedValues, { code: value }) | ||
|
||
if (actual) { | ||
value = actual.name | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Translate tokens to be compatible with postgres json | ||
*/ | ||
function translate (tokens, options) { | ||
const parts = tokens.map(function (token, i) { | ||
const middle = tokens[i + 1] | ||
if (!middle) return token | ||
// if this is a field name wrap it in postgres json | ||
const left = jsonify(token, middle, options) | ||
const right = removeTrailingParen(tokens[i + 2]) | ||
// if this is a numeric operation cast to float | ||
return cast(left, middle, right) | ||
}) | ||
return parts.join(' ').replace(/\|@/g, ' ') | ||
} | ||
if (typeof value === 'string') { | ||
value = `'${value}'` | ||
} | ||
|
||
/** | ||
* Cast a JSON selector to float if this is a numeric operation | ||
*/ | ||
function cast (left, middle, right) { | ||
const numericOp = ['>', '<', '=', '>=', '<='].indexOf(middle) > -1 && isNumeric(right) | ||
if (numericOp) return left | ||
else return left | ||
return value | ||
} | ||
|
||
/** | ||
* Removes the trailing parameter from a sql token | ||
* Traverse a SQL AST and return its string representation | ||
* @param {object} node AST node | ||
* @param {object} options winnow options | ||
* @return {string} AST string | ||
*/ | ||
function removeTrailingParen (token) { | ||
if (!token) return undefined | ||
const paren = token.indexOf(')') > -1 | ||
if (paren) return token.slice(0, paren) | ||
else return token | ||
function traverse(node, options) { | ||
if (!node) { | ||
return '' | ||
} | ||
|
||
switch(node.type) { | ||
case 'unary_expr': | ||
case 'binary_expr': | ||
return handleExpr(node, options) | ||
case 'function': | ||
return handleFunction(node, options) | ||
case 'expr_list': | ||
return handleExprList(node, options) | ||
case 'column_ref': | ||
return handleColumn(node, options) | ||
case 'string': | ||
case 'number': | ||
case 'null': | ||
case 'bool': | ||
return handleValue(node, options) | ||
default: | ||
throw new Error('Unrecognized AST node: \n' + JSON.stringify(node, null, 2)) | ||
} | ||
} | ||
|
||
/** | ||
* Apply JSON selectors where appropriate | ||
* Creates a viable SQL where clause from a passed in SQL (from a url "where" param) | ||
* @param {string} options winnow options | ||
* @return {string} SQL where clause | ||
*/ | ||
function jsonify (token, next, options) { | ||
function createClause(options) { | ||
options = options || {} | ||
let leading = '' | ||
const lastPar = token.lastIndexOf('(') | ||
if (lastPar > -1) { | ||
leading = token.slice(0, lastPar + 1) | ||
token = token.slice(lastPar + 1, token.length) | ||
} | ||
const trailingParens = token.match(/\)+\s*$/) | ||
let trailing = '' | ||
if (trailingParens) { | ||
trailing = trailingParens[0] | ||
token = token.slice(0, trailingParens.index) | ||
} | ||
const selector = options.esri ? 'attributes' : 'properties' | ||
if (next) next = next.toLowerCase() | ||
const field = token.replace(/'|"/g, '') | ||
if (OPERATORS.indexOf(next) > -1 && field !== '1') return `${leading}${selector}->\`${field}\`${trailing}` | ||
else return leading + token + trailing | ||
} | ||
if (!options.where) return '' | ||
|
||
function isNumeric (num) { | ||
return (num >= 0 || num < 0) | ||
// AST parsing requires a complete SQL. | ||
const whereTree = parser.parse('SELECT * WHERE ' + options.where).where | ||
return traverse(whereTree, options) | ||
} | ||
|
||
module.exports = { createClause } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
const test = require('tape') | ||
const Winnow = require('../src') | ||
|
||
test('compiling a complex query', t => { | ||
try { | ||
t.ok(Winnow.prepareQuery({ | ||
geometry: { | ||
xmin: -37237674.195623085, | ||
ymin: 676003.5082798181, | ||
xmax: 37237674.195623085, | ||
ymax: 12416731.052879848, | ||
spatialReference: { | ||
wkt: 'PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-6.828007551173374],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]' | ||
} | ||
} | ||
}), 'query compiled') | ||
t.end() | ||
} catch (e) { | ||
t.fail(e) | ||
t.end() | ||
} | ||
}) | ||
|
||
test('compiling with several and statements', t => { | ||
try { | ||
t.ok(Winnow.prepareQuery({where: 'ELEVATION >= 1165 AND ELEVATION <= 4365 AND POP1990 >= 8247 AND POP1990 <= 5700236'})) | ||
t.end() | ||
} catch (e) { | ||
t.fail(e) | ||
t.end() | ||
} | ||
}) |