Skip to content

Commit

Permalink
Merge pull request #2035 from brianc/bmc/add-pg-query-stream
Browse files Browse the repository at this point in the history
Add pg-query-stream module
  • Loading branch information
brianc committed Dec 23, 2019
2 parents 0189c95 + fdae851 commit bd3efaa
Show file tree
Hide file tree
Showing 24 changed files with 2,309 additions and 2 deletions.
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -10,7 +10,7 @@
"packages/*"
],
"scripts": {
"test": "yarn lerna exec --parallel yarn test",
"test": "yarn lerna exec yarn test",
"lint": "yarn lerna exec --parallel yarn lint"
},
"devDependencies": {
Expand Down
1 change: 1 addition & 0 deletions packages/pg-query-stream/.gitignore
@@ -0,0 +1 @@
node_modules
9 changes: 9 additions & 0 deletions packages/pg-query-stream/LICENSE
@@ -0,0 +1,9 @@
The MIT License (MIT)

Copyright (c) 2013 Brian M. Carlson

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
68 changes: 68 additions & 0 deletions packages/pg-query-stream/README.md
@@ -0,0 +1,68 @@
# pg-query-stream

[![Build Status](https://travis-ci.org/brianc/node-pg-query-stream.svg)](https://travis-ci.org/brianc/node-pg-query-stream)

Receive result rows from [pg](https://github.com/brianc/node-postgres) as a readable (object) stream.


## installation

```bash
$ npm install pg --save
$ npm install pg-query-stream --save
```

_requires pg>=2.8.1_


## use

```js
const pg = require('pg')
const QueryStream = require('pg-query-stream')
const JSONStream = require('JSONStream')

//pipe 1,000,000 rows to stdout without blowing up your memory usage
pg.connect((err, client, done) => {
if (err) throw err;
const query = new QueryStream('SELECT * FROM generate_series(0, $1) num', [1000000])
const stream = client.query(query)
//release the client when the stream is finished
stream.on('end', done)
stream.pipe(JSONStream.stringify()).pipe(process.stdout)
})
```

The stream uses a cursor on the server so it efficiently keeps only a low number of rows in memory.

This is especially useful when doing [ETL](http://en.wikipedia.org/wiki/Extract,_transform,_load) on a huge table. Using manual `limit` and `offset` queries to fake out async itteration through your data is cumbersome, and _way way way_ slower than using a cursor.

_note: this module only works with the JavaScript client, and does not work with the native bindings. libpq doesn't expose the protocol at a level where a cursor can be manipulated directly_

## contribution

I'm very open to contribution! Open a pull request with your code or idea and we'll talk about it. If it's not way insane we'll merge it in too: isn't open source awesome?

## license

The MIT License (MIT)

Copyright (c) 2013 Brian M. Carlson

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
61 changes: 61 additions & 0 deletions packages/pg-query-stream/index.js
@@ -0,0 +1,61 @@
'use strict'
var Cursor = require('pg-cursor')
var Readable = require('stream').Readable

class PgQueryStream extends Readable {
constructor (text, values, options) {
super(Object.assign({ objectMode: true }, options))
this.cursor = new Cursor(text, values, options)
this._reading = false
this._closed = false
this.batchSize = (options || {}).batchSize || 100

// delegate Submittable callbacks to cursor
this.handleRowDescription = this.cursor.handleRowDescription.bind(this.cursor)
this.handleDataRow = this.cursor.handleDataRow.bind(this.cursor)
this.handlePortalSuspended = this.cursor.handlePortalSuspended.bind(this.cursor)
this.handleCommandComplete = this.cursor.handleCommandComplete.bind(this.cursor)
this.handleReadyForQuery = this.cursor.handleReadyForQuery.bind(this.cursor)
this.handleError = this.cursor.handleError.bind(this.cursor)
}

submit (connection) {
this.cursor.submit(connection)
}

close (callback) {
this._closed = true
const cb = callback || (() => this.emit('close'))
this.cursor.close(cb)
}

_read (size) {
if (this._reading || this._closed) {
return false
}
this._reading = true
const readAmount = Math.max(size, this.batchSize)
this.cursor.read(readAmount, (err, rows) => {
if (this._closed) {
return
}
if (err) {
return this.emit('error', err)
}
// if we get a 0 length array we've read to the end of the cursor
if (!rows.length) {
this._closed = true
setImmediate(() => this.emit('close'))
return this.push(null)
}

// push each row into the stream
this._reading = false
for (var i = 0; i < rows.length; i++) {
this.push(rows[i])
}
})
}
}

module.exports = PgQueryStream
38 changes: 38 additions & 0 deletions packages/pg-query-stream/package.json
@@ -0,0 +1,38 @@
{
"name": "pg-query-stream",
"version": "2.0.1",
"description": "Postgres query result returned as readable stream",
"main": "index.js",
"scripts": {
"test": "mocha",
"lint": "eslint ."
},
"repository": {
"type": "git",
"url": "git://github.com/brianc/node-postgres.git"
},
"keywords": [
"postgres",
"pg",
"query",
"stream"
],
"author": "Brian M. Carlson",
"license": "MIT",
"bugs": {
"url": "https://github.com/brianc/node-postgres/issues"
},
"devDependencies": {
"JSONStream": "~0.7.1",
"concat-stream": "~1.0.1",
"eslint-plugin-promise": "^3.5.0",
"mocha": "^6.2.2",
"pg": "^7.5.0",
"stream-spec": "~0.3.5",
"stream-tester": "0.0.5",
"through": "~2.3.4"
},
"dependencies": {
"pg-cursor": "^2.0.1"
}
}
57 changes: 57 additions & 0 deletions packages/pg-query-stream/test/async-iterator.es6
@@ -0,0 +1,57 @@
const QueryStream = require('../')
const pg = require('pg')
const assert = require('assert')

const queryText = 'SELECT * FROM generate_series(0, 200) num'
describe('Async iterator', () => {
it('works', async () => {
const stream = new QueryStream(queryText, [])
const client = new pg.Client()
await client.connect()
const query = client.query(stream)
const rows = []
for await (const row of query) {
rows.push(row)
}
assert.equal(rows.length, 201)
await client.end()
})

it('can async iterate and then do a query afterwards', async () => {
const stream = new QueryStream(queryText, [])
const client = new pg.Client()
await client.connect()
const query = client.query(stream)
const iteratorRows = []
for await (const row of query) {
iteratorRows.push(row)
}
assert.equal(iteratorRows.length, 201)
const { rows } = await client.query('SELECT NOW()')
assert.equal(rows.length, 1)
await client.end()
})

it('can async iterate multiple times with a pool', async () => {
const pool = new pg.Pool({ max: 1 })

const allRows = []
const run = async () => {
// get the client
const client = await pool.connect()
// stream some rows
const stream = new QueryStream(queryText, [])
const iteratorRows = []
client.query(stream)
for await (const row of stream) {
iteratorRows.push(row)
allRows.push(row)
}
assert.equal(iteratorRows.length, 201)
client.release()
}
await Promise.all([run(), run(), run()])
assert.equal(allRows.length, 603)
await pool.end()
})
})
4 changes: 4 additions & 0 deletions packages/pg-query-stream/test/async-iterator.js
@@ -0,0 +1,4 @@
// only newer versions of node support async iterator
if (!process.version.startsWith('v8')) {
require('./async-iterator.es6')
}
52 changes: 52 additions & 0 deletions packages/pg-query-stream/test/close.js
@@ -0,0 +1,52 @@
var assert = require('assert')
var concat = require('concat-stream')

var QueryStream = require('../')
var helper = require('./helper')

helper('close', function (client) {
it('emits close', function (done) {
var stream = new QueryStream('SELECT * FROM generate_series(0, $1) num', [3], {batchSize: 2, highWaterMark: 2})
var query = client.query(stream)
query.pipe(concat(function () {}))
query.on('close', done)
})
})

helper('early close', function (client) {
it('can be closed early', function (done) {
var stream = new QueryStream('SELECT * FROM generate_series(0, $1) num', [20000], {batchSize: 2, highWaterMark: 2})
var query = client.query(stream)
var readCount = 0
query.on('readable', function () {
readCount++
query.read()
})
query.once('readable', function () {
query.close()
})
query.on('close', function () {
assert(readCount < 10, 'should not have read more than 10 rows')
done()
})
})
})

helper('close callback', function (client) {
it('notifies an optional callback when the conneciton is closed', function (done) {
var stream = new QueryStream('SELECT * FROM generate_series(0, $1) num', [10], {batchSize: 2, highWaterMark: 2})
var query = client.query(stream)
query.once('readable', function () { // only reading once
query.read()
})
query.once('readable', function () {
query.close(function () {
// nothing to assert. This test will time out if the callback does not work.
done()
})
})
query.on('close', function () {
assert(false, 'close event should not fire') // no close event because we did not read to the end of the stream.
})
})
})
22 changes: 22 additions & 0 deletions packages/pg-query-stream/test/concat.js
@@ -0,0 +1,22 @@
var assert = require('assert')
var concat = require('concat-stream')
var through = require('through')
var helper = require('./helper')

var QueryStream = require('../')

helper('concat', function (client) {
it('concats correctly', function (done) {
var stream = new QueryStream('SELECT * FROM generate_series(0, 200) num', [])
var query = client.query(stream)
query.pipe(through(function (row) {
this.push(row.num)
})).pipe(concat(function (result) {
var total = result.reduce(function (prev, cur) {
return prev + cur
})
assert.equal(total, 20100)
}))
stream.on('end', done)
})
})
10 changes: 10 additions & 0 deletions packages/pg-query-stream/test/config.js
@@ -0,0 +1,10 @@
var assert = require('assert')
var QueryStream = require('../')

var stream = new QueryStream('SELECT NOW()', [], {
highWaterMark: 999,
batchSize: 88
})

assert.equal(stream._readableState.highWaterMark, 999)
assert.equal(stream.batchSize, 88)
22 changes: 22 additions & 0 deletions packages/pg-query-stream/test/error.js
@@ -0,0 +1,22 @@
var assert = require('assert')
var helper = require('./helper')

var QueryStream = require('../')

helper('error', function (client) {
it('receives error on stream', function (done) {
var stream = new QueryStream('SELECT * FROM asdf num', [])
var query = client.query(stream)
query.on('error', function (err) {
assert(err)
assert.equal(err.code, '42P01')
done()
}).on('data', function () {
// noop to kick of reading
})
})

it('continues to function after stream', function (done) {
client.query('SELECT NOW()', done)
})
})
35 changes: 35 additions & 0 deletions packages/pg-query-stream/test/fast-reader.js
@@ -0,0 +1,35 @@
var assert = require('assert')
var helper = require('./helper')
var QueryStream = require('../')

helper('fast reader', function (client) {
it('works', function (done) {
var stream = new QueryStream('SELECT * FROM generate_series(0, 200) num', [])
var query = client.query(stream)
var result = []
stream.on('readable', function () {
var res = stream.read()
while (res) {
if (result.length !== 201) {
assert(res, 'should not return null on evented reader')
} else {
// a readable stream will emit a null datum when it finishes being readable
// https://nodejs.org/api/stream.html#stream_event_readable
assert.equal(res, null)
}
if (res) {
result.push(res.num)
}
res = stream.read()
}
})
stream.on('end', function () {
var total = result.reduce(function (prev, cur) {
return prev + cur
})
assert.equal(total, 20100)
done()
})
assert.strictEqual(query.read(2), null)
})
})

0 comments on commit bd3efaa

Please sign in to comment.