1.1.1 • Published 3 years ago

unmind-athena-client v1.1.1

Weekly downloads
17
License
-
Repository
github
Last release
3 years ago

Build Status Coverage Status

athena-client - a simple aws athena client for nodejs and typescript

This is version 2.x document. 1.x document is here

Install with:

npm install athena-client

Usage Example

Create Client

var clientConfig = {
    bucketUri: 's3://xxxx'
}

var awsConfig = {
    region: 'xxxx', 
}
 
var athena = require("athena-client")
var client = athena.createClient(clientConfig, awsConfig)

Receive result by Callback

client.execute('SELECT 1', function(err, data) {
    if (err) {
        return console.error(err)
    }
    console.log(data)
})

Receive result by Promise

client.execute('SELECT 1').toPromise()
.then(function(data) {
    console.log(data)
})
.catch(function(err) {
    console.error(err)
})

Receive result by Stream

var stream = client.execute('SELECT 1').toStream()
stream.on('data', function(record) {
  console.log(record)
})
stream.on('query_end', function(queryExecution) {
  console.log(queryExecution)
})
stream.on('end', function() {
  console.log('end')
})
stream.on('error', function(e) {
  console.error(e)
})

API

athena = require("athena-client")

This module exposes the createClient and setConcurrentExecMax method, which execute query to AWS Athena.

client = athena.createClient(clientConfig, awsConfig)

Returns a client instance attached to the account specified by the given clientConfig and awsConfig.

athena.setConcurrentExecMax(concurrentExecMax)

Set the number of cuncurrent execution of query max. It should be set smaller than AWS Service limit (default is 5).

clientConfig object properties

PropertyDefaultDescription
bucketUriRequiredURI of S3 bucket for saving a query results file (.csv) and a metadata file (.csv.metadata)
pollingInterval1000Optional. Interval of polling sql results (ms)
queryTimeout0Optional. Timeout of query execution. 0 is no timeout
database'default'Optional. The name of the database within which the query executes
baseRetryWait200Optional. Used to calculate retry timeout for a particular query execution request
retryWaitMax10000Optional. Maximum retry timeout for starting a new query execution
retryCountMax10Optional. Maximum number of retry attempts for a particular query execution request
execRightCheckInterval100Optional. Timeout when number of maximum concurrent requests is exceeded
encryptionOptionundefinedOptional. Indicates the S3 encryption option used to encrypt the query results. Possible values include: SSE_S3, SSE_KMS, or CSE_KMS
encryptionKmsKeyundefinedOptional but required if encryptionOption is set to SSE_KMS or CSE_KMS. Value is the KMS key ARN or ID
skipFetchResultfalseOptional. If true, do not return the result of the query when the athena query is finished. This option is used for CTAS
concurrentExecMax5DEPRECATED. Use athena.setConcurrentExecMax() instead
workGroup'primary'Optional. The name of the workgroup within which the query executes

awsConfig object properties

PropertyDefaultDescription
regionRequiredYour Athena and S3 region
accessKeyIdundefinedOptional. Your IAM accessKeyId
secretAccessKeyundefinedOptional. Your IAM secretAccessKey

client.execute(query, callback)

It will return the following result. If you want to know more about params of queryExecution, please refer to the aws-sdk document

{
    "records": [
        {"_col0:": "1"}
    ],
    "queryExecution": {
        "Query": "SELECT 1", 
        "QueryExecutionId": "55571bb9-8e4e-4274-90b7-8cffe4539c3c", 
        "ResultConfiguration": {
            "OutputLocation": "s3://bucket/55571bb9-8e4e-4274-90b7-8cffe4539c3c"
        }, 
        "Statistics": {
            "DataScannedInBytes": 0, 
            "EngineExecutionTimeInMillis": 137
        }, 
        "Status": {
            "CompletionDateTime": "2017-12-31T16:03:53.493Z", 
            "State": "SUCCEEDED", 
            "SubmissionDateTime": "2017-12-31T16:03:53.209Z"
        }
    }
}

client.execute(query).toPromise()

Returns a Promise that resolves the result of your query.

client.execute(query).toStream()

Returns a Stream to buffer the results of your query. This method is recommended for large result sets.

// Get record one by one
stream.on('data', function(record) {
  console.log(record) // {"col1": "val1", "col2": "val2"}
})

// When query succeed, this event will emit.
stream.on('query_end', function(queryExecution) {
  console.log(queryExecution) // {"QueryExecutionId": "", ...}
})

stream.on('end', function() {
  console.log('end')
})
stream.on('error', function(e) {
  console.error(e)
})