0.1.0 • Published 1 year ago

kysely-s3-select v0.1.0

Weekly downloads
-
License
MIT
Repository
github
Last release
1 year ago

kysely-s3-select

Powered by TypeScript

Kysely dialects, plugins and other goodies for Amazon S3 Select.

Inspired by Thomas Aribart's great post.

Installation

NPM 7+

npm i kysely-s3-select

NPM <7

npm i kysely-s3-select kysely @aws-sdk/client-s3

Yarn

yarn add kysely-s3-select kysely @aws-sdk/client-s3

PNPM

pnpm add kysely-s3-select kysely @aws-sdk/client-s3

Deno

This package was not tested in Deno, aws-sdk-v3 might not be supported.

This package uses/extends some Kysely types and classes, which are imported using its NPM package name -- not a relative file path or CDN url.

To fix that, add an import_map.json file.

{
  "imports": {
    "kysely": "https://cdn.jsdelivr.net/npm/kysely@0.23.5/dist/esm/index.js"
  }
}

Usage

import { S3Client } from '@aws-sdk/client-s3'
import { Kysely } from 'kysely'
import { S3SelectDialect } from 'kysely-s3-select'

interface ConditionsCSV {
  S3Object: Condition
}

interface Condition {
  START: string
  STOP: string
  PATIENT: string
  CODE: string
  DESCRIPTION: string
}

const conditions = new Kysely<ConditionsCSV>({
  dialect: new S3SelectDialect({
    bucket: 'synthea-open-data',
    client: new S3Client({
      region: 'us-east-1', // optional
    }),
    contentType: 'csv', // one of 'csv' | 'json' | 'parquet'
    // csvOptions: { // optional
    //   allowQuotedRecordDelimiter: false, // optional
    //   comments?: '#', // optional
    //   fieldDelimiter?: ',', // optional
    //   fileHeaderInfo?: 'use', // optional
    //   quoteCharacter?: '"', // optional
    //   quoteEscapeCharacter?: '"', // optional
    //   recordDelimiter?: '\n', // optional
    // },
    key: 'coherent/unzipped/csv/conditions.csv',
  })
})

const results = await conditions
  .selectFrom('S3Object')
  .where('START', '>=', '2000')
  .where('STOP', '!=', '')
  .select(['PATIENT as patient', 'DESCRIPTION as description'])
  .limit(50)
  .execute()
  
interface PatientBundleJSON {
  S3Object: Bundle
}

interface Bundle {
  resourceType: 'Bundle'
  type: 'transaction'
  entry: Entry[]
}

interface Entry {
  fullUrl: string
  resource: Patient
  request: object
}

interface Patient {
  resourceType: 'Patient'
  id: string
  meta: object
  text: object
  extension: object[]
  identifier: object[]
  name: {
    use: 'official'
    family: string
    given: string[]
    prefix: string[]
  }[]
  telecom: object[]
  gender: 'male' | 'female'
  birthDate: string
  deceasedDateTime: string
  address: object[]
  maritalStatus: object
  multipleBirthBoolean: boolean
  communication: object[]
}
  
const patientBundle = new Kysely<PatientBundleJSON>({
  dialect: new S3SelectDialect({
    bucket: 'synthea-open-data',
    client: new S3Client({
      region: 'us-east-1', // optional
    }),
    contentType: 'json', // one of 'csv' | 'json' | 'parquet'
    // jsonOptions: { // optional
    //   type: 'document', // optional, one of 'document' | 'lines'
    // },
    key: 'coherent/unzipped/fhir/Abe604_Frami345_b8dd1798-beef-094d-1be4-f90ee0e6b7d5.json',
  })
})

const patient = await patientBundle
  .selectFrom(
    sql<Partial<Entry['resource']>>`S3Object[*].${sql.ref('entry')}[*].${sql.ref('resource')}`.as('resource'),
  )
  .where('resource.resourceType', '=', 'Patient')
  .select(['resource.id as id', 'resource.name as name'])
  .limit(1)
  .$castTo<Pick<Patient, 'id' | 'name'>>()
  .executeTakeFirstOrThrow()

License

MIT License, see LICENSE

0.1.0

1 year ago

0.0.2

1 year ago

0.0.1

1 year ago