0.7.4-alpha.1 • Published 3 years ago

@ridi/pdf-parser v0.7.4-alpha.1

Weekly downloads
69
License
Apache-2.0
Repository
github
Last release
3 years ago

@ridi/pdf-parser

Common PDF data parser for Ridibooks services

NPM version Check codecov NPM total downloads

Features

  • Structure parsing
  • Read files
    • Read cover page
  • Encrypt and decrypt function when parsing or reading
  • Debug mode
  • Environment
    • Node
    • CLI
    • Browser
  • Online demo

Install

npm install @ridi/pdf-parser

Usage

Basic:

import { PdfParser } from '@ridi/pdf-parser';
// or const { PdfParser } = require('@ridi/pdf-parser');

const parser = new PdfParser('./foo/bar.pdf');
parser.parse().then((book) => {
  ...
});
parser.read().then((pdfFileBuffer) => {
  ...
});

with AesCryptor:

import { CryptoProvider, AesCryptor } from '@ridi/pdf-parser';
// or const { CryptoProvider, AesCryptor } = require('@ridi/pdf-parser');

const { Purpose } = CryptoProvider;
const { Mode, Padding } = AesCryptor;

class ContentCryptoProvider extends CryptoProvider {
  constructor(key) {
    super();
    this.cryptor = new AesCryptor(Mode.ECB, { key });
  }

  getCryptor(filePath, purpose) {
    return this.cryptor;
  }

  // If use as follows:
  // const provider = new ContentCryptoProvider(...);
  // const parser = new PdfParser('encrypted.pdf', provider);
  // const book = await parser.parse();
  // const cover = await parser.readItem(book.cover);
  //
  // It will be called as follows:
  // 1. run(data, 'encrypted.pdf', Purpose.READ_IN_DIR)
  // 2. run(data, 'encrypted.pdf', Purpose.READ_IN_DIR)
  //
  run(data, filePath, purpose) {
    const cryptor = this.getCryptor(filePath, purpose);
    if (purpose === Purpose.READ_IN_DIR) {
      return cryptor.decrypt(data, { padding: Padding.AUTO });
    }
    return data;
  }
}

const cryptoProvider = new ContentCryptoProvider(key);
const parser = new PdfParser('./foo/bar.pdf', cryptoProvider);

Log level setting:

import { LogLevel, ... } from '@ridi/pdf-parser';
const parser = new PdfParser(/* path */, /* cryptoProvider */, /* logLevel */)
// or const parser = new PdfParser(/* path */, /* logLevel */)
parser.logger.logLevel = LogLevel.VERBOSE; // SILENT, ERROR, WARN(default), INFO, DEBUG, VERBOSE

API

parse(parseOptions)

Returns Promise<PdfBook> with:

Or throw exception.

parseOptions: ?object


read()

Returns PDF file as Buffer.


onProgress = callback(step, totalStep, action)

Tells the progress of parser through callback.

const { Action } = PdfParser; // PARSE, READ_ITEMS
parser.onProgress = (step, totalStep, action) => {
  console.log(`[${action}] ${step} / ${totalStep}`);
}

Model

PdfBook

  • version: Version
  • title: string
  • author: string
  • subject: string
  • keywords: string
  • creator: string
  • producer: string
  • creationDate: ?string
  • modificationDate: ?string
  • outlineItems: OutlineItem[]
  • isLinearized: boolean
  • isAcroFormPresent: boolean
  • isXFAPresent: boolean
  • isCollectionPresent: boolean
  • userInfo: object
  • pageCount: number
  • permissions: Permissions
  • toRaw(): object

Version

  • major: number
  • minor: number
  • patch: number
  • toString(): string

OutlineItem

  • dest: ?string|*[]
  • url: ?string
  • title: string
  • color: Color
  • bold: boolean
  • italic: boolean
  • depth: number (Default: 0)
  • children: OutlineItem[]
  • page: ?number
  • toRaw(): object

Color

  • red: number
  • green: number
  • blue: number
  • intValue: number (ex: 7237488)
  • hexString: string (ex: '#6e6f70')
  • rgbString: string (ex: 'rgb(110, 111, 112)')
  • toRaw(): object

Permissions

  • allowPrinting: boolean
  • allowContentsModifying: boolean
  • allowCopying: boolean
  • allowAnnotationsModifying: boolean
  • allowInteractiveFormsModifying: boolean
  • allowCopyingForAccessibility: boolean
  • allowAssembling: boolean
  • allowHighQualityPrinting: boolean
  • toRaw(): ?number[]

Parse Options


fakeWorker: boolean

Use fake worker when used in a browser environment such as Electron Renderer Proccess.

Default: false

License

Apache-2.0

0.7.4-alpha.1

3 years ago

0.7.4-alpha.0

3 years ago

0.7.3-alpha.3

3 years ago

0.7.3

3 years ago

0.7.3-alpha.2

3 years ago

0.7.3-alpha.0

3 years ago

0.7.3-alpha.1

3 years ago

0.7.2

3 years ago

0.7.2-alpha.4

3 years ago

0.7.2-alpha.3

3 years ago

0.7.2-alpha.0

3 years ago

0.7.2-alpha.1

3 years ago

0.7.2-alpha.2

3 years ago

0.7.1

3 years ago

0.7.0

4 years ago

0.6.16-alpha.2

4 years ago

0.6.16-alpha.1

4 years ago

0.6.15-alpha.2

4 years ago

0.6.16-alpha.0

4 years ago

0.6.21-alpha.0

4 years ago

0.6.20-alpha.0

4 years ago

0.6.15

4 years ago

0.6.15-alpha.1

4 years ago

0.6.15-alpha.0

4 years ago

0.6.14

4 years ago

0.6.13

4 years ago

0.6.12

4 years ago

0.6.11

4 years ago

0.6.10

4 years ago

0.6.9

4 years ago

0.6.8

4 years ago

0.6.7

4 years ago

0.6.6

5 years ago

0.6.5

5 years ago

0.6.4

5 years ago

0.6.3

5 years ago

0.6.2

5 years ago

0.6.1

5 years ago

0.6.0

5 years ago

0.6.0-alpha.8

5 years ago

0.6.0-alpha.7

5 years ago

0.6.0-ahlpa.6

5 years ago

0.6.0-alpha.4

5 years ago

0.6.0-alpha.3

5 years ago

0.6.0-alpha.2

5 years ago