0.1.1 • Published 9 months ago
whisper-rs-node v0.1.1
whisper-rs-node
Node.js add-on to whisper-rs (which is bindings to whisper.cpp).
Features:
- Pre-compiled so that you don't have to prepare compilation environments for these platforms.
- Near native performance as the core part is whisper-rs.
Pre-compiled platforms:
Platform | Status |
---|---|
Mac(arm64) | ✅ |
Mac(x64) | ✅ |
Windows(x64) | ✅ |
Linux(x64) | ✅ |
Installation
npm i whisper-rs-node
Download models
Download according to whisper.cpp/models/README.md.
Quick start
import {
convertFileBufferToAudioSamples,
convertIntegerToFloatAudio,
convertStereoToMonoAudio,
WhisperContext,
newGreedyFullParams,
} from 'whisper-rs-node';
// Read model file. You can download model files according to the README.md
const model = fs.readFileSync(
path.resolve(__dirname, './PATH_TO_YOUR_MODEL.bin')
);
// Create WhipserContext and WhisperState that are used to run the model.
const ctx = new WhisperContext(model);
const state = ctx.createState();
// Read and parse the .wav file.
// You can read the "parseWav" in [example](./examples/audio_transcription.ts).
const { format, samples } = await parseWav(
path.resolve(__dirname, './PATH_TO_YOUR_WAV_FILE.wav')
);
// Convert the samples into float array which is required by whisper-rs.
let audio = convertIntegerToFloatAudio(samples);
if (format.channels === 2) {
// Convert into mono audio which is also required by whisper-rs.
audio = convertStereoToMonoAudio(audio);
} else if (format.channels !== 1) {
throw new Error('>2 channels unsupported');
}
if (format.sampleRate !== 16000) {
throw new Error('sample rate must be 16KHz');
}
// Set config in FullParams which would be used in our WhisperState created before.
const fullParams = newGreedyFullParams(0);
fullParams.setNThreads(1);
fullParams.setTranslate(true);
fullParams.setLanguage('en');
fullParams.setPrintProgress(false);
fullParams.setPrintRealtime(false);
fullParams.setPrintSpecial(false);
fullParams.setPrintTimestamps(false);
// Run the model.
state.full(fullParams, audio);
// Get the result and print them.
for (let i = 0; i < state.fullNSegment(); i += 1) {
console.log(
`[${state.fullGetSegmentT0(i)}-${state.fullGetSegmentT1(
i
)}] ${state.fullGetSegmentText(i)}`
);
}
See examples/audio_transcription.ts for a detail example.
LICENSE
MIT