@louischan-oursky/messageformat-parser v0.3.0
messageformat-parser
A PEG.js parser for ICU MessageFormat strings – part of messageformat. Outputs an AST defined by parser.pegjs.
The generated parse(src, [options]) function takes two parameters, first the
string to be parsed, and a second optional parameter options, an object with
the following possible keys:
cardinalandordinal– Arrays of valid plural categories for the current locale, used to validatepluralandselectordinalkeys. If these are missing or set to false, the full set of valid Unicode CLDR keys is used:'zero', 'one', 'two', 'few', 'many', 'other'. To disable this check, pass in an empty array.strict– By default, the parsing applies a few relaxations to the ICU MessageFormat spec. Settingstrict: truewill disable these relaxations:- The
argTypeofsimpleArgformatting functions will be restricted to the set ofnumber,date,time,spellout,ordinal, andduration, rather than accepting any lower-case identifier that does not start with a number. - The optional
argStyleofsimpleArgformatting functions will not be parsed as any other text, but instead as the spec requires: "In argStyleText, every single ASCII apostrophe begins and ends quoted literal text, and unquoted {curly braces} must occur in matched pairs." - Inside a
pluralorselectordinalstatement, a pound symbol (#) is replaced with the input number. By default,#is also parsed as a special character in nested statements too, and can be escaped using apostrophes ('#'). In strict mode#will be parsed as a special character only directly inside apluralorselectordinalstatement. Outside those,#and'#'will be parsed as literal text.
- The
The parser only supports the default DOUBLE_OPTIONAL apostrophe mode, in
which a single apostrophe only starts quoted literal text if it immediately
precedes a curly brace {}, or a pound symbol # if inside a plural format. A
literal apostrophe ' is represented by either a single ' or a doubled ''
apostrophe character.
Installation
npm install messageformat-parserUsage
> var parse = require('messageformat-parser').parse;
> parse('So {wow}.')
[ 'So ', { type: 'argument', arg: 'wow' }, '.' ]
> parse('Such { thing }. { count, selectordinal, one {First} two {Second}' +
' few {Third} other {#th} } word.')
[ 'Such ',
{ type: 'argument', arg: 'thing' },
'. ',
{ type: 'selectordinal',
arg: 'count',
offset: 0,
cases:
[ { key: 'one', tokens: [ 'First' ] },
{ key: 'two', tokens: [ 'Second' ] },
{ key: 'few', tokens: [ 'Third' ] },
{ key: 'other', tokens: [ { type: 'octothorpe' }, 'th' ] } ] },
' word.' ]
> parse('Many{type,select,plural{ numbers}selectordinal{ counting}' +
'select{ choices}other{ some {type}}}.')
[ 'Many',
{ type: 'select',
arg: 'type',
cases:
[ { key: 'plural', tokens: [ ' numbers' ] },
{ key: 'selectordinal', tokens: [ ' counting' ] },
{ key: 'select', tokens: [ ' choices' ] },
{ key: 'other', tokens: [ ' some',
{ type: 'argument', arg: 'type' } ] } ] },
'.' ]
> parse('{Such compliance')
// SyntaxError: Expected ",", "}" or [ \t\n\r] but "c" found.
> var msg = '{words, plural, zero{No words} one{One word} other{# words}}';
> var englishKeys = { cardinal: [ 'one', 'other' ],
ordinal: [ 'one', 'two', 'few', 'other' ] };
> parse(msg)
[ { type: 'plural',
arg: 'words',
offset: 0,
cases:
[ { key: 'zero', tokens: [ 'No words' ] },
{ key: 'one', tokens: [ 'One word' ] },
{ key: 'other', tokens: [ { type: 'octothorpe' }, ' words' ] } ] } ]
> parse(msg, englishKeys)
// Error: Invalid key `zero` for argument `words`. Valid plural keys for this
// locale are `one`, `other`, and explicit keys like `=0`.For more example usage, please take a look at our test suite.
Structure
The output of parse() is a Token array:
type Token = string | Argument | Plural | React | Select | Function
type Argument = {
type: 'argument',
arg: Identifier
}
type Plural = {
type: 'plural' | 'selectordinal',
arg: Identifier,
offset: number,
cases: PluralCase[]
}
type React = {
type: 'react',
arg: Identifier,
cases: SelectCase[]
}
type Select = {
type: 'select',
arg: Identifier,
cases: SelectCase[]
}
type Function = {
type: 'function',
arg: Identifier,
key: Identifier,
param: {
tokens: options.strict ? [string] : (Token | Octothorpe)[]
} | null
}
type PluralCase = {
key: 'zero' | 'one' | 'two' | 'few' | 'many' | 'other' | '=0' | '=1' | '=2' | ...,
tokens: (Token | Octothorpe)[]
}
type SelectCase = {
key: Identifier,
tokens: options.strict ? Token[] : (Token | Octothorpe)[]
}
type Octothorpe = {
type: 'octothorpe'
}
type Identifier = string // not containing whitespace or control charactersLicense & Contributor License Agreement
Released under the MIT license. See the messageformat README for details.