judahnator/lexer

A basic lexer for your lexer needs.

v0.5.0 2023-12-29 17:13 UTC

This package is auto-updated.

Last update: 2024-12-29 19:36:09 UTC


README

This package provides a basic lexer for all of your lexer needs.

Usage

// Describe the tokens we want to see
$identifiers = [
    // One-off constant token identifiers
    new \Judahnator\Lexer\Token\ConstantTokenIdentifier('FOO_TOKEN', 'foo'),
    new \Judahnator\Lexer\Token\ConstantTokenIdentifier('BAR_TOKEN', 'bar'),
    
    // Match 'bing' or 'baz'
    new \Judahnator\Lexer\Token\VariableTokenIdentifier([
        'BING_TOKEN' => 'bing',
        'BAZ_TOKEN' => 'baz',
    ]),
    
    // Match repeating "fizzbuzz,"
    new \Judahnator\Lexer\Token\RepeatingTokenIdentifier(
        new \Judahnator\Lexer\Token\ConstantTokenIdentifier('FIZZ_TOKEN', 'fizz'),
        new \Judahnator\Lexer\Token\ConstantTokenIdentifier('BUZZ_TOKEN', 'buzz'),
        new \Judahnator\Lexer\Token\ConstantTokenIdentifier('COMMA', ','),
    ),
];

// Set up our dictionary with all the tokens we can match
$dictionary = new \Judahnator\Lexer\Dictionary(...$identifiers);

// Set up our lexer with the dictionary
$lexer = new \Judahnator\Lexer\Lexer($dictionary);

// Tokenize an input string into a new token buffer
$tokens = $lexer->tokenize(
    new \Judahnator\Lexer\Buffer\CharacterBuffer('foo bar bing baz fizzbuzz,fizzbuzz,')
);

// Iterate over the tokens
foreach ($tokens as $token) {
    echo $token->getLiteral(), PHP_EOL;
}
/*
foo
bar
bing
baz
fizzbuzz,fizzbuzz,
 */