Add lexer for reqlog search

This commit is contained in:
David Stotijn
2020-11-13 21:33:49 +01:00
parent e59b9d6663
commit 16910bb637
4 changed files with 342 additions and 0 deletions

144
pkg/reqlog/lexer.go Normal file
View File

@ -0,0 +1,144 @@
package reqlog
import (
"unicode"
"github.com/db47h/lex"
"github.com/db47h/lex/state"
)
const (
tokEOF = iota
tokString
tokOpNot
tokOpAnd
tokOpOr
tokOpEq
tokOpNeq
tokOpGt
tokOpGteq
tokOpLt
tokOpLteq
tokOpHas
tokOpRe
tokOpNre
tokParenOpen
tokParenClose
)
type lexItem struct {
token lex.Token
value string
}
func lexQuery(s *lex.State) lex.StateFn {
str := lexString()
quotedString := state.QuotedString(tokString)
return func(s *lex.State) lex.StateFn {
r := s.Next()
pos := s.Pos()
switch r {
case lex.EOF:
s.Emit(pos, tokEOF, nil)
return nil
case '"':
return quotedString
case '=':
if next := s.Next(); next == '~' {
s.Emit(pos, tokOpRe, nil)
} else {
s.Backup()
s.Emit(pos, tokOpEq, nil)
}
return nil
case '!':
switch next := s.Next(); next {
case '=':
s.Emit(pos, tokOpNeq, nil)
return nil
case '~':
s.Emit(pos, tokOpNre, nil)
return nil
default:
s.Backup()
}
case '>':
if next := s.Next(); next == '=' {
s.Emit(pos, tokOpGteq, nil)
} else {
s.Backup()
s.Emit(pos, tokOpGt, nil)
}
return nil
case '<':
if next := s.Next(); next == '=' {
s.Emit(pos, tokOpLteq, nil)
} else {
s.Backup()
s.Emit(pos, tokOpLt, nil)
}
return nil
case ':':
s.Emit(pos, tokOpHas, nil)
return nil
case '(':
s.Emit(pos, tokParenOpen, nil)
return nil
case ')':
s.Emit(pos, tokParenClose, nil)
return nil
}
switch {
case unicode.IsSpace(r):
// Absorb spaces.
for r = s.Next(); unicode.IsSpace(r); r = s.Next() {
}
s.Backup()
return nil
default:
return str
}
}
}
func lexString() lex.StateFn {
// Preallocate a buffer to store the value. It will end-up being at
// least as large as the largest value scanned.
b := make([]rune, 0, 64)
isStringChar := func(r rune) bool {
switch r {
case '=', '!', '<', '>', ':', '(', ')':
return false
}
return !(unicode.IsSpace(r) || r == lex.EOF)
}
return func(l *lex.State) lex.StateFn {
pos := l.Pos()
// Reset buffer and add first char.
b = append(b[:0], l.Current())
// Read identifier.
for r := l.Next(); isStringChar(r); r = l.Next() {
b = append(b, r)
}
// The character returned by the last call to `l.Next` is not part of
// the value. Undo it.
l.Backup()
switch {
case string(b) == "NOT":
l.Emit(pos, tokOpNot, nil)
case string(b) == "AND":
l.Emit(pos, tokOpAnd, nil)
case string(b) == "OR":
l.Emit(pos, tokOpOr, nil)
default:
l.Emit(pos, tokString, string(b))
}
return nil
}
}

194
pkg/reqlog/lexer_test.go Normal file
View File

@ -0,0 +1,194 @@
package reqlog
import (
"strings"
"testing"
"github.com/db47h/lex"
)
func TestLex(t *testing.T) {
lexTests := []struct {
name string
input string
expected []lexItem
}{
{
name: "empty query",
input: "",
expected: []lexItem{
{tokEOF, ""},
},
},
{
name: "single unquoted value",
input: "foobar",
expected: []lexItem{
{tokString, "foobar"},
{tokEOF, ""},
},
},
{
name: "single unquoted value with non letter",
input: "foob*",
expected: []lexItem{
{tokString, "foob*"},
{tokEOF, ""},
},
},
{
name: "multiple unquoted values",
input: "foo bar",
expected: []lexItem{
{tokString, "foo"},
{tokString, "bar"},
{tokEOF, ""},
},
},
{
name: "quoted value",
input: `"foo bar"`,
expected: []lexItem{
{tokString, "foo bar"},
{tokEOF, ""},
},
},
{
name: "comparison with negation operator",
input: "NOT foobar",
expected: []lexItem{
{tokOpNot, ""},
{tokString, "foobar"},
{tokEOF, ""},
},
},
{
name: "comparison with and operator",
input: "foo AND bar",
expected: []lexItem{
{tokString, "foo"},
{tokOpAnd, ""},
{tokString, "bar"},
{tokEOF, ""},
},
},
{
name: "comparison with or operator",
input: "foo OR bar",
expected: []lexItem{
{tokString, "foo"},
{tokOpOr, ""},
{tokString, "bar"},
{tokEOF, ""},
},
},
{
name: "comparison with equals operator",
input: "foo = bar",
expected: []lexItem{
{tokString, "foo"},
{tokOpEq, ""},
{tokString, "bar"},
{tokEOF, ""},
},
},
{
name: "comparison with greater than operator",
input: "foo > 42",
expected: []lexItem{
{tokString, "foo"},
{tokOpGt, ""},
{tokString, "42"},
{tokEOF, ""},
},
},
{
name: "comparison with greater than or equal operator",
input: "foo >= 42",
expected: []lexItem{
{tokString, "foo"},
{tokOpGteq, ""},
{tokString, "42"},
{tokEOF, ""},
},
},
{
name: "comparison with less than operator",
input: "foo < 42",
expected: []lexItem{
{tokString, "foo"},
{tokOpLt, ""},
{tokString, "42"},
{tokEOF, ""},
},
},
{
name: "comparison with less than or equal operator",
input: "foo <= 42",
expected: []lexItem{
{tokString, "foo"},
{tokOpLteq, ""},
{tokString, "42"},
{tokEOF, ""},
},
},
{
name: "comparison with regular expression operator",
input: "foo =~ 42",
expected: []lexItem{
{tokString, "foo"},
{tokOpRe, ""},
{tokString, "42"},
{tokEOF, ""},
},
},
{
name: "comparison with not regular expression operator",
input: "foo !~ 42",
expected: []lexItem{
{tokString, "foo"},
{tokOpNre, ""},
{tokString, "42"},
{tokEOF, ""},
},
},
{
name: "comparison with parentheses",
input: "(foo OR bar) AND baz",
expected: []lexItem{
{tokParenOpen, ""},
{tokString, "foo"},
{tokOpOr, ""},
{tokString, "bar"},
{tokParenClose, ""},
{tokOpAnd, ""},
{tokString, "baz"},
{tokEOF, ""},
},
},
}
for _, tt := range lexTests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
file := lex.NewFile(tt.name, strings.NewReader(tt.input))
l := lex.NewLexer(file, lexQuery)
for i, exp := range tt.expected {
token, _, value := l.Lex()
if err, isErr := value.(error); isErr {
t.Fatalf("unexpected error: %v", err)
}
valueStr, _ := value.(string)
got := lexItem{
token: token,
value: valueStr,
}
if got != exp {
t.Errorf("%v: got: %+v, expected: %+v", i, got, exp)
}
}
})
}
}