From 5bce912e890c27cb7a629f3e28ada52296455587 Mon Sep 17 00:00:00 2001 From: David Stotijn Date: Mon, 16 Nov 2020 22:14:58 +0100 Subject: [PATCH] Replace lexer, add parser --- go.mod | 1 - go.sum | 10 -- pkg/reqlog/lexer.go | 144 --------------------- pkg/reqlog/lexer_test.go | 194 ---------------------------- pkg/search/ast.go | 53 ++++++++ pkg/search/lexer.go | 264 ++++++++++++++++++++++++++++++++++++++ pkg/search/lexer_test.go | 89 +++++++++++++ pkg/search/parser.go | 226 ++++++++++++++++++++++++++++++++ pkg/search/parser_test.go | 216 +++++++++++++++++++++++++++++++ 9 files changed, 848 insertions(+), 349 deletions(-) delete mode 100644 pkg/reqlog/lexer.go delete mode 100644 pkg/reqlog/lexer_test.go create mode 100644 pkg/search/ast.go create mode 100644 pkg/search/lexer.go create mode 100644 pkg/search/lexer_test.go create mode 100644 pkg/search/parser.go create mode 100644 pkg/search/parser_test.go diff --git a/go.mod b/go.mod index 5ce68bb..beada7f 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ require ( github.com/99designs/gqlgen v0.13.0 github.com/GeertJohan/go.rice v1.0.0 github.com/Masterminds/squirrel v1.4.0 - github.com/db47h/lex v1.2.1 github.com/gorilla/mux v1.7.4 github.com/hashicorp/golang-lru v0.5.1 // indirect github.com/jmoiron/sqlx v1.2.0 diff --git a/go.sum b/go.sum index 2981218..37826fb 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,3 @@ -github.com/99designs/gqlgen v0.11.3 h1:oFSxl1DFS9X///uHV3y6CEfpcXWrDUxVblR4Xib2bs4= -github.com/99designs/gqlgen v0.11.3/go.mod h1:RgX5GRRdDWNkh4pBrdzNpNPFVsdoUFY2+adM6nb1N+4= github.com/99designs/gqlgen v0.13.0 h1:haLTcUp3Vwp80xMVEg5KRNwzfUrgFdRmtBY8fuB8scA= github.com/99designs/gqlgen v0.13.0/go.mod h1:NV130r6f4tpRWuAI+zsrSdooO/eWUv+Gyyoi3rEfXIk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -26,8 +24,6 @@ github.com/daaku/go.zipexe v1.0.0/go.mod h1:z8IiR6TsVLEYKwXAoE/I+8ys/sDkgTzSL0CL github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/db47h/lex v1.2.1 h1:OPXYd/WUiM8UbB65L26nA/NI4Y9jlCFOJIHGDPvYn2M= -github.com/db47h/lex v1.2.1/go.mod h1:c1/b2FVNgxTLDASa/K0yVU07pAFiYx5Iiihijlvokqg= github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c h1:TUuUh0Xgj97tLMNtWtNvI9mIV6isjEb9lBMNv+77IGM= github.com/dgryski/trifles v0.0.0-20190318185328-a8d75aae118c/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= github.com/go-chi/chi v3.3.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ= @@ -39,10 +35,6 @@ github.com/gorilla/context v0.0.0-20160226214623-1ea25387ff6f/go.mod h1:kBGZzfjB github.com/gorilla/mux v1.6.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc= github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= -github.com/gorilla/websocket v1.2.0 h1:VJtLvh6VQym50czpZzx07z/kw9EgAxI3x1ZB8taTMQQ= -github.com/gorilla/websocket v1.2.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= -github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= @@ -109,8 +101,6 @@ github.com/valyala/fasttemplate v1.0.1 h1:tY9CJiPnMXf1ERmG2EyK7gNUd+c6RKGD0IfU8W github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/vektah/dataloaden v0.2.1-0.20190515034641-a19b9a6e7c9e h1:+w0Zm/9gaWpEAyDlU1eKOuk5twTjAjuevXqcJJw8hrg= github.com/vektah/dataloaden v0.2.1-0.20190515034641-a19b9a6e7c9e/go.mod h1:/HUdMve7rvxZma+2ZELQeNh88+003LL7Pf/CZ089j8U= -github.com/vektah/gqlparser/v2 v2.0.1 h1:xgl5abVnsd4hkN9rk65OJID9bfcLSMuTaTcZj777q1o= -github.com/vektah/gqlparser/v2 v2.0.1/go.mod h1:SyUiHgLATUR8BiYURfTirrTcGpcE+4XkV2se04Px1Ms= github.com/vektah/gqlparser/v2 v2.1.0 h1:uiKJ+T5HMGGQM2kRKQ8Pxw8+Zq9qhhZhz/lieYvCMns= github.com/vektah/gqlparser/v2 v2.1.0/go.mod h1:SyUiHgLATUR8BiYURfTirrTcGpcE+4XkV2se04Px1Ms= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= diff --git a/pkg/reqlog/lexer.go b/pkg/reqlog/lexer.go deleted file mode 100644 index 3167d86..0000000 --- a/pkg/reqlog/lexer.go +++ /dev/null @@ -1,144 +0,0 @@ -package reqlog - -import ( - "unicode" - - "github.com/db47h/lex" - "github.com/db47h/lex/state" -) - -const ( - tokEOF = iota - tokString - tokOpNot - tokOpAnd - tokOpOr - tokOpEq - tokOpNeq - tokOpGt - tokOpGteq - tokOpLt - tokOpLteq - tokOpHas - tokOpRe - tokOpNre - tokParenOpen - tokParenClose -) - -type lexItem struct { - token lex.Token - value string -} - -func lexQuery(s *lex.State) lex.StateFn { - str := lexString() - quotedString := state.QuotedString(tokString) - - return func(s *lex.State) lex.StateFn { - r := s.Next() - pos := s.Pos() - switch r { - case lex.EOF: - s.Emit(pos, tokEOF, nil) - return nil - case '"': - return quotedString - case '=': - if next := s.Next(); next == '~' { - s.Emit(pos, tokOpRe, nil) - } else { - s.Backup() - s.Emit(pos, tokOpEq, nil) - } - return nil - case '!': - switch next := s.Next(); next { - case '=': - s.Emit(pos, tokOpNeq, nil) - return nil - case '~': - s.Emit(pos, tokOpNre, nil) - return nil - default: - s.Backup() - } - case '>': - if next := s.Next(); next == '=' { - s.Emit(pos, tokOpGteq, nil) - } else { - s.Backup() - s.Emit(pos, tokOpGt, nil) - } - return nil - case '<': - if next := s.Next(); next == '=' { - s.Emit(pos, tokOpLteq, nil) - } else { - s.Backup() - s.Emit(pos, tokOpLt, nil) - } - return nil - case ':': - s.Emit(pos, tokOpHas, nil) - return nil - case '(': - s.Emit(pos, tokParenOpen, nil) - return nil - case ')': - s.Emit(pos, tokParenClose, nil) - return nil - } - - switch { - case unicode.IsSpace(r): - // Absorb spaces. - for r = s.Next(); unicode.IsSpace(r); r = s.Next() { - } - s.Backup() - return nil - default: - return str - } - } -} - -func lexString() lex.StateFn { - // Preallocate a buffer to store the value. It will end-up being at - // least as large as the largest value scanned. - b := make([]rune, 0, 64) - - isStringChar := func(r rune) bool { - switch r { - case '=', '!', '<', '>', ':', '(', ')': - return false - } - return !(unicode.IsSpace(r) || r == lex.EOF) - } - - return func(l *lex.State) lex.StateFn { - pos := l.Pos() - // Reset buffer and add first char. - b = append(b[:0], l.Current()) - // Read identifier. - for r := l.Next(); isStringChar(r); r = l.Next() { - b = append(b, r) - } - // The character returned by the last call to `l.Next` is not part of - // the value. Undo it. - l.Backup() - - switch { - case string(b) == "NOT": - l.Emit(pos, tokOpNot, nil) - case string(b) == "AND": - l.Emit(pos, tokOpAnd, nil) - case string(b) == "OR": - l.Emit(pos, tokOpOr, nil) - default: - l.Emit(pos, tokString, string(b)) - } - - return nil - } -} diff --git a/pkg/reqlog/lexer_test.go b/pkg/reqlog/lexer_test.go deleted file mode 100644 index 32a244b..0000000 --- a/pkg/reqlog/lexer_test.go +++ /dev/null @@ -1,194 +0,0 @@ -package reqlog - -import ( - "strings" - "testing" - - "github.com/db47h/lex" -) - -func TestLex(t *testing.T) { - lexTests := []struct { - name string - input string - expected []lexItem - }{ - { - name: "empty query", - input: "", - expected: []lexItem{ - {tokEOF, ""}, - }, - }, - { - name: "single unquoted value", - input: "foobar", - expected: []lexItem{ - {tokString, "foobar"}, - {tokEOF, ""}, - }, - }, - { - name: "single unquoted value with non letter", - input: "foob*", - expected: []lexItem{ - {tokString, "foob*"}, - {tokEOF, ""}, - }, - }, - { - name: "multiple unquoted values", - input: "foo bar", - expected: []lexItem{ - {tokString, "foo"}, - {tokString, "bar"}, - {tokEOF, ""}, - }, - }, - { - name: "quoted value", - input: `"foo bar"`, - expected: []lexItem{ - {tokString, "foo bar"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with negation operator", - input: "NOT foobar", - expected: []lexItem{ - {tokOpNot, ""}, - {tokString, "foobar"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with and operator", - input: "foo AND bar", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpAnd, ""}, - {tokString, "bar"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with or operator", - input: "foo OR bar", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpOr, ""}, - {tokString, "bar"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with equals operator", - input: "foo = bar", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpEq, ""}, - {tokString, "bar"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with greater than operator", - input: "foo > 42", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpGt, ""}, - {tokString, "42"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with greater than or equal operator", - input: "foo >= 42", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpGteq, ""}, - {tokString, "42"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with less than operator", - input: "foo < 42", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpLt, ""}, - {tokString, "42"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with less than or equal operator", - input: "foo <= 42", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpLteq, ""}, - {tokString, "42"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with regular expression operator", - input: "foo =~ 42", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpRe, ""}, - {tokString, "42"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with not regular expression operator", - input: "foo !~ 42", - expected: []lexItem{ - {tokString, "foo"}, - {tokOpNre, ""}, - {tokString, "42"}, - {tokEOF, ""}, - }, - }, - { - name: "comparison with parentheses", - input: "(foo OR bar) AND baz", - expected: []lexItem{ - {tokParenOpen, ""}, - {tokString, "foo"}, - {tokOpOr, ""}, - {tokString, "bar"}, - {tokParenClose, ""}, - {tokOpAnd, ""}, - {tokString, "baz"}, - {tokEOF, ""}, - }, - }, - } - - for _, tt := range lexTests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - file := lex.NewFile(tt.name, strings.NewReader(tt.input)) - l := lex.NewLexer(file, lexQuery) - - for i, exp := range tt.expected { - token, _, value := l.Lex() - if err, isErr := value.(error); isErr { - t.Fatalf("unexpected error: %v", err) - } - valueStr, _ := value.(string) - got := lexItem{ - token: token, - value: valueStr, - } - if got != exp { - t.Errorf("%v: got: %+v, expected: %+v", i, got, exp) - } - } - }) - } -} diff --git a/pkg/search/ast.go b/pkg/search/ast.go new file mode 100644 index 0000000..7a4647c --- /dev/null +++ b/pkg/search/ast.go @@ -0,0 +1,53 @@ +package search + +import "strings" + +type Expression interface { + String() string +} + +type PrefixExpression struct { + Operator TokenType + Right Expression +} + +func (pe *PrefixExpression) expressionNode() {} +func (pe *PrefixExpression) String() string { + b := strings.Builder{} + b.WriteString("(") + b.WriteString(pe.Operator.String()) + b.WriteString(" ") + b.WriteString(pe.Right.String()) + b.WriteString(")") + + return b.String() +} + +type InfixExpression struct { + Operator TokenType + Left Expression + Right Expression +} + +func (ie *InfixExpression) expressionNode() {} +func (ie *InfixExpression) String() string { + b := strings.Builder{} + b.WriteString("(") + b.WriteString(ie.Left.String()) + b.WriteString(" ") + b.WriteString(ie.Operator.String()) + b.WriteString(" ") + b.WriteString(ie.Right.String()) + b.WriteString(")") + + return b.String() +} + +type StringLiteral struct { + Value string +} + +func (sl *StringLiteral) expressionNode() {} +func (sl *StringLiteral) String() string { + return sl.Value +} diff --git a/pkg/search/lexer.go b/pkg/search/lexer.go new file mode 100644 index 0000000..2af09c8 --- /dev/null +++ b/pkg/search/lexer.go @@ -0,0 +1,264 @@ +package search + +import ( + "fmt" + "unicode" + "unicode/utf8" +) + +type TokenType int + +type Token struct { + Type TokenType + Literal string +} + +const eof = 0 + +// Token types. +const ( + // Flow + TokInvalid TokenType = iota + TokEOF + TokParenOpen + TokParenClose + + // Literals + TokString + + // Boolean operators + TokOpNot + TokOpAnd + TokOpOr + + // Comparison operators + TokOpEq + TokOpNotEq + TokOpGt + TokOpLt + TokOpGtEq + TokOpLtEq + TokOpRe + TokOpNotRe +) + +var ( + keywords = map[string]TokenType{ + "NOT": TokOpNot, + "AND": TokOpAnd, + "OR": TokOpOr, + } + reservedRunes = []rune{'=', '!', '<', '>', '(', ')'} + tokenTypeStrings = map[TokenType]string{ + TokInvalid: "INVALID", + TokEOF: "EOF", + TokParenOpen: "(", + TokParenClose: ")", + TokString: "STRING", + TokOpNot: "NOT", + TokOpAnd: "AND", + TokOpOr: "OR", + TokOpEq: "=", + TokOpNotEq: "!=", + TokOpGt: ">", + TokOpLt: "<", + TokOpGtEq: ">=", + TokOpLtEq: "<=", + TokOpRe: "=~", + TokOpNotRe: "!~", + } +) + +type stateFn func(*Lexer) stateFn + +type Lexer struct { + input string + pos int + start int + width int + tokens chan Token +} + +func NewLexer(input string) *Lexer { + l := &Lexer{ + input: input, + tokens: make(chan Token), + } + + go l.run(begin) + + return l +} + +func (l *Lexer) Next() Token { + return <-l.tokens +} + +func (tt TokenType) String() string { + if typeString, ok := tokenTypeStrings[tt]; ok { + return typeString + } + return "" +} + +func (l *Lexer) run(init stateFn) { + for nextState := init; nextState != nil; { + nextState = nextState(l) + } + close(l.tokens) +} + +func (l *Lexer) read() (r rune) { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + + return +} + +func (l *Lexer) emit(tokenType TokenType) { + l.tokens <- Token{ + Type: tokenType, + Literal: l.input[l.start:l.pos], + } + l.start = l.pos +} + +func (l *Lexer) ignore() { + l.start = l.pos +} + +func (l *Lexer) skip() { + l.pos += l.width + l.start = l.pos +} + +func (l *Lexer) backup() { + l.pos -= l.width +} + +func (l *Lexer) errorf(format string, args ...interface{}) stateFn { + l.tokens <- Token{ + Type: TokInvalid, + Literal: fmt.Sprintf(format, args...), + } + + return nil +} + +func begin(l *Lexer) stateFn { + r := l.read() + switch r { + case '=': + if next := l.read(); next == '~' { + l.emit(TokOpRe) + } else { + l.backup() + l.emit(TokOpEq) + } + return begin + case '!': + switch next := l.read(); next { + case '=': + l.emit(TokOpNotEq) + case '~': + l.emit(TokOpNotRe) + default: + return l.errorf("invalid rune %v", r) + } + return begin + case '<': + if next := l.read(); next == '=' { + l.emit(TokOpLtEq) + } else { + l.backup() + l.emit(TokOpLt) + } + return begin + case '>': + if next := l.read(); next == '=' { + l.emit(TokOpGtEq) + } else { + l.backup() + l.emit(TokOpGt) + } + return begin + case '(': + l.emit(TokParenOpen) + return begin + case ')': + l.emit(TokParenClose) + return begin + case '"': + return l.delimString(r) + case eof: + l.emit(TokEOF) + return nil + } + + if unicode.IsSpace(r) { + l.ignore() + return begin + } + + return unquotedString +} + +func (l *Lexer) delimString(delim rune) stateFn { + // Ignore the start delimiter rune. + l.ignore() + + for r := l.read(); r != delim; r = l.read() { + if r == eof { + return l.errorf("unexpected EOF, unclosed delimiter") + } + } + // Don't include the end delimiter in emitted token. + l.backup() + l.emit(TokString) + // Skip end delimiter. + l.skip() + + return begin +} + +func unquotedString(l *Lexer) stateFn { + for r := l.read(); ; r = l.read() { + switch { + case r == eof: + l.backup() + l.emitUnquotedString() + return begin + case unicode.IsSpace(r): + l.backup() + l.emitUnquotedString() + l.skip() + return begin + case isReserved(r): + l.backup() + l.emitUnquotedString() + return begin + } + } +} + +func (l *Lexer) emitUnquotedString() { + str := l.input[l.start:l.pos] + if tokType, ok := keywords[str]; ok { + l.emit(tokType) + return + } + l.emit(TokString) +} + +func isReserved(r rune) bool { + for _, v := range reservedRunes { + if r == v { + return true + } + } + return false +} diff --git a/pkg/search/lexer_test.go b/pkg/search/lexer_test.go new file mode 100644 index 0000000..8811cd4 --- /dev/null +++ b/pkg/search/lexer_test.go @@ -0,0 +1,89 @@ +package search + +import "testing" + +func TestNextToken(t *testing.T) { + tests := []struct { + name string + input string + expected []Token + }{ + { + name: "unquoted string", + input: "foo bar", + expected: []Token{ + {TokString, "foo"}, + {TokString, "bar"}, + {TokEOF, ""}, + }, + }, + { + name: "quoted string", + input: `"foo bar" "baz"`, + expected: []Token{ + {TokString, "foo bar"}, + {TokString, "baz"}, + {TokEOF, ""}, + }, + }, + { + name: "boolean operator token types", + input: "NOT AND OR", + expected: []Token{ + {TokOpNot, "NOT"}, + {TokOpAnd, "AND"}, + {TokOpOr, "OR"}, + {TokEOF, ""}, + }, + }, + { + name: "comparison operator token types", + input: `= != < > <= >= =~ !~`, + expected: []Token{ + {TokOpEq, "="}, + {TokOpNotEq, "!="}, + {TokOpLt, "<"}, + {TokOpGt, ">"}, + {TokOpLtEq, "<="}, + {TokOpGtEq, ">="}, + {TokOpRe, "=~"}, + {TokOpNotRe, "!~"}, + {TokEOF, ""}, + }, + }, + { + name: "with parentheses", + input: "(foo AND bar) OR baz", + expected: []Token{ + {TokParenOpen, "("}, + {TokString, "foo"}, + {TokOpAnd, "AND"}, + {TokString, "bar"}, + {TokParenClose, ")"}, + {TokOpOr, "OR"}, + {TokString, "baz"}, + {TokEOF, ""}, + }, + }, + } + + for i, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + l := NewLexer(tt.input) + + for _, exp := range tt.expected { + got := l.Next() + if got.Type != exp.Type { + t.Errorf("invalid type (idx: %v, expected: %v, got: %v)", + i, exp.Type, got.Type) + } + if got.Literal != exp.Literal { + t.Errorf("invalid literal (idx: %v, expected: %v, got: %v)", + i, exp.Literal, got.Literal) + } + } + }) + } +} diff --git a/pkg/search/parser.go b/pkg/search/parser.go new file mode 100644 index 0000000..ec0ad41 --- /dev/null +++ b/pkg/search/parser.go @@ -0,0 +1,226 @@ +package search + +import ( + "fmt" +) + +type precedence int + +const ( + _ precedence = iota + precLowest + precAnd + precOr + precNot + precEq + precLessGreater + precPrefix + precGroup +) + +type prefixParser func(*Parser) (Expression, error) +type infixParser func(*Parser, Expression) (Expression, error) + +var ( + prefixParsers = map[TokenType]prefixParser{} + infixParsers = map[TokenType]infixParser{} +) + +var tokenPrecedences = map[TokenType]precedence{ + TokParenOpen: precGroup, + TokOpNot: precNot, + TokOpAnd: precAnd, + TokOpOr: precOr, + TokOpEq: precEq, + TokOpNotEq: precEq, + TokOpGt: precLessGreater, + TokOpLt: precLessGreater, + TokOpGtEq: precLessGreater, + TokOpLtEq: precLessGreater, + TokOpRe: precEq, + TokOpNotRe: precEq, +} + +func init() { + // Populate maps in `init`, because package global variables would cause an + // initialization cycle. + infixOperators := []TokenType{ + TokOpAnd, + TokOpOr, + TokOpEq, + TokOpNotEq, + TokOpGt, + TokOpLt, + TokOpGtEq, + TokOpLtEq, + TokOpRe, + TokOpNotRe, + } + for _, op := range infixOperators { + infixParsers[op] = parseInfixExpression + } + + prefixParsers[TokOpNot] = parsePrefixExpression + prefixParsers[TokString] = parseStringLiteral + prefixParsers[TokParenOpen] = parseGroupedExpression +} + +type Parser struct { + l *Lexer + cur Token + peek Token +} + +func NewParser(l *Lexer) *Parser { + p := &Parser{l: l} + p.nextToken() + p.nextToken() + + return p + +} + +func ParseQuery(input string) (expr Expression, err error) { + p := &Parser{l: NewLexer(input)} + p.nextToken() + p.nextToken() + + if p.curTokenIs(TokEOF) { + return nil, fmt.Errorf("unexpected EOF") + } + + for !p.curTokenIs(TokEOF) { + right, err := p.parseExpression(precLowest) + if err != nil { + return nil, fmt.Errorf("search: could not parse expression: %v", err) + } + if expr == nil { + expr = right + } else { + expr = &InfixExpression{ + Operator: TokOpAnd, + Left: expr, + Right: right, + } + } + p.nextToken() + } + + return +} + +func (p *Parser) nextToken() { + p.cur = p.peek + p.peek = p.l.Next() +} + +func (p *Parser) curTokenIs(t TokenType) bool { + return p.cur.Type == t +} + +func (p *Parser) peekTokenIs(t TokenType) bool { + return p.peek.Type == t +} + +func (p *Parser) expectPeek(t TokenType) error { + if !p.peekTokenIs(t) { + return fmt.Errorf("expected next token to be %v, got %v", t, p.peek.Type) + } + p.nextToken() + return nil +} + +func (p *Parser) curPrecedence() precedence { + if p, ok := tokenPrecedences[p.cur.Type]; ok { + return p + } + return precLowest +} + +func (p *Parser) peekPrecedence() precedence { + if p, ok := tokenPrecedences[p.peek.Type]; ok { + return p + } + return precLowest +} + +func (p *Parser) parseExpression(prec precedence) (Expression, error) { + prefixParser, ok := prefixParsers[p.cur.Type] + if !ok { + return nil, fmt.Errorf("no prefix parse function for %v found", p.cur.Type) + } + + expr, err := prefixParser(p) + if err != nil { + return nil, fmt.Errorf("could not parse expression prefix: %v", err) + } + + for !p.peekTokenIs(eof) && prec < p.peekPrecedence() { + infixParser, ok := infixParsers[p.peek.Type] + if !ok { + break + } + + p.nextToken() + + expr, err = infixParser(p, expr) + if err != nil { + return nil, fmt.Errorf("could not parse infix expression: %v", err) + } + } + + return expr, nil +} + +func parsePrefixExpression(p *Parser) (Expression, error) { + expr := &PrefixExpression{ + Operator: p.cur.Type, + } + + p.nextToken() + + right, err := p.parseExpression(precPrefix) + if err != nil { + return nil, fmt.Errorf("could not parse expression for right operand: %v", err) + } + expr.Right = right + + return expr, nil +} + +func parseInfixExpression(p *Parser, left Expression) (Expression, error) { + expr := &InfixExpression{ + Operator: p.cur.Type, + Left: left, + } + + prec := p.curPrecedence() + p.nextToken() + + right, err := p.parseExpression(prec) + if err != nil { + return nil, fmt.Errorf("could not parse expression for right operand: %v", err) + } + expr.Right = right + + return expr, nil +} + +func parseStringLiteral(p *Parser) (Expression, error) { + return &StringLiteral{Value: p.cur.Literal}, nil +} + +func parseGroupedExpression(p *Parser) (Expression, error) { + p.nextToken() + + expr, err := p.parseExpression(precLowest) + if err != nil { + return nil, fmt.Errorf("could not parse grouped expression: %v", err) + } + + if err := p.expectPeek(TokParenClose); err != nil { + return nil, err + } + + return expr, nil +} diff --git a/pkg/search/parser_test.go b/pkg/search/parser_test.go new file mode 100644 index 0000000..b07f4c7 --- /dev/null +++ b/pkg/search/parser_test.go @@ -0,0 +1,216 @@ +package search + +import ( + "errors" + "reflect" + "testing" +) + +func TestParseQuery(t *testing.T) { + tests := []struct { + name string + input string + expectedExpression Expression + expectedError error + }{ + { + name: "empty query", + input: "", + expectedExpression: nil, + expectedError: errors.New("unexpected EOF"), + }, + { + name: "string literal expression", + input: "foobar", + expectedExpression: &StringLiteral{Value: "foobar"}, + expectedError: nil, + }, + { + name: "boolean expression with equal operator", + input: "foo = bar", + expectedExpression: &InfixExpression{ + Operator: TokOpEq, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with not equal operator", + input: "foo != bar", + expectedExpression: &InfixExpression{ + Operator: TokOpNotEq, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with greater than operator", + input: "foo > bar", + expectedExpression: &InfixExpression{ + Operator: TokOpGt, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with less than operator", + input: "foo < bar", + expectedExpression: &InfixExpression{ + Operator: TokOpLt, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with greater than or equal operator", + input: "foo >= bar", + expectedExpression: &InfixExpression{ + Operator: TokOpGtEq, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with less than or equal operator", + input: "foo <= bar", + expectedExpression: &InfixExpression{ + Operator: TokOpLtEq, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with regular expression operator", + input: "foo =~ bar", + expectedExpression: &InfixExpression{ + Operator: TokOpRe, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with not regular expression operator", + input: "foo !~ bar", + expectedExpression: &InfixExpression{ + Operator: TokOpNotRe, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + expectedError: nil, + }, + { + name: "boolean expression with AND, OR and NOT operators", + input: "foo AND bar OR NOT baz", + expectedExpression: &InfixExpression{ + Operator: TokOpAnd, + Left: &StringLiteral{Value: "foo"}, + Right: &InfixExpression{ + Operator: TokOpOr, + Left: &StringLiteral{Value: "bar"}, + Right: &PrefixExpression{ + Operator: TokOpNot, + Right: &StringLiteral{Value: "baz"}, + }, + }, + }, + expectedError: nil, + }, + { + name: "boolean expression with nested group", + input: "(foo AND bar) OR NOT baz", + expectedExpression: &InfixExpression{ + Operator: TokOpOr, + Left: &InfixExpression{ + Operator: TokOpAnd, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + Right: &PrefixExpression{ + Operator: TokOpNot, + Right: &StringLiteral{Value: "baz"}, + }, + }, + expectedError: nil, + }, + { + name: "implicit boolean expression with string literal operands", + input: "foo bar baz", + expectedExpression: &InfixExpression{ + Operator: TokOpAnd, + Left: &InfixExpression{ + Operator: TokOpAnd, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + Right: &StringLiteral{Value: "baz"}, + }, + expectedError: nil, + }, + { + name: "implicit and explicit boolean expression with string literal operands", + input: "foo bar OR baz yolo", + expectedExpression: &InfixExpression{ + Operator: TokOpAnd, + Left: &InfixExpression{ + Operator: TokOpAnd, + Left: &StringLiteral{Value: "foo"}, + Right: &InfixExpression{ + Operator: TokOpOr, + Left: &StringLiteral{Value: "bar"}, + Right: &StringLiteral{Value: "baz"}, + }, + }, + Right: &StringLiteral{Value: "yolo"}, + }, + expectedError: nil, + }, + { + name: "implicit boolean expression with comparison operands", + input: "foo=bar baz=~yolo", + expectedExpression: &InfixExpression{ + Operator: TokOpAnd, + Left: &InfixExpression{ + Operator: TokOpEq, + Left: &StringLiteral{Value: "foo"}, + Right: &StringLiteral{Value: "bar"}, + }, + Right: &InfixExpression{ + Operator: TokOpRe, + Left: &StringLiteral{Value: "baz"}, + Right: &StringLiteral{Value: "yolo"}, + }, + }, + expectedError: nil, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, err := ParseQuery(tt.input) + assertError(t, tt.expectedError, err) + if !reflect.DeepEqual(tt.expectedExpression, got) { + t.Errorf("expected: %v, got: %v", tt.expectedExpression, got) + } + }) + } +} + +func assertError(t *testing.T, exp, got error) { + switch { + case exp == nil && got != nil: + t.Fatalf("expected: nil, got: %v", got) + case exp != nil && got == nil: + t.Fatalf("expected: %v, got: nil", exp.Error()) + case exp != nil && got != nil && exp.Error() != got.Error(): + t.Fatalf("expected: %v, got: %v", exp.Error(), got.Error()) + } +}