maddy

Fork https://github.com/foxcpp/maddy

git clone git://git.lin.moe/go/maddy.git

  1/*
  2Maddy Mail Server - Composable all-in-one email server.
  3Copyright © 2019-2020 Max Mazurov <fox.cpp@disroot.org>, Maddy Mail Server contributors
  4
  5This program is free software: you can redistribute it and/or modify
  6it under the terms of the GNU General Public License as published by
  7the Free Software Foundation, either version 3 of the License, or
  8(at your option) any later version.
  9
 10This program is distributed in the hope that it will be useful,
 11but WITHOUT ANY WARRANTY; without even the implied warranty of
 12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13GNU General Public License for more details.
 14
 15You should have received a copy of the GNU General Public License
 16along with this program.  If not, see <https://www.gnu.org/licenses/>.
 17*/
 18
 19// Package config provides set of utilities for configuration parsing.
 20package parser
 21
 22import (
 23	"errors"
 24	"fmt"
 25	"io"
 26	"strings"
 27	"unicode"
 28
 29	"github.com/foxcpp/maddy/framework/config/lexer"
 30)
 31
 32// Node struct describes a parsed configurtion block or a simple directive.
 33//
 34//	name arg0 arg1 {
 35//	 children0
 36//	 children1
 37//	}
 38type Node struct {
 39	// Name is the first string at node's line.
 40	Name string
 41	// Args are any strings placed after the node name.
 42	Args []string
 43
 44	// Children slice contains all children blocks if node is a block. Can be nil.
 45	Children []Node
 46
 47	// Snippet indicates whether current parsed node is a snippet. Always false
 48	// for all nodes returned from Read because snippets are expanded before it
 49	// returns.
 50	Snippet bool
 51
 52	// Macro indicates whether current parsed node is a macro. Always false
 53	// for all nodes returned from Read because macros are expanded before it
 54	// returns.
 55	Macro bool
 56
 57	// File is the name of node's source file.
 58	File string
 59
 60	// Line is the line number where the directive is located in the source file. For
 61	// blocks this is the line where "block header" (name + args) resides.
 62	Line int
 63}
 64
 65type parseContext struct {
 66	lexer.Dispenser
 67	nesting  int
 68	snippets map[string][]Node
 69	macros   map[string][]string
 70
 71	fileLocation string
 72}
 73
 74func validateNodeName(s string) error {
 75	if len(s) == 0 {
 76		return errors.New("empty directive name")
 77	}
 78
 79	if unicode.IsDigit([]rune(s)[0]) {
 80		return errors.New("directive name cannot start with a digit")
 81	}
 82
 83	allowedPunct := map[rune]bool{'.': true, '-': true, '_': true}
 84
 85	for _, ch := range s {
 86		if !unicode.IsLetter(ch) &&
 87			!unicode.IsDigit(ch) &&
 88			!allowedPunct[ch] {
 89			return errors.New("character not allowed in directive name: " + string(ch))
 90		}
 91	}
 92
 93	return nil
 94}
 95
 96// readNode reads node starting at current token pointed by the lexer's
 97// cursor (it should point to node name).
 98//
 99// After readNode returns, the lexer's cursor will point to the last token of the parsed
100// Node. This ensures predictable cursor location independently of the EOF state.
101// Thus code reading multiple nodes should call readNode then manually
102// advance lexer cursor (ctx.Next) and either call readNode again or stop
103// because cursor hit EOF.
104//
105// readNode calls readNodes if currently parsed node is a block.
106func (ctx *parseContext) readNode() (Node, error) {
107	node := Node{}
108	node.File = ctx.File()
109	node.Line = ctx.Line()
110
111	if ctx.Val() == "{" {
112		return node, ctx.SyntaxErr("block header")
113	}
114
115	node.Name = ctx.Val()
116	if ok, name := ctx.isSnippet(node.Name); ok {
117		node.Name = name
118		node.Snippet = true
119	}
120
121	var continueOnLF bool
122	for {
123		for ctx.NextArg() || (continueOnLF && ctx.NextLine()) {
124			continueOnLF = false
125			// name arg0 arg1 {
126			//              # ^ called when we hit this token
127			//   c0
128			//   c1
129			// }
130			if ctx.Val() == "{" {
131				var err error
132				node.Children, err = ctx.readNodes()
133				if err != nil {
134					return node, err
135				}
136				break
137			}
138
139			node.Args = append(node.Args, ctx.Val())
140		}
141
142		// Continue reading the same Node if the \ was used to escape the newline.
143		// E.g.
144		//   name arg0 arg1 \
145		//	   arg2 arg3
146		if len(node.Args) != 0 && node.Args[len(node.Args)-1] == `\` {
147			last := len(node.Args) - 1
148			node.Args[last] = node.Args[last][:len(node.Args[last])-1]
149			if len(node.Args[last]) == 0 {
150				node.Args = node.Args[:last]
151			}
152			continueOnLF = true
153			continue
154		}
155		break
156	}
157
158	macroName, macroArgs, err := ctx.parseAsMacro(&node)
159	if err != nil {
160		return node, err
161	}
162	if macroName != "" {
163		node.Name = macroName
164		node.Args = macroArgs
165		node.Macro = true
166	}
167
168	if !node.Macro && !node.Snippet {
169		if err := validateNodeName(node.Name); err != nil {
170			return node, err
171		}
172	}
173
174	return node, nil
175}
176
177func NodeErr(node Node, f string, args ...interface{}) error {
178	if node.File == "" {
179		return fmt.Errorf(f, args...)
180	}
181	return fmt.Errorf("%s:%d: %s", node.File, node.Line, fmt.Sprintf(f, args...))
182}
183
184func (ctx *parseContext) isSnippet(name string) (bool, string) {
185	if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
186		return true, name[1 : len(name)-1]
187	}
188	return false, ""
189}
190
191func (ctx *parseContext) parseAsMacro(node *Node) (macroName string, args []string, err error) {
192	if !strings.HasPrefix(node.Name, "$(") {
193		return "", nil, nil
194	}
195	if !strings.HasSuffix(node.Name, ")") {
196		return "", nil, ctx.Err("macro name must end with )")
197	}
198	macroName = node.Name[2 : len(node.Name)-1]
199	if len(node.Args) < 2 {
200		return macroName, nil, ctx.Err("at least 2 arguments are required")
201	}
202	if node.Args[0] != "=" {
203		return macroName, nil, ctx.Err("missing = in macro declaration")
204	}
205	return macroName, node.Args[1:], nil
206}
207
208// readNodes reads nodes from the currently parsed block.
209//
210// The lexer's cursor should point to the opening brace
211// name arg0 arg1 {  #< this one
212//
213//	  c0
214//	  c1
215//	}
216//
217// To stay consistent with readNode after this function returns the lexer's cursor points
218// to the last token of the black (closing brace).
219func (ctx *parseContext) readNodes() ([]Node, error) {
220	// It is not 'var res []Node' because we want empty
221	// but non-nil Children slice for empty braces.
222	res := []Node{}
223
224	if ctx.nesting > 255 {
225		return res, ctx.Err("nesting limit reached")
226	}
227
228	ctx.nesting++
229
230	var requireNewLine bool
231	// This loop iterates over logical lines.
232	// Here are some examples, '#' is placed before token where cursor is when
233	// another iteration of this loop starts.
234	//
235	// #a
236	// #a b
237	// #a b {
238	//   #ac aa
239	// #}
240	// #aa bbb bbb \
241	//    ccc ccc
242	// #a b { #ac aa }
243	//
244	// As can be seen by the latest example, sometimes such logical line might
245	// not be terminated by an actual LF character and so this needs to be
246	// handled carefully.
247	//
248	// Note that if the '}' is on the same physical line, it is currently
249	// included as the part of the logical line, that is:
250	// #a b { #ac aa }
251	//        ^------- that's the logical line
252	// #c d
253	// ^--- that's the next logical line
254	// This is handled by the "edge case" branch inside the loop.
255	for {
256		if requireNewLine {
257			if !ctx.NextLine() {
258				// If we can't advance cursor even without Line constraint -
259				// that's EOF.
260				if !ctx.Next() {
261					return res, nil
262				}
263				return res, ctx.Err("newline is required after closing brace")
264			}
265		} else if !ctx.Next() {
266			break
267		}
268
269		// name arg0 arg1 {
270		//   c0
271		//   c1
272		// }
273		// ^ called when we hit } on separate line,
274		// This means block we hit end of our block.
275		if ctx.Val() == "}" {
276			ctx.nesting--
277			// name arg0 arg1 { #<1
278			// }   }
279			// ^2  ^3
280			//
281			// After #1 ctx.nesting is incremented by ctx.nesting++ before this loop.
282			// Then we advance cursor and hit }, we exit loop, ctx.nesting now becomes 0.
283			// But then the parent block reader does the same when it hits #3 -
284			// ctx.nesting becomes -1 and it fails.
285			if ctx.nesting < 0 {
286				return res, ctx.Err("unexpected }")
287			}
288			break
289		}
290		node, err := ctx.readNode()
291		if err != nil {
292			return res, err
293		}
294		requireNewLine = true
295
296		shouldStop := false
297
298		// name arg0 arg1 {
299		//   c1 c2 }
300		//         ^
301		// Edge case, here we check if the last argument of the last node is a }
302		// If it is - we stop as we hit the end of our block.
303		if len(node.Args) != 0 && node.Args[len(node.Args)-1] == "}" {
304			ctx.nesting--
305			if ctx.nesting < 0 {
306				return res, ctx.Err("unexpected }")
307			}
308			node.Args = node.Args[:len(node.Args)-1]
309			shouldStop = true
310		}
311
312		if node.Macro {
313			if ctx.nesting != 0 {
314				return res, ctx.Err("macro declarations are only allowed at top-level")
315			}
316
317			// Macro declaration itself can contain macro references.
318			if err := ctx.expandMacros(&node); err != nil {
319				return res, err
320			}
321
322			// = sign is removed by parseAsMacro.
323			// It also cuts $( and ) from name.
324			ctx.macros[node.Name] = node.Args
325			continue
326		}
327		if node.Snippet {
328			if ctx.nesting != 0 {
329				return res, ctx.Err("snippet declarations are only allowed at top-level")
330			}
331			if len(node.Args) != 0 {
332				return res, ctx.Err("snippet declarations can't have arguments")
333			}
334
335			ctx.snippets[node.Name] = node.Children
336			continue
337		}
338
339		if err := ctx.expandMacros(&node); err != nil {
340			return res, err
341		}
342
343		res = append(res, node)
344		if shouldStop {
345			break
346		}
347	}
348	return res, nil
349}
350
351func readTree(r io.Reader, location string, expansionDepth int) (nodes []Node, snips map[string][]Node, macros map[string][]string, err error) {
352	ctx := parseContext{
353		Dispenser:    lexer.NewDispenser(location, r),
354		snippets:     make(map[string][]Node),
355		macros:       map[string][]string{},
356		nesting:      -1,
357		fileLocation: location,
358	}
359
360	root := Node{}
361	root.File = location
362	root.Line = 1
363	// Before parsing starts the lexer's cursor points to the non-existent
364	// token before the first one. From readNodes viewpoint this is opening
365	// brace so we don't break any requirements here.
366	//
367	// For the same reason we use -1 as a starting nesting. So readNodes
368	// will see this as it is reading block at nesting level 0.
369	root.Children, err = ctx.readNodes()
370	if err != nil {
371		return root.Children, ctx.snippets, ctx.macros, err
372	}
373
374	// There is no need to check ctx.nesting < 0 because it is checked by readNodes.
375	if ctx.nesting > 0 {
376		return root.Children, ctx.snippets, ctx.macros, ctx.Err("unexpected EOF when looking for }")
377	}
378
379	root, err = ctx.expandImports(root, expansionDepth)
380	if err != nil {
381		return root.Children, ctx.snippets, ctx.macros, err
382	}
383
384	return root.Children, ctx.snippets, ctx.macros, nil
385}
386
387func Read(r io.Reader, location string) (nodes []Node, err error) {
388	nodes, _, _, err = readTree(r, location, 0)
389	nodes = expandEnvironment(nodes)
390	return
391}