1/*2Maddy Mail Server - Composable all-in-one email server.3Copyright © 2019-2020 Max Mazurov <fox.cpp@disroot.org>, Maddy Mail Server contributors45This program is free software: you can redistribute it and/or modify6it under the terms of the GNU General Public License as published by7the Free Software Foundation, either version 3 of the License, or8(at your option) any later version.910This program is distributed in the hope that it will be useful,11but WITHOUT ANY WARRANTY; without even the implied warranty of12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the13GNU General Public License for more details.1415You should have received a copy of the GNU General Public License16along with this program. If not, see <https://www.gnu.org/licenses/>.17*/1819// Package config provides set of utilities for configuration parsing.20package parser2122import (23 "errors"24 "fmt"25 "io"26 "strings"27 "unicode"2829 "github.com/foxcpp/maddy/framework/config/lexer"30)3132// Node struct describes a parsed configurtion block or a simple directive.33//34// name arg0 arg1 {35// children036// children137// }38type Node struct {39 // Name is the first string at node's line.40 Name string41 // Args are any strings placed after the node name.42 Args []string4344 // Children slice contains all children blocks if node is a block. Can be nil.45 Children []Node4647 // Snippet indicates whether current parsed node is a snippet. Always false48 // for all nodes returned from Read because snippets are expanded before it49 // returns.50 Snippet bool5152 // Macro indicates whether current parsed node is a macro. Always false53 // for all nodes returned from Read because macros are expanded before it54 // returns.55 Macro bool5657 // File is the name of node's source file.58 File string5960 // Line is the line number where the directive is located in the source file. For61 // blocks this is the line where "block header" (name + args) resides.62 Line int63}6465type parseContext struct {66 lexer.Dispenser67 nesting int68 snippets map[string][]Node69 macros map[string][]string7071 fileLocation string72}7374func validateNodeName(s string) error {75 if len(s) == 0 {76 return errors.New("empty directive name")77 }7879 if unicode.IsDigit([]rune(s)[0]) {80 return errors.New("directive name cannot start with a digit")81 }8283 allowedPunct := map[rune]bool{'.': true, '-': true, '_': true}8485 for _, ch := range s {86 if !unicode.IsLetter(ch) &&87 !unicode.IsDigit(ch) &&88 !allowedPunct[ch] {89 return errors.New("character not allowed in directive name: " + string(ch))90 }91 }9293 return nil94}9596// readNode reads node starting at current token pointed by the lexer's97// cursor (it should point to node name).98//99// After readNode returns, the lexer's cursor will point to the last token of the parsed100// Node. This ensures predictable cursor location independently of the EOF state.101// Thus code reading multiple nodes should call readNode then manually102// advance lexer cursor (ctx.Next) and either call readNode again or stop103// because cursor hit EOF.104//105// readNode calls readNodes if currently parsed node is a block.106func (ctx *parseContext) readNode() (Node, error) {107 node := Node{}108 node.File = ctx.File()109 node.Line = ctx.Line()110111 if ctx.Val() == "{" {112 return node, ctx.SyntaxErr("block header")113 }114115 node.Name = ctx.Val()116 if ok, name := ctx.isSnippet(node.Name); ok {117 node.Name = name118 node.Snippet = true119 }120121 var continueOnLF bool122 for {123 for ctx.NextArg() || (continueOnLF && ctx.NextLine()) {124 continueOnLF = false125 // name arg0 arg1 {126 // # ^ called when we hit this token127 // c0128 // c1129 // }130 if ctx.Val() == "{" {131 var err error132 node.Children, err = ctx.readNodes()133 if err != nil {134 return node, err135 }136 break137 }138139 node.Args = append(node.Args, ctx.Val())140 }141142 // Continue reading the same Node if the \ was used to escape the newline.143 // E.g.144 // name arg0 arg1 \145 // arg2 arg3146 if len(node.Args) != 0 && node.Args[len(node.Args)-1] == `\` {147 last := len(node.Args) - 1148 node.Args[last] = node.Args[last][:len(node.Args[last])-1]149 if len(node.Args[last]) == 0 {150 node.Args = node.Args[:last]151 }152 continueOnLF = true153 continue154 }155 break156 }157158 macroName, macroArgs, err := ctx.parseAsMacro(&node)159 if err != nil {160 return node, err161 }162 if macroName != "" {163 node.Name = macroName164 node.Args = macroArgs165 node.Macro = true166 }167168 if !node.Macro && !node.Snippet {169 if err := validateNodeName(node.Name); err != nil {170 return node, err171 }172 }173174 return node, nil175}176177func NodeErr(node Node, f string, args ...interface{}) error {178 if node.File == "" {179 return fmt.Errorf(f, args...)180 }181 return fmt.Errorf("%s:%d: %s", node.File, node.Line, fmt.Sprintf(f, args...))182}183184func (ctx *parseContext) isSnippet(name string) (bool, string) {185 if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {186 return true, name[1 : len(name)-1]187 }188 return false, ""189}190191func (ctx *parseContext) parseAsMacro(node *Node) (macroName string, args []string, err error) {192 if !strings.HasPrefix(node.Name, "$(") {193 return "", nil, nil194 }195 if !strings.HasSuffix(node.Name, ")") {196 return "", nil, ctx.Err("macro name must end with )")197 }198 macroName = node.Name[2 : len(node.Name)-1]199 if len(node.Args) < 2 {200 return macroName, nil, ctx.Err("at least 2 arguments are required")201 }202 if node.Args[0] != "=" {203 return macroName, nil, ctx.Err("missing = in macro declaration")204 }205 return macroName, node.Args[1:], nil206}207208// readNodes reads nodes from the currently parsed block.209//210// The lexer's cursor should point to the opening brace211// name arg0 arg1 { #< this one212//213// c0214// c1215// }216//217// To stay consistent with readNode after this function returns the lexer's cursor points218// to the last token of the black (closing brace).219func (ctx *parseContext) readNodes() ([]Node, error) {220 // It is not 'var res []Node' because we want empty221 // but non-nil Children slice for empty braces.222 res := []Node{}223224 if ctx.nesting > 255 {225 return res, ctx.Err("nesting limit reached")226 }227228 ctx.nesting++229230 var requireNewLine bool231 // This loop iterates over logical lines.232 // Here are some examples, '#' is placed before token where cursor is when233 // another iteration of this loop starts.234 //235 // #a236 // #a b237 // #a b {238 // #ac aa239 // #}240 // #aa bbb bbb \241 // ccc ccc242 // #a b { #ac aa }243 //244 // As can be seen by the latest example, sometimes such logical line might245 // not be terminated by an actual LF character and so this needs to be246 // handled carefully.247 //248 // Note that if the '}' is on the same physical line, it is currently249 // included as the part of the logical line, that is:250 // #a b { #ac aa }251 // ^------- that's the logical line252 // #c d253 // ^--- that's the next logical line254 // This is handled by the "edge case" branch inside the loop.255 for {256 if requireNewLine {257 if !ctx.NextLine() {258 // If we can't advance cursor even without Line constraint -259 // that's EOF.260 if !ctx.Next() {261 return res, nil262 }263 return res, ctx.Err("newline is required after closing brace")264 }265 } else if !ctx.Next() {266 break267 }268269 // name arg0 arg1 {270 // c0271 // c1272 // }273 // ^ called when we hit } on separate line,274 // This means block we hit end of our block.275 if ctx.Val() == "}" {276 ctx.nesting--277 // name arg0 arg1 { #<1278 // } }279 // ^2 ^3280 //281 // After #1 ctx.nesting is incremented by ctx.nesting++ before this loop.282 // Then we advance cursor and hit }, we exit loop, ctx.nesting now becomes 0.283 // But then the parent block reader does the same when it hits #3 -284 // ctx.nesting becomes -1 and it fails.285 if ctx.nesting < 0 {286 return res, ctx.Err("unexpected }")287 }288 break289 }290 node, err := ctx.readNode()291 if err != nil {292 return res, err293 }294 requireNewLine = true295296 shouldStop := false297298 // name arg0 arg1 {299 // c1 c2 }300 // ^301 // Edge case, here we check if the last argument of the last node is a }302 // If it is - we stop as we hit the end of our block.303 if len(node.Args) != 0 && node.Args[len(node.Args)-1] == "}" {304 ctx.nesting--305 if ctx.nesting < 0 {306 return res, ctx.Err("unexpected }")307 }308 node.Args = node.Args[:len(node.Args)-1]309 shouldStop = true310 }311312 if node.Macro {313 if ctx.nesting != 0 {314 return res, ctx.Err("macro declarations are only allowed at top-level")315 }316317 // Macro declaration itself can contain macro references.318 if err := ctx.expandMacros(&node); err != nil {319 return res, err320 }321322 // = sign is removed by parseAsMacro.323 // It also cuts $( and ) from name.324 ctx.macros[node.Name] = node.Args325 continue326 }327 if node.Snippet {328 if ctx.nesting != 0 {329 return res, ctx.Err("snippet declarations are only allowed at top-level")330 }331 if len(node.Args) != 0 {332 return res, ctx.Err("snippet declarations can't have arguments")333 }334335 ctx.snippets[node.Name] = node.Children336 continue337 }338339 if err := ctx.expandMacros(&node); err != nil {340 return res, err341 }342343 res = append(res, node)344 if shouldStop {345 break346 }347 }348 return res, nil349}350351func readTree(r io.Reader, location string, expansionDepth int) (nodes []Node, snips map[string][]Node, macros map[string][]string, err error) {352 ctx := parseContext{353 Dispenser: lexer.NewDispenser(location, r),354 snippets: make(map[string][]Node),355 macros: map[string][]string{},356 nesting: -1,357 fileLocation: location,358 }359360 root := Node{}361 root.File = location362 root.Line = 1363 // Before parsing starts the lexer's cursor points to the non-existent364 // token before the first one. From readNodes viewpoint this is opening365 // brace so we don't break any requirements here.366 //367 // For the same reason we use -1 as a starting nesting. So readNodes368 // will see this as it is reading block at nesting level 0.369 root.Children, err = ctx.readNodes()370 if err != nil {371 return root.Children, ctx.snippets, ctx.macros, err372 }373374 // There is no need to check ctx.nesting < 0 because it is checked by readNodes.375 if ctx.nesting > 0 {376 return root.Children, ctx.snippets, ctx.macros, ctx.Err("unexpected EOF when looking for }")377 }378379 root, err = ctx.expandImports(root, expansionDepth)380 if err != nil {381 return root.Children, ctx.snippets, ctx.macros, err382 }383384 return root.Children, ctx.snippets, ctx.macros, nil385}386387func Read(r io.Reader, location string) (nodes []Node, err error) {388 nodes, _, _, err = readTree(r, location, 0)389 nodes = expandEnvironment(nodes)390 return391}