refactor dotenv parser in order to support multi-line variable values declaration

x1unix · x1unix · commit d9069cd4972d · 2021-09-24T00:22:00.000+03:00
Signed-off-by: x1unix &lt;denis0051@gmail.com&gt;
diff --git a/godotenv.go b/godotenv.go
@@ -14,10 +14,10 @@
 package godotenv
 
 import (
-	"bufio"
 	"errors"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
 	"os/exec"
 	"regexp"
@@ -27,6 +27,16 @@ import (
 
 const doubleQuoteSpecialChars = "\\\n\r\"!$`"
 
+// Parse reads an env file from io.Reader, returning a map of keys and values.
+func Parse(r io.Reader) (map[string]string, error) {
+	data, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, err
+	}
+
+	return UnmarshalBytes(data)
+}
+
 // Load will read your env file(s) and load them into ENV for this process.
 //
 // Call this function as close as possible to the start of your program (ideally in main)
@@ -95,37 +105,16 @@ func Read(filenames ...string) (envMap map[string]string, err error) {
 	return
 }
 
-// Parse reads an env file from io.Reader, returning a map of keys and values.
-func Parse(r io.Reader) (envMap map[string]string, err error) {
-	envMap = make(map[string]string)
-
-	var lines []string
-	scanner := bufio.NewScanner(r)
-	for scanner.Scan() {
-		lines = append(lines, scanner.Text())
-	}
-
-	if err = scanner.Err(); err != nil {
-		return
-	}
-
-	for _, fullLine := range lines {
-		if !isIgnoredLine(fullLine) {
-			var key, value string
-			key, value, err = parseLine(fullLine, envMap)
-
-			if err != nil {
-				return
-			}
-			envMap[key] = value
-		}
-	}
-	return
+// Unmarshal reads an env file from a string, returning a map of keys and values.
+func Unmarshal(str string) (envMap map[string]string, err error) {
+	return UnmarshalBytes([]byte(str))
 }
 
-//Unmarshal reads an env file from a string, returning a map of keys and values.
-func Unmarshal(str string) (envMap map[string]string, err error) {
-	return Parse(strings.NewReader(str))
+// UnmarshalBytes parses env file from byte slice of chars, returning a map of keys and values.
+func UnmarshalBytes(src []byte) (map[string]string, error) {
+	out := make(map[string]string)
+	err := parseBytes(src, out)
+	return out, err
 }
 
 // Exec loads env vars from the specified filenames (empty map falls back to default)
@@ -136,7 +125,9 @@ func Unmarshal(str string) (envMap map[string]string, err error) {
 // If you want more fine grained control over your command it's recommended
 // that you use `Load()` or `Read()` and the `os/exec` package yourself.
 func Exec(filenames []string, cmd string, cmdArgs []string) error {
-	Load(filenames...)
+	if err := Load(filenames...); err != nil {
+		return err
+	}
 
 	command := exec.Command(cmd, cmdArgs...)
 	command.Stdin = os.Stdin
@@ -160,8 +151,7 @@ func Write(envMap map[string]string, filename string) error {
 	if err != nil {
 		return err
 	}
-	file.Sync()
-	return err
+	return file.Sync()
 }
 
 // Marshal outputs the given environment as a dotenv-formatted environment file.
@@ -197,7 +187,7 @@ func loadFile(filename string, overload bool) error {
 
 	for key, value := range envMap {
 		if !currentEnv[key] || overload {
-			os.Setenv(key, value)
+			_ = os.Setenv(key, value)
 		}
 	}
 
@@ -338,11 +328,6 @@ func expandVariables(v string, m map[string]string) string {
 	})
 }
 
-func isIgnoredLine(line string) bool {
-	trimmedLine := strings.TrimSpace(line)
-	return len(trimmedLine) == 0 || strings.HasPrefix(trimmedLine, "#")
-}
-
 func doubleQuoteEscape(line string) string {
 	for _, c := range doubleQuoteSpecialChars {
 		toReplace := "\\" + string(c)
diff --git a/parser.go b/parser.go
@@ -0,0 +1,206 @@
+package godotenv
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"strings"
+	"unicode"
+)
+
+const (
+	charComment       = '#'
+	prefixSingleQuote = '\''
+	prefixDoubleQuote = '"'
+
+	exportPrefix = "export"
+)
+
+func parseBytes(src []byte, out map[string]string) error {
+	cutset := src
+	for {
+		cutset = getStatementStart(cutset)
+		if cutset == nil {
+			// reached end of file
+			break
+		}
+
+		key, left, err := locateKeyName(cutset)
+		if err != nil {
+			return err
+		}
+
+		value, left, err := extractVarValue(left, out)
+		if err != nil {
+			return err
+		}
+
+		out[key] = value
+		cutset = left
+	}
+
+	return nil
+}
+
+// getStatementPosition returns position of statement begin.
+//
+// It skips any comment line or non-whitespace character.
+func getStatementStart(src []byte) []byte {
+	pos := indexOfNonSpaceChar(src)
+	if pos == -1 {
+		return nil
+	}
+
+	src = src[pos:]
+	if src[0] != charComment {
+		return src
+	}
+
+	// skip comment section
+	pos = bytes.IndexFunc(src, isCharFunc('\n'))
+	if pos == -1 {
+		return nil
+	}
+
+	return getStatementStart(src[pos:])
+}
+
+// locateKeyName locates and parses key name and returns rest of slice
+func locateKeyName(src []byte) (key string, cutset []byte, err error) {
+	// trim "export" and space at beginning
+	src = bytes.TrimLeftFunc(bytes.TrimPrefix(src, []byte(exportPrefix)), isSpace)
+
+	// locate key name end and validate it in single loop
+	offset := 0
+loop:
+	for i, char := range src {
+		rchar := rune(char)
+		if isSpace(rchar) {
+			continue
+		}
+
+		switch char {
+		case '=', ':':
+			// library also supports yaml-style value declaration
+			key = string(src[0:i])
+			offset = i + 1
+			break loop
+		case '_':
+		default:
+			// variable name should match [A-Za-z0-9_]
+			if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) {
+				continue
+			}
+
+			return "", nil, fmt.Errorf(
+				`unexpected character %q in variable name near %q`,
+				string(char), string(src))
+		}
+	}
+
+	if len(src) == 0 {
+		return "", nil, errors.New("zero length string")
+	}
+
+	// trim whitespace
+	key = strings.TrimRightFunc(key, unicode.IsSpace)
+	cutset = bytes.TrimLeftFunc(src[offset:], isSpace)
+	return key, cutset, nil
+}
+
+// extractVarValue extracts variable value and returns rest of slice
+func extractVarValue(src []byte, vars map[string]string) (value string, rest []byte, err error) {
+	quote, hasPrefix := hasQuotePrefix(src)
+	if !hasPrefix {
+		// unquoted value - read until whitespace
+		end := bytes.IndexFunc(src, unicode.IsSpace)
+		if end == -1 {
+			return expandVariables(string(src), vars), nil, nil
+		}
+
+		return expandVariables(string(src[0:end]), vars), src[end:], nil
+	}
+
+	// lookup quoted string terminator
+	for i := 1; i < len(src); i++ {
+		if char := src[i]; char != quote {
+			continue
+		}
+
+		// skip escaped quote symbol (\" or \', depends on quote)
+		if prevChar := src[i-1]; prevChar == '\\' {
+			continue
+		}
+
+		// trim quotes
+		trimFunc := isCharFunc(rune(quote))
+		value = string(bytes.TrimLeftFunc(bytes.TrimRightFunc(src[0:i], trimFunc), trimFunc))
+		if quote == prefixDoubleQuote {
+			// unescape newlines for double quote (this is compat feature)
+			// and expand environment variables
+			value = expandVariables(expandEscapes(value), vars)
+		}
+
+		return value, src[i+1:], nil
+	}
+
+	// return formatted error if quoted string is not terminated
+	valEndIndex := bytes.IndexFunc(src, isCharFunc('\n'))
+	if valEndIndex == -1 {
+		valEndIndex = len(src)
+	}
+
+	return "", nil, fmt.Errorf("unterminated quoted value %s", src[:valEndIndex])
+}
+
+func expandEscapes(str string) string {
+	out := escapeRegex.ReplaceAllStringFunc(str, func(match string) string {
+		c := strings.TrimPrefix(match, `\`)
+		switch c {
+		case "n":
+			return "\n"
+		case "r":
+			return "\r"
+		default:
+			return match
+		}
+	})
+	return unescapeCharsRegex.ReplaceAllString(out, "$1")
+}
+
+func indexOfNonSpaceChar(src []byte) int {
+	return bytes.IndexFunc(src, func(r rune) bool {
+		return !unicode.IsSpace(r)
+	})
+}
+
+// hasQuotePrefix reports whether charset starts with single or double quote and returns quote character
+func hasQuotePrefix(src []byte) (prefix byte, isQuored bool) {
+	if len(src) == 0 {
+		return 0, false
+	}
+
+	switch prefix := src[0]; prefix {
+	case prefixDoubleQuote, prefixSingleQuote:
+		return prefix, true
+	default:
+		return 0, false
+	}
+}
+
+func isCharFunc(char rune) func(rune) bool {
+	return func(v rune) bool {
+		return v == char
+	}
+}
+
+// isSpace reports whether the rune is a space character but not line break character
+//
+// this differs from unicode.IsSpace, which also applies line break as space
+func isSpace(r rune) bool {
+	switch r {
+	case '\t', '\v', '\f', '\r', ' ', 0x85, 0xA0:
+		return true
+	}
+	return false
+}