From c61b08ec1c7da57161413d05b3978bbcc4f5847d Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 01:11:30 +0300 Subject: [PATCH 001/137] hclfmt: initial skeleton --- hclfmt.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 hclfmt.go diff --git a/hclfmt.go b/hclfmt.go new file mode 100644 index 0000000..5b3a2e2 --- /dev/null +++ b/hclfmt.go @@ -0,0 +1,101 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "go/scanner" + "io" + "os" + "path/filepath" + "runtime/pprof" + "strings" +) + +func main() { + if err := realMain(); err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(1) + } +} + +func realMain() error { + var ( + write = flag.Bool("w", false, "write result to (source) file instead of stdout") + + // debugging + cpuprofile = flag.String("cpuprofile", "", "write cpu profile to this file") + ) + + flag.Usage = usage + flag.Parse() + + if *cpuprofile != "" { + f, err := os.Create(*cpuprofile) + if err != nil { + return fmt.Errorf("creating cpu profile: %s\n", err) + } + defer f.Close() + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + + if flag.NArg() == 0 { + if *write { + return errors.New("error: cannot use -w with standard input") + } + + return processFile("", os.Stdin, os.Stdout, true) + } + + for i := 0; i < flag.NArg(); i++ { + path := flag.Arg(i) + switch dir, err := os.Stat(path); { + case err != nil: + report(err) + case dir.IsDir(): + walkDir(path) + default: + if err := processFile(path, nil, os.Stdout, false); err != nil { + report(err) + } + } + } + + return nil +} + +func usage() { + fmt.Fprintf(os.Stderr, "usage: hclfmt [flags] [path ...]\n") + flag.PrintDefaults() + os.Exit(2) +} + +func isGoFile(f os.FileInfo) bool { + // ignore non-Go files + name := f.Name() + return !f.IsDir() && !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".go") +} + +func report(err error) { + scanner.PrintError(os.Stderr, err) +} + +func walkDir(path string) { + filepath.Walk(path, visitFile) +} + +func visitFile(path string, f os.FileInfo, err error) error { + if err == nil && isGoFile(f) { + err = processFile(path, nil, os.Stdout, false) + } + if err != nil { + report(err) + } + return nil +} + +// If in == nil, the source is the contents of the file with the given filename. +func processFile(filename string, in io.Reader, out io.Writer, stdin bool) error { + return errors.New("not imlemented yet") +} From 9c29a827885ba2025ad7fc1641b6a197b36ac6e7 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 02:18:04 +0300 Subject: [PATCH 002/137] hclfmt: only parse hcl files --- hclfmt.go | 37 ++++++++++++++++++++++++++++++------- testdata/complex.hcl | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 testdata/complex.hcl diff --git a/hclfmt.go b/hclfmt.go index 5b3a2e2..707fa4b 100644 --- a/hclfmt.go +++ b/hclfmt.go @@ -6,10 +6,13 @@ import ( "fmt" "go/scanner" "io" + "io/ioutil" "os" "path/filepath" "runtime/pprof" "strings" + + "github.com/hashicorp/hcl" ) func main() { @@ -71,22 +74,22 @@ func usage() { os.Exit(2) } -func isGoFile(f os.FileInfo) bool { - // ignore non-Go files - name := f.Name() - return !f.IsDir() && !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".go") -} - func report(err error) { scanner.PrintError(os.Stderr, err) } +func isHclFile(f os.FileInfo) bool { + // ignore non-hcl files + name := f.Name() + return !f.IsDir() && !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".hcl") +} + func walkDir(path string) { filepath.Walk(path, visitFile) } func visitFile(path string, f os.FileInfo, err error) error { - if err == nil && isGoFile(f) { + if err == nil && isHclFile(f) { err = processFile(path, nil, os.Stdout, false) } if err != nil { @@ -97,5 +100,25 @@ func visitFile(path string, f os.FileInfo, err error) error { // If in == nil, the source is the contents of the file with the given filename. func processFile(filename string, in io.Reader, out io.Writer, stdin bool) error { + if in == nil { + f, err := os.Open(filename) + if err != nil { + return err + } + defer f.Close() + in = f + } + + src, err := ioutil.ReadAll(in) + if err != nil { + return err + } + + obj, err := hcl.Parse(string(src)) + if err != nil { + return err + } + + fmt.Printf("obj = %+v\n", obj) return errors.New("not imlemented yet") } diff --git a/testdata/complex.hcl b/testdata/complex.hcl new file mode 100644 index 0000000..cccb5b0 --- /dev/null +++ b/testdata/complex.hcl @@ -0,0 +1,42 @@ +// This comes from Terraform, as a test +variable "foo" { + default = "bar" + description = "bar" +} + +provider "aws" { + access_key = "foo" + secret_key = "bar" +} + +provider "do" { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" { + count = 5 +} + +resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } +} + +resource "aws_instance" "db" { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + + depends_on = ["aws_instance.web"] +} + +output "web_ip" { + value = "${aws_instance.web.private_ip}" +} From e5a8a1fa62354a0d722e119a6190e91468923375 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 03:30:57 +0300 Subject: [PATCH 003/137] hclprinter: initial printer package --- hclprinter/hclprinter.go | 92 +++++++++++++++++++++++++++++++++++ hclprinter/hclprinter_test.go | 63 ++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 hclprinter/hclprinter.go create mode 100644 hclprinter/hclprinter_test.go diff --git a/hclprinter/hclprinter.go b/hclprinter/hclprinter.go new file mode 100644 index 0000000..ffe1777 --- /dev/null +++ b/hclprinter/hclprinter.go @@ -0,0 +1,92 @@ +package hclprinter + +import ( + "bytes" + "fmt" + "io" + "text/tabwriter" + + "github.com/hashicorp/hcl/hcl" +) + +type printer struct { + cfg Config + obj *hcl.Object +} + +func (p *printer) output() []byte { + var buf bytes.Buffer + fmt.Println("STARTING OUTPUT") + return buf.Bytes() +} + +// A Mode value is a set of flags (or 0). They control printing. +type Mode uint + +const ( + RawFormat Mode = 1 << iota // do not use a tabwriter; if set, UseSpaces is ignored + TabIndent // use tabs for indentation independent of UseSpaces + UseSpaces // use spaces instead of tabs for alignment +) + +// A Config node controls the output of Fprint. +type Config struct { + Mode Mode // default: 0 + Tabwidth int // default: 8 + Indent int // default: 0 (all code is indented at least by this much) +} + +func (c *Config) fprint(output io.Writer, obj *hcl.Object) error { + p := &printer{ + cfg: *c, + obj: obj, + } + + // TODO(arslan): implement this + // redirect output through a trimmer to eliminate trailing whitespace + // (Input to a tabwriter must be untrimmed since trailing tabs provide + // formatting information. The tabwriter could provide trimming + // functionality but no tabwriter is used when RawFormat is set.) + // output = &trimmer{output: output} + + // redirect output through a tabwriter if necessary + if c.Mode&RawFormat == 0 { + minwidth := c.Tabwidth + + padchar := byte('\t') + if c.Mode&UseSpaces != 0 { + padchar = ' ' + } + + twmode := tabwriter.DiscardEmptyColumns + if c.Mode&TabIndent != 0 { + minwidth = 0 + twmode |= tabwriter.TabIndent + } + + output = tabwriter.NewWriter(output, minwidth, c.Tabwidth, 1, padchar, twmode) + } + + // write printer result via tabwriter/trimmer to output + if _, err := output.Write(p.output()); err != nil { + return err + } + + // flush tabwriter, if any + var err error + if tw, _ := output.(*tabwriter.Writer); tw != nil { + err = tw.Flush() + } + + return err +} + +func (c *Config) Fprint(output io.Writer, obj *hcl.Object) error { + return c.fprint(output, obj) +} + +// Fprint "pretty-prints" an HCL object to output +// It calls Config.Fprint with default settings. +func Fprint(output io.Writer, obj *hcl.Object) error { + return (&Config{Tabwidth: 8}).Fprint(output, obj) +} diff --git a/hclprinter/hclprinter_test.go b/hclprinter/hclprinter_test.go new file mode 100644 index 0000000..64c8a1b --- /dev/null +++ b/hclprinter/hclprinter_test.go @@ -0,0 +1,63 @@ +package hclprinter + +import ( + "os" + "testing" + + "github.com/hashicorp/hcl/hcl" +) + +var complexHcl = `// This comes from Terraform, as a test +variable "foo" { + default = "bar" + description = "bar" +} + +provider "aws" { + access_key = "foo" + secret_key = "bar" +} + +provider "do" { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" { + count = 5 +} + +resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } +} + +resource "aws_instance" "db" { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + + depends_on = ["aws_instance.web"] +} + +output "web_ip" { + value = "${aws_instance.web.private_ip}" +} +` + +func TestPrint(t *testing.T) { + obj, err := hcl.Parse(complexHcl) + if err != nil { + t.Fatal(err) + } + + if err := Fprint(os.Stdout, obj); err != nil { + t.Error(err) + } +} From 7e929f0990aaed77217525533296f70753d61bc3 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 14:32:19 +0300 Subject: [PATCH 004/137] hcl: start to implement our own lexer and parser --- parser/parser.go | 1 + parser/token.go | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 parser/parser.go create mode 100644 parser/token.go diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..0bfe2c2 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1 @@ +package parser diff --git a/parser/token.go b/parser/token.go new file mode 100644 index 0000000..5b969eb --- /dev/null +++ b/parser/token.go @@ -0,0 +1,45 @@ +package parser + +// Token is the set of lexical tokens of the HCL (HashiCorp Configuration Language) +type Token int + +const ( + // Special tokens + ILLEGAL Token = iota + EOF + COMMENT + NEWLINE + + literal_beg + IDENT // literals + NUMBER // 12345 + FLOAT // 123.45 + BOOL // true,false + STRING // "abc" + literal_end + + operator_beg + LBRACK // [ + LBRACE // { + COMMA // , + PERIOD // . + + RBRACK // ] + RBRACE // } + + ASSIGN // = + ADD // + + SUB // - + + EPLUS // e + EMINUS // e- + operator_end +) + +// IsLiteral returns true for tokens corresponding to identifiers and basic +// type literals; it returns false otherwise. +func (t Token) IsLiteral() bool { return literal_beg < t && t < literal_end } + +// IsOperator returns true for tokens corresponding to operators and +// delimiters; it returns false otherwise. +func (t Token) IsOperator() bool { return operator_beg < t && t < operator_end } From 6fb7de55d35581daa04aa9a31eeee698152f252e Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 15:34:06 +0300 Subject: [PATCH 005/137] token: add token representations --- parser/token.go | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/parser/token.go b/parser/token.go index 5b969eb..3b6d615 100644 --- a/parser/token.go +++ b/parser/token.go @@ -1,5 +1,7 @@ package parser +import "strconv" + // Token is the set of lexical tokens of the HCL (HashiCorp Configuration Language) type Token int @@ -36,6 +38,51 @@ const ( operator_end ) +var tokens = [...]string{ + ILLEGAL: "ILLEGAL", + + EOF: "EOF", + COMMENT: "COMMENT", + NEWLINE: "NEWLINE", + + IDENT: "IDENT", + NUMBER: "NUMBER", + FLOAT: "FLOAT", + BOOL: "BOOL", + STRING: "STRING", + + LBRACK: "[", + LBRACE: "{", + COMMA: ",", + PERIOD: ".", + + RBRACK: "]", + RBRACE: "}", + + ASSIGN: "=", + ADD: "+", + SUB: "-", + + EPLUS: "e", + EMINUS: "e-", +} + +// String returns the string corresponding to the token tok. +// For operators, delimiters, and keywords the string is the actual +// token character sequence (e.g., for the token ADD, the string is +// "+"). For all other tokens the string corresponds to the token +// constant name (e.g. for the token IDENT, the string is "IDENT"). +func (t Token) String() string { + s := "" + if 0 <= t && t < Token(len(tokens)) { + s = tokens[t] + } + if s == "" { + s = "token(" + strconv.Itoa(int(t)) + ")" + } + return s +} + // IsLiteral returns true for tokens corresponding to identifiers and basic // type literals; it returns false otherwise. func (t Token) IsLiteral() bool { return literal_beg < t && t < literal_end } From 01a609f812c62c625c873d8f125e86d7d5f5a034 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 15:36:24 +0300 Subject: [PATCH 006/137] parser: add position for upcoming requirements --- parser/position.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 parser/position.go diff --git a/parser/position.go b/parser/position.go new file mode 100644 index 0000000..ca09d54 --- /dev/null +++ b/parser/position.go @@ -0,0 +1,36 @@ +package parser + +import "fmt" + +// Position describes an arbitrary source position +// including the file, line, and column location. +// A Position is valid if the line number is > 0. +type Position struct { + Filename string // filename, if any + Offset int // offset, starting at 0 + Line int // line number, starting at 1 + Column int // column number, starting at 1 (character count) +} + +// IsValid returns true if the position is valid. +func (p *Position) IsValid() bool { return p.Line > 0 } + +// String returns a string in one of several forms: +// +// file:line:column valid position with file name +// line:column valid position without file name +// file invalid position with file name +// - invalid position without file name +func (p Position) String() string { + s := p.Filename + if p.IsValid() { + if s != "" { + s += ":" + } + s += fmt.Sprintf("%d:%d", p.Line, p.Column) + } + if s == "" { + s = "-" + } + return s +} From e6ba36eacaabc5971d409f40aecb51561647a89b Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 17:08:09 +0300 Subject: [PATCH 007/137] parser: initial lexer next method --- parser/lexer.go | 92 +++++++++++++++++++++++++++++++++++++++++++++++++ parser/token.go | 2 -- 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 parser/lexer.go diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..6048dfa --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,92 @@ +package parser + +import ( + "bufio" + "io" + "unicode" +) + +// eof represents a marker rune for the end of the reader. +const eof = rune(0) + +// Lexer defines a lexical scanner +type Lexer struct { + r *bufio.Reader + + // Start position of most recently scanned token; set by Scan. + // Calling Init or Next invalidates the position (Line == 0). + // The Filename field is always left untouched by the Scanner. + // If an error is reported (via Error) and Position is invalid, + // the scanner is not inside a token. Call Pos to obtain an error + // position in that case. + Position +} + +// NewLexer returns a new instance of Lexer. +func NewLexer(r io.Reader) *Lexer { + return &Lexer{ + r: bufio.NewReader(r), + } +} + +// next reads the next rune from the bufferred reader. Returns the rune(0) if +// an error occurs (or io.EOF is returned). +func (l *Lexer) next() rune { + ch, _, err := l.r.ReadRune() + if err != nil { + return eof + } + return ch +} + +// unread places the previously read rune back on the reader. +func (l *Lexer) unread() { _ = l.r.UnreadRune() } + +// Scan scans the next token and returns the token and it's literal string. +func (l *Lexer) Scan() (tok Token, lit string) { + ch := l.next() + + if isWhitespace(ch) { + ch = l.next() + } + + return 0, "" +} + +func (l *Lexer) skipWhitespace() { + l.next() +} + +// Pos returns the position of the character immediately after the character or +// token returned by the last call to Next or Scan. +func (l *Lexer) Pos() Position { + return Position{} +} + +// isSpace reports whether r is a space character. +func isSpace(r rune) bool { + return r == ' ' || r == '\t' +} + +// isEndOfLine reports whether r is an end-of-line character. +func isEndOfLine(r rune) bool { + return r == '\r' || r == '\n' +} + +// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. +func isAlphaNumeric(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) +} + +func isLetter(ch rune) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) +} + +func isDigit(ch rune) bool { + return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) +} + +// isWhitespace returns true if the rune is a space, tab, newline or carriage return +func isWhitespace(ch rune) bool { + return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' +} diff --git a/parser/token.go b/parser/token.go index 3b6d615..8dda237 100644 --- a/parser/token.go +++ b/parser/token.go @@ -10,7 +10,6 @@ const ( ILLEGAL Token = iota EOF COMMENT - NEWLINE literal_beg IDENT // literals @@ -43,7 +42,6 @@ var tokens = [...]string{ EOF: "EOF", COMMENT: "COMMENT", - NEWLINE: "NEWLINE", IDENT: "IDENT", NUMBER: "NUMBER", From 4711a01f764fb7c54d351ed8a639e8e11e4a991c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 19:45:57 +0300 Subject: [PATCH 008/137] lexer: various changes, trying text/scanner --- parser/lexer.go | 62 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index 6048dfa..267c083 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -2,7 +2,9 @@ package parser import ( "bufio" + "bytes" "io" + "text/scanner" "unicode" ) @@ -11,7 +13,9 @@ const eof = rune(0) // Lexer defines a lexical scanner type Lexer struct { - r *bufio.Reader + src *bufio.Reader // input + ch rune // current character + sc *scanner.Scanner // Start position of most recently scanned token; set by Scan. // Calling Init or Next invalidates the position (Line == 0). @@ -23,38 +27,73 @@ type Lexer struct { } // NewLexer returns a new instance of Lexer. -func NewLexer(r io.Reader) *Lexer { +func NewLexer(src io.Reader) *Lexer { + sc := &scanner.Scanner{} + sc.Init(src) + sc.Mode = 0 + sc.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' + return &Lexer{ - r: bufio.NewReader(r), + src: bufio.NewReader(src), + sc: sc, } } // next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). func (l *Lexer) next() rune { - ch, _, err := l.r.ReadRune() + var err error + l.ch, _, err = l.src.ReadRune() if err != nil { return eof } - return ch + return l.ch } // unread places the previously read rune back on the reader. -func (l *Lexer) unread() { _ = l.r.UnreadRune() } +func (l *Lexer) unread() { + _ = l.src.UnreadRune() +} + +func (l *Lexer) peek() rune { + prev := l.ch + peekCh := l.next() + l.unread() + l.ch = prev + return peekCh +} // Scan scans the next token and returns the token and it's literal string. func (l *Lexer) Scan() (tok Token, lit string) { ch := l.next() - if isWhitespace(ch) { + // skip white space + for isWhitespace(ch) { ch = l.next() } + // identifier + if isLetter(ch) { + return l.scanIdentifier() + } + + switch ch { + case eof: + return EOF, "" + } + return 0, "" } -func (l *Lexer) skipWhitespace() { - l.next() +func (l *Lexer) scanIdentifier() (Token, string) { + // Create a buffer and read the current character into it. + var buf bytes.Buffer + + // write current character before we move to the next + buf.WriteRune(l.ch) + + return 0, "" + } // Pos returns the position of the character immediately after the character or @@ -73,11 +112,6 @@ func isEndOfLine(r rune) bool { return r == '\r' || r == '\n' } -// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. -func isAlphaNumeric(r rune) bool { - return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) -} - func isLetter(ch rune) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) } From 70845ccfec674bf7fe2b8d0eb535e06aec9f941e Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 20:32:27 +0300 Subject: [PATCH 009/137] lexer: back to the roots --- parser/lexer.go | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index 267c083..95625b9 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "io" - "text/scanner" "unicode" ) @@ -15,31 +14,16 @@ const eof = rune(0) type Lexer struct { src *bufio.Reader // input ch rune // current character - sc *scanner.Scanner - - // Start position of most recently scanned token; set by Scan. - // Calling Init or Next invalidates the position (Line == 0). - // The Filename field is always left untouched by the Scanner. - // If an error is reported (via Error) and Position is invalid, - // the scanner is not inside a token. Call Pos to obtain an error - // position in that case. - Position } // NewLexer returns a new instance of Lexer. func NewLexer(src io.Reader) *Lexer { - sc := &scanner.Scanner{} - sc.Init(src) - sc.Mode = 0 - sc.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' - return &Lexer{ src: bufio.NewReader(src), - sc: sc, } } -// next reads the next rune from the bufferred reader. Returns the rune(0) if +// next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). func (l *Lexer) next() rune { var err error @@ -47,21 +31,12 @@ func (l *Lexer) next() rune { if err != nil { return eof } + return l.ch } // unread places the previously read rune back on the reader. -func (l *Lexer) unread() { - _ = l.src.UnreadRune() -} - -func (l *Lexer) peek() rune { - prev := l.ch - peekCh := l.next() - l.unread() - l.ch = prev - return peekCh -} +func (l *Lexer) unread() { _ = l.src.UnreadRune() } // Scan scans the next token and returns the token and it's literal string. func (l *Lexer) Scan() (tok Token, lit string) { @@ -93,7 +68,6 @@ func (l *Lexer) scanIdentifier() (Token, string) { buf.WriteRune(l.ch) return 0, "" - } // Pos returns the position of the character immediately after the character or From ca906622c2fc0365c1182f935bd503c697116b57 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 20:33:51 +0300 Subject: [PATCH 010/137] lexer: add peek() method --- parser/lexer.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/parser/lexer.go b/parser/lexer.go index 95625b9..98e7165 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -38,6 +38,14 @@ func (l *Lexer) next() rune { // unread places the previously read rune back on the reader. func (l *Lexer) unread() { _ = l.src.UnreadRune() } +func (l *Lexer) peek() rune { + prev := l.ch + peekCh := l.next() + l.unread() + l.ch = prev + return peekCh +} + // Scan scans the next token and returns the token and it's literal string. func (l *Lexer) Scan() (tok Token, lit string) { ch := l.next() From 1b6ead19c22066ce650a25c4d2581e530b3ccb83 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 21:06:30 +0300 Subject: [PATCH 011/137] parser: add scanning ident test --- parser/lexer.go | 8 ++++--- parser/lexer_test.go | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 parser/lexer_test.go diff --git a/parser/lexer.go b/parser/lexer.go index 98e7165..be542f9 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -72,10 +72,12 @@ func (l *Lexer) scanIdentifier() (Token, string) { // Create a buffer and read the current character into it. var buf bytes.Buffer - // write current character before we move to the next - buf.WriteRune(l.ch) + for isLetter(l.ch) || isDigit(l.ch) { + buf.WriteRune(l.ch) + l.next() + } - return 0, "" + return IDENT, buf.String() } // Pos returns the position of the character immediately after the character or diff --git a/parser/lexer_test.go b/parser/lexer_test.go new file mode 100644 index 0000000..687dffa --- /dev/null +++ b/parser/lexer_test.go @@ -0,0 +1,53 @@ +package parser + +import ( + "bytes" + "fmt" + "testing" +) + +type token struct { + tok Token + text string +} + +func TestIdent(t *testing.T) { + var identList = []token{ + {IDENT, "a"}, + {IDENT, "a0"}, + {IDENT, "foobar"}, + {IDENT, "abc123"}, + {IDENT, "LGTM"}, + {IDENT, "_"}, + {IDENT, "_abc123"}, + {IDENT, "abc123_"}, + {IDENT, "_abc_123_"}, + {IDENT, "_äöü"}, + {IDENT, "_本"}, + {IDENT, "äöü"}, + {IDENT, "本"}, + {IDENT, "a۰۱۸"}, + {IDENT, "foo६४"}, + {IDENT, "bar9876"}, + } + + // create artifical source code + buf := new(bytes.Buffer) + for _, ident := range identList { + fmt.Fprintf(buf, " \t%s\n", ident.text) + } + + l := NewLexer(buf) + + for _, ident := range identList { + tok, lit := l.Scan() + if tok != ident.tok { + t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) + } + + if lit != ident.text { + t.Errorf("text = %s want %s", lit, ident.text) + } + + } +} From 4b72853d41cbcb1cd103c90daae6796403f4c612 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 21:25:21 +0300 Subject: [PATCH 012/137] lexer: scanner is more Go idiomatic --- parser/lexer.go | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index be542f9..15b430d 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -11,53 +11,53 @@ import ( const eof = rune(0) // Lexer defines a lexical scanner -type Lexer struct { +type Scanner struct { src *bufio.Reader // input ch rune // current character } // NewLexer returns a new instance of Lexer. -func NewLexer(src io.Reader) *Lexer { - return &Lexer{ +func NewLexer(src io.Reader) *Scanner { + return &Scanner{ src: bufio.NewReader(src), } } // next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). -func (l *Lexer) next() rune { +func (s *Scanner) next() rune { var err error - l.ch, _, err = l.src.ReadRune() + s.ch, _, err = s.src.ReadRune() if err != nil { return eof } - return l.ch + return s.ch } // unread places the previously read rune back on the reader. -func (l *Lexer) unread() { _ = l.src.UnreadRune() } +func (s *Scanner) unread() { _ = s.src.UnreadRune() } -func (l *Lexer) peek() rune { - prev := l.ch - peekCh := l.next() - l.unread() - l.ch = prev +func (s *Scanner) peek() rune { + prev := s.ch + peekCh := s.next() + s.unread() + s.ch = prev return peekCh } // Scan scans the next token and returns the token and it's literal string. -func (l *Lexer) Scan() (tok Token, lit string) { - ch := l.next() +func (s *Scanner) Scan() (tok Token, lit string) { + ch := s.next() // skip white space for isWhitespace(ch) { - ch = l.next() + ch = s.next() } // identifier if isLetter(ch) { - return l.scanIdentifier() + return s.scanIdentifier() } switch ch { @@ -68,13 +68,13 @@ func (l *Lexer) Scan() (tok Token, lit string) { return 0, "" } -func (l *Lexer) scanIdentifier() (Token, string) { +func (s *Scanner) scanIdentifier() (Token, string) { // Create a buffer and read the current character into it. var buf bytes.Buffer - for isLetter(l.ch) || isDigit(l.ch) { - buf.WriteRune(l.ch) - l.next() + for isLetter(s.ch) || isDigit(s.ch) { + buf.WriteRune(s.ch) + s.next() } return IDENT, buf.String() @@ -82,7 +82,7 @@ func (l *Lexer) scanIdentifier() (Token, string) { // Pos returns the position of the character immediately after the character or // token returned by the last call to Next or Scan. -func (l *Lexer) Pos() Position { +func (s *Scanner) Pos() Position { return Position{} } From cc7a91eeab11371687e94a04cd940306c5499d08 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 3 Oct 2015 23:50:50 +0300 Subject: [PATCH 013/137] lexer: implement positions --- parser/lexer.go | 92 ++++++++++++++++++++++++++++++-------------- parser/lexer_test.go | 5 ++- 2 files changed, 68 insertions(+), 29 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index 15b430d..e14fb39 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -1,9 +1,9 @@ package parser import ( - "bufio" "bytes" "io" + "io/ioutil" "unicode" ) @@ -12,40 +12,56 @@ const eof = rune(0) // Lexer defines a lexical scanner type Scanner struct { - src *bufio.Reader // input - ch rune // current character + src *bytes.Buffer + srcBytes []byte + + ch rune // current character + lastCharLen int // length of last character in bytes + pos Position + + // Token text buffer + tokBuf bytes.Buffer + tokPos int // token text tail position (srcBuf index); valid if >= 0 + tokEnd int // token text tail end (srcBuf index) } -// NewLexer returns a new instance of Lexer. -func NewLexer(src io.Reader) *Scanner { - return &Scanner{ - src: bufio.NewReader(src), +// NewLexer returns a new instance of Lexer. Even though src is an io.Reader, +// we fully consume the content. +func NewLexer(src io.Reader) (*Scanner, error) { + buf, err := ioutil.ReadAll(src) + if err != nil { + return nil, err } + + b := bytes.NewBuffer(buf) + return &Scanner{ + src: b, + srcBytes: b.Bytes(), + }, nil } // next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). func (s *Scanner) next() rune { var err error - s.ch, _, err = s.src.ReadRune() + var size int + s.ch, size, err = s.src.ReadRune() if err != nil { return eof } + s.lastCharLen = size + s.pos.Offset += size + s.pos.Column += size + + if s.ch == '\n' { + s.pos.Line++ + s.pos.Column = 0 + } + return s.ch } -// unread places the previously read rune back on the reader. -func (s *Scanner) unread() { _ = s.src.UnreadRune() } - -func (s *Scanner) peek() rune { - prev := s.ch - peekCh := s.next() - s.unread() - s.ch = prev - return peekCh -} - // Scan scans the next token and returns the token and it's literal string. func (s *Scanner) Scan() (tok Token, lit string) { ch := s.next() @@ -55,29 +71,49 @@ func (s *Scanner) Scan() (tok Token, lit string) { ch = s.next() } + // start the token position + s.tokBuf.Reset() + s.tokPos = s.pos.Offset - s.lastCharLen + // identifier if isLetter(ch) { - return s.scanIdentifier() + s.scanIdentifier() + tok = IDENT + } + + if isDigit(ch) { + // scan for number } switch ch { case eof: - return EOF, "" + tok = EOF } - return 0, "" + s.tokEnd = s.pos.Offset - s.lastCharLen + + return tok, s.TokenLiteral() } -func (s *Scanner) scanIdentifier() (Token, string) { - // Create a buffer and read the current character into it. - var buf bytes.Buffer - +func (s *Scanner) scanIdentifier() { for isLetter(s.ch) || isDigit(s.ch) { - buf.WriteRune(s.ch) s.next() } +} - return IDENT, buf.String() +// TokenLiteral returns the literal string corresponding to the most recently +// scanned token. +func (s *Scanner) TokenLiteral() string { + if s.tokPos < 0 { + // no token text + return "" + } + + // part of the token text was saved in tokBuf: save the rest in + // tokBuf as well and return its content + s.tokBuf.Write(s.srcBytes[s.tokPos:s.tokEnd]) + s.tokPos = s.tokEnd // ensure idempotency of TokenText() call + return s.tokBuf.String() } // Pos returns the position of the character immediately after the character or diff --git a/parser/lexer_test.go b/parser/lexer_test.go index 687dffa..e48c117 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -37,7 +37,10 @@ func TestIdent(t *testing.T) { fmt.Fprintf(buf, " \t%s\n", ident.text) } - l := NewLexer(buf) + l, err := NewLexer(buf) + if err != nil { + t.Fatal(err) + } for _, ident := range identList { tok, lit := l.Scan() From 97fb05dd4abd4af01b5d0a41d65d4db8130d6eda Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 00:20:26 +0300 Subject: [PATCH 014/137] lexer: scan strings --- parser/lexer.go | 27 +++++++++++++++++++++++- parser/lexer_test.go | 50 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/parser/lexer.go b/parser/lexer.go index e14fb39..2677647 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -4,6 +4,7 @@ import ( "bytes" "io" "io/ioutil" + "log" "unicode" ) @@ -77,8 +78,8 @@ func (s *Scanner) Scan() (tok Token, lit string) { // identifier if isLetter(ch) { - s.scanIdentifier() tok = IDENT + s.scanIdentifier() } if isDigit(ch) { @@ -88,6 +89,10 @@ func (s *Scanner) Scan() (tok Token, lit string) { switch ch { case eof: tok = EOF + case '"': + tok = STRING + s.scanString() + s.next() // move forward so we finalize the string } s.tokEnd = s.pos.Offset - s.lastCharLen @@ -95,6 +100,26 @@ func (s *Scanner) Scan() (tok Token, lit string) { return tok, s.TokenLiteral() } +func (s *Scanner) scanString() { + // '"' opening already consumed + ch := s.next() // read character after quote + for ch != '"' { + if ch == '\n' || ch < 0 { + log.Println("[ERROR] literal not terminated") + return + } + + if ch == '\\' { + // scanEscape + return + } else { + ch = s.next() + } + } + + return +} + func (s *Scanner) scanIdentifier() { for isLetter(s.ch) || isDigit(s.ch) { s.next() diff --git a/parser/lexer_test.go b/parser/lexer_test.go index e48c117..e69dbb6 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -6,6 +6,8 @@ import ( "testing" ) +var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + type token struct { tok Token text string @@ -54,3 +56,51 @@ func TestIdent(t *testing.T) { } } + +func TestString(t *testing.T) { + var identList = []token{ + {STRING, `" "`}, + {STRING, `"a"`}, + {STRING, `"本"`}, + // {STRING, `"\a"`}, + // {STRING, `"\b"`}, + // {STRING, `"\f"`}, + // {STRING, `"\n"`}, + // {STRING, `"\r"`}, + // {STRING, `"\t"`}, + // {STRING, `"\v"`}, + // {STRING, `"\""`}, + // {STRING, `"\000"`}, + // {STRING, `"\777"`}, + // {STRING, `"\x00"`}, + // {STRING, `"\xff"`}, + // {STRING, `"\u0000"`}, + // {STRING, `"\ufA16"`}, + // {STRING, `"\U00000000"`}, + // {STRING, `"\U0000ffAB"`}, + // {STRING, `"` + f100 + `"`}, + } + + // create artifical source code + buf := new(bytes.Buffer) + for _, ident := range identList { + fmt.Fprintf(buf, " \t%s\n", ident.text) + } + + l, err := NewLexer(buf) + if err != nil { + t.Fatal(err) + } + + for _, ident := range identList { + tok, lit := l.Scan() + if tok != ident.tok { + t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) + } + + if lit != ident.text { + t.Errorf("text = %s want %s", lit, ident.text) + } + + } +} From 32ad59fcd776536083ab129e1f4d39fa03c2c342 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 01:29:13 +0300 Subject: [PATCH 015/137] lexer: more robust implementation --- parser/lexer.go | 79 +++++++++++++++++++++++++++++--------------- parser/lexer_test.go | 42 +++++++++++++++++++---- 2 files changed, 89 insertions(+), 32 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index 2677647..b0cc69a 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -16,14 +16,15 @@ type Scanner struct { src *bytes.Buffer srcBytes []byte - ch rune // current character - lastCharLen int // length of last character in bytes - pos Position + // ch rune // current character + lastCharLen int // length of last character in bytes - // Token text buffer - tokBuf bytes.Buffer - tokPos int // token text tail position (srcBuf index); valid if >= 0 - tokEnd int // token text tail end (srcBuf index) + currPos Position // current position + prevPos Position // previous position + + tokBuf bytes.Buffer // token text buffer + tokPos int // token text tail position (srcBuf index); valid if >= 0 + tokEnd int // token text tail end (srcBuf index) } // NewLexer returns a new instance of Lexer. Even though src is an io.Reader, @@ -44,23 +45,41 @@ func NewLexer(src io.Reader) (*Scanner, error) { // next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). func (s *Scanner) next() rune { - var err error - var size int - s.ch, size, err = s.src.ReadRune() + ch, size, err := s.src.ReadRune() if err != nil { return eof } - s.lastCharLen = size - s.pos.Offset += size - s.pos.Column += size + // remember last position + s.prevPos = s.currPos - if s.ch == '\n' { - s.pos.Line++ - s.pos.Column = 0 + s.lastCharLen = size + s.currPos.Offset += size + s.currPos.Column += size + + if ch == '\n' { + s.currPos.Line++ + s.currPos.Column = 0 } - return s.ch + return ch +} + +func (s *Scanner) unread() { + if err := s.src.UnreadRune(); err != nil { + panic(err) // this is user fault, we should catch it + } + s.currPos = s.prevPos // put back last position +} + +func (s *Scanner) peek() rune { + peek, _, err := s.src.ReadRune() + if err != nil { + return eof + } + + s.src.UnreadRune() + return peek } // Scan scans the next token and returns the token and it's literal string. @@ -74,16 +93,19 @@ func (s *Scanner) Scan() (tok Token, lit string) { // start the token position s.tokBuf.Reset() - s.tokPos = s.pos.Offset - s.lastCharLen + s.tokPos = s.currPos.Offset - s.lastCharLen - // identifier if isLetter(ch) { tok = IDENT - s.scanIdentifier() + lit = s.scanIdentifier() + if lit == "true" || lit == "false" { + tok = BOOL + } } if isDigit(ch) { - // scan for number + // scanDigits() + // TODO(arslan) } switch ch { @@ -92,10 +114,9 @@ func (s *Scanner) Scan() (tok Token, lit string) { case '"': tok = STRING s.scanString() - s.next() // move forward so we finalize the string } - s.tokEnd = s.pos.Offset - s.lastCharLen + s.tokEnd = s.currPos.Offset return tok, s.TokenLiteral() } @@ -120,10 +141,16 @@ func (s *Scanner) scanString() { return } -func (s *Scanner) scanIdentifier() { - for isLetter(s.ch) || isDigit(s.ch) { - s.next() +func (s *Scanner) scanIdentifier() string { + offs := s.currPos.Offset - s.lastCharLen + ch := s.next() + for isLetter(ch) || isDigit(ch) { + ch = s.next() } + s.unread() // we got identifier, put back latest char + + // return string(s.srcBytes[offs:(s.currPos.Offset - s.lastCharLen)]) + return string(s.srcBytes[offs:s.currPos.Offset]) } // TokenLiteral returns the literal string corresponding to the most recently diff --git a/parser/lexer_test.go b/parser/lexer_test.go index e69dbb6..44f0fb6 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -13,8 +13,38 @@ type token struct { text string } +func TestBool(t *testing.T) { + var tokenList = []token{ + {BOOL, "true"}, + {BOOL, "false"}, + } + + // create artifical source code + buf := new(bytes.Buffer) + for _, ident := range tokenList { + fmt.Fprintf(buf, " \t%s\n", ident.text) + } + + l, err := NewLexer(buf) + if err != nil { + t.Fatal(err) + } + + for _, ident := range tokenList { + tok, lit := l.Scan() + if tok != ident.tok { + t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) + } + + if lit != ident.text { + t.Errorf("text = %s want %s", lit, ident.text) + } + + } +} + func TestIdent(t *testing.T) { - var identList = []token{ + var tokenList = []token{ {IDENT, "a"}, {IDENT, "a0"}, {IDENT, "foobar"}, @@ -35,7 +65,7 @@ func TestIdent(t *testing.T) { // create artifical source code buf := new(bytes.Buffer) - for _, ident := range identList { + for _, ident := range tokenList { fmt.Fprintf(buf, " \t%s\n", ident.text) } @@ -44,7 +74,7 @@ func TestIdent(t *testing.T) { t.Fatal(err) } - for _, ident := range identList { + for _, ident := range tokenList { tok, lit := l.Scan() if tok != ident.tok { t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) @@ -58,7 +88,7 @@ func TestIdent(t *testing.T) { } func TestString(t *testing.T) { - var identList = []token{ + var tokenList = []token{ {STRING, `" "`}, {STRING, `"a"`}, {STRING, `"本"`}, @@ -83,7 +113,7 @@ func TestString(t *testing.T) { // create artifical source code buf := new(bytes.Buffer) - for _, ident := range identList { + for _, ident := range tokenList { fmt.Fprintf(buf, " \t%s\n", ident.text) } @@ -92,7 +122,7 @@ func TestString(t *testing.T) { t.Fatal(err) } - for _, ident := range identList { + for _, ident := range tokenList { tok, lit := l.Scan() if tok != ident.tok { t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) From c859b8a1a4a3748067f23295b75f42ca368356ef Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 01:32:45 +0300 Subject: [PATCH 016/137] lexer: fix Position() call --- parser/lexer.go | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index b0cc69a..2e12099 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -117,7 +117,6 @@ func (s *Scanner) Scan() (tok Token, lit string) { } s.tokEnd = s.currPos.Offset - return tok, s.TokenLiteral() } @@ -169,19 +168,9 @@ func (s *Scanner) TokenLiteral() string { } // Pos returns the position of the character immediately after the character or -// token returned by the last call to Next or Scan. +// token returned by the last call to Scan. func (s *Scanner) Pos() Position { - return Position{} -} - -// isSpace reports whether r is a space character. -func isSpace(r rune) bool { - return r == ' ' || r == '\t' -} - -// isEndOfLine reports whether r is an end-of-line character. -func isEndOfLine(r rune) bool { - return r == '\r' || r == '\n' + return s.currPos } func isLetter(ch rune) bool { From 3bf9d71be560f232f3da5da484d30e51857c0a49 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 01:35:29 +0300 Subject: [PATCH 017/137] parser: more idiomatic call --- parser/lexer.go | 12 ++++++------ parser/lexer_test.go | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index 2e12099..720b37f 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -82,8 +82,8 @@ func (s *Scanner) peek() rune { return peek } -// Scan scans the next token and returns the token and it's literal string. -func (s *Scanner) Scan() (tok Token, lit string) { +// Scan scans the next token and returns the token. +func (s *Scanner) Scan() (tok Token) { ch := s.next() // skip white space @@ -97,7 +97,7 @@ func (s *Scanner) Scan() (tok Token, lit string) { if isLetter(ch) { tok = IDENT - lit = s.scanIdentifier() + lit := s.scanIdentifier() if lit == "true" || lit == "false" { tok = BOOL } @@ -117,7 +117,7 @@ func (s *Scanner) Scan() (tok Token, lit string) { } s.tokEnd = s.currPos.Offset - return tok, s.TokenLiteral() + return tok } func (s *Scanner) scanString() { @@ -152,9 +152,9 @@ func (s *Scanner) scanIdentifier() string { return string(s.srcBytes[offs:s.currPos.Offset]) } -// TokenLiteral returns the literal string corresponding to the most recently +// TokenText returns the literal string corresponding to the most recently // scanned token. -func (s *Scanner) TokenLiteral() string { +func (s *Scanner) TokenText() string { if s.tokPos < 0 { // no token text return "" diff --git a/parser/lexer_test.go b/parser/lexer_test.go index 44f0fb6..1124bca 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -31,12 +31,12 @@ func TestBool(t *testing.T) { } for _, ident := range tokenList { - tok, lit := l.Scan() + tok := l.Scan() if tok != ident.tok { t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) } - if lit != ident.text { + if l.TokenText() != ident.text { t.Errorf("text = %s want %s", lit, ident.text) } @@ -75,12 +75,12 @@ func TestIdent(t *testing.T) { } for _, ident := range tokenList { - tok, lit := l.Scan() + tok := l.Scan() if tok != ident.tok { t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) } - if lit != ident.text { + if l.TokenText() != ident.text { t.Errorf("text = %s want %s", lit, ident.text) } @@ -123,12 +123,12 @@ func TestString(t *testing.T) { } for _, ident := range tokenList { - tok, lit := l.Scan() + tok := l.Scan() if tok != ident.tok { t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) } - if lit != ident.text { + if l.TokenText() != ident.text { t.Errorf("text = %s want %s", lit, ident.text) } From f2aa1fbc4ae8e20ba869066106e96c69c8b60012 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 20:13:38 +0300 Subject: [PATCH 018/137] lexer: fix tests --- parser/lexer_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parser/lexer_test.go b/parser/lexer_test.go index 1124bca..666e48c 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -37,7 +37,7 @@ func TestBool(t *testing.T) { } if l.TokenText() != ident.text { - t.Errorf("text = %s want %s", lit, ident.text) + t.Errorf("text = %s want %s", l.TokenText(), ident.text) } } @@ -81,7 +81,7 @@ func TestIdent(t *testing.T) { } if l.TokenText() != ident.text { - t.Errorf("text = %s want %s", lit, ident.text) + t.Errorf("text = %s want %s", l.TokenText(), ident.text) } } @@ -129,7 +129,7 @@ func TestString(t *testing.T) { } if l.TokenText() != ident.text { - t.Errorf("text = %s want %s", lit, ident.text) + t.Errorf("text = %s want %s", l.TokenText(), ident.text) } } From df82bd3e9cd487b607780da7feb599a3f86d5e71 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 20:16:43 +0300 Subject: [PATCH 019/137] hcl: split up package for more control --- {parser => scanner}/position.go | 2 +- parser/lexer.go => scanner/scanner.go | 2 +- parser/lexer_test.go => scanner/scanner_text.go | 6 +++--- {parser => token}/token.go | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) rename {parser => scanner}/position.go (98%) rename parser/lexer.go => scanner/scanner.go (99%) rename parser/lexer_test.go => scanner/scanner_text.go (97%) rename {parser => token}/token.go (99%) diff --git a/parser/position.go b/scanner/position.go similarity index 98% rename from parser/position.go rename to scanner/position.go index ca09d54..8ba9195 100644 --- a/parser/position.go +++ b/scanner/position.go @@ -1,4 +1,4 @@ -package parser +package scanner import "fmt" diff --git a/parser/lexer.go b/scanner/scanner.go similarity index 99% rename from parser/lexer.go rename to scanner/scanner.go index 720b37f..c3dd565 100644 --- a/parser/lexer.go +++ b/scanner/scanner.go @@ -1,4 +1,4 @@ -package parser +package scanner import ( "bytes" diff --git a/parser/lexer_test.go b/scanner/scanner_text.go similarity index 97% rename from parser/lexer_test.go rename to scanner/scanner_text.go index 666e48c..2c5f80c 100644 --- a/parser/lexer_test.go +++ b/scanner/scanner_text.go @@ -1,4 +1,4 @@ -package parser +package scanner import ( "bytes" @@ -15,8 +15,8 @@ type token struct { func TestBool(t *testing.T) { var tokenList = []token{ - {BOOL, "true"}, - {BOOL, "false"}, + {token.BOOL, "true"}, + {token.BOOL, "false"}, } // create artifical source code diff --git a/parser/token.go b/token/token.go similarity index 99% rename from parser/token.go rename to token/token.go index 8dda237..cbd822d 100644 --- a/parser/token.go +++ b/token/token.go @@ -1,4 +1,4 @@ -package parser +package token import "strconv" From 52befe20932bf3453549dddc569d5445542daf7f Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 20:19:39 +0300 Subject: [PATCH 020/137] scanner: use new hcl/token package --- scanner/scanner.go | 12 +++-- scanner/{scanner_text.go => scanner_test.go} | 50 ++++++++++---------- 2 files changed, 33 insertions(+), 29 deletions(-) rename scanner/{scanner_text.go => scanner_test.go} (76%) diff --git a/scanner/scanner.go b/scanner/scanner.go index c3dd565..b223a86 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -6,6 +6,8 @@ import ( "io/ioutil" "log" "unicode" + + "github.com/fatih/hcl/token" ) // eof represents a marker rune for the end of the reader. @@ -83,7 +85,7 @@ func (s *Scanner) peek() rune { } // Scan scans the next token and returns the token. -func (s *Scanner) Scan() (tok Token) { +func (s *Scanner) Scan() (tok token.Token) { ch := s.next() // skip white space @@ -96,10 +98,10 @@ func (s *Scanner) Scan() (tok Token) { s.tokPos = s.currPos.Offset - s.lastCharLen if isLetter(ch) { - tok = IDENT + tok = token.IDENT lit := s.scanIdentifier() if lit == "true" || lit == "false" { - tok = BOOL + tok = token.BOOL } } @@ -110,9 +112,9 @@ func (s *Scanner) Scan() (tok Token) { switch ch { case eof: - tok = EOF + tok = token.EOF case '"': - tok = STRING + tok = token.STRING s.scanString() } diff --git a/scanner/scanner_text.go b/scanner/scanner_test.go similarity index 76% rename from scanner/scanner_text.go rename to scanner/scanner_test.go index 2c5f80c..3ea41fd 100644 --- a/scanner/scanner_text.go +++ b/scanner/scanner_test.go @@ -4,17 +4,19 @@ import ( "bytes" "fmt" "testing" + + "github.com/fatih/hcl/token" ) var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" -type token struct { - tok Token +type tokenPair struct { + tok token.Token text string } func TestBool(t *testing.T) { - var tokenList = []token{ + var tokenList = []tokenPair{ {token.BOOL, "true"}, {token.BOOL, "false"}, } @@ -44,23 +46,23 @@ func TestBool(t *testing.T) { } func TestIdent(t *testing.T) { - var tokenList = []token{ - {IDENT, "a"}, - {IDENT, "a0"}, - {IDENT, "foobar"}, - {IDENT, "abc123"}, - {IDENT, "LGTM"}, - {IDENT, "_"}, - {IDENT, "_abc123"}, - {IDENT, "abc123_"}, - {IDENT, "_abc_123_"}, - {IDENT, "_äöü"}, - {IDENT, "_本"}, - {IDENT, "äöü"}, - {IDENT, "本"}, - {IDENT, "a۰۱۸"}, - {IDENT, "foo६४"}, - {IDENT, "bar9876"}, + var tokenList = []tokenPair{ + {token.IDENT, "a"}, + {token.IDENT, "a0"}, + {token.IDENT, "foobar"}, + {token.IDENT, "abc123"}, + {token.IDENT, "LGTM"}, + {token.IDENT, "_"}, + {token.IDENT, "_abc123"}, + {token.IDENT, "abc123_"}, + {token.IDENT, "_abc_123_"}, + {token.IDENT, "_äöü"}, + {token.IDENT, "_本"}, + {token.IDENT, "äöü"}, + {token.IDENT, "本"}, + {token.IDENT, "a۰۱۸"}, + {token.IDENT, "foo६४"}, + {token.IDENT, "bar9876"}, } // create artifical source code @@ -88,10 +90,10 @@ func TestIdent(t *testing.T) { } func TestString(t *testing.T) { - var tokenList = []token{ - {STRING, `" "`}, - {STRING, `"a"`}, - {STRING, `"本"`}, + var tokenList = []tokenPair{ + {token.STRING, `" "`}, + {token.STRING, `"a"`}, + {token.STRING, `"本"`}, // {STRING, `"\a"`}, // {STRING, `"\b"`}, // {STRING, `"\f"`}, From 94bd4afe4d3884d9b5120adb606056431ae38bf4 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 20:22:37 +0300 Subject: [PATCH 021/137] scanner: small fixes --- scanner/scanner.go | 7 +++---- scanner/scanner_test.go | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index b223a86..a4ba64e 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -13,12 +13,11 @@ import ( // eof represents a marker rune for the end of the reader. const eof = rune(0) -// Lexer defines a lexical scanner +// Scanner defines a lexical scanner type Scanner struct { src *bytes.Buffer srcBytes []byte - // ch rune // current character lastCharLen int // length of last character in bytes currPos Position // current position @@ -29,9 +28,9 @@ type Scanner struct { tokEnd int // token text tail end (srcBuf index) } -// NewLexer returns a new instance of Lexer. Even though src is an io.Reader, +// NewScanner returns a new instance of Lexer. Even though src is an io.Reader, // we fully consume the content. -func NewLexer(src io.Reader) (*Scanner, error) { +func NewScanner(src io.Reader) (*Scanner, error) { buf, err := ioutil.ReadAll(src) if err != nil { return nil, err diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 3ea41fd..b6dc463 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -27,7 +27,7 @@ func TestBool(t *testing.T) { fmt.Fprintf(buf, " \t%s\n", ident.text) } - l, err := NewLexer(buf) + l, err := NewScanner(buf) if err != nil { t.Fatal(err) } @@ -71,7 +71,7 @@ func TestIdent(t *testing.T) { fmt.Fprintf(buf, " \t%s\n", ident.text) } - l, err := NewLexer(buf) + l, err := NewScanner(buf) if err != nil { t.Fatal(err) } @@ -119,7 +119,7 @@ func TestString(t *testing.T) { fmt.Fprintf(buf, " \t%s\n", ident.text) } - l, err := NewLexer(buf) + l, err := NewScanner(buf) if err != nil { t.Fatal(err) } From 1f011b4e82547d14b00dc1a690580e38c33b9ccd Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 22:01:10 +0300 Subject: [PATCH 022/137] scanner: implement string scanning --- scanner/scanner.go | 95 ++++++++++++++++++++++++++++++++++++----- scanner/scanner_test.go | 34 +++++++-------- 2 files changed, 102 insertions(+), 27 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index a4ba64e..d2b98c4 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -2,9 +2,10 @@ package scanner import ( "bytes" + "fmt" "io" "io/ioutil" - "log" + "os" "unicode" "github.com/fatih/hcl/token" @@ -26,6 +27,13 @@ type Scanner struct { tokBuf bytes.Buffer // token text buffer tokPos int // token text tail position (srcBuf index); valid if >= 0 tokEnd int // token text tail end (srcBuf index) + + // Error is called for each error encountered. If no Error + // function is set, the error is reported to os.Stderr. + Error func(pos Position, msg string) + + // ErrorCount is incremented by one for each error encountered. + ErrorCount int } // NewScanner returns a new instance of Lexer. Even though src is an io.Reader, @@ -122,25 +130,70 @@ func (s *Scanner) Scan() (tok token.Token) { } func (s *Scanner) scanString() { - // '"' opening already consumed - ch := s.next() // read character after quote - for ch != '"' { - if ch == '\n' || ch < 0 { - log.Println("[ERROR] literal not terminated") + for { + // '"' opening already consumed + // read character after quote + ch := s.next() + + if ch == '\n' || ch < 0 || ch == eof { + s.err("literal not terminated") return } + if ch == '"' { + break + } + if ch == '\\' { - // scanEscape - return - } else { - ch = s.next() + s.scanEscape() } } return } +// scanEscape scans an escape sequence +func (s *Scanner) scanEscape() rune { + // http://en.cppreference.com/w/cpp/language/escape + ch := s.next() // read character after '/' + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': + // nothing to do + case '0', '1', '2', '3', '4', '5', '6', '7': + // octal notation + ch = s.scanDigits(ch, 8, 3) + case 'x': + // hexademical notation + ch = s.scanDigits(s.next(), 16, 2) + case 'u': + // universal character name + ch = s.scanDigits(s.next(), 16, 4) + case 'U': + // universal character name + ch = s.scanDigits(s.next(), 16, 8) + default: + s.err("illegal char escape") + } + return ch +} + +// scanDigits scans a rune with the given base for n times. For example an +// octan notation \184 would yield in scanDigits(ch, 8, 3) +func (s *Scanner) scanDigits(ch rune, base, n int) rune { + for n > 0 && digitVal(ch) < base { + ch = s.next() + n-- + } + if n > 0 { + s.err("illegal char escape") + } + + // we scanned all digits, put the last non digit char back + s.unread() + return ch +} + +// scanIdentifier scans an identifier and returns the literal string func (s *Scanner) scanIdentifier() string { offs := s.currPos.Offset - s.lastCharLen ch := s.next() @@ -174,6 +227,16 @@ func (s *Scanner) Pos() Position { return s.currPos } +func (s *Scanner) err(msg string) { + s.ErrorCount++ + if s.Error != nil { + s.Error(s.currPos, msg) + return + } + + fmt.Fprintf(os.Stderr, "%s: %s\n", s.currPos, msg) +} + func isLetter(ch rune) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) } @@ -186,3 +249,15 @@ func isDigit(ch rune) bool { func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } + +func digitVal(ch rune) int { + switch { + case '0' <= ch && ch <= '9': + return int(ch - '0') + case 'a' <= ch && ch <= 'f': + return int(ch - 'a' + 10) + case 'A' <= ch && ch <= 'F': + return int(ch - 'A' + 10) + } + return 16 // larger than any legal digit val +} diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index b6dc463..1ed6326 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -94,23 +94,23 @@ func TestString(t *testing.T) { {token.STRING, `" "`}, {token.STRING, `"a"`}, {token.STRING, `"本"`}, - // {STRING, `"\a"`}, - // {STRING, `"\b"`}, - // {STRING, `"\f"`}, - // {STRING, `"\n"`}, - // {STRING, `"\r"`}, - // {STRING, `"\t"`}, - // {STRING, `"\v"`}, - // {STRING, `"\""`}, - // {STRING, `"\000"`}, - // {STRING, `"\777"`}, - // {STRING, `"\x00"`}, - // {STRING, `"\xff"`}, - // {STRING, `"\u0000"`}, - // {STRING, `"\ufA16"`}, - // {STRING, `"\U00000000"`}, - // {STRING, `"\U0000ffAB"`}, - // {STRING, `"` + f100 + `"`}, + {token.STRING, `"\a"`}, + {token.STRING, `"\b"`}, + {token.STRING, `"\f"`}, + {token.STRING, `"\n"`}, + {token.STRING, `"\r"`}, + {token.STRING, `"\t"`}, + {token.STRING, `"\v"`}, + {token.STRING, `"\""`}, + {token.STRING, `"\000"`}, + {token.STRING, `"\777"`}, + {token.STRING, `"\x00"`}, + {token.STRING, `"\xff"`}, + {token.STRING, `"\u0000"`}, + {token.STRING, `"\ufA16"`}, + {token.STRING, `"\U00000000"`}, + {token.STRING, `"\U0000ffAB"`}, + {token.STRING, `"` + f100 + `"`}, } // create artifical source code From aa9105226b95df61309a89d78c7f2a52e8d3d1ff Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 22:17:59 +0300 Subject: [PATCH 023/137] scanner: reuse tests code --- scanner/scanner.go | 6 +++- scanner/scanner_test.go | 70 ++++++++++------------------------------- 2 files changed, 21 insertions(+), 55 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index d2b98c4..2fc3b27 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -113,7 +113,7 @@ func (s *Scanner) Scan() (tok token.Token) { } if isDigit(ch) { - // scanDigits() + s.scanNumber() // TODO(arslan) } @@ -129,6 +129,10 @@ func (s *Scanner) Scan() (tok token.Token) { return tok } +func (s *Scanner) scanNumber() { +} + +// scanString scans a quoted string func (s *Scanner) scanString() { for { // '"' opening already consumed diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 1ed6326..4f69088 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -15,36 +15,40 @@ type tokenPair struct { text string } -func TestBool(t *testing.T) { - var tokenList = []tokenPair{ - {token.BOOL, "true"}, - {token.BOOL, "false"}, - } - +func testTokenList(t *testing.T, tokenList []tokenPair) { // create artifical source code buf := new(bytes.Buffer) for _, ident := range tokenList { fmt.Fprintf(buf, " \t%s\n", ident.text) } - l, err := NewScanner(buf) + s, err := NewScanner(buf) if err != nil { t.Fatal(err) } for _, ident := range tokenList { - tok := l.Scan() + tok := s.Scan() if tok != ident.tok { t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) } - if l.TokenText() != ident.text { - t.Errorf("text = %s want %s", l.TokenText(), ident.text) + if s.TokenText() != ident.text { + t.Errorf("text = %s want %s", s.TokenText(), ident.text) } } } +func TestBool(t *testing.T) { + var tokenList = []tokenPair{ + {token.BOOL, "true"}, + {token.BOOL, "false"}, + } + + testTokenList(t, tokenList) +} + func TestIdent(t *testing.T) { var tokenList = []tokenPair{ {token.IDENT, "a"}, @@ -65,28 +69,7 @@ func TestIdent(t *testing.T) { {token.IDENT, "bar9876"}, } - // create artifical source code - buf := new(bytes.Buffer) - for _, ident := range tokenList { - fmt.Fprintf(buf, " \t%s\n", ident.text) - } - - l, err := NewScanner(buf) - if err != nil { - t.Fatal(err) - } - - for _, ident := range tokenList { - tok := l.Scan() - if tok != ident.tok { - t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) - } - - if l.TokenText() != ident.text { - t.Errorf("text = %s want %s", l.TokenText(), ident.text) - } - - } + testTokenList(t, tokenList) } func TestString(t *testing.T) { @@ -113,26 +96,5 @@ func TestString(t *testing.T) { {token.STRING, `"` + f100 + `"`}, } - // create artifical source code - buf := new(bytes.Buffer) - for _, ident := range tokenList { - fmt.Fprintf(buf, " \t%s\n", ident.text) - } - - l, err := NewScanner(buf) - if err != nil { - t.Fatal(err) - } - - for _, ident := range tokenList { - tok := l.Scan() - if tok != ident.tok { - t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) - } - - if l.TokenText() != ident.text { - t.Errorf("text = %s want %s", l.TokenText(), ident.text) - } - - } + testTokenList(t, tokenList) } From 9629512599de12b5f5250a932f3f203eca6664c0 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 22:53:20 +0300 Subject: [PATCH 024/137] scanner: organize Scan() so it's easier to read --- scanner/scanner.go | 22 ++++++++-------- scanner/scanner_test.go | 58 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 2fc3b27..394eeb0 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -104,25 +104,25 @@ func (s *Scanner) Scan() (tok token.Token) { s.tokBuf.Reset() s.tokPos = s.currPos.Offset - s.lastCharLen - if isLetter(ch) { + switch { + case isLetter(ch): tok = token.IDENT lit := s.scanIdentifier() if lit == "true" || lit == "false" { tok = token.BOOL } - } - if isDigit(ch) { + case isDigit(ch): s.scanNumber() // TODO(arslan) - } - - switch ch { - case eof: - tok = token.EOF - case '"': - tok = token.STRING - s.scanString() + default: + switch ch { + case eof: + tok = token.EOF + case '"': + tok = token.STRING + s.scanString() + } } s.tokEnd = s.currPos.Offset diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 4f69088..4d30050 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -98,3 +98,61 @@ func TestString(t *testing.T) { testTokenList(t, tokenList) } + +func TestNumber(t *testing.T) { + t.SkipNow() + var tokenList = []tokenPair{ + {token.NUMBER, "0"}, + {token.NUMBER, "1"}, + {token.NUMBER, "9"}, + {token.NUMBER, "42"}, + {token.NUMBER, "1234567890"}, + {token.NUMBER, "00"}, + {token.NUMBER, "01"}, + {token.NUMBER, "07"}, + {token.NUMBER, "042"}, + {token.NUMBER, "01234567"}, + {token.NUMBER, "0x0"}, + {token.NUMBER, "0x1"}, + {token.NUMBER, "0xf"}, + {token.NUMBER, "0x42"}, + {token.NUMBER, "0x123456789abcDEF"}, + {token.NUMBER, "0x" + f100}, + {token.NUMBER, "0X0"}, + {token.NUMBER, "0X1"}, + {token.NUMBER, "0XF"}, + {token.NUMBER, "0X42"}, + {token.NUMBER, "0X123456789abcDEF"}, + {token.NUMBER, "0X" + f100}, + {token.FLOAT, "0."}, + {token.FLOAT, "1."}, + {token.FLOAT, "42."}, + {token.FLOAT, "01234567890."}, + {token.FLOAT, ".0"}, + {token.FLOAT, ".1"}, + {token.FLOAT, ".42"}, + {token.FLOAT, ".0123456789"}, + {token.FLOAT, "0.0"}, + {token.FLOAT, "1.0"}, + {token.FLOAT, "42.0"}, + {token.FLOAT, "01234567890.0"}, + {token.FLOAT, "0e0"}, + {token.FLOAT, "1e0"}, + {token.FLOAT, "42e0"}, + {token.FLOAT, "01234567890e0"}, + {token.FLOAT, "0E0"}, + {token.FLOAT, "1E0"}, + {token.FLOAT, "42E0"}, + {token.FLOAT, "01234567890E0"}, + {token.FLOAT, "0e+10"}, + {token.FLOAT, "1e-10"}, + {token.FLOAT, "42e+10"}, + {token.FLOAT, "01234567890e-10"}, + {token.FLOAT, "0E+10"}, + {token.FLOAT, "1E-10"}, + {token.FLOAT, "42E+10"}, + {token.FLOAT, "01234567890E-10"}, + } + + testTokenList(t, tokenList) +} From 5d75c5a14370258e54afaf1513e33874c9d69a92 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 23:21:34 +0300 Subject: [PATCH 025/137] scanner: initial number lexing --- scanner/scanner.go | 27 ++++++++++++-- scanner/scanner_test.go | 81 ++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 45 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 394eeb0..fda5265 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -112,9 +112,8 @@ func (s *Scanner) Scan() (tok token.Token) { tok = token.BOOL } - case isDigit(ch): - s.scanNumber() - // TODO(arslan) + case isDecimal(ch): + tok = s.scanNumber(ch) default: switch ch { case eof: @@ -129,7 +128,23 @@ func (s *Scanner) Scan() (tok token.Token) { return tok } -func (s *Scanner) scanNumber() { +// scanNumber scans a HCL number definition starting with the given rune +func (s *Scanner) scanNumber(ch rune) token.Token { + if ch == '0' { + // check hexadecimal or float + // ch = s.next() + // return token.ILLEGAL + } + + s.scanMantissa(ch) + return token.NUMBER +} + +func (s *Scanner) scanMantissa(ch rune) { + for isDecimal(ch) { + ch = s.next() + } + s.unread() } // scanString scans a quoted string @@ -249,6 +264,10 @@ func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) } +func isDecimal(ch rune) bool { + return '0' <= ch && ch <= '9' +} + // isWhitespace returns true if the rune is a space, tab, newline or carriage return func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 4d30050..8ce08c0 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -100,7 +100,6 @@ func TestString(t *testing.T) { } func TestNumber(t *testing.T) { - t.SkipNow() var tokenList = []tokenPair{ {token.NUMBER, "0"}, {token.NUMBER, "1"}, @@ -112,46 +111,46 @@ func TestNumber(t *testing.T) { {token.NUMBER, "07"}, {token.NUMBER, "042"}, {token.NUMBER, "01234567"}, - {token.NUMBER, "0x0"}, - {token.NUMBER, "0x1"}, - {token.NUMBER, "0xf"}, - {token.NUMBER, "0x42"}, - {token.NUMBER, "0x123456789abcDEF"}, - {token.NUMBER, "0x" + f100}, - {token.NUMBER, "0X0"}, - {token.NUMBER, "0X1"}, - {token.NUMBER, "0XF"}, - {token.NUMBER, "0X42"}, - {token.NUMBER, "0X123456789abcDEF"}, - {token.NUMBER, "0X" + f100}, - {token.FLOAT, "0."}, - {token.FLOAT, "1."}, - {token.FLOAT, "42."}, - {token.FLOAT, "01234567890."}, - {token.FLOAT, ".0"}, - {token.FLOAT, ".1"}, - {token.FLOAT, ".42"}, - {token.FLOAT, ".0123456789"}, - {token.FLOAT, "0.0"}, - {token.FLOAT, "1.0"}, - {token.FLOAT, "42.0"}, - {token.FLOAT, "01234567890.0"}, - {token.FLOAT, "0e0"}, - {token.FLOAT, "1e0"}, - {token.FLOAT, "42e0"}, - {token.FLOAT, "01234567890e0"}, - {token.FLOAT, "0E0"}, - {token.FLOAT, "1E0"}, - {token.FLOAT, "42E0"}, - {token.FLOAT, "01234567890E0"}, - {token.FLOAT, "0e+10"}, - {token.FLOAT, "1e-10"}, - {token.FLOAT, "42e+10"}, - {token.FLOAT, "01234567890e-10"}, - {token.FLOAT, "0E+10"}, - {token.FLOAT, "1E-10"}, - {token.FLOAT, "42E+10"}, - {token.FLOAT, "01234567890E-10"}, + // {token.NUMBER, "0x0"}, + // {token.NUMBER, "0x1"}, + // {token.NUMBER, "0xf"}, + // {token.NUMBER, "0x42"}, + // {token.NUMBER, "0x123456789abcDEF"}, + // {token.NUMBER, "0x" + f100}, + // {token.NUMBER, "0X0"}, + // {token.NUMBER, "0X1"}, + // {token.NUMBER, "0XF"}, + // {token.NUMBER, "0X42"}, + // {token.NUMBER, "0X123456789abcDEF"}, + // {token.NUMBER, "0X" + f100}, + // {token.FLOAT, "0."}, + // {token.FLOAT, "1."}, + // {token.FLOAT, "42."}, + // {token.FLOAT, "01234567890."}, + // {token.FLOAT, ".0"}, + // {token.FLOAT, ".1"}, + // {token.FLOAT, ".42"}, + // {token.FLOAT, ".0123456789"}, + // {token.FLOAT, "0.0"}, + // {token.FLOAT, "1.0"}, + // {token.FLOAT, "42.0"}, + // {token.FLOAT, "01234567890.0"}, + // {token.FLOAT, "0e0"}, + // {token.FLOAT, "1e0"}, + // {token.FLOAT, "42e0"}, + // {token.FLOAT, "01234567890e0"}, + // {token.FLOAT, "0E0"}, + // {token.FLOAT, "1E0"}, + // {token.FLOAT, "42E0"}, + // {token.FLOAT, "01234567890E0"}, + // {token.FLOAT, "0e+10"}, + // {token.FLOAT, "1e-10"}, + // {token.FLOAT, "42e+10"}, + // {token.FLOAT, "01234567890e-10"}, + // {token.FLOAT, "0E+10"}, + // {token.FLOAT, "1E-10"}, + // {token.FLOAT, "42E+10"}, + // {token.FLOAT, "01234567890E-10"}, } testTokenList(t, tokenList) From 0255587e95af39f757d227e477792ff2f3a92bd8 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 4 Oct 2015 23:47:06 +0300 Subject: [PATCH 026/137] scnaner: implement scanning hexadecimal numbers --- scanner/scanner.go | 30 ++++++++++++++++++++++++++++-- scanner/scanner_test.go | 24 ++++++++++++------------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index fda5265..337691a 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -132,8 +132,13 @@ func (s *Scanner) Scan() (tok token.Token) { func (s *Scanner) scanNumber(ch rune) token.Token { if ch == '0' { // check hexadecimal or float - // ch = s.next() - // return token.ILLEGAL + ch = s.next() + if ch == 'x' || ch == 'X' { + ch = s.next() + s.scanHexadecimal(ch) + return token.NUMBER + } + } s.scanMantissa(ch) @@ -147,6 +152,23 @@ func (s *Scanner) scanMantissa(ch rune) { s.unread() } +func (s *Scanner) scanHexadecimal(ch rune) { + found := false + + // after "0x" or "0X" + for isHexadecimal(ch) { + ch = s.next() + found = true + } + + if !found { + // only scanned "0x" or "0X" + s.err("illegal hexadecimal number") + } + + s.unread() +} + // scanString scans a quoted string func (s *Scanner) scanString() { for { @@ -268,6 +290,10 @@ func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } +func isHexadecimal(ch rune) bool { + return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' +} + // isWhitespace returns true if the rune is a space, tab, newline or carriage return func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 8ce08c0..4df11ea 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -111,18 +111,18 @@ func TestNumber(t *testing.T) { {token.NUMBER, "07"}, {token.NUMBER, "042"}, {token.NUMBER, "01234567"}, - // {token.NUMBER, "0x0"}, - // {token.NUMBER, "0x1"}, - // {token.NUMBER, "0xf"}, - // {token.NUMBER, "0x42"}, - // {token.NUMBER, "0x123456789abcDEF"}, - // {token.NUMBER, "0x" + f100}, - // {token.NUMBER, "0X0"}, - // {token.NUMBER, "0X1"}, - // {token.NUMBER, "0XF"}, - // {token.NUMBER, "0X42"}, - // {token.NUMBER, "0X123456789abcDEF"}, - // {token.NUMBER, "0X" + f100}, + {token.NUMBER, "0x0"}, + {token.NUMBER, "0x1"}, + {token.NUMBER, "0xf"}, + {token.NUMBER, "0x42"}, + {token.NUMBER, "0x123456789abcDEF"}, + {token.NUMBER, "0x" + f100}, + {token.NUMBER, "0X0"}, + {token.NUMBER, "0X1"}, + {token.NUMBER, "0XF"}, + {token.NUMBER, "0X42"}, + {token.NUMBER, "0X123456789abcDEF"}, + {token.NUMBER, "0X" + f100}, // {token.FLOAT, "0."}, // {token.FLOAT, "1."}, // {token.FLOAT, "42."}, From b33f1a99db45f02428bbad09aff3c4b648114d45 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 11:56:11 +0300 Subject: [PATCH 027/137] scanner: implement parsing octals --- scanner/scanner.go | 60 +++++++++++++++++++++++++++-------------- scanner/scanner_test.go | 2 +- token/token.go | 6 ----- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 337691a..8b4e81a 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -111,7 +111,6 @@ func (s *Scanner) Scan() (tok token.Token) { if lit == "true" || lit == "false" { tok = token.BOOL } - case isDecimal(ch): tok = s.scanNumber(ch) default: @@ -131,14 +130,48 @@ func (s *Scanner) Scan() (tok token.Token) { // scanNumber scans a HCL number definition starting with the given rune func (s *Scanner) scanNumber(ch rune) token.Token { if ch == '0' { - // check hexadecimal or float + // check for hexadecimal, octal or float ch = s.next() if ch == 'x' || ch == 'X' { + // hexadecimal ch = s.next() - s.scanHexadecimal(ch) + found := false + for isHexadecimal(ch) { + ch = s.next() + found = true + } + s.unread() + + if !found { + // only scanned "0x" or "0X" + s.err("illegal hexadecimal number") + // return token.ILLEGAL + } + return token.NUMBER } + // now it's either something like: 0421(octal) or 0.1231(float) + illegalOctal := false + for isOctal(ch) { + ch = s.next() + if ch == '8' || ch == '9' { + illegalOctal = true + } + } + s.unread() + + if ch == '.' || ch == 'e' || ch == 'E' { + // TODO: scan float + return token.FLOAT + } + + // illegal octal + if illegalOctal { + s.err("illegal octal number") + } + + return token.NUMBER } s.scanMantissa(ch) @@ -152,23 +185,6 @@ func (s *Scanner) scanMantissa(ch rune) { s.unread() } -func (s *Scanner) scanHexadecimal(ch rune) { - found := false - - // after "0x" or "0X" - for isHexadecimal(ch) { - ch = s.next() - found = true - } - - if !found { - // only scanned "0x" or "0X" - s.err("illegal hexadecimal number") - } - - s.unread() -} - // scanString scans a quoted string func (s *Scanner) scanString() { for { @@ -286,6 +302,10 @@ func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) } +func isOctal(ch rune) bool { + return '0' <= ch && ch <= '7' +} + func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 4df11ea..c7e2837 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -19,7 +19,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { // create artifical source code buf := new(bytes.Buffer) for _, ident := range tokenList { - fmt.Fprintf(buf, " \t%s\n", ident.text) + fmt.Fprintf(buf, "%s\n", ident.text) } s, err := NewScanner(buf) diff --git a/token/token.go b/token/token.go index cbd822d..9ebb696 100644 --- a/token/token.go +++ b/token/token.go @@ -31,9 +31,6 @@ const ( ASSIGN // = ADD // + SUB // - - - EPLUS // e - EMINUS // e- operator_end ) @@ -60,9 +57,6 @@ var tokens = [...]string{ ASSIGN: "=", ADD: "+", SUB: "-", - - EPLUS: "e", - EMINUS: "e-", } // String returns the string corresponding to the token tok. From 69796c9fd23355bea06875c84a44b463c329e457 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 12:26:22 +0300 Subject: [PATCH 028/137] scanner: parse a set of fractions --- scanner/scanner.go | 50 ++++++++++++++++++++++++++++++++------- scanner/scanner_test.go | 52 ++++++++++++++++++++--------------------- 2 files changed, 68 insertions(+), 34 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 8b4e81a..a491691 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -143,9 +143,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { s.unread() if !found { - // only scanned "0x" or "0X" s.err("illegal hexadecimal number") - // return token.ILLEGAL } return token.NUMBER @@ -153,20 +151,24 @@ func (s *Scanner) scanNumber(ch rune) token.Token { // now it's either something like: 0421(octal) or 0.1231(float) illegalOctal := false - for isOctal(ch) { + for isDecimal(ch) { ch = s.next() if ch == '8' || ch == '9' { + // this is just a possibility. For example 0159 is illegal, but + // 159.23 is valid. So we mark a possible illegal octal. If the + // next character is not a period, we'll print the error illegalOctal = true + } } s.unread() if ch == '.' || ch == 'e' || ch == 'E' { - // TODO: scan float + ch = s.scanFraction(ch) + ch = s.scanExponent(ch) return token.FLOAT } - // illegal octal if illegalOctal { s.err("illegal octal number") } @@ -174,15 +176,47 @@ func (s *Scanner) scanNumber(ch rune) token.Token { return token.NUMBER } - s.scanMantissa(ch) + ch = s.scanMantissa(ch) + if ch == '.' || ch == 'e' || ch == 'E' { + ch = s.scanFraction(ch) + ch = s.scanExponent(ch) + return token.FLOAT + } return token.NUMBER } -func (s *Scanner) scanMantissa(ch rune) { +func (s *Scanner) scanFraction(ch rune) rune { + if ch == '.' { + ch = s.next() + ch = s.scanMantissa(ch) + } + return ch +} + +func (s *Scanner) scanExponent(ch rune) rune { + if ch == 'e' || ch == 'E' { + ch = s.next() + if ch == '-' || ch == '+' { + ch = s.next() + } + ch = s.scanMantissa(ch) + } + return ch +} + +// scanMantissa scans the mantissa begining from the rune. It returns the next +// non decimal rune. It's used to determine wheter it's a fraction or exponent. +func (s *Scanner) scanMantissa(ch rune) rune { + scanned := false for isDecimal(ch) { ch = s.next() + scanned = true } - s.unread() + + if scanned { + s.unread() + } + return ch } // scanString scans a quoted string diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index c7e2837..29ff830 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -101,36 +101,36 @@ func TestString(t *testing.T) { func TestNumber(t *testing.T) { var tokenList = []tokenPair{ - {token.NUMBER, "0"}, - {token.NUMBER, "1"}, - {token.NUMBER, "9"}, - {token.NUMBER, "42"}, - {token.NUMBER, "1234567890"}, - {token.NUMBER, "00"}, - {token.NUMBER, "01"}, - {token.NUMBER, "07"}, - {token.NUMBER, "042"}, - {token.NUMBER, "01234567"}, - {token.NUMBER, "0x0"}, - {token.NUMBER, "0x1"}, - {token.NUMBER, "0xf"}, - {token.NUMBER, "0x42"}, - {token.NUMBER, "0x123456789abcDEF"}, - {token.NUMBER, "0x" + f100}, - {token.NUMBER, "0X0"}, - {token.NUMBER, "0X1"}, - {token.NUMBER, "0XF"}, - {token.NUMBER, "0X42"}, - {token.NUMBER, "0X123456789abcDEF"}, - {token.NUMBER, "0X" + f100}, + // {token.NUMBER, "0"}, + // {token.NUMBER, "1"}, + // {token.NUMBER, "9"}, + // {token.NUMBER, "42"}, + // {token.NUMBER, "1234567890"}, + // {token.NUMBER, "00"}, + // {token.NUMBER, "01"}, + // {token.NUMBER, "07"}, + // {token.NUMBER, "042"}, + // {token.NUMBER, "01234567"}, + // {token.NUMBER, "0x0"}, + // {token.NUMBER, "0x1"}, + // {token.NUMBER, "0xf"}, + // {token.NUMBER, "0x42"}, + // {token.NUMBER, "0x123456789abcDEF"}, + // {token.NUMBER, "0x" + f100}, + // {token.NUMBER, "0X0"}, + // {token.NUMBER, "0X1"}, + // {token.NUMBER, "0XF"}, + // {token.NUMBER, "0X42"}, + // {token.NUMBER, "0X123456789abcDEF"}, + // {token.NUMBER, "0X" + f100}, // {token.FLOAT, "0."}, // {token.FLOAT, "1."}, // {token.FLOAT, "42."}, // {token.FLOAT, "01234567890."}, - // {token.FLOAT, ".0"}, - // {token.FLOAT, ".1"}, - // {token.FLOAT, ".42"}, - // {token.FLOAT, ".0123456789"}, + {token.FLOAT, ".0"}, + {token.FLOAT, ".1"}, + {token.FLOAT, ".42"}, + {token.FLOAT, ".0123456789"}, // {token.FLOAT, "0.0"}, // {token.FLOAT, "1.0"}, // {token.FLOAT, "42.0"}, From c7955d276fe919814f08617ea274182808168cf4 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 12:31:26 +0300 Subject: [PATCH 029/137] scanner: parse floats in form of .9 , .123 --- scanner/scanner.go | 8 ++++++ scanner/scanner_test.go | 54 ++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index a491691..63480ab 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -120,6 +120,13 @@ func (s *Scanner) Scan() (tok token.Token) { case '"': tok = token.STRING s.scanString() + case '.': + ch = s.next() + if isDecimal(ch) { + tok = token.FLOAT + ch = s.scanMantissa(ch) + ch = s.scanExponent(ch) + } } } @@ -177,6 +184,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { } ch = s.scanMantissa(ch) + fmt.Printf("ch = %q\n", ch) if ch == '.' || ch == 'e' || ch == 'E' { ch = s.scanFraction(ch) ch = s.scanExponent(ch) diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 29ff830..e4d75fb 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -101,37 +101,37 @@ func TestString(t *testing.T) { func TestNumber(t *testing.T) { var tokenList = []tokenPair{ - // {token.NUMBER, "0"}, - // {token.NUMBER, "1"}, - // {token.NUMBER, "9"}, - // {token.NUMBER, "42"}, - // {token.NUMBER, "1234567890"}, - // {token.NUMBER, "00"}, - // {token.NUMBER, "01"}, - // {token.NUMBER, "07"}, - // {token.NUMBER, "042"}, - // {token.NUMBER, "01234567"}, - // {token.NUMBER, "0x0"}, - // {token.NUMBER, "0x1"}, - // {token.NUMBER, "0xf"}, - // {token.NUMBER, "0x42"}, - // {token.NUMBER, "0x123456789abcDEF"}, - // {token.NUMBER, "0x" + f100}, - // {token.NUMBER, "0X0"}, - // {token.NUMBER, "0X1"}, - // {token.NUMBER, "0XF"}, - // {token.NUMBER, "0X42"}, - // {token.NUMBER, "0X123456789abcDEF"}, - // {token.NUMBER, "0X" + f100}, - // {token.FLOAT, "0."}, - // {token.FLOAT, "1."}, - // {token.FLOAT, "42."}, - // {token.FLOAT, "01234567890."}, + {token.NUMBER, "0"}, + {token.NUMBER, "1"}, + {token.NUMBER, "9"}, + {token.NUMBER, "42"}, + {token.NUMBER, "1234567890"}, + {token.NUMBER, "00"}, + {token.NUMBER, "01"}, + {token.NUMBER, "07"}, + {token.NUMBER, "042"}, + {token.NUMBER, "01234567"}, + {token.NUMBER, "0x0"}, + {token.NUMBER, "0x1"}, + {token.NUMBER, "0xf"}, + {token.NUMBER, "0x42"}, + {token.NUMBER, "0x123456789abcDEF"}, + {token.NUMBER, "0x" + f100}, + {token.NUMBER, "0X0"}, + {token.NUMBER, "0X1"}, + {token.NUMBER, "0XF"}, + {token.NUMBER, "0X42"}, + {token.NUMBER, "0X123456789abcDEF"}, + {token.NUMBER, "0X" + f100}, + {token.FLOAT, "0."}, + {token.FLOAT, "1."}, + {token.FLOAT, "42."}, + {token.FLOAT, "01234567890."}, {token.FLOAT, ".0"}, {token.FLOAT, ".1"}, {token.FLOAT, ".42"}, {token.FLOAT, ".0123456789"}, - // {token.FLOAT, "0.0"}, + {token.FLOAT, "0.0"}, // {token.FLOAT, "1.0"}, // {token.FLOAT, "42.0"}, // {token.FLOAT, "01234567890.0"}, From 00e06fb432f357467be5f87ae8db233aab11b741 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 12:59:55 +0300 Subject: [PATCH 030/137] scanner: finalize float scanning --- scanner/scanner.go | 49 +++++++++++++++++++++++------------------ scanner/scanner_test.go | 38 ++++++++++++++++---------------- 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 63480ab..8fe0651 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -162,15 +162,17 @@ func (s *Scanner) scanNumber(ch rune) token.Token { ch = s.next() if ch == '8' || ch == '9' { // this is just a possibility. For example 0159 is illegal, but - // 159.23 is valid. So we mark a possible illegal octal. If the - // next character is not a period, we'll print the error + // 0159.23 is valid. So we mark a possible illegal octal. If + // the next character is not a period, we'll print the error. illegalOctal = true } + } s.unread() if ch == '.' || ch == 'e' || ch == 'E' { + ch = s.next() ch = s.scanFraction(ch) ch = s.scanExponent(ch) return token.FLOAT @@ -184,8 +186,8 @@ func (s *Scanner) scanNumber(ch rune) token.Token { } ch = s.scanMantissa(ch) - fmt.Printf("ch = %q\n", ch) if ch == '.' || ch == 'e' || ch == 'E' { + ch = s.next() // seek forward ch = s.scanFraction(ch) ch = s.scanExponent(ch) return token.FLOAT @@ -193,25 +195,6 @@ func (s *Scanner) scanNumber(ch rune) token.Token { return token.NUMBER } -func (s *Scanner) scanFraction(ch rune) rune { - if ch == '.' { - ch = s.next() - ch = s.scanMantissa(ch) - } - return ch -} - -func (s *Scanner) scanExponent(ch rune) rune { - if ch == 'e' || ch == 'E' { - ch = s.next() - if ch == '-' || ch == '+' { - ch = s.next() - } - ch = s.scanMantissa(ch) - } - return ch -} - // scanMantissa scans the mantissa begining from the rune. It returns the next // non decimal rune. It's used to determine wheter it's a fraction or exponent. func (s *Scanner) scanMantissa(ch rune) rune { @@ -227,6 +210,28 @@ func (s *Scanner) scanMantissa(ch rune) rune { return ch } +func (s *Scanner) scanFraction(ch rune) rune { + if ch == '.' { + ch = s.next() + msCh := s.scanMantissa(ch) + if msCh == ch { + s.unread() + } + } + return ch +} + +func (s *Scanner) scanExponent(ch rune) rune { + if ch == 'e' || ch == 'E' { + ch = s.next() + if ch == '-' || ch == '+' { + ch = s.next() + } + ch = s.scanMantissa(ch) + } + return ch +} + // scanString scans a quoted string func (s *Scanner) scanString() { for { diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index e4d75fb..4c7992f 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -132,25 +132,25 @@ func TestNumber(t *testing.T) { {token.FLOAT, ".42"}, {token.FLOAT, ".0123456789"}, {token.FLOAT, "0.0"}, - // {token.FLOAT, "1.0"}, - // {token.FLOAT, "42.0"}, - // {token.FLOAT, "01234567890.0"}, - // {token.FLOAT, "0e0"}, - // {token.FLOAT, "1e0"}, - // {token.FLOAT, "42e0"}, - // {token.FLOAT, "01234567890e0"}, - // {token.FLOAT, "0E0"}, - // {token.FLOAT, "1E0"}, - // {token.FLOAT, "42E0"}, - // {token.FLOAT, "01234567890E0"}, - // {token.FLOAT, "0e+10"}, - // {token.FLOAT, "1e-10"}, - // {token.FLOAT, "42e+10"}, - // {token.FLOAT, "01234567890e-10"}, - // {token.FLOAT, "0E+10"}, - // {token.FLOAT, "1E-10"}, - // {token.FLOAT, "42E+10"}, - // {token.FLOAT, "01234567890E-10"}, + {token.FLOAT, "1.0"}, + {token.FLOAT, "42.0"}, + {token.FLOAT, "01234567890.0"}, + {token.FLOAT, "0e0"}, + {token.FLOAT, "1e0"}, + {token.FLOAT, "42e0"}, + {token.FLOAT, "01234567890e0"}, + {token.FLOAT, "0E0"}, + {token.FLOAT, "1E0"}, + {token.FLOAT, "42E0"}, + {token.FLOAT, "01234567890E0"}, + {token.FLOAT, "0e+10"}, + {token.FLOAT, "1e-10"}, + {token.FLOAT, "42e+10"}, + {token.FLOAT, "01234567890e-10"}, + {token.FLOAT, "0E+10"}, + {token.FLOAT, "1E-10"}, + {token.FLOAT, "42E+10"}, + {token.FLOAT, "01234567890E-10"}, } testTokenList(t, tokenList) From dd848e3dab803670ffc26f72ed43028193372ccf Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 13:03:46 +0300 Subject: [PATCH 031/137] scanner: peek instead of next. --- scanner/scanner.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 8fe0651..1a8a60c 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -212,11 +212,8 @@ func (s *Scanner) scanMantissa(ch rune) rune { func (s *Scanner) scanFraction(ch rune) rune { if ch == '.' { - ch = s.next() - msCh := s.scanMantissa(ch) - if msCh == ch { - s.unread() - } + ch = s.peek() // we peek just to see if we can move forward + ch = s.scanMantissa(ch) } return ch } From 9096153a3484191a6cd3d71bc30fa3b80ffed96c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 13:12:48 +0300 Subject: [PATCH 032/137] scanner: implement remaning tokens --- scanner/scanner.go | 20 +++++++++++++++++++- scanner/scanner_test.go | 16 ++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 1a8a60c..5dfe526 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -121,12 +121,30 @@ func (s *Scanner) Scan() (tok token.Token) { tok = token.STRING s.scanString() case '.': - ch = s.next() + ch = s.peek() if isDecimal(ch) { tok = token.FLOAT ch = s.scanMantissa(ch) ch = s.scanExponent(ch) + } else { + tok = token.PERIOD } + case '[': + tok = token.LBRACK + case ']': + tok = token.RBRACK + case '{': + tok = token.LBRACE + case '}': + tok = token.RBRACE + case ',': + tok = token.COMMA + case '=': + tok = token.ASSIGN + case '+': + tok = token.ADD + case '-': + tok = token.SUB } } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 4c7992f..a2be09b 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -40,6 +40,22 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { } } +func TestOperator(t *testing.T) { + var tokenList = []tokenPair{ + {token.LBRACK, "["}, + {token.LBRACE, "{"}, + {token.COMMA, ","}, + {token.PERIOD, "."}, + {token.RBRACK, "]"}, + {token.RBRACE, "}"}, + {token.ASSIGN, "="}, + {token.ADD, "+"}, + {token.SUB, "-"}, + } + + testTokenList(t, tokenList) +} + func TestBool(t *testing.T) { var tokenList = []tokenPair{ {token.BOOL, "true"}, From 2216cd81e9f4a157d728860e1e6e7b1d1ba2f9c0 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 13:24:38 +0300 Subject: [PATCH 033/137] scanner: # style line comment scanning implemented --- scanner/scanner.go | 14 ++++++++++++++ scanner/scanner_test.go | 29 +++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 5dfe526..091f227 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -120,6 +120,9 @@ func (s *Scanner) Scan() (tok token.Token) { case '"': tok = token.STRING s.scanString() + case '#': + tok = token.COMMENT + s.scanComment(ch) case '.': ch = s.peek() if isDecimal(ch) { @@ -152,6 +155,17 @@ func (s *Scanner) Scan() (tok token.Token) { return tok } +func (s *Scanner) scanComment(ch rune) { + if ch == '#' { + // line comment + ch = s.next() + for ch != '\n' && ch >= 0 { + ch = s.next() + } + s.unread() + } +} + // scanNumber scans a HCL number definition starting with the given rune func (s *Scanner) scanNumber(ch rune) token.Token { if ch == '0' { diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index a2be09b..7081320 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -30,16 +30,41 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { for _, ident := range tokenList { tok := s.Scan() if tok != ident.tok { - t.Errorf("tok = %s want %s for %s\n", tok, ident.tok, ident.text) + t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) } if s.TokenText() != ident.text { - t.Errorf("text = %s want %s", s.TokenText(), ident.text) + t.Errorf("text = %q want %q", s.TokenText(), ident.text) } } } +func TestComment(t *testing.T) { + var tokenList = []tokenPair{ + // {token.COMMENT, "//"}, + // {token.COMMENT, "////"}, + // {token.COMMENT, "// comment"}, + // {token.COMMENT, "// /* comment */"}, + // {token.COMMENT, "// // comment //"}, + {token.COMMENT, "#"}, + {token.COMMENT, "##"}, + {token.COMMENT, "# comment"}, + {token.COMMENT, "# /* comment */"}, + {token.COMMENT, "# # comment #"}, + {token.COMMENT, "#" + f100}, + // {token.COMMENT, "/**/"}, + // {token.COMMENT, "/***/"}, + // {token.COMMENT, "/* comment */"}, + // {token.COMMENT, "/* // comment */"}, + // {token.COMMENT, "/* /* comment */"}, + // {token.COMMENT, "/*\n comment\n*/"}, + // {token.COMMENT, "/*" + f100 + "*/"}, + } + + testTokenList(t, tokenList) +} + func TestOperator(t *testing.T) { var tokenList = []tokenPair{ {token.LBRACK, "["}, From a299665100ab069550b3708bfe91ec7817127e46 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 13:26:18 +0300 Subject: [PATCH 034/137] scanner: // style comments are implemented too --- scanner/scanner.go | 4 ++-- scanner/scanner_test.go | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 091f227..7b09498 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -120,7 +120,7 @@ func (s *Scanner) Scan() (tok token.Token) { case '"': tok = token.STRING s.scanString() - case '#': + case '#', '/': tok = token.COMMENT s.scanComment(ch) case '.': @@ -156,7 +156,7 @@ func (s *Scanner) Scan() (tok token.Token) { } func (s *Scanner) scanComment(ch rune) { - if ch == '#' { + if ch == '#' || ch == '/' { // line comment ch = s.next() for ch != '\n' && ch >= 0 { diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 7081320..2bbe534 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -42,11 +42,12 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { func TestComment(t *testing.T) { var tokenList = []tokenPair{ - // {token.COMMENT, "//"}, - // {token.COMMENT, "////"}, - // {token.COMMENT, "// comment"}, - // {token.COMMENT, "// /* comment */"}, - // {token.COMMENT, "// // comment //"}, + {token.COMMENT, "//"}, + {token.COMMENT, "////"}, + {token.COMMENT, "// comment"}, + {token.COMMENT, "// /* comment */"}, + {token.COMMENT, "// // comment //"}, + {token.COMMENT, "//" + f100}, {token.COMMENT, "#"}, {token.COMMENT, "##"}, {token.COMMENT, "# comment"}, From 6e23b0404d6cd41783710c48d19290de61627e4a Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 13:36:28 +0300 Subject: [PATCH 035/137] scanner: implement comments --- scanner/scanner.go | 22 +++++++++++++++++++--- scanner/scanner_test.go | 14 +++++++------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 7b09498..52e0657 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -124,13 +124,12 @@ func (s *Scanner) Scan() (tok token.Token) { tok = token.COMMENT s.scanComment(ch) case '.': + tok = token.PERIOD ch = s.peek() if isDecimal(ch) { tok = token.FLOAT ch = s.scanMantissa(ch) ch = s.scanExponent(ch) - } else { - tok = token.PERIOD } case '[': tok = token.LBRACK @@ -156,13 +155,30 @@ func (s *Scanner) Scan() (tok token.Token) { } func (s *Scanner) scanComment(ch rune) { + // look for /* - style comments + if ch == '/' && s.peek() == '*' { + for { + if ch < 0 { + s.err("comment not terminated") + break + } + + ch0 := ch + ch = s.next() + if ch0 == '*' && ch == '/' { + break + } + } + } + + // single line comments if ch == '#' || ch == '/' { - // line comment ch = s.next() for ch != '\n' && ch >= 0 { ch = s.next() } s.unread() + return } } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 2bbe534..eaa09c8 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -54,13 +54,13 @@ func TestComment(t *testing.T) { {token.COMMENT, "# /* comment */"}, {token.COMMENT, "# # comment #"}, {token.COMMENT, "#" + f100}, - // {token.COMMENT, "/**/"}, - // {token.COMMENT, "/***/"}, - // {token.COMMENT, "/* comment */"}, - // {token.COMMENT, "/* // comment */"}, - // {token.COMMENT, "/* /* comment */"}, - // {token.COMMENT, "/*\n comment\n*/"}, - // {token.COMMENT, "/*" + f100 + "*/"}, + {token.COMMENT, "/**/"}, + {token.COMMENT, "/***/"}, + {token.COMMENT, "/* comment */"}, + {token.COMMENT, "/* // comment */"}, + {token.COMMENT, "/* /* comment */"}, + {token.COMMENT, "/*\n comment\n*/"}, + {token.COMMENT, "/*" + f100 + "*/"}, } testTokenList(t, tokenList) From 9a71fdff071115058486560600ea5d5265b07555 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 13:38:53 +0300 Subject: [PATCH 036/137] scanner: split float and number tests --- scanner/scanner_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index eaa09c8..8e60992 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -165,6 +165,13 @@ func TestNumber(t *testing.T) { {token.NUMBER, "0X42"}, {token.NUMBER, "0X123456789abcDEF"}, {token.NUMBER, "0X" + f100}, + } + + testTokenList(t, tokenList) +} + +func TestFloat(t *testing.T) { + var tokenList = []tokenPair{ {token.FLOAT, "0."}, {token.FLOAT, "1."}, {token.FLOAT, "42."}, From d9a424d177b30b2ef524c39f7e38c131ffbbdea5 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 13:44:55 +0300 Subject: [PATCH 037/137] scnaner: reorganize tests for upcoming additional tests --- scanner/scanner_test.go | 89 ++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 8e60992..2ac57a3 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -40,8 +40,8 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { } } -func TestComment(t *testing.T) { - var tokenList = []tokenPair{ +var tokenLists = map[string][]tokenPair{ + "comment": []tokenPair{ {token.COMMENT, "//"}, {token.COMMENT, "////"}, {token.COMMENT, "// comment"}, @@ -61,13 +61,8 @@ func TestComment(t *testing.T) { {token.COMMENT, "/* /* comment */"}, {token.COMMENT, "/*\n comment\n*/"}, {token.COMMENT, "/*" + f100 + "*/"}, - } - - testTokenList(t, tokenList) -} - -func TestOperator(t *testing.T) { - var tokenList = []tokenPair{ + }, + "operator": []tokenPair{ {token.LBRACK, "["}, {token.LBRACE, "{"}, {token.COMMA, ","}, @@ -77,22 +72,13 @@ func TestOperator(t *testing.T) { {token.ASSIGN, "="}, {token.ADD, "+"}, {token.SUB, "-"}, - } - - testTokenList(t, tokenList) -} - -func TestBool(t *testing.T) { - var tokenList = []tokenPair{ + }, + "bool": []tokenPair{ {token.BOOL, "true"}, {token.BOOL, "false"}, - } + }, - testTokenList(t, tokenList) -} - -func TestIdent(t *testing.T) { - var tokenList = []tokenPair{ + "ident": []tokenPair{ {token.IDENT, "a"}, {token.IDENT, "a0"}, {token.IDENT, "foobar"}, @@ -109,13 +95,8 @@ func TestIdent(t *testing.T) { {token.IDENT, "a۰۱۸"}, {token.IDENT, "foo६४"}, {token.IDENT, "bar9876"}, - } - - testTokenList(t, tokenList) -} - -func TestString(t *testing.T) { - var tokenList = []tokenPair{ + }, + "string": []tokenPair{ {token.STRING, `" "`}, {token.STRING, `"a"`}, {token.STRING, `"本"`}, @@ -136,13 +117,8 @@ func TestString(t *testing.T) { {token.STRING, `"\U00000000"`}, {token.STRING, `"\U0000ffAB"`}, {token.STRING, `"` + f100 + `"`}, - } - - testTokenList(t, tokenList) -} - -func TestNumber(t *testing.T) { - var tokenList = []tokenPair{ + }, + "number": []tokenPair{ {token.NUMBER, "0"}, {token.NUMBER, "1"}, {token.NUMBER, "9"}, @@ -165,13 +141,8 @@ func TestNumber(t *testing.T) { {token.NUMBER, "0X42"}, {token.NUMBER, "0X123456789abcDEF"}, {token.NUMBER, "0X" + f100}, - } - - testTokenList(t, tokenList) -} - -func TestFloat(t *testing.T) { - var tokenList = []tokenPair{ + }, + "float": []tokenPair{ {token.FLOAT, "0."}, {token.FLOAT, "1."}, {token.FLOAT, "42."}, @@ -200,7 +171,33 @@ func TestFloat(t *testing.T) { {token.FLOAT, "1E-10"}, {token.FLOAT, "42E+10"}, {token.FLOAT, "01234567890E-10"}, - } - - testTokenList(t, tokenList) + }, +} + +func TestComment(t *testing.T) { + testTokenList(t, tokenLists["comment"]) +} + +func TestOperator(t *testing.T) { + testTokenList(t, tokenLists["operator"]) +} + +func TestBool(t *testing.T) { + testTokenList(t, tokenLists["bool"]) +} + +func TestIdent(t *testing.T) { + testTokenList(t, tokenLists["ident"]) +} + +func TestString(t *testing.T) { + testTokenList(t, tokenLists["string"]) +} + +func TestNumber(t *testing.T) { + testTokenList(t, tokenLists["number"]) +} + +func TestFloat(t *testing.T) { + testTokenList(t, tokenLists["float"]) } From bbf8cf2ac041c901049f3d3f4decac19f94cf460 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 17:34:45 +0300 Subject: [PATCH 038/137] scanner: various fixes and improvements around NUMBER and FLOAT --- scanner/scanner.go | 123 ++++++++++++++------- scanner/scanner_test.go | 229 ++++++++++++++++++++++++++-------------- 2 files changed, 234 insertions(+), 118 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 52e0657..84d22c0 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -16,17 +16,21 @@ const eof = rune(0) // Scanner defines a lexical scanner type Scanner struct { - src *bytes.Buffer - srcBytes []byte + src *bytes.Buffer - lastCharLen int // length of last character in bytes + // Source Buffer + srcBuf []byte - currPos Position // current position + // Source Position + srcPos Position // current position prevPos Position // previous position - tokBuf bytes.Buffer // token text buffer - tokPos int // token text tail position (srcBuf index); valid if >= 0 - tokEnd int // token text tail end (srcBuf index) + lastCharLen int // length of last character in bytes + lastLineLen int // length of last line in characters (for correct column reporting) + + tokBuf bytes.Buffer // token text buffer + tokStart int // token text start position + tokEnd int // token text end position // Error is called for each error encountered. If no Error // function is set, the error is reported to os.Stderr. @@ -34,6 +38,14 @@ type Scanner struct { // ErrorCount is incremented by one for each error encountered. ErrorCount int + + // Start position of most recently scanned token; set by Scan. + // Calling Init or Next invalidates the position (Line == 0). + // The Filename field is always left untouched by the Scanner. + // If an error is reported (via Error) and Position is invalid, + // the scanner is not inside a token. Call Pos to obtain an error + // position in that case. + tokPos Position } // NewScanner returns a new instance of Lexer. Even though src is an io.Reader, @@ -45,10 +57,12 @@ func NewScanner(src io.Reader) (*Scanner, error) { } b := bytes.NewBuffer(buf) - return &Scanner{ - src: b, - srcBytes: b.Bytes(), - }, nil + s := &Scanner{ + src: b, + srcBuf: b.Bytes(), + } + + return s, nil } // next reads the next rune from the bufferred reader. Returns the rune(0) if @@ -60,15 +74,16 @@ func (s *Scanner) next() rune { } // remember last position - s.prevPos = s.currPos - + s.prevPos = s.srcPos s.lastCharLen = size - s.currPos.Offset += size - s.currPos.Column += size + s.srcPos.Offset += size + + s.srcPos.Column += size if ch == '\n' { - s.currPos.Line++ - s.currPos.Column = 0 + s.srcPos.Line++ + s.srcPos.Column = 0 + s.lastLineLen = s.srcPos.Column } return ch @@ -78,7 +93,7 @@ func (s *Scanner) unread() { if err := s.src.UnreadRune(); err != nil { panic(err) // this is user fault, we should catch it } - s.currPos = s.prevPos // put back last position + s.srcPos = s.prevPos // put back last position } func (s *Scanner) peek() rune { @@ -93,16 +108,30 @@ func (s *Scanner) peek() rune { // Scan scans the next token and returns the token. func (s *Scanner) Scan() (tok token.Token) { - ch := s.next() + ch := s.peek() // skip white space for isWhitespace(ch) { ch = s.next() } - // start the token position + // token text markings s.tokBuf.Reset() - s.tokPos = s.currPos.Offset - s.lastCharLen + s.tokStart = s.srcPos.Offset - s.lastCharLen + + // token position + s.tokPos.Offset = s.srcPos.Offset + if s.srcPos.Column > 0 { + // common case: last character was not a '\n' + s.tokPos.Line = s.srcPos.Line + s.tokPos.Column = s.srcPos.Column + } else { + // last character was a '\n' + // (we cannot be at the beginning of the source + // since we have called next() at least once) + s.tokPos.Line = s.srcPos.Line - 1 + s.tokPos.Column = s.lastLineLen + } switch { case isLetter(ch): @@ -150,7 +179,7 @@ func (s *Scanner) Scan() (tok token.Token) { } } - s.tokEnd = s.currPos.Offset + s.tokEnd = s.srcPos.Offset return tok } @@ -219,10 +248,21 @@ func (s *Scanner) scanNumber(ch rune) token.Token { } s.unread() - if ch == '.' || ch == 'e' || ch == 'E' { - ch = s.next() - ch = s.scanFraction(ch) + // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. + if ch == 'e' || ch == 'E' { + ch = s.next() // seek forward ch = s.scanExponent(ch) + return token.NUMBER + } + + if ch == '.' { + ch = s.next() // seek forward + ch = s.scanFraction(ch) + + if ch == 'e' || ch == 'E' { + ch = s.next() + ch = s.scanExponent(ch) + } return token.FLOAT } @@ -234,10 +274,20 @@ func (s *Scanner) scanNumber(ch rune) token.Token { } ch = s.scanMantissa(ch) - if ch == '.' || ch == 'e' || ch == 'E' { + // literals of form 1e10 are treates as Numbers in HCL, which differs from Go. + if ch == 'e' || ch == 'E' { + ch = s.next() + ch = s.scanExponent(ch) + return token.NUMBER + } + + if ch == '.' { ch = s.next() // seek forward ch = s.scanFraction(ch) - ch = s.scanExponent(ch) + if ch == 'e' || ch == 'E' { + ch = s.next() + ch = s.scanExponent(ch) + } return token.FLOAT } return token.NUMBER @@ -344,46 +394,45 @@ func (s *Scanner) scanDigits(ch rune, base, n int) rune { // scanIdentifier scans an identifier and returns the literal string func (s *Scanner) scanIdentifier() string { - offs := s.currPos.Offset - s.lastCharLen + offs := s.srcPos.Offset - s.lastCharLen ch := s.next() for isLetter(ch) || isDigit(ch) { ch = s.next() } s.unread() // we got identifier, put back latest char - // return string(s.srcBytes[offs:(s.currPos.Offset - s.lastCharLen)]) - return string(s.srcBytes[offs:s.currPos.Offset]) + return string(s.srcBuf[offs:s.srcPos.Offset]) } // TokenText returns the literal string corresponding to the most recently // scanned token. func (s *Scanner) TokenText() string { - if s.tokPos < 0 { + if s.tokStart < 0 { // no token text return "" } // part of the token text was saved in tokBuf: save the rest in // tokBuf as well and return its content - s.tokBuf.Write(s.srcBytes[s.tokPos:s.tokEnd]) - s.tokPos = s.tokEnd // ensure idempotency of TokenText() call + s.tokBuf.Write(s.srcBuf[s.tokStart:s.tokEnd]) + s.tokStart = s.tokEnd // ensure idempotency of TokenText() call return s.tokBuf.String() } // Pos returns the position of the character immediately after the character or // token returned by the last call to Scan. -func (s *Scanner) Pos() Position { - return s.currPos +func (s *Scanner) Pos() (pos Position) { + return s.tokPos } func (s *Scanner) err(msg string) { s.ErrorCount++ if s.Error != nil { - s.Error(s.currPos, msg) + s.Error(s.srcPos, msg) return } - fmt.Fprintf(os.Stderr, "%s: %s\n", s.currPos, msg) + fmt.Fprintf(os.Stderr, "%s: %s\n", s.srcPos, msg) } func isLetter(ch rune) bool { diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 2ac57a3..f5cf401 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -40,43 +40,84 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { } } +func TestPosition(t *testing.T) { + t.SkipNow() + // create artifical source code + buf := new(bytes.Buffer) + for _, list := range tokenLists { + for _, ident := range list { + fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text) + } + } + + s, err := NewScanner(buf) + if err != nil { + t.Fatal(err) + } + + s.Scan() + pos := Position{"", 4, 1, 5} + for _, list := range tokenLists { + for _, k := range list { + curPos := s.Pos() + fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) + if curPos.Offset != pos.Offset { + t.Errorf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) + } + if curPos.Line != pos.Line { + t.Errorf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) + } + if curPos.Column != pos.Column { + t.Errorf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) + } + pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline + pos.Line += countNewlines(k.text) + 1 // each token is on a new line + s.Scan() + } + } + // make sure there were no token-internal errors reported by scanner + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } +} + var tokenLists = map[string][]tokenPair{ - "comment": []tokenPair{ - {token.COMMENT, "//"}, - {token.COMMENT, "////"}, - {token.COMMENT, "// comment"}, - {token.COMMENT, "// /* comment */"}, - {token.COMMENT, "// // comment //"}, - {token.COMMENT, "//" + f100}, - {token.COMMENT, "#"}, - {token.COMMENT, "##"}, - {token.COMMENT, "# comment"}, - {token.COMMENT, "# /* comment */"}, - {token.COMMENT, "# # comment #"}, - {token.COMMENT, "#" + f100}, - {token.COMMENT, "/**/"}, - {token.COMMENT, "/***/"}, - {token.COMMENT, "/* comment */"}, - {token.COMMENT, "/* // comment */"}, - {token.COMMENT, "/* /* comment */"}, - {token.COMMENT, "/*\n comment\n*/"}, - {token.COMMENT, "/*" + f100 + "*/"}, - }, - "operator": []tokenPair{ - {token.LBRACK, "["}, - {token.LBRACE, "{"}, - {token.COMMA, ","}, - {token.PERIOD, "."}, - {token.RBRACK, "]"}, - {token.RBRACE, "}"}, - {token.ASSIGN, "="}, - {token.ADD, "+"}, - {token.SUB, "-"}, - }, - "bool": []tokenPair{ - {token.BOOL, "true"}, - {token.BOOL, "false"}, - }, + // "comment": []tokenPair{ + // {token.COMMENT, "//"}, + // {token.COMMENT, "////"}, + // {token.COMMENT, "// comment"}, + // {token.COMMENT, "// /* comment */"}, + // {token.COMMENT, "// // comment //"}, + // {token.COMMENT, "//" + f100}, + // {token.COMMENT, "#"}, + // {token.COMMENT, "##"}, + // {token.COMMENT, "# comment"}, + // {token.COMMENT, "# /* comment */"}, + // {token.COMMENT, "# # comment #"}, + // {token.COMMENT, "#" + f100}, + // {token.COMMENT, "/**/"}, + // {token.COMMENT, "/***/"}, + // {token.COMMENT, "/* comment */"}, + // {token.COMMENT, "/* // comment */"}, + // {token.COMMENT, "/* /* comment */"}, + // {token.COMMENT, "/*\n comment\n*/"}, + // {token.COMMENT, "/*" + f100 + "*/"}, + // }, + // "operator": []tokenPair{ + // {token.LBRACK, "["}, + // {token.LBRACE, "{"}, + // {token.COMMA, ","}, + // {token.PERIOD, "."}, + // {token.RBRACK, "]"}, + // {token.RBRACE, "}"}, + // {token.ASSIGN, "="}, + // {token.ADD, "+"}, + // {token.SUB, "-"}, + // }, + // "bool": []tokenPair{ + // {token.BOOL, "true"}, + // {token.BOOL, "false"}, + // }, "ident": []tokenPair{ {token.IDENT, "a"}, @@ -88,36 +129,36 @@ var tokenLists = map[string][]tokenPair{ {token.IDENT, "_abc123"}, {token.IDENT, "abc123_"}, {token.IDENT, "_abc_123_"}, - {token.IDENT, "_äöü"}, - {token.IDENT, "_本"}, - {token.IDENT, "äöü"}, - {token.IDENT, "本"}, - {token.IDENT, "a۰۱۸"}, - {token.IDENT, "foo६४"}, - {token.IDENT, "bar9876"}, - }, - "string": []tokenPair{ - {token.STRING, `" "`}, - {token.STRING, `"a"`}, - {token.STRING, `"本"`}, - {token.STRING, `"\a"`}, - {token.STRING, `"\b"`}, - {token.STRING, `"\f"`}, - {token.STRING, `"\n"`}, - {token.STRING, `"\r"`}, - {token.STRING, `"\t"`}, - {token.STRING, `"\v"`}, - {token.STRING, `"\""`}, - {token.STRING, `"\000"`}, - {token.STRING, `"\777"`}, - {token.STRING, `"\x00"`}, - {token.STRING, `"\xff"`}, - {token.STRING, `"\u0000"`}, - {token.STRING, `"\ufA16"`}, - {token.STRING, `"\U00000000"`}, - {token.STRING, `"\U0000ffAB"`}, - {token.STRING, `"` + f100 + `"`}, + // {token.IDENT, "_äöü"}, + // {token.IDENT, "_本"}, + // {token.IDENT, "äöü"}, + // {token.IDENT, "本"}, + // {token.IDENT, "a۰۱۸"}, + // {token.IDENT, "foo६४"}, + // {token.IDENT, "bar9876"}, }, + // "string": []tokenPair{ + // {token.STRING, `" "`}, + // {token.STRING, `"a"`}, + // {token.STRING, `"本"`}, + // {token.STRING, `"\a"`}, + // {token.STRING, `"\b"`}, + // {token.STRING, `"\f"`}, + // {token.STRING, `"\n"`}, + // {token.STRING, `"\r"`}, + // {token.STRING, `"\t"`}, + // {token.STRING, `"\v"`}, + // {token.STRING, `"\""`}, + // {token.STRING, `"\000"`}, + // {token.STRING, `"\777"`}, + // {token.STRING, `"\x00"`}, + // {token.STRING, `"\xff"`}, + // {token.STRING, `"\u0000"`}, + // {token.STRING, `"\ufA16"`}, + // {token.STRING, `"\U00000000"`}, + // {token.STRING, `"\U0000ffAB"`}, + // {token.STRING, `"` + f100 + `"`}, + // }, "number": []tokenPair{ {token.NUMBER, "0"}, {token.NUMBER, "1"}, @@ -141,6 +182,22 @@ var tokenLists = map[string][]tokenPair{ {token.NUMBER, "0X42"}, {token.NUMBER, "0X123456789abcDEF"}, {token.NUMBER, "0X" + f100}, + {token.NUMBER, "0e0"}, + {token.NUMBER, "1e0"}, + {token.NUMBER, "42e0"}, + {token.NUMBER, "01234567890e0"}, + {token.NUMBER, "0E0"}, + {token.NUMBER, "1E0"}, + {token.NUMBER, "42E0"}, + {token.NUMBER, "01234567890E0"}, + {token.NUMBER, "0e+10"}, + {token.NUMBER, "1e-10"}, + {token.NUMBER, "42e+10"}, + {token.NUMBER, "01234567890e-10"}, + {token.NUMBER, "0E+10"}, + {token.NUMBER, "1E-10"}, + {token.NUMBER, "42E+10"}, + {token.NUMBER, "01234567890E-10"}, }, "float": []tokenPair{ {token.FLOAT, "0."}, @@ -155,22 +212,22 @@ var tokenLists = map[string][]tokenPair{ {token.FLOAT, "1.0"}, {token.FLOAT, "42.0"}, {token.FLOAT, "01234567890.0"}, - {token.FLOAT, "0e0"}, - {token.FLOAT, "1e0"}, - {token.FLOAT, "42e0"}, - {token.FLOAT, "01234567890e0"}, - {token.FLOAT, "0E0"}, - {token.FLOAT, "1E0"}, - {token.FLOAT, "42E0"}, - {token.FLOAT, "01234567890E0"}, - {token.FLOAT, "0e+10"}, - {token.FLOAT, "1e-10"}, - {token.FLOAT, "42e+10"}, - {token.FLOAT, "01234567890e-10"}, - {token.FLOAT, "0E+10"}, - {token.FLOAT, "1E-10"}, - {token.FLOAT, "42E+10"}, - {token.FLOAT, "01234567890E-10"}, + {token.FLOAT, "01.8e0"}, + {token.FLOAT, "1.4e0"}, + {token.FLOAT, "42.2e0"}, + {token.FLOAT, "01234567890.12e0"}, + {token.FLOAT, "0.E0"}, + {token.FLOAT, "1.12E0"}, + {token.FLOAT, "42.123E0"}, + {token.FLOAT, "01234567890.213E0"}, + {token.FLOAT, "0.2e+10"}, + {token.FLOAT, "1.2e-10"}, + {token.FLOAT, "42.54e+10"}, + {token.FLOAT, "01234567890.98e-10"}, + {token.FLOAT, "0.1E+10"}, + {token.FLOAT, "1.1E-10"}, + {token.FLOAT, "42.1E+10"}, + {token.FLOAT, "01234567890.1E-10"}, }, } @@ -201,3 +258,13 @@ func TestNumber(t *testing.T) { func TestFloat(t *testing.T) { testTokenList(t, tokenLists["float"]) } + +func countNewlines(s string) int { + n := 0 + for _, ch := range s { + if ch == '\n' { + n++ + } + } + return n +} From 89db79cf6389347f1d8ef508376678671854669b Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 17:36:14 +0300 Subject: [PATCH 039/137] scanner: improve forward seeking --- scanner/scanner.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 84d22c0..79677ca 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -246,17 +246,14 @@ func (s *Scanner) scanNumber(ch rune) token.Token { } } - s.unread() // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { - ch = s.next() // seek forward ch = s.scanExponent(ch) return token.NUMBER } if ch == '.' { - ch = s.next() // seek forward ch = s.scanFraction(ch) if ch == 'e' || ch == 'E' { @@ -270,6 +267,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { s.err("illegal octal number") } + s.unread() return token.NUMBER } From 81a8399ed1082b4b31d78d95eb9fbb53914f4c31 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 17:43:29 +0300 Subject: [PATCH 040/137] scanner: improvements around Position --- scanner/scanner.go | 17 +++-- scanner/scanner_test.go | 142 ++++++++++++++++++++-------------------- 2 files changed, 82 insertions(+), 77 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 79677ca..de5a54f 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -62,6 +62,9 @@ func NewScanner(src io.Reader) (*Scanner, error) { srcBuf: b.Bytes(), } + // srcPosition always starts with 1 + s.srcPos.Line = 1 + return s, nil } @@ -78,7 +81,6 @@ func (s *Scanner) next() rune { s.lastCharLen = size s.srcPos.Offset += size - s.srcPos.Column += size if ch == '\n' { s.srcPos.Line++ @@ -119,8 +121,10 @@ func (s *Scanner) Scan() (tok token.Token) { s.tokBuf.Reset() s.tokStart = s.srcPos.Offset - s.lastCharLen - // token position - s.tokPos.Offset = s.srcPos.Offset + // token position, initial next() is moving the offset by one, though we + // are interested with the starting point + s.tokPos.Offset = s.srcPos.Offset - 1 + if s.srcPos.Column > 0 { // common case: last character was not a '\n' s.tokPos.Line = s.srcPos.Line @@ -271,16 +275,15 @@ func (s *Scanner) scanNumber(ch rune) token.Token { return token.NUMBER } - ch = s.scanMantissa(ch) + s.scanMantissa(ch) + ch = s.next() // seek forward // literals of form 1e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { - ch = s.next() ch = s.scanExponent(ch) return token.NUMBER } if ch == '.' { - ch = s.next() // seek forward ch = s.scanFraction(ch) if ch == 'e' || ch == 'E' { ch = s.next() @@ -288,6 +291,8 @@ func (s *Scanner) scanNumber(ch rune) token.Token { } return token.FLOAT } + + s.unread() return token.NUMBER } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index f5cf401..64ba1f4 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -41,7 +41,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { } func TestPosition(t *testing.T) { - t.SkipNow() + // t.SkipNow() // create artifical source code buf := new(bytes.Buffer) for _, list := range tokenLists { @@ -159,76 +159,76 @@ var tokenLists = map[string][]tokenPair{ // {token.STRING, `"\U0000ffAB"`}, // {token.STRING, `"` + f100 + `"`}, // }, - "number": []tokenPair{ - {token.NUMBER, "0"}, - {token.NUMBER, "1"}, - {token.NUMBER, "9"}, - {token.NUMBER, "42"}, - {token.NUMBER, "1234567890"}, - {token.NUMBER, "00"}, - {token.NUMBER, "01"}, - {token.NUMBER, "07"}, - {token.NUMBER, "042"}, - {token.NUMBER, "01234567"}, - {token.NUMBER, "0x0"}, - {token.NUMBER, "0x1"}, - {token.NUMBER, "0xf"}, - {token.NUMBER, "0x42"}, - {token.NUMBER, "0x123456789abcDEF"}, - {token.NUMBER, "0x" + f100}, - {token.NUMBER, "0X0"}, - {token.NUMBER, "0X1"}, - {token.NUMBER, "0XF"}, - {token.NUMBER, "0X42"}, - {token.NUMBER, "0X123456789abcDEF"}, - {token.NUMBER, "0X" + f100}, - {token.NUMBER, "0e0"}, - {token.NUMBER, "1e0"}, - {token.NUMBER, "42e0"}, - {token.NUMBER, "01234567890e0"}, - {token.NUMBER, "0E0"}, - {token.NUMBER, "1E0"}, - {token.NUMBER, "42E0"}, - {token.NUMBER, "01234567890E0"}, - {token.NUMBER, "0e+10"}, - {token.NUMBER, "1e-10"}, - {token.NUMBER, "42e+10"}, - {token.NUMBER, "01234567890e-10"}, - {token.NUMBER, "0E+10"}, - {token.NUMBER, "1E-10"}, - {token.NUMBER, "42E+10"}, - {token.NUMBER, "01234567890E-10"}, - }, - "float": []tokenPair{ - {token.FLOAT, "0."}, - {token.FLOAT, "1."}, - {token.FLOAT, "42."}, - {token.FLOAT, "01234567890."}, - {token.FLOAT, ".0"}, - {token.FLOAT, ".1"}, - {token.FLOAT, ".42"}, - {token.FLOAT, ".0123456789"}, - {token.FLOAT, "0.0"}, - {token.FLOAT, "1.0"}, - {token.FLOAT, "42.0"}, - {token.FLOAT, "01234567890.0"}, - {token.FLOAT, "01.8e0"}, - {token.FLOAT, "1.4e0"}, - {token.FLOAT, "42.2e0"}, - {token.FLOAT, "01234567890.12e0"}, - {token.FLOAT, "0.E0"}, - {token.FLOAT, "1.12E0"}, - {token.FLOAT, "42.123E0"}, - {token.FLOAT, "01234567890.213E0"}, - {token.FLOAT, "0.2e+10"}, - {token.FLOAT, "1.2e-10"}, - {token.FLOAT, "42.54e+10"}, - {token.FLOAT, "01234567890.98e-10"}, - {token.FLOAT, "0.1E+10"}, - {token.FLOAT, "1.1E-10"}, - {token.FLOAT, "42.1E+10"}, - {token.FLOAT, "01234567890.1E-10"}, - }, + // "number": []tokenPair{ + // {token.NUMBER, "0"}, + // {token.NUMBER, "1"}, + // {token.NUMBER, "9"}, + // {token.NUMBER, "42"}, + // {token.NUMBER, "1234567890"}, + // {token.NUMBER, "00"}, + // {token.NUMBER, "01"}, + // {token.NUMBER, "07"}, + // {token.NUMBER, "042"}, + // {token.NUMBER, "01234567"}, + // {token.NUMBER, "0x0"}, + // {token.NUMBER, "0x1"}, + // {token.NUMBER, "0xf"}, + // {token.NUMBER, "0x42"}, + // {token.NUMBER, "0x123456789abcDEF"}, + // {token.NUMBER, "0x" + f100}, + // {token.NUMBER, "0X0"}, + // {token.NUMBER, "0X1"}, + // {token.NUMBER, "0XF"}, + // {token.NUMBER, "0X42"}, + // {token.NUMBER, "0X123456789abcDEF"}, + // {token.NUMBER, "0X" + f100}, + // {token.NUMBER, "0e0"}, + // {token.NUMBER, "1e0"}, + // {token.NUMBER, "42e0"}, + // {token.NUMBER, "01234567890e0"}, + // {token.NUMBER, "0E0"}, + // {token.NUMBER, "1E0"}, + // {token.NUMBER, "42E0"}, + // {token.NUMBER, "01234567890E0"}, + // {token.NUMBER, "0e+10"}, + // {token.NUMBER, "1e-10"}, + // {token.NUMBER, "42e+10"}, + // {token.NUMBER, "01234567890e-10"}, + // {token.NUMBER, "0E+10"}, + // {token.NUMBER, "1E-10"}, + // {token.NUMBER, "42E+10"}, + // {token.NUMBER, "01234567890E-10"}, + // }, + // "float": []tokenPair{ + // {token.FLOAT, "0."}, + // {token.FLOAT, "1."}, + // {token.FLOAT, "42."}, + // {token.FLOAT, "01234567890."}, + // {token.FLOAT, ".0"}, + // {token.FLOAT, ".1"}, + // {token.FLOAT, ".42"}, + // {token.FLOAT, ".0123456789"}, + // {token.FLOAT, "0.0"}, + // {token.FLOAT, "1.0"}, + // {token.FLOAT, "42.0"}, + // {token.FLOAT, "01234567890.0"}, + // {token.FLOAT, "01.8e0"}, + // {token.FLOAT, "1.4e0"}, + // {token.FLOAT, "42.2e0"}, + // {token.FLOAT, "01234567890.12e0"}, + // {token.FLOAT, "0.E0"}, + // {token.FLOAT, "1.12E0"}, + // {token.FLOAT, "42.123E0"}, + // {token.FLOAT, "01234567890.213E0"}, + // {token.FLOAT, "0.2e+10"}, + // {token.FLOAT, "1.2e-10"}, + // {token.FLOAT, "42.54e+10"}, + // {token.FLOAT, "01234567890.98e-10"}, + // {token.FLOAT, "0.1E+10"}, + // {token.FLOAT, "1.1E-10"}, + // {token.FLOAT, "42.1E+10"}, + // {token.FLOAT, "01234567890.1E-10"}, + // }, } func TestComment(t *testing.T) { From ac40da147e230d460ae30d092acc5ce8fcd256d7 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 18:18:09 +0300 Subject: [PATCH 041/137] scanner: fix tests --- scanner/scanner.go | 12 +- scanner/scanner_test.go | 301 +++++++++++++++++++++------------------- 2 files changed, 163 insertions(+), 150 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index de5a54f..4580c89 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -81,13 +81,15 @@ func (s *Scanner) next() rune { s.lastCharLen = size s.srcPos.Offset += size - s.srcPos.Column += size + s.srcPos.Column++ if ch == '\n' { s.srcPos.Line++ s.srcPos.Column = 0 s.lastLineLen = s.srcPos.Column } + // debug + // fmt.Printf("ch: %q, off:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column) return ch } @@ -110,7 +112,7 @@ func (s *Scanner) peek() rune { // Scan scans the next token and returns the token. func (s *Scanner) Scan() (tok token.Token) { - ch := s.peek() + ch := s.next() // skip white space for isWhitespace(ch) { @@ -121,9 +123,9 @@ func (s *Scanner) Scan() (tok token.Token) { s.tokBuf.Reset() s.tokStart = s.srcPos.Offset - s.lastCharLen - // token position, initial next() is moving the offset by one, though we - // are interested with the starting point - s.tokPos.Offset = s.srcPos.Offset - 1 + // token position, initial next() is moving the offset by one(size of rune + // actually), though we are interested with the starting point + s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen if s.srcPos.Column > 0 { // common case: last character was not a '\n' diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 64ba1f4..af56d80 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -44,8 +44,9 @@ func TestPosition(t *testing.T) { // t.SkipNow() // create artifical source code buf := new(bytes.Buffer) - for _, list := range tokenLists { - for _, ident := range list { + + for _, listName := range orderedTokenLists { + for _, ident := range tokenLists[listName] { fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text) } } @@ -55,20 +56,21 @@ func TestPosition(t *testing.T) { t.Fatal(err) } - s.Scan() pos := Position{"", 4, 1, 5} - for _, list := range tokenLists { - for _, k := range list { + for _, listName := range orderedTokenLists { + s.Scan() + + for _, k := range tokenLists[listName] { curPos := s.Pos() - fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) + // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) if curPos.Offset != pos.Offset { - t.Errorf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) + t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) } if curPos.Line != pos.Line { - t.Errorf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) + t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) } if curPos.Column != pos.Column { - t.Errorf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) + t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) } pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline pos.Line += countNewlines(k.text) + 1 // each token is on a new line @@ -81,44 +83,53 @@ func TestPosition(t *testing.T) { } } -var tokenLists = map[string][]tokenPair{ - // "comment": []tokenPair{ - // {token.COMMENT, "//"}, - // {token.COMMENT, "////"}, - // {token.COMMENT, "// comment"}, - // {token.COMMENT, "// /* comment */"}, - // {token.COMMENT, "// // comment //"}, - // {token.COMMENT, "//" + f100}, - // {token.COMMENT, "#"}, - // {token.COMMENT, "##"}, - // {token.COMMENT, "# comment"}, - // {token.COMMENT, "# /* comment */"}, - // {token.COMMENT, "# # comment #"}, - // {token.COMMENT, "#" + f100}, - // {token.COMMENT, "/**/"}, - // {token.COMMENT, "/***/"}, - // {token.COMMENT, "/* comment */"}, - // {token.COMMENT, "/* // comment */"}, - // {token.COMMENT, "/* /* comment */"}, - // {token.COMMENT, "/*\n comment\n*/"}, - // {token.COMMENT, "/*" + f100 + "*/"}, - // }, - // "operator": []tokenPair{ - // {token.LBRACK, "["}, - // {token.LBRACE, "{"}, - // {token.COMMA, ","}, - // {token.PERIOD, "."}, - // {token.RBRACK, "]"}, - // {token.RBRACE, "}"}, - // {token.ASSIGN, "="}, - // {token.ADD, "+"}, - // {token.SUB, "-"}, - // }, - // "bool": []tokenPair{ - // {token.BOOL, "true"}, - // {token.BOOL, "false"}, - // }, +var orderedTokenLists = []string{ + // "comment", + // "operator", + // "bool", + // "ident", + // "string", + "number", + // "float", +} +var tokenLists = map[string][]tokenPair{ + "comment": []tokenPair{ + {token.COMMENT, "//"}, + {token.COMMENT, "////"}, + {token.COMMENT, "// comment"}, + {token.COMMENT, "// /* comment */"}, + {token.COMMENT, "// // comment //"}, + {token.COMMENT, "//" + f100}, + {token.COMMENT, "#"}, + {token.COMMENT, "##"}, + {token.COMMENT, "# comment"}, + {token.COMMENT, "# /* comment */"}, + {token.COMMENT, "# # comment #"}, + {token.COMMENT, "#" + f100}, + {token.COMMENT, "/**/"}, + {token.COMMENT, "/***/"}, + {token.COMMENT, "/* comment */"}, + {token.COMMENT, "/* // comment */"}, + {token.COMMENT, "/* /* comment */"}, + {token.COMMENT, "/*\n comment\n*/"}, + {token.COMMENT, "/*" + f100 + "*/"}, + }, + "operator": []tokenPair{ + {token.LBRACK, "["}, + {token.LBRACE, "{"}, + {token.COMMA, ","}, + {token.PERIOD, "."}, + {token.RBRACK, "]"}, + {token.RBRACE, "}"}, + {token.ASSIGN, "="}, + {token.ADD, "+"}, + {token.SUB, "-"}, + }, + "bool": []tokenPair{ + {token.BOOL, "true"}, + {token.BOOL, "false"}, + }, "ident": []tokenPair{ {token.IDENT, "a"}, {token.IDENT, "a0"}, @@ -129,106 +140,106 @@ var tokenLists = map[string][]tokenPair{ {token.IDENT, "_abc123"}, {token.IDENT, "abc123_"}, {token.IDENT, "_abc_123_"}, - // {token.IDENT, "_äöü"}, - // {token.IDENT, "_本"}, - // {token.IDENT, "äöü"}, - // {token.IDENT, "本"}, - // {token.IDENT, "a۰۱۸"}, - // {token.IDENT, "foo६४"}, - // {token.IDENT, "bar9876"}, + {token.IDENT, "_äöü"}, + {token.IDENT, "_本"}, + {token.IDENT, "äöü"}, + {token.IDENT, "本"}, + {token.IDENT, "a۰۱۸"}, + {token.IDENT, "foo६४"}, + {token.IDENT, "bar9876"}, + }, + "string": []tokenPair{ + {token.STRING, `" "`}, + {token.STRING, `"a"`}, + {token.STRING, `"本"`}, + {token.STRING, `"\a"`}, + {token.STRING, `"\b"`}, + {token.STRING, `"\f"`}, + {token.STRING, `"\n"`}, + {token.STRING, `"\r"`}, + {token.STRING, `"\t"`}, + {token.STRING, `"\v"`}, + {token.STRING, `"\""`}, + {token.STRING, `"\000"`}, + {token.STRING, `"\777"`}, + {token.STRING, `"\x00"`}, + {token.STRING, `"\xff"`}, + {token.STRING, `"\u0000"`}, + {token.STRING, `"\ufA16"`}, + {token.STRING, `"\U00000000"`}, + {token.STRING, `"\U0000ffAB"`}, + {token.STRING, `"` + f100 + `"`}, + }, + "number": []tokenPair{ + {token.NUMBER, "0"}, + {token.NUMBER, "1"}, + {token.NUMBER, "9"}, + {token.NUMBER, "42"}, + {token.NUMBER, "1234567890"}, + {token.NUMBER, "00"}, + {token.NUMBER, "01"}, + {token.NUMBER, "07"}, + {token.NUMBER, "042"}, + {token.NUMBER, "01234567"}, + {token.NUMBER, "0x0"}, + {token.NUMBER, "0x1"}, + {token.NUMBER, "0xf"}, + {token.NUMBER, "0x42"}, + {token.NUMBER, "0x123456789abcDEF"}, + {token.NUMBER, "0x" + f100}, + {token.NUMBER, "0X0"}, + {token.NUMBER, "0X1"}, + {token.NUMBER, "0XF"}, + {token.NUMBER, "0X42"}, + {token.NUMBER, "0X123456789abcDEF"}, + {token.NUMBER, "0X" + f100}, + {token.NUMBER, "0e0"}, + {token.NUMBER, "1e0"}, + {token.NUMBER, "42e0"}, + {token.NUMBER, "01234567890e0"}, + {token.NUMBER, "0E0"}, + {token.NUMBER, "1E0"}, + {token.NUMBER, "42E0"}, + {token.NUMBER, "01234567890E0"}, + {token.NUMBER, "0e+10"}, + {token.NUMBER, "1e-10"}, + {token.NUMBER, "42e+10"}, + {token.NUMBER, "01234567890e-10"}, + {token.NUMBER, "0E+10"}, + {token.NUMBER, "1E-10"}, + {token.NUMBER, "42E+10"}, + {token.NUMBER, "01234567890E-10"}, + }, + "float": []tokenPair{ + {token.FLOAT, "0."}, + {token.FLOAT, "1."}, + {token.FLOAT, "42."}, + {token.FLOAT, "01234567890."}, + {token.FLOAT, ".0"}, + {token.FLOAT, ".1"}, + {token.FLOAT, ".42"}, + {token.FLOAT, ".0123456789"}, + {token.FLOAT, "0.0"}, + {token.FLOAT, "1.0"}, + {token.FLOAT, "42.0"}, + {token.FLOAT, "01234567890.0"}, + {token.FLOAT, "01.8e0"}, + {token.FLOAT, "1.4e0"}, + {token.FLOAT, "42.2e0"}, + {token.FLOAT, "01234567890.12e0"}, + {token.FLOAT, "0.E0"}, + {token.FLOAT, "1.12E0"}, + {token.FLOAT, "42.123E0"}, + {token.FLOAT, "01234567890.213E0"}, + {token.FLOAT, "0.2e+10"}, + {token.FLOAT, "1.2e-10"}, + {token.FLOAT, "42.54e+10"}, + {token.FLOAT, "01234567890.98e-10"}, + {token.FLOAT, "0.1E+10"}, + {token.FLOAT, "1.1E-10"}, + {token.FLOAT, "42.1E+10"}, + {token.FLOAT, "01234567890.1E-10"}, }, - // "string": []tokenPair{ - // {token.STRING, `" "`}, - // {token.STRING, `"a"`}, - // {token.STRING, `"本"`}, - // {token.STRING, `"\a"`}, - // {token.STRING, `"\b"`}, - // {token.STRING, `"\f"`}, - // {token.STRING, `"\n"`}, - // {token.STRING, `"\r"`}, - // {token.STRING, `"\t"`}, - // {token.STRING, `"\v"`}, - // {token.STRING, `"\""`}, - // {token.STRING, `"\000"`}, - // {token.STRING, `"\777"`}, - // {token.STRING, `"\x00"`}, - // {token.STRING, `"\xff"`}, - // {token.STRING, `"\u0000"`}, - // {token.STRING, `"\ufA16"`}, - // {token.STRING, `"\U00000000"`}, - // {token.STRING, `"\U0000ffAB"`}, - // {token.STRING, `"` + f100 + `"`}, - // }, - // "number": []tokenPair{ - // {token.NUMBER, "0"}, - // {token.NUMBER, "1"}, - // {token.NUMBER, "9"}, - // {token.NUMBER, "42"}, - // {token.NUMBER, "1234567890"}, - // {token.NUMBER, "00"}, - // {token.NUMBER, "01"}, - // {token.NUMBER, "07"}, - // {token.NUMBER, "042"}, - // {token.NUMBER, "01234567"}, - // {token.NUMBER, "0x0"}, - // {token.NUMBER, "0x1"}, - // {token.NUMBER, "0xf"}, - // {token.NUMBER, "0x42"}, - // {token.NUMBER, "0x123456789abcDEF"}, - // {token.NUMBER, "0x" + f100}, - // {token.NUMBER, "0X0"}, - // {token.NUMBER, "0X1"}, - // {token.NUMBER, "0XF"}, - // {token.NUMBER, "0X42"}, - // {token.NUMBER, "0X123456789abcDEF"}, - // {token.NUMBER, "0X" + f100}, - // {token.NUMBER, "0e0"}, - // {token.NUMBER, "1e0"}, - // {token.NUMBER, "42e0"}, - // {token.NUMBER, "01234567890e0"}, - // {token.NUMBER, "0E0"}, - // {token.NUMBER, "1E0"}, - // {token.NUMBER, "42E0"}, - // {token.NUMBER, "01234567890E0"}, - // {token.NUMBER, "0e+10"}, - // {token.NUMBER, "1e-10"}, - // {token.NUMBER, "42e+10"}, - // {token.NUMBER, "01234567890e-10"}, - // {token.NUMBER, "0E+10"}, - // {token.NUMBER, "1E-10"}, - // {token.NUMBER, "42E+10"}, - // {token.NUMBER, "01234567890E-10"}, - // }, - // "float": []tokenPair{ - // {token.FLOAT, "0."}, - // {token.FLOAT, "1."}, - // {token.FLOAT, "42."}, - // {token.FLOAT, "01234567890."}, - // {token.FLOAT, ".0"}, - // {token.FLOAT, ".1"}, - // {token.FLOAT, ".42"}, - // {token.FLOAT, ".0123456789"}, - // {token.FLOAT, "0.0"}, - // {token.FLOAT, "1.0"}, - // {token.FLOAT, "42.0"}, - // {token.FLOAT, "01234567890.0"}, - // {token.FLOAT, "01.8e0"}, - // {token.FLOAT, "1.4e0"}, - // {token.FLOAT, "42.2e0"}, - // {token.FLOAT, "01234567890.12e0"}, - // {token.FLOAT, "0.E0"}, - // {token.FLOAT, "1.12E0"}, - // {token.FLOAT, "42.123E0"}, - // {token.FLOAT, "01234567890.213E0"}, - // {token.FLOAT, "0.2e+10"}, - // {token.FLOAT, "1.2e-10"}, - // {token.FLOAT, "42.54e+10"}, - // {token.FLOAT, "01234567890.98e-10"}, - // {token.FLOAT, "0.1E+10"}, - // {token.FLOAT, "1.1E-10"}, - // {token.FLOAT, "42.1E+10"}, - // {token.FLOAT, "01234567890.1E-10"}, - // }, } func TestComment(t *testing.T) { From bc777d79f3235db9bab60c37e8112d43165fbe3f Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 18:23:56 +0300 Subject: [PATCH 042/137] scanner: implement fully workable positions --- scanner/scanner_test.go | 156 ++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index af56d80..07951b2 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -15,84 +15,6 @@ type tokenPair struct { text string } -func testTokenList(t *testing.T, tokenList []tokenPair) { - // create artifical source code - buf := new(bytes.Buffer) - for _, ident := range tokenList { - fmt.Fprintf(buf, "%s\n", ident.text) - } - - s, err := NewScanner(buf) - if err != nil { - t.Fatal(err) - } - - for _, ident := range tokenList { - tok := s.Scan() - if tok != ident.tok { - t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) - } - - if s.TokenText() != ident.text { - t.Errorf("text = %q want %q", s.TokenText(), ident.text) - } - - } -} - -func TestPosition(t *testing.T) { - // t.SkipNow() - // create artifical source code - buf := new(bytes.Buffer) - - for _, listName := range orderedTokenLists { - for _, ident := range tokenLists[listName] { - fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text) - } - } - - s, err := NewScanner(buf) - if err != nil { - t.Fatal(err) - } - - pos := Position{"", 4, 1, 5} - for _, listName := range orderedTokenLists { - s.Scan() - - for _, k := range tokenLists[listName] { - curPos := s.Pos() - // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) - if curPos.Offset != pos.Offset { - t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) - } - if curPos.Line != pos.Line { - t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) - } - if curPos.Column != pos.Column { - t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) - } - pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline - pos.Line += countNewlines(k.text) + 1 // each token is on a new line - s.Scan() - } - } - // make sure there were no token-internal errors reported by scanner - if s.ErrorCount != 0 { - t.Errorf("%d errors", s.ErrorCount) - } -} - -var orderedTokenLists = []string{ - // "comment", - // "operator", - // "bool", - // "ident", - // "string", - "number", - // "float", -} - var tokenLists = map[string][]tokenPair{ "comment": []tokenPair{ {token.COMMENT, "//"}, @@ -242,6 +164,59 @@ var tokenLists = map[string][]tokenPair{ }, } +var orderedTokenLists = []string{ + "comment", + "operator", + "bool", + "ident", + "string", + "number", + "float", +} + +func TestPosition(t *testing.T) { + // t.SkipNow() + // create artifical source code + buf := new(bytes.Buffer) + + for _, listName := range orderedTokenLists { + for _, ident := range tokenLists[listName] { + fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text) + } + } + + s, err := NewScanner(buf) + if err != nil { + t.Fatal(err) + } + + pos := Position{"", 4, 1, 5} + s.Scan() + for _, listName := range orderedTokenLists { + + for _, k := range tokenLists[listName] { + curPos := s.Pos() + // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) + if curPos.Offset != pos.Offset { + t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) + } + if curPos.Line != pos.Line { + t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) + } + if curPos.Column != pos.Column { + t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) + } + pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline + pos.Line += countNewlines(k.text) + 1 // each token is on a new line + s.Scan() + } + } + // make sure there were no token-internal errors reported by scanner + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } +} + func TestComment(t *testing.T) { testTokenList(t, tokenLists["comment"]) } @@ -270,6 +245,31 @@ func TestFloat(t *testing.T) { testTokenList(t, tokenLists["float"]) } +func testTokenList(t *testing.T, tokenList []tokenPair) { + // create artifical source code + buf := new(bytes.Buffer) + for _, ident := range tokenList { + fmt.Fprintf(buf, "%s\n", ident.text) + } + + s, err := NewScanner(buf) + if err != nil { + t.Fatal(err) + } + + for _, ident := range tokenList { + tok := s.Scan() + if tok != ident.tok { + t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) + } + + if s.TokenText() != ident.text { + t.Errorf("text = %q want %q", s.TokenText(), ident.text) + } + + } +} + func countNewlines(s string) int { n := 0 for _, ch := range s { From ea921629557970660aa9d643a7a0e38f7d111dff Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 5 Oct 2015 18:48:26 +0300 Subject: [PATCH 043/137] scanner: add more tests for capturing errors --- scanner/scanner.go | 16 +++++++++++ scanner/scanner_test.go | 61 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 4580c89..12d8bbc 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "os" "unicode" + "unicode/utf8" "github.com/fatih/hcl/token" ) @@ -76,6 +77,13 @@ func (s *Scanner) next() rune { return eof } + if ch == utf8.RuneError && size == 1 { + s.srcPos.Column++ + s.srcPos.Offset += size + s.err("illegal UTF-8 encoding") + return eof + } + // remember last position s.prevPos = s.srcPos s.lastCharLen = size @@ -430,6 +438,8 @@ func (s *Scanner) Pos() (pos Position) { return s.tokPos } +// err prints the error of any scanning to s.Error function. If the function is +// not defined, by default it prints them to os.Stderr func (s *Scanner) err(msg string) { s.ErrorCount++ if s.Error != nil { @@ -440,22 +450,27 @@ func (s *Scanner) err(msg string) { fmt.Fprintf(os.Stderr, "%s: %s\n", s.srcPos, msg) } +// isHexadecimal returns true if the given rune is a letter func isLetter(ch rune) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) } +// isHexadecimal returns true if the given rune is a decimal digit func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) } +// isHexadecimal returns true if the given rune is an octan number func isOctal(ch rune) bool { return '0' <= ch && ch <= '7' } +// isHexadecimal returns true if the given rune is a decimal number func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } +// isHexadecimal returns true if the given rune is an hexadecimal number func isHexadecimal(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' } @@ -465,6 +480,7 @@ func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } +// digitVal returns the integer value of a given octal,decimal or hexadecimal rune func digitVal(ch rune) int { switch { case '0' <= ch && ch <= '9': diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 07951b2..5f97679 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -3,6 +3,7 @@ package scanner import ( "bytes" "fmt" + "strings" "testing" "github.com/fatih/hcl/token" @@ -175,7 +176,6 @@ var orderedTokenLists = []string{ } func TestPosition(t *testing.T) { - // t.SkipNow() // create artifical source code buf := new(bytes.Buffer) @@ -245,6 +245,65 @@ func TestFloat(t *testing.T) { testTokenList(t, tokenLists["float"]) } +func TestError(t *testing.T) { + testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.EOF) + testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.EOF) + + testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT) + testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT) + + testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) + testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) + + testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) + testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) + + testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER) + testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER) + testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `'aa'`, "1:4", "illegal char literal", token.STRING) + + testError(t, `'`, "1:2", "literal not terminated", token.STRING) + testError(t, `'`+"\n", "1:2", "literal not terminated", token.STRING) + testError(t, `"abc`, "1:5", "literal not terminated", token.STRING) + testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING) + testError(t, "`abc\n", "2:1", "literal not terminated", token.STRING) + testError(t, `/*/`, "1:4", "comment not terminated", token.EOF) +} + +func testError(t *testing.T, src, pos, msg string, tok token.Token) { + s, err := NewScanner(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + errorCalled := false + s.Error = func(p Position, m string) { + if !errorCalled { + if pos != p.String() { + t.Errorf("pos = %q, want %q for %q", p, pos, src) + } + + if m != msg { + t.Errorf("msg = %q, want %q for %q", m, msg, src) + } + errorCalled = true + } + } + + tk := s.Scan() + if tk != tok { + t.Errorf("tok = %s, want %s for %q", tk, tok, src) + } + if !errorCalled { + t.Errorf("error handler not called for %q", src) + } + if s.ErrorCount == 0 { + t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src) + } +} + func testTokenList(t *testing.T, tokenList []tokenPair) { // create artifical source code buf := new(bytes.Buffer) From 62a4ab3db750467349855dcb08b50ba99be400f1 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 6 Oct 2015 01:11:02 +0300 Subject: [PATCH 044/137] scanner: fix all errors --- scanner/scanner.go | 107 ++++++++++++++++++++++++++-------------- scanner/scanner_test.go | 18 +++---- 2 files changed, 77 insertions(+), 48 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 12d8bbc..eea9814 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -46,7 +46,7 @@ type Scanner struct { // If an error is reported (via Error) and Position is invalid, // the scanner is not inside a token. Call Pos to obtain an error // position in that case. - tokPos Position + Position } // NewScanner returns a new instance of Lexer. Even though src is an io.Reader, @@ -65,7 +65,6 @@ func NewScanner(src io.Reader) (*Scanner, error) { // srcPosition always starts with 1 s.srcPos.Line = 1 - return s, nil } @@ -74,30 +73,36 @@ func NewScanner(src io.Reader) (*Scanner, error) { func (s *Scanner) next() rune { ch, size, err := s.src.ReadRune() if err != nil { + // advance for error reporting + s.srcPos.Column++ + s.srcPos.Offset += size + s.lastCharLen = size return eof } if ch == utf8.RuneError && size == 1 { s.srcPos.Column++ s.srcPos.Offset += size + s.lastCharLen = size s.err("illegal UTF-8 encoding") - return eof + return ch } // remember last position s.prevPos = s.srcPos - s.lastCharLen = size - s.srcPos.Offset += size s.srcPos.Column++ + s.lastCharLen = size + s.srcPos.Offset += size + if ch == '\n' { s.srcPos.Line++ - s.srcPos.Column = 0 s.lastLineLen = s.srcPos.Column + s.srcPos.Column = 0 } // debug - // fmt.Printf("ch: %q, off:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column) + // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column) return ch } @@ -133,18 +138,17 @@ func (s *Scanner) Scan() (tok token.Token) { // token position, initial next() is moving the offset by one(size of rune // actually), though we are interested with the starting point - s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen - + s.Position.Offset = s.srcPos.Offset - s.lastCharLen if s.srcPos.Column > 0 { // common case: last character was not a '\n' - s.tokPos.Line = s.srcPos.Line - s.tokPos.Column = s.srcPos.Column + s.Position.Line = s.srcPos.Line + s.Position.Column = s.srcPos.Column } else { // last character was a '\n' // (we cannot be at the beginning of the source // since we have called next() at least once) - s.tokPos.Line = s.srcPos.Line - 1 - s.tokPos.Column = s.lastLineLen + s.Position.Line = s.srcPos.Line - 1 + s.Position.Column = s.lastLineLen } switch { @@ -190,6 +194,8 @@ func (s *Scanner) Scan() (tok token.Token) { tok = token.ADD case '-': tok = token.SUB + default: + s.err("illegal char") } } @@ -198,24 +204,8 @@ func (s *Scanner) Scan() (tok token.Token) { } func (s *Scanner) scanComment(ch rune) { - // look for /* - style comments - if ch == '/' && s.peek() == '*' { - for { - if ch < 0 { - s.err("comment not terminated") - break - } - - ch0 := ch - ch = s.next() - if ch0 == '*' && ch == '/' { - break - } - } - } - // single line comments - if ch == '#' || ch == '/' { + if ch == '#' || (ch == '/' && s.peek() != '*') { ch = s.next() for ch != '\n' && ch >= 0 { ch = s.next() @@ -223,6 +213,27 @@ func (s *Scanner) scanComment(ch rune) { s.unread() return } + + // be sure we get the character after /* This allows us to find comment's + // that are not erminated + if ch == '/' { + s.next() + ch = s.next() // read character after "/*" + } + + // look for /* - style comments + for { + if ch < 0 || ch == eof { + s.err("comment not terminated") + break + } + + ch0 := ch + ch = s.next() + if ch0 == '*' && ch == '/' { + break + } + } } // scanNumber scans a HCL number definition starting with the given rune @@ -238,12 +249,15 @@ func (s *Scanner) scanNumber(ch rune) token.Token { ch = s.next() found = true } - s.unread() if !found { s.err("illegal hexadecimal number") } + if ch != eof { + s.unread() + } + return token.NUMBER } @@ -256,9 +270,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { // 0159.23 is valid. So we mark a possible illegal octal. If // the next character is not a period, we'll print the error. illegalOctal = true - } - } // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. @@ -281,7 +293,9 @@ func (s *Scanner) scanNumber(ch rune) token.Token { s.err("illegal octal number") } - s.unread() + if ch != eof { + s.unread() + } return token.NUMBER } @@ -435,19 +449,38 @@ func (s *Scanner) TokenText() string { // Pos returns the position of the character immediately after the character or // token returned by the last call to Scan. func (s *Scanner) Pos() (pos Position) { - return s.tokPos + pos.Offset = s.srcPos.Offset - s.lastCharLen + switch { + case s.srcPos.Column > 0: + // common case: last character was not a '\n' + pos.Line = s.srcPos.Line + pos.Column = s.srcPos.Column + case s.lastLineLen > 0: + // last character was a '\n' + // (we cannot be at the beginning of the source + // since we have called next() at least once) + pos.Line = s.srcPos.Line - 1 + pos.Column = s.lastLineLen + default: + // at the beginning of the source + pos.Line = 1 + pos.Column = 1 + } + return } // err prints the error of any scanning to s.Error function. If the function is // not defined, by default it prints them to os.Stderr func (s *Scanner) err(msg string) { s.ErrorCount++ + pos := s.Pos() + if s.Error != nil { - s.Error(s.srcPos, msg) + s.Error(pos, msg) return } - fmt.Fprintf(os.Stderr, "%s: %s\n", s.srcPos, msg) + fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg) } // isHexadecimal returns true if the given rune is a letter diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 5f97679..c229b76 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -195,8 +195,9 @@ func TestPosition(t *testing.T) { for _, listName := range orderedTokenLists { for _, k := range tokenLists[listName] { - curPos := s.Pos() + curPos := s.Position // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) + if curPos.Offset != pos.Offset { t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) } @@ -246,8 +247,8 @@ func TestFloat(t *testing.T) { } func TestError(t *testing.T) { - testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.EOF) - testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.EOF) + testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) + testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT) testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT) @@ -255,21 +256,16 @@ func TestError(t *testing.T) { testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) - testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) - testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) - testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER) testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER) testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER) testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER) - testError(t, `'aa'`, "1:4", "illegal char literal", token.STRING) + testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL) - testError(t, `'`, "1:2", "literal not terminated", token.STRING) - testError(t, `'`+"\n", "1:2", "literal not terminated", token.STRING) + testError(t, `"`, "1:2", "literal not terminated", token.STRING) testError(t, `"abc`, "1:5", "literal not terminated", token.STRING) testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING) - testError(t, "`abc\n", "2:1", "literal not terminated", token.STRING) - testError(t, `/*/`, "1:4", "comment not terminated", token.EOF) + testError(t, `/*/`, "1:4", "comment not terminated", token.COMMENT) } func testError(t *testing.T, src, pos, msg string, tok token.Token) { From da40013062bd0cffb95ed418709dac73f22afc28 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 6 Oct 2015 19:03:32 +0300 Subject: [PATCH 045/137] scanner: change API for a better usage --- scanner/scanner.go | 23 ++++++++++++++--------- scanner/scanner_test.go | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index eea9814..4447a69 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -46,7 +46,7 @@ type Scanner struct { // If an error is reported (via Error) and Position is invalid, // the scanner is not inside a token. Call Pos to obtain an error // position in that case. - Position + tokPos Position } // NewScanner returns a new instance of Lexer. Even though src is an io.Reader, @@ -138,17 +138,17 @@ func (s *Scanner) Scan() (tok token.Token) { // token position, initial next() is moving the offset by one(size of rune // actually), though we are interested with the starting point - s.Position.Offset = s.srcPos.Offset - s.lastCharLen + s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen if s.srcPos.Column > 0 { // common case: last character was not a '\n' - s.Position.Line = s.srcPos.Line - s.Position.Column = s.srcPos.Column + s.tokPos.Line = s.srcPos.Line + s.tokPos.Column = s.srcPos.Column } else { // last character was a '\n' // (we cannot be at the beginning of the source // since we have called next() at least once) - s.Position.Line = s.srcPos.Line - 1 - s.Position.Column = s.lastLineLen + s.tokPos.Line = s.srcPos.Line - 1 + s.tokPos.Column = s.lastLineLen } switch { @@ -446,9 +446,14 @@ func (s *Scanner) TokenText() string { return s.tokBuf.String() } -// Pos returns the position of the character immediately after the character or -// token returned by the last call to Scan. +// Pos returns the successful position of the most recently scanned token. func (s *Scanner) Pos() (pos Position) { + return s.tokPos +} + +// recentPosition returns the position of the character immediately after the +// character or token returned by the last call to Scan. +func (s *Scanner) recentPosition() (pos Position) { pos.Offset = s.srcPos.Offset - s.lastCharLen switch { case s.srcPos.Column > 0: @@ -473,7 +478,7 @@ func (s *Scanner) Pos() (pos Position) { // not defined, by default it prints them to os.Stderr func (s *Scanner) err(msg string) { s.ErrorCount++ - pos := s.Pos() + pos := s.recentPosition() if s.Error != nil { s.Error(pos, msg) diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index c229b76..399e16b 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -195,7 +195,7 @@ func TestPosition(t *testing.T) { for _, listName := range orderedTokenLists { for _, k := range tokenLists[listName] { - curPos := s.Position + curPos := s.tokPos // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) if curPos.Offset != pos.Offset { From 3631451bd2bf75f8ce08d5f248c9e529e8041c2d Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 6 Oct 2015 19:53:56 +0300 Subject: [PATCH 046/137] scanner: change signature of Scanner --- scanner/scanner.go | 76 ++++++++++++++++++++--------------------- scanner/scanner_test.go | 63 ++++++++++++++++++++++++++-------- 2 files changed, 86 insertions(+), 53 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 4447a69..3279ead 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -1,10 +1,10 @@ +// Package scanner implements a scanner for HCL (HashiCorp Configuration +// Language) source text. package scanner import ( "bytes" "fmt" - "io" - "io/ioutil" "os" "unicode" "unicode/utf8" @@ -40,37 +40,30 @@ type Scanner struct { // ErrorCount is incremented by one for each error encountered. ErrorCount int - // Start position of most recently scanned token; set by Scan. - // Calling Init or Next invalidates the position (Line == 0). - // The Filename field is always left untouched by the Scanner. - // If an error is reported (via Error) and Position is invalid, - // the scanner is not inside a token. Call Pos to obtain an error - // position in that case. + // tokPos is the start position of most recently scanned token; set by + // Scan. The Filename field is always left untouched by the Scanner. If + // an error is reported (via Error) and Position is invalid, the scanner is + // not inside a token. tokPos Position } -// NewScanner returns a new instance of Lexer. Even though src is an io.Reader, -// we fully consume the content. -func NewScanner(src io.Reader) (*Scanner, error) { - buf, err := ioutil.ReadAll(src) - if err != nil { - return nil, err - } - - b := bytes.NewBuffer(buf) +// NewScanner returns a new instance of Scanner. +func NewScanner(src []byte) *Scanner { + b := bytes.NewBuffer(src) s := &Scanner{ src: b, - srcBuf: b.Bytes(), + srcBuf: src, // immutable src } // srcPosition always starts with 1 s.srcPos.Line = 1 - return s, nil + return s } // next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). func (s *Scanner) next() rune { + ch, size, err := s.src.ReadRune() if err != nil { // advance for error reporting @@ -106,6 +99,7 @@ func (s *Scanner) next() rune { return ch } +// unread func (s *Scanner) unread() { if err := s.src.UnreadRune(); err != nil { panic(err) // this is user fault, we should catch it @@ -113,6 +107,7 @@ func (s *Scanner) unread() { s.srcPos = s.prevPos // put back last position } +// peek returns the next rune without advancing the reader. func (s *Scanner) peek() rune { peek, _, err := s.src.ReadRune() if err != nil { @@ -203,6 +198,26 @@ func (s *Scanner) Scan() (tok token.Token) { return tok } +// TokenText returns the literal string corresponding to the most recently +// scanned token. +func (s *Scanner) TokenText() string { + if s.tokStart < 0 { + // no token text + return "" + } + + // part of the token text was saved in tokBuf: save the rest in + // tokBuf as well and return its content + s.tokBuf.Write(s.srcBuf[s.tokStart:s.tokEnd]) + s.tokStart = s.tokEnd // ensure idempotency of TokenText() call + return s.tokBuf.String() +} + +// Pos returns the successful position of the most recently scanned token. +func (s *Scanner) Pos() (pos Position) { + return s.tokPos +} + func (s *Scanner) scanComment(ch rune) { // single line comments if ch == '#' || (ch == '/' && s.peek() != '*') { @@ -335,6 +350,7 @@ func (s *Scanner) scanMantissa(ch rune) rune { return ch } +// scanFraction scans the fraction after the '.' rune func (s *Scanner) scanFraction(ch rune) rune { if ch == '.' { ch = s.peek() // we peek just to see if we can move forward @@ -343,6 +359,8 @@ func (s *Scanner) scanFraction(ch rune) rune { return ch } +// scanExponent scans the remaining parts of an exponent after the 'e' or 'E' +// rune. func (s *Scanner) scanExponent(ch rune) rune { if ch == 'e' || ch == 'E' { ch = s.next() @@ -431,26 +449,6 @@ func (s *Scanner) scanIdentifier() string { return string(s.srcBuf[offs:s.srcPos.Offset]) } -// TokenText returns the literal string corresponding to the most recently -// scanned token. -func (s *Scanner) TokenText() string { - if s.tokStart < 0 { - // no token text - return "" - } - - // part of the token text was saved in tokBuf: save the rest in - // tokBuf as well and return its content - s.tokBuf.Write(s.srcBuf[s.tokStart:s.tokEnd]) - s.tokStart = s.tokEnd // ensure idempotency of TokenText() call - return s.tokBuf.String() -} - -// Pos returns the successful position of the most recently scanned token. -func (s *Scanner) Pos() (pos Position) { - return s.tokPos -} - // recentPosition returns the position of the character immediately after the // character or token returned by the last call to Scan. func (s *Scanner) recentPosition() (pos Position) { diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 399e16b..62ec714 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -3,7 +3,6 @@ package scanner import ( "bytes" "fmt" - "strings" "testing" "github.com/fatih/hcl/token" @@ -185,10 +184,7 @@ func TestPosition(t *testing.T) { } } - s, err := NewScanner(buf) - if err != nil { - t.Fatal(err) - } + s := NewScanner(buf.Bytes()) pos := Position{"", 4, 1, 5} s.Scan() @@ -246,6 +242,52 @@ func TestFloat(t *testing.T) { testTokenList(t, tokenLists["float"]) } +func TestComplexHCL(t *testing.T) { + // complexHCL = `// This comes from Terraform, as a test + // variable "foo" { + // default = "bar" + // description = "bar" + // } + // + // provider "aws" { + // access_key = "foo" + // secret_key = "bar" + // } + // + // provider "do" { + // api_key = "${var.foo}" + // } + // + // resource "aws_security_group" "firewall" { + // count = 5 + // } + // + // resource aws_instance "web" { + // ami = "${var.foo}" + // security_groups = [ + // "foo", + // "${aws_security_group.firewall.foo}" + // ] + // + // network_interface { + // device_index = 0 + // description = "Main network interface" + // } + // } + // + // resource "aws_instance" "db" { + // security_groups = "${aws_security_group.firewall.*.id}" + // VPC = "foo" + // + // depends_on = ["aws_instance.web"] + // } + // + // output "web_ip" { + // value = "${aws_instance.web.private_ip}" + // }` + +} + func TestError(t *testing.T) { testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) @@ -269,10 +311,7 @@ func TestError(t *testing.T) { } func testError(t *testing.T, src, pos, msg string, tok token.Token) { - s, err := NewScanner(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } + s := NewScanner([]byte(src)) errorCalled := false s.Error = func(p Position, m string) { @@ -307,11 +346,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { fmt.Fprintf(buf, "%s\n", ident.text) } - s, err := NewScanner(buf) - if err != nil { - t.Fatal(err) - } - + s := NewScanner(buf.Bytes()) for _, ident := range tokenList { tok := s.Scan() if tok != ident.tok { From b5330a1d789ac6f7b7856f6bd5c7ecf611a43ffc Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 6 Oct 2015 19:59:12 +0300 Subject: [PATCH 047/137] scanner: more internal renamings --- scanner/scanner.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 3279ead..6487eb1 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -17,14 +17,12 @@ const eof = rune(0) // Scanner defines a lexical scanner type Scanner struct { - src *bytes.Buffer - - // Source Buffer - srcBuf []byte + buf *bytes.Buffer // Source buffer for advancing and scanning + src []byte // Source buffer for immutable access // Source Position srcPos Position // current position - prevPos Position // previous position + prevPos Position // previous position, used for peek() method lastCharLen int // length of last character in bytes lastLineLen int // length of last line in characters (for correct column reporting) @@ -49,10 +47,13 @@ type Scanner struct { // NewScanner returns a new instance of Scanner. func NewScanner(src []byte) *Scanner { + // even though we accept a src, we read from a io.Reader compatible type + // (*bytes.Buffer). So in the future we might easily change it to streaming + // read. b := bytes.NewBuffer(src) s := &Scanner{ - src: b, - srcBuf: src, // immutable src + buf: b, + src: src, } // srcPosition always starts with 1 @@ -63,8 +64,7 @@ func NewScanner(src []byte) *Scanner { // next reads the next rune from the bufferred reader. Returns the rune(0) if // an error occurs (or io.EOF is returned). func (s *Scanner) next() rune { - - ch, size, err := s.src.ReadRune() + ch, size, err := s.buf.ReadRune() if err != nil { // advance for error reporting s.srcPos.Column++ @@ -101,7 +101,7 @@ func (s *Scanner) next() rune { // unread func (s *Scanner) unread() { - if err := s.src.UnreadRune(); err != nil { + if err := s.buf.UnreadRune(); err != nil { panic(err) // this is user fault, we should catch it } s.srcPos = s.prevPos // put back last position @@ -109,12 +109,12 @@ func (s *Scanner) unread() { // peek returns the next rune without advancing the reader. func (s *Scanner) peek() rune { - peek, _, err := s.src.ReadRune() + peek, _, err := s.buf.ReadRune() if err != nil { return eof } - s.src.UnreadRune() + s.buf.UnreadRune() return peek } @@ -208,7 +208,7 @@ func (s *Scanner) TokenText() string { // part of the token text was saved in tokBuf: save the rest in // tokBuf as well and return its content - s.tokBuf.Write(s.srcBuf[s.tokStart:s.tokEnd]) + s.tokBuf.Write(s.src[s.tokStart:s.tokEnd]) s.tokStart = s.tokEnd // ensure idempotency of TokenText() call return s.tokBuf.String() } @@ -446,7 +446,7 @@ func (s *Scanner) scanIdentifier() string { } s.unread() // we got identifier, put back latest char - return string(s.srcBuf[offs:s.srcPos.Offset]) + return string(s.src[offs:s.srcPos.Offset]) } // recentPosition returns the position of the character immediately after the From 85e52052c617907b4b4b9f7948488f72171cb40f Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 6 Oct 2015 20:57:10 +0300 Subject: [PATCH 048/137] scanner: add a real example test --- scanner/scanner_test.go | 143 ++++++++++++++++++++++++++++------------ token/token.go | 18 ++--- 2 files changed, 109 insertions(+), 52 deletions(-) diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 62ec714..1b85805 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -242,49 +242,106 @@ func TestFloat(t *testing.T) { testTokenList(t, tokenLists["float"]) } -func TestComplexHCL(t *testing.T) { - // complexHCL = `// This comes from Terraform, as a test - // variable "foo" { - // default = "bar" - // description = "bar" - // } - // - // provider "aws" { - // access_key = "foo" - // secret_key = "bar" - // } - // - // provider "do" { - // api_key = "${var.foo}" - // } - // - // resource "aws_security_group" "firewall" { - // count = 5 - // } - // - // resource aws_instance "web" { - // ami = "${var.foo}" - // security_groups = [ - // "foo", - // "${aws_security_group.firewall.foo}" - // ] - // - // network_interface { - // device_index = 0 - // description = "Main network interface" - // } - // } - // - // resource "aws_instance" "db" { - // security_groups = "${aws_security_group.firewall.*.id}" - // VPC = "foo" - // - // depends_on = ["aws_instance.web"] - // } - // - // output "web_ip" { - // value = "${aws_instance.web.private_ip}" - // }` +func TestRealExample(t *testing.T) { + complexHCL := `// This comes from Terraform, as a test + variable "foo" { + default = "bar" + description = "bar" + } + + provider "aws" { + access_key = "foo" + secret_key = "bar" + } + + resource "aws_security_group" "firewall" { + count = 5 + } + + resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } + }` + + literals := []struct { + token token.Token + literal string + }{ + {token.COMMENT, `// This comes from Terraform, as a test`}, + {token.IDENT, `variable`}, + {token.STRING, `"foo"`}, + {token.LBRACE, `{`}, + {token.IDENT, `default`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.IDENT, `description`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.RBRACE, `}`}, + {token.IDENT, `provider`}, + {token.STRING, `"aws"`}, + {token.LBRACE, `{`}, + {token.IDENT, `access_key`}, + {token.ASSIGN, `=`}, + {token.STRING, `"foo"`}, + {token.IDENT, `secret_key`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.RBRACE, `}`}, + {token.IDENT, `resource`}, + {token.STRING, `"aws_security_group"`}, + {token.STRING, `"firewall"`}, + {token.LBRACE, `{`}, + {token.IDENT, `count`}, + {token.ASSIGN, `=`}, + {token.NUMBER, `5`}, + {token.RBRACE, `}`}, + {token.IDENT, `resource`}, + {token.IDENT, `aws_instance`}, + {token.STRING, `"web"`}, + {token.LBRACE, `{`}, + {token.IDENT, `ami`}, + {token.ASSIGN, `=`}, + {token.STRING, `"${var.foo}"`}, + {token.IDENT, `security_groups`}, + {token.ASSIGN, `=`}, + {token.LBRACK, `[`}, + {token.STRING, `"foo"`}, + {token.COMMA, `,`}, + {token.STRING, `"${aws_security_group.firewall.foo}"`}, + {token.RBRACK, `]`}, + {token.IDENT, `network_interface`}, + {token.LBRACE, `{`}, + {token.IDENT, `device_index`}, + {token.ASSIGN, `=`}, + {token.NUMBER, `0`}, + {token.IDENT, `description`}, + {token.ASSIGN, `=`}, + {token.STRING, `"Main network interface"`}, + {token.RBRACE, `}`}, + {token.RBRACE, `}`}, + {token.EOF, ``}, + } + + s := NewScanner([]byte(complexHCL)) + for _, l := range literals { + tok := s.Scan() + if l.token != tok { + t.Errorf("got: %s want %s for %s\n", tok, l.token, s.TokenText()) + } + + if l.literal != s.TokenText() { + t.Errorf("got: %s want %s\n", s.TokenText(), l.literal) + } + } } diff --git a/token/token.go b/token/token.go index 9ebb696..4b615c7 100644 --- a/token/token.go +++ b/token/token.go @@ -46,17 +46,17 @@ var tokens = [...]string{ BOOL: "BOOL", STRING: "STRING", - LBRACK: "[", - LBRACE: "{", - COMMA: ",", - PERIOD: ".", + LBRACK: "LBRACK", + LBRACE: "LBRACE", + COMMA: "COMMA", + PERIOD: "PERIOD", - RBRACK: "]", - RBRACE: "}", + RBRACK: "RBRACK", + RBRACE: "RBRACE", - ASSIGN: "=", - ADD: "+", - SUB: "-", + ASSIGN: "ASSIGN", + ADD: "ADD", + SUB: "SUB", } // String returns the string corresponding to the token tok. From 760a028e8a280a8098c1e83ff2a10146d4dbe062 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 12:11:52 +0300 Subject: [PATCH 049/137] scanner: add NewScannerString, update docs --- scanner/scanner.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 6487eb1..385d7bb 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -45,7 +45,14 @@ type Scanner struct { tokPos Position } -// NewScanner returns a new instance of Scanner. +// NewScannerstring creates and initializes a new instance of Scanner using +// string src as its source content. +func NewScannerString(src string) *Scanner { + return NewScanner([]byte(src)) +} + +// NewScanner creates and initializes a new instance of Scanner using src as +// its source content. func NewScanner(src []byte) *Scanner { // even though we accept a src, we read from a io.Reader compatible type // (*bytes.Buffer). So in the future we might easily change it to streaming @@ -99,7 +106,7 @@ func (s *Scanner) next() rune { return ch } -// unread +// unread unreads the previous read Rune and updates the source position func (s *Scanner) unread() { if err := s.buf.UnreadRune(); err != nil { panic(err) // this is user fault, we should catch it @@ -119,7 +126,7 @@ func (s *Scanner) peek() rune { } // Scan scans the next token and returns the token. -func (s *Scanner) Scan() (tok token.Token) { +func (s *Scanner) Scan() token.Token { ch := s.next() // skip white space @@ -127,6 +134,8 @@ func (s *Scanner) Scan() (tok token.Token) { ch = s.next() } + var tok token.Token + // token text markings s.tokBuf.Reset() s.tokStart = s.srcPos.Offset - s.lastCharLen From 8169cb79d7ea1ec5d6bd88556137b955c574dcb2 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 12:20:35 +0300 Subject: [PATCH 050/137] scanner: use a better token type --- scanner/scanner.go | 59 +++++++++++++------------ scanner/token.go | 108 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 28 deletions(-) create mode 100644 scanner/token.go diff --git a/scanner/scanner.go b/scanner/scanner.go index 385d7bb..fd96004 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -8,8 +8,6 @@ import ( "os" "unicode" "unicode/utf8" - - "github.com/fatih/hcl/token" ) // eof represents a marker rune for the end of the reader. @@ -126,7 +124,7 @@ func (s *Scanner) peek() rune { } // Scan scans the next token and returns the token. -func (s *Scanner) Scan() token.Token { +func (s *Scanner) Scan() Token { ch := s.next() // skip white space @@ -134,7 +132,7 @@ func (s *Scanner) Scan() token.Token { ch = s.next() } - var tok token.Token + var tok TokenType // token text markings s.tokBuf.Reset() @@ -157,54 +155,59 @@ func (s *Scanner) Scan() token.Token { switch { case isLetter(ch): - tok = token.IDENT + tok = IDENT lit := s.scanIdentifier() if lit == "true" || lit == "false" { - tok = token.BOOL + tok = BOOL } case isDecimal(ch): tok = s.scanNumber(ch) default: switch ch { case eof: - tok = token.EOF + tok = EOF case '"': - tok = token.STRING + tok = STRING s.scanString() case '#', '/': - tok = token.COMMENT + tok = COMMENT s.scanComment(ch) case '.': - tok = token.PERIOD + tok = PERIOD ch = s.peek() if isDecimal(ch) { - tok = token.FLOAT + tok = FLOAT ch = s.scanMantissa(ch) ch = s.scanExponent(ch) } case '[': - tok = token.LBRACK + tok = LBRACK case ']': - tok = token.RBRACK + tok = RBRACK case '{': - tok = token.LBRACE + tok = LBRACE case '}': - tok = token.RBRACE + tok = RBRACE case ',': - tok = token.COMMA + tok = COMMA case '=': - tok = token.ASSIGN + tok = ASSIGN case '+': - tok = token.ADD + tok = ADD case '-': - tok = token.SUB + tok = SUB default: s.err("illegal char") } } s.tokEnd = s.srcPos.Offset - return tok + + return Token{ + token: tok, + pos: s.tokPos, + text: s.TokenText(), + } } // TokenText returns the literal string corresponding to the most recently @@ -261,7 +264,7 @@ func (s *Scanner) scanComment(ch rune) { } // scanNumber scans a HCL number definition starting with the given rune -func (s *Scanner) scanNumber(ch rune) token.Token { +func (s *Scanner) scanNumber(ch rune) TokenType { if ch == '0' { // check for hexadecimal, octal or float ch = s.next() @@ -282,7 +285,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { s.unread() } - return token.NUMBER + return NUMBER } // now it's either something like: 0421(octal) or 0.1231(float) @@ -300,7 +303,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { ch = s.scanExponent(ch) - return token.NUMBER + return NUMBER } if ch == '.' { @@ -310,7 +313,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { ch = s.next() ch = s.scanExponent(ch) } - return token.FLOAT + return FLOAT } if illegalOctal { @@ -320,7 +323,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { if ch != eof { s.unread() } - return token.NUMBER + return NUMBER } s.scanMantissa(ch) @@ -328,7 +331,7 @@ func (s *Scanner) scanNumber(ch rune) token.Token { // literals of form 1e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { ch = s.scanExponent(ch) - return token.NUMBER + return NUMBER } if ch == '.' { @@ -337,11 +340,11 @@ func (s *Scanner) scanNumber(ch rune) token.Token { ch = s.next() ch = s.scanExponent(ch) } - return token.FLOAT + return FLOAT } s.unread() - return token.NUMBER + return NUMBER } // scanMantissa scans the mantissa begining from the rune. It returns the next diff --git a/scanner/token.go b/scanner/token.go new file mode 100644 index 0000000..9d31f27 --- /dev/null +++ b/scanner/token.go @@ -0,0 +1,108 @@ +package scanner + +import "strconv" + +// Token defines a single HCL token which can be obtained via the Scanner +type Token struct { + token TokenType + pos Position + text string +} + +// TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language) +type TokenType int + +const ( + // Special tokens + ILLEGAL TokenType = iota + EOF + COMMENT + + literal_beg + IDENT // literals + NUMBER // 12345 + FLOAT // 123.45 + BOOL // true,false + STRING // "abc" + literal_end + + operator_beg + LBRACK // [ + LBRACE // { + COMMA // , + PERIOD // . + + RBRACK // ] + RBRACE // } + + ASSIGN // = + ADD // + + SUB // - + operator_end +) + +var tokens = [...]string{ + ILLEGAL: "ILLEGAL", + + EOF: "EOF", + COMMENT: "COMMENT", + + IDENT: "IDENT", + NUMBER: "NUMBER", + FLOAT: "FLOAT", + BOOL: "BOOL", + STRING: "STRING", + + LBRACK: "LBRACK", + LBRACE: "LBRACE", + COMMA: "COMMA", + PERIOD: "PERIOD", + + RBRACK: "RBRACK", + RBRACE: "RBRACE", + + ASSIGN: "ASSIGN", + ADD: "ADD", + SUB: "SUB", +} + +// String returns the string corresponding to the token tok. +// For operators, delimiters, and keywords the string is the actual +// token character sequence (e.g., for the token ADD, the string is +// "+"). For all other tokens the string corresponds to the token +// constant name (e.g. for the token IDENT, the string is "IDENT"). +func (t TokenType) String() string { + s := "" + if 0 <= t && t < TokenType(len(tokens)) { + s = tokens[t] + } + if s == "" { + s = "token(" + strconv.Itoa(int(t)) + ")" + } + return s +} + +// IsLiteral returns true for tokens corresponding to identifiers and basic +// type literals; it returns false otherwise. +func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end } + +// IsOperator returns true for tokens corresponding to operators and +// delimiters; it returns false otherwise. +func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end } + +// Type returns the token's type +func (t Token) Type() TokenType { + return t.token +} + +// Pos returns the token's position +func (t Token) Pos() Position { + return t.pos +} + +// Text retusn the token's literal text. Note that this is only +// applicable for certain token types, such as token.IDENT, +// token.STRING, etc.. +func (t Token) Text() string { + return t.text +} From fa991d3df2c927f8894e2e48980ebc28dd6b1d2c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 12:24:03 +0300 Subject: [PATCH 051/137] scanner: fix tests --- scanner/scanner_test.go | 422 ++++++++++++++++++++-------------------- scanner/token.go | 4 +- 2 files changed, 212 insertions(+), 214 deletions(-) diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 1b85805..2e2aebc 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -4,163 +4,161 @@ import ( "bytes" "fmt" "testing" - - "github.com/fatih/hcl/token" ) var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" type tokenPair struct { - tok token.Token + tok TokenType text string } var tokenLists = map[string][]tokenPair{ "comment": []tokenPair{ - {token.COMMENT, "//"}, - {token.COMMENT, "////"}, - {token.COMMENT, "// comment"}, - {token.COMMENT, "// /* comment */"}, - {token.COMMENT, "// // comment //"}, - {token.COMMENT, "//" + f100}, - {token.COMMENT, "#"}, - {token.COMMENT, "##"}, - {token.COMMENT, "# comment"}, - {token.COMMENT, "# /* comment */"}, - {token.COMMENT, "# # comment #"}, - {token.COMMENT, "#" + f100}, - {token.COMMENT, "/**/"}, - {token.COMMENT, "/***/"}, - {token.COMMENT, "/* comment */"}, - {token.COMMENT, "/* // comment */"}, - {token.COMMENT, "/* /* comment */"}, - {token.COMMENT, "/*\n comment\n*/"}, - {token.COMMENT, "/*" + f100 + "*/"}, + {COMMENT, "//"}, + {COMMENT, "////"}, + {COMMENT, "// comment"}, + {COMMENT, "// /* comment */"}, + {COMMENT, "// // comment //"}, + {COMMENT, "//" + f100}, + {COMMENT, "#"}, + {COMMENT, "##"}, + {COMMENT, "# comment"}, + {COMMENT, "# /* comment */"}, + {COMMENT, "# # comment #"}, + {COMMENT, "#" + f100}, + {COMMENT, "/**/"}, + {COMMENT, "/***/"}, + {COMMENT, "/* comment */"}, + {COMMENT, "/* // comment */"}, + {COMMENT, "/* /* comment */"}, + {COMMENT, "/*\n comment\n*/"}, + {COMMENT, "/*" + f100 + "*/"}, }, "operator": []tokenPair{ - {token.LBRACK, "["}, - {token.LBRACE, "{"}, - {token.COMMA, ","}, - {token.PERIOD, "."}, - {token.RBRACK, "]"}, - {token.RBRACE, "}"}, - {token.ASSIGN, "="}, - {token.ADD, "+"}, - {token.SUB, "-"}, + {LBRACK, "["}, + {LBRACE, "{"}, + {COMMA, ","}, + {PERIOD, "."}, + {RBRACK, "]"}, + {RBRACE, "}"}, + {ASSIGN, "="}, + {ADD, "+"}, + {SUB, "-"}, }, "bool": []tokenPair{ - {token.BOOL, "true"}, - {token.BOOL, "false"}, + {BOOL, "true"}, + {BOOL, "false"}, }, "ident": []tokenPair{ - {token.IDENT, "a"}, - {token.IDENT, "a0"}, - {token.IDENT, "foobar"}, - {token.IDENT, "abc123"}, - {token.IDENT, "LGTM"}, - {token.IDENT, "_"}, - {token.IDENT, "_abc123"}, - {token.IDENT, "abc123_"}, - {token.IDENT, "_abc_123_"}, - {token.IDENT, "_äöü"}, - {token.IDENT, "_本"}, - {token.IDENT, "äöü"}, - {token.IDENT, "本"}, - {token.IDENT, "a۰۱۸"}, - {token.IDENT, "foo६४"}, - {token.IDENT, "bar9876"}, + {IDENT, "a"}, + {IDENT, "a0"}, + {IDENT, "foobar"}, + {IDENT, "abc123"}, + {IDENT, "LGTM"}, + {IDENT, "_"}, + {IDENT, "_abc123"}, + {IDENT, "abc123_"}, + {IDENT, "_abc_123_"}, + {IDENT, "_äöü"}, + {IDENT, "_本"}, + {IDENT, "äöü"}, + {IDENT, "本"}, + {IDENT, "a۰۱۸"}, + {IDENT, "foo६४"}, + {IDENT, "bar9876"}, }, "string": []tokenPair{ - {token.STRING, `" "`}, - {token.STRING, `"a"`}, - {token.STRING, `"本"`}, - {token.STRING, `"\a"`}, - {token.STRING, `"\b"`}, - {token.STRING, `"\f"`}, - {token.STRING, `"\n"`}, - {token.STRING, `"\r"`}, - {token.STRING, `"\t"`}, - {token.STRING, `"\v"`}, - {token.STRING, `"\""`}, - {token.STRING, `"\000"`}, - {token.STRING, `"\777"`}, - {token.STRING, `"\x00"`}, - {token.STRING, `"\xff"`}, - {token.STRING, `"\u0000"`}, - {token.STRING, `"\ufA16"`}, - {token.STRING, `"\U00000000"`}, - {token.STRING, `"\U0000ffAB"`}, - {token.STRING, `"` + f100 + `"`}, + {STRING, `" "`}, + {STRING, `"a"`}, + {STRING, `"本"`}, + {STRING, `"\a"`}, + {STRING, `"\b"`}, + {STRING, `"\f"`}, + {STRING, `"\n"`}, + {STRING, `"\r"`}, + {STRING, `"\t"`}, + {STRING, `"\v"`}, + {STRING, `"\""`}, + {STRING, `"\000"`}, + {STRING, `"\777"`}, + {STRING, `"\x00"`}, + {STRING, `"\xff"`}, + {STRING, `"\u0000"`}, + {STRING, `"\ufA16"`}, + {STRING, `"\U00000000"`}, + {STRING, `"\U0000ffAB"`}, + {STRING, `"` + f100 + `"`}, }, "number": []tokenPair{ - {token.NUMBER, "0"}, - {token.NUMBER, "1"}, - {token.NUMBER, "9"}, - {token.NUMBER, "42"}, - {token.NUMBER, "1234567890"}, - {token.NUMBER, "00"}, - {token.NUMBER, "01"}, - {token.NUMBER, "07"}, - {token.NUMBER, "042"}, - {token.NUMBER, "01234567"}, - {token.NUMBER, "0x0"}, - {token.NUMBER, "0x1"}, - {token.NUMBER, "0xf"}, - {token.NUMBER, "0x42"}, - {token.NUMBER, "0x123456789abcDEF"}, - {token.NUMBER, "0x" + f100}, - {token.NUMBER, "0X0"}, - {token.NUMBER, "0X1"}, - {token.NUMBER, "0XF"}, - {token.NUMBER, "0X42"}, - {token.NUMBER, "0X123456789abcDEF"}, - {token.NUMBER, "0X" + f100}, - {token.NUMBER, "0e0"}, - {token.NUMBER, "1e0"}, - {token.NUMBER, "42e0"}, - {token.NUMBER, "01234567890e0"}, - {token.NUMBER, "0E0"}, - {token.NUMBER, "1E0"}, - {token.NUMBER, "42E0"}, - {token.NUMBER, "01234567890E0"}, - {token.NUMBER, "0e+10"}, - {token.NUMBER, "1e-10"}, - {token.NUMBER, "42e+10"}, - {token.NUMBER, "01234567890e-10"}, - {token.NUMBER, "0E+10"}, - {token.NUMBER, "1E-10"}, - {token.NUMBER, "42E+10"}, - {token.NUMBER, "01234567890E-10"}, + {NUMBER, "0"}, + {NUMBER, "1"}, + {NUMBER, "9"}, + {NUMBER, "42"}, + {NUMBER, "1234567890"}, + {NUMBER, "00"}, + {NUMBER, "01"}, + {NUMBER, "07"}, + {NUMBER, "042"}, + {NUMBER, "01234567"}, + {NUMBER, "0x0"}, + {NUMBER, "0x1"}, + {NUMBER, "0xf"}, + {NUMBER, "0x42"}, + {NUMBER, "0x123456789abcDEF"}, + {NUMBER, "0x" + f100}, + {NUMBER, "0X0"}, + {NUMBER, "0X1"}, + {NUMBER, "0XF"}, + {NUMBER, "0X42"}, + {NUMBER, "0X123456789abcDEF"}, + {NUMBER, "0X" + f100}, + {NUMBER, "0e0"}, + {NUMBER, "1e0"}, + {NUMBER, "42e0"}, + {NUMBER, "01234567890e0"}, + {NUMBER, "0E0"}, + {NUMBER, "1E0"}, + {NUMBER, "42E0"}, + {NUMBER, "01234567890E0"}, + {NUMBER, "0e+10"}, + {NUMBER, "1e-10"}, + {NUMBER, "42e+10"}, + {NUMBER, "01234567890e-10"}, + {NUMBER, "0E+10"}, + {NUMBER, "1E-10"}, + {NUMBER, "42E+10"}, + {NUMBER, "01234567890E-10"}, }, "float": []tokenPair{ - {token.FLOAT, "0."}, - {token.FLOAT, "1."}, - {token.FLOAT, "42."}, - {token.FLOAT, "01234567890."}, - {token.FLOAT, ".0"}, - {token.FLOAT, ".1"}, - {token.FLOAT, ".42"}, - {token.FLOAT, ".0123456789"}, - {token.FLOAT, "0.0"}, - {token.FLOAT, "1.0"}, - {token.FLOAT, "42.0"}, - {token.FLOAT, "01234567890.0"}, - {token.FLOAT, "01.8e0"}, - {token.FLOAT, "1.4e0"}, - {token.FLOAT, "42.2e0"}, - {token.FLOAT, "01234567890.12e0"}, - {token.FLOAT, "0.E0"}, - {token.FLOAT, "1.12E0"}, - {token.FLOAT, "42.123E0"}, - {token.FLOAT, "01234567890.213E0"}, - {token.FLOAT, "0.2e+10"}, - {token.FLOAT, "1.2e-10"}, - {token.FLOAT, "42.54e+10"}, - {token.FLOAT, "01234567890.98e-10"}, - {token.FLOAT, "0.1E+10"}, - {token.FLOAT, "1.1E-10"}, - {token.FLOAT, "42.1E+10"}, - {token.FLOAT, "01234567890.1E-10"}, + {FLOAT, "0."}, + {FLOAT, "1."}, + {FLOAT, "42."}, + {FLOAT, "01234567890."}, + {FLOAT, ".0"}, + {FLOAT, ".1"}, + {FLOAT, ".42"}, + {FLOAT, ".0123456789"}, + {FLOAT, "0.0"}, + {FLOAT, "1.0"}, + {FLOAT, "42.0"}, + {FLOAT, "01234567890.0"}, + {FLOAT, "01.8e0"}, + {FLOAT, "1.4e0"}, + {FLOAT, "42.2e0"}, + {FLOAT, "01234567890.12e0"}, + {FLOAT, "0.E0"}, + {FLOAT, "1.12E0"}, + {FLOAT, "42.123E0"}, + {FLOAT, "01234567890.213E0"}, + {FLOAT, "0.2e+10"}, + {FLOAT, "1.2e-10"}, + {FLOAT, "42.54e+10"}, + {FLOAT, "01234567890.98e-10"}, + {FLOAT, "0.1E+10"}, + {FLOAT, "1.1E-10"}, + {FLOAT, "42.1E+10"}, + {FLOAT, "01234567890.1E-10"}, }, } @@ -272,102 +270,102 @@ func TestRealExample(t *testing.T) { }` literals := []struct { - token token.Token + token TokenType literal string }{ - {token.COMMENT, `// This comes from Terraform, as a test`}, - {token.IDENT, `variable`}, - {token.STRING, `"foo"`}, - {token.LBRACE, `{`}, - {token.IDENT, `default`}, - {token.ASSIGN, `=`}, - {token.STRING, `"bar"`}, - {token.IDENT, `description`}, - {token.ASSIGN, `=`}, - {token.STRING, `"bar"`}, - {token.RBRACE, `}`}, - {token.IDENT, `provider`}, - {token.STRING, `"aws"`}, - {token.LBRACE, `{`}, - {token.IDENT, `access_key`}, - {token.ASSIGN, `=`}, - {token.STRING, `"foo"`}, - {token.IDENT, `secret_key`}, - {token.ASSIGN, `=`}, - {token.STRING, `"bar"`}, - {token.RBRACE, `}`}, - {token.IDENT, `resource`}, - {token.STRING, `"aws_security_group"`}, - {token.STRING, `"firewall"`}, - {token.LBRACE, `{`}, - {token.IDENT, `count`}, - {token.ASSIGN, `=`}, - {token.NUMBER, `5`}, - {token.RBRACE, `}`}, - {token.IDENT, `resource`}, - {token.IDENT, `aws_instance`}, - {token.STRING, `"web"`}, - {token.LBRACE, `{`}, - {token.IDENT, `ami`}, - {token.ASSIGN, `=`}, - {token.STRING, `"${var.foo}"`}, - {token.IDENT, `security_groups`}, - {token.ASSIGN, `=`}, - {token.LBRACK, `[`}, - {token.STRING, `"foo"`}, - {token.COMMA, `,`}, - {token.STRING, `"${aws_security_group.firewall.foo}"`}, - {token.RBRACK, `]`}, - {token.IDENT, `network_interface`}, - {token.LBRACE, `{`}, - {token.IDENT, `device_index`}, - {token.ASSIGN, `=`}, - {token.NUMBER, `0`}, - {token.IDENT, `description`}, - {token.ASSIGN, `=`}, - {token.STRING, `"Main network interface"`}, - {token.RBRACE, `}`}, - {token.RBRACE, `}`}, - {token.EOF, ``}, + {COMMENT, `// This comes from Terraform, as a test`}, + {IDENT, `variable`}, + {STRING, `"foo"`}, + {LBRACE, `{`}, + {IDENT, `default`}, + {ASSIGN, `=`}, + {STRING, `"bar"`}, + {IDENT, `description`}, + {ASSIGN, `=`}, + {STRING, `"bar"`}, + {RBRACE, `}`}, + {IDENT, `provider`}, + {STRING, `"aws"`}, + {LBRACE, `{`}, + {IDENT, `access_key`}, + {ASSIGN, `=`}, + {STRING, `"foo"`}, + {IDENT, `secret_key`}, + {ASSIGN, `=`}, + {STRING, `"bar"`}, + {RBRACE, `}`}, + {IDENT, `resource`}, + {STRING, `"aws_security_group"`}, + {STRING, `"firewall"`}, + {LBRACE, `{`}, + {IDENT, `count`}, + {ASSIGN, `=`}, + {NUMBER, `5`}, + {RBRACE, `}`}, + {IDENT, `resource`}, + {IDENT, `aws_instance`}, + {STRING, `"web"`}, + {LBRACE, `{`}, + {IDENT, `ami`}, + {ASSIGN, `=`}, + {STRING, `"${var.foo}"`}, + {IDENT, `security_groups`}, + {ASSIGN, `=`}, + {LBRACK, `[`}, + {STRING, `"foo"`}, + {COMMA, `,`}, + {STRING, `"${aws_security_group.firewall.foo}"`}, + {RBRACK, `]`}, + {IDENT, `network_interface`}, + {LBRACE, `{`}, + {IDENT, `device_index`}, + {ASSIGN, `=`}, + {NUMBER, `0`}, + {IDENT, `description`}, + {ASSIGN, `=`}, + {STRING, `"Main network interface"`}, + {RBRACE, `}`}, + {RBRACE, `}`}, + {EOF, ``}, } s := NewScanner([]byte(complexHCL)) for _, l := range literals { tok := s.Scan() - if l.token != tok { - t.Errorf("got: %s want %s for %s\n", tok, l.token, s.TokenText()) + if l.token != tok.Type() { + t.Errorf("got: %s want %s for %s\n", tok, l.token, tok.String()) } - if l.literal != s.TokenText() { - t.Errorf("got: %s want %s\n", s.TokenText(), l.literal) + if l.literal != tok.String() { + t.Errorf("got: %s want %s\n", tok, l.literal) } } } func TestError(t *testing.T) { - testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) - testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) + testError(t, "\x80", "1:1", "illegal UTF-8 encoding", ILLEGAL) + testError(t, "\xff", "1:1", "illegal UTF-8 encoding", ILLEGAL) - testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT) - testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT) + testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", IDENT) + testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", IDENT) - testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) - testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) + testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", STRING) + testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", STRING) - testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER) - testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER) - testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER) - testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER) - testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL) + testError(t, `01238`, "1:6", "illegal octal number", NUMBER) + testError(t, `01238123`, "1:9", "illegal octal number", NUMBER) + testError(t, `0x`, "1:3", "illegal hexadecimal number", NUMBER) + testError(t, `0xg`, "1:3", "illegal hexadecimal number", NUMBER) + testError(t, `'aa'`, "1:1", "illegal char", ILLEGAL) - testError(t, `"`, "1:2", "literal not terminated", token.STRING) - testError(t, `"abc`, "1:5", "literal not terminated", token.STRING) - testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING) - testError(t, `/*/`, "1:4", "comment not terminated", token.COMMENT) + testError(t, `"`, "1:2", "literal not terminated", STRING) + testError(t, `"abc`, "1:5", "literal not terminated", STRING) + testError(t, `"abc`+"\n", "1:5", "literal not terminated", STRING) + testError(t, `/*/`, "1:4", "comment not terminated", COMMENT) } -func testError(t *testing.T, src, pos, msg string, tok token.Token) { +func testError(t *testing.T, src, pos, msg string, tok TokenType) { s := NewScanner([]byte(src)) errorCalled := false @@ -385,7 +383,7 @@ func testError(t *testing.T, src, pos, msg string, tok token.Token) { } tk := s.Scan() - if tk != tok { + if tk.Type() != tok { t.Errorf("tok = %s, want %s for %q", tk, tok, src) } if !errorCalled { @@ -406,7 +404,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { s := NewScanner(buf.Bytes()) for _, ident := range tokenList { tok := s.Scan() - if tok != ident.tok { + if tok.Type() != ident.tok { t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) } diff --git a/scanner/token.go b/scanner/token.go index 9d31f27..481c342 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -100,9 +100,9 @@ func (t Token) Pos() Position { return t.pos } -// Text retusn the token's literal text. Note that this is only +// String returns the token's literal text. Note that this is only // applicable for certain token types, such as token.IDENT, // token.STRING, etc.. -func (t Token) Text() string { +func (t Token) String() string { return t.text } From 0728686f59d668d1747bb75e42a0386ccff47fa0 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 15:04:34 +0300 Subject: [PATCH 052/137] scanner: simplify token text reading --- scanner/scanner.go | 36 +++++++++++------------------------- scanner/scanner_test.go | 4 ++-- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index fd96004..35cb427 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -25,9 +25,8 @@ type Scanner struct { lastCharLen int // length of last character in bytes lastLineLen int // length of last line in characters (for correct column reporting) - tokBuf bytes.Buffer // token text buffer - tokStart int // token text start position - tokEnd int // token text end position + tokStart int // token text start position + tokEnd int // token text end position // Error is called for each error encountered. If no Error // function is set, the error is reported to os.Stderr. @@ -135,7 +134,6 @@ func (s *Scanner) Scan() Token { var tok TokenType // token text markings - s.tokBuf.Reset() s.tokStart = s.srcPos.Offset - s.lastCharLen // token position, initial next() is moving the offset by one(size of rune @@ -201,35 +199,23 @@ func (s *Scanner) Scan() Token { } } + // finish token ending s.tokEnd = s.srcPos.Offset + // create token literal + var tokenText string + if s.tokStart >= 0 { + tokenText = string(s.src[s.tokStart:s.tokEnd]) + } + s.tokStart = s.tokEnd // ensure idempotency of tokenText() call + return Token{ token: tok, pos: s.tokPos, - text: s.TokenText(), + text: tokenText, } } -// TokenText returns the literal string corresponding to the most recently -// scanned token. -func (s *Scanner) TokenText() string { - if s.tokStart < 0 { - // no token text - return "" - } - - // part of the token text was saved in tokBuf: save the rest in - // tokBuf as well and return its content - s.tokBuf.Write(s.src[s.tokStart:s.tokEnd]) - s.tokStart = s.tokEnd // ensure idempotency of TokenText() call - return s.tokBuf.String() -} - -// Pos returns the successful position of the most recently scanned token. -func (s *Scanner) Pos() (pos Position) { - return s.tokPos -} - func (s *Scanner) scanComment(ch rune) { // single line comments if ch == '#' || (ch == '/' && s.peek() != '*') { diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 2e2aebc..aa27d58 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -408,8 +408,8 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) } - if s.TokenText() != ident.text { - t.Errorf("text = %q want %q", s.TokenText(), ident.text) + if tok.String() != ident.text { + t.Errorf("text = %q want %q", tok.String(), ident.text) } } From a790a9664eb9cfa79b01df35eaf1946d6d8edd75 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 15:06:02 +0300 Subject: [PATCH 053/137] scanner: remove non used function --- scanner/scanner.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 35cb427..41f5b21 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -494,11 +494,6 @@ func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) } -// isHexadecimal returns true if the given rune is an octan number -func isOctal(ch rune) bool { - return '0' <= ch && ch <= '7' -} - // isHexadecimal returns true if the given rune is a decimal number func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' From f507aa7d783b8797b7cabf83f80d445a71e4dac3 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 15:22:52 +0300 Subject: [PATCH 054/137] token: remove package, no need to abstract that much --- token/token.go | 84 -------------------------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 token/token.go diff --git a/token/token.go b/token/token.go deleted file mode 100644 index 4b615c7..0000000 --- a/token/token.go +++ /dev/null @@ -1,84 +0,0 @@ -package token - -import "strconv" - -// Token is the set of lexical tokens of the HCL (HashiCorp Configuration Language) -type Token int - -const ( - // Special tokens - ILLEGAL Token = iota - EOF - COMMENT - - literal_beg - IDENT // literals - NUMBER // 12345 - FLOAT // 123.45 - BOOL // true,false - STRING // "abc" - literal_end - - operator_beg - LBRACK // [ - LBRACE // { - COMMA // , - PERIOD // . - - RBRACK // ] - RBRACE // } - - ASSIGN // = - ADD // + - SUB // - - operator_end -) - -var tokens = [...]string{ - ILLEGAL: "ILLEGAL", - - EOF: "EOF", - COMMENT: "COMMENT", - - IDENT: "IDENT", - NUMBER: "NUMBER", - FLOAT: "FLOAT", - BOOL: "BOOL", - STRING: "STRING", - - LBRACK: "LBRACK", - LBRACE: "LBRACE", - COMMA: "COMMA", - PERIOD: "PERIOD", - - RBRACK: "RBRACK", - RBRACE: "RBRACE", - - ASSIGN: "ASSIGN", - ADD: "ADD", - SUB: "SUB", -} - -// String returns the string corresponding to the token tok. -// For operators, delimiters, and keywords the string is the actual -// token character sequence (e.g., for the token ADD, the string is -// "+"). For all other tokens the string corresponds to the token -// constant name (e.g. for the token IDENT, the string is "IDENT"). -func (t Token) String() string { - s := "" - if 0 <= t && t < Token(len(tokens)) { - s = tokens[t] - } - if s == "" { - s = "token(" + strconv.Itoa(int(t)) + ")" - } - return s -} - -// IsLiteral returns true for tokens corresponding to identifiers and basic -// type literals; it returns false otherwise. -func (t Token) IsLiteral() bool { return literal_beg < t && t < literal_end } - -// IsOperator returns true for tokens corresponding to operators and -// delimiters; it returns false otherwise. -func (t Token) IsOperator() bool { return operator_beg < t && t < operator_end } From c62cc48b921c1c14bbf7128ef02eee4edde3c890 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 7 Oct 2015 15:31:27 +0300 Subject: [PATCH 055/137] scanner: add string test for token type --- scanner/token.go | 4 ---- scanner/token_test.go | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 scanner/token_test.go diff --git a/scanner/token.go b/scanner/token.go index 481c342..f6f473d 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -67,10 +67,6 @@ var tokens = [...]string{ } // String returns the string corresponding to the token tok. -// For operators, delimiters, and keywords the string is the actual -// token character sequence (e.g., for the token ADD, the string is -// "+"). For all other tokens the string corresponds to the token -// constant name (e.g. for the token IDENT, the string is "IDENT"). func (t TokenType) String() string { s := "" if 0 <= t && t < TokenType(len(tokens)) { diff --git a/scanner/token_test.go b/scanner/token_test.go new file mode 100644 index 0000000..0e05576 --- /dev/null +++ b/scanner/token_test.go @@ -0,0 +1,36 @@ +package scanner + +import "testing" + +func TestTokenTypeString(t *testing.T) { + var tokens = []struct { + tt TokenType + str string + }{ + {ILLEGAL, "ILLEGAL"}, + {EOF, "EOF"}, + {COMMENT, "COMMENT"}, + {IDENT, "IDENT"}, + {NUMBER, "NUMBER"}, + {FLOAT, "FLOAT"}, + {BOOL, "BOOL"}, + {STRING, "STRING"}, + {LBRACK, "LBRACK"}, + {LBRACE, "LBRACE"}, + {COMMA, "COMMA"}, + {PERIOD, "PERIOD"}, + {RBRACK, "RBRACK"}, + {RBRACE, "RBRACE"}, + {ASSIGN, "ASSIGN"}, + {ADD, "ADD"}, + {SUB, "SUB"}, + } + + for _, token := range tokens { + if token.tt.String() != token.str { + t.Errorf("want: %q got:%q\n", token.str, token.tt) + + } + } + +} From 8e99146570424d70b734a30ec3901601254e840c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Thu, 8 Oct 2015 01:38:39 +0300 Subject: [PATCH 056/137] parser: add initial AST definitions --- parser/ast.go | 56 +++++++++++++++++++++++++++++++++++++++++ parser/parser.go | 15 +++++++++++ scanner/position.go | 8 +++--- scanner/scanner.go | 10 ++++---- scanner/scanner_test.go | 4 +-- scanner/token.go | 4 +-- 6 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 parser/ast.go diff --git a/parser/ast.go b/parser/ast.go new file mode 100644 index 0000000..ce0cba7 --- /dev/null +++ b/parser/ast.go @@ -0,0 +1,56 @@ +package parser + +import "github.com/fatih/hcl/scanner" + +type NodeType int + +const ( + Unknown NodeType = 0 + Number + Float + Bool + String + List + Object +) + +// Node is an element in the parse tree. +type Node interface { + String() string + Type() NodeType + Start() scanner.Pos + End() scanner.Pos +} + +// IdentStatement represents an identifier. +type IdentStatement struct { + Token scanner.Token + Pos scanner.Pos // position of the literal + Value string +} + +type BlockStatement struct { + Lbrace scanner.Pos // position of "{" + Rbrace scanner.Pos // position of "}" + List []Node // the nodes in lexical order +} + +// AssignStatement represents an assignment +type AssignStatement struct { + Lhs Node // left hand side of the assignment + Rhs Node // right hand side of the assignment + Assign scanner.Pos // position of "=" +} + +// ListStatement represents a list +type ListStatement struct { + Lbrack scanner.Pos // position of "[" + Rbrack scanner.Pos // position of "]" + List []Node // the elements in lexical order +} + +// ObjectStatment represents an object +type ObjectStatement struct { + Idents []Node // the idents in elements in lexical order + BlockStatement +} diff --git a/parser/parser.go b/parser/parser.go index 0bfe2c2..46a1729 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1 +1,16 @@ package parser + +import "github.com/fatih/hcl/scanner" + +type Parser struct { + sc *scanner.Scanner +} + +func NewParser(src []byte) *Parser { + return &Parser{ + sc: scanner.NewScanner(src), + } +} + +func (p *Parser) Parse() { +} diff --git a/scanner/position.go b/scanner/position.go index 8ba9195..aef546c 100644 --- a/scanner/position.go +++ b/scanner/position.go @@ -2,10 +2,10 @@ package scanner import "fmt" -// Position describes an arbitrary source position +// Pos describes an arbitrary source position // including the file, line, and column location. // A Position is valid if the line number is > 0. -type Position struct { +type Pos struct { Filename string // filename, if any Offset int // offset, starting at 0 Line int // line number, starting at 1 @@ -13,7 +13,7 @@ type Position struct { } // IsValid returns true if the position is valid. -func (p *Position) IsValid() bool { return p.Line > 0 } +func (p *Pos) IsValid() bool { return p.Line > 0 } // String returns a string in one of several forms: // @@ -21,7 +21,7 @@ func (p *Position) IsValid() bool { return p.Line > 0 } // line:column valid position without file name // file invalid position with file name // - invalid position without file name -func (p Position) String() string { +func (p Pos) String() string { s := p.Filename if p.IsValid() { if s != "" { diff --git a/scanner/scanner.go b/scanner/scanner.go index 41f5b21..ebf91db 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -19,8 +19,8 @@ type Scanner struct { src []byte // Source buffer for immutable access // Source Position - srcPos Position // current position - prevPos Position // previous position, used for peek() method + srcPos Pos // current position + prevPos Pos // previous position, used for peek() method lastCharLen int // length of last character in bytes lastLineLen int // length of last line in characters (for correct column reporting) @@ -30,7 +30,7 @@ type Scanner struct { // Error is called for each error encountered. If no Error // function is set, the error is reported to os.Stderr. - Error func(pos Position, msg string) + Error func(pos Pos, msg string) // ErrorCount is incremented by one for each error encountered. ErrorCount int @@ -39,7 +39,7 @@ type Scanner struct { // Scan. The Filename field is always left untouched by the Scanner. If // an error is reported (via Error) and Position is invalid, the scanner is // not inside a token. - tokPos Position + tokPos Pos } // NewScannerstring creates and initializes a new instance of Scanner using @@ -449,7 +449,7 @@ func (s *Scanner) scanIdentifier() string { // recentPosition returns the position of the character immediately after the // character or token returned by the last call to Scan. -func (s *Scanner) recentPosition() (pos Position) { +func (s *Scanner) recentPosition() (pos Pos) { pos.Offset = s.srcPos.Offset - s.lastCharLen switch { case s.srcPos.Column > 0: diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index aa27d58..47b91e1 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -184,7 +184,7 @@ func TestPosition(t *testing.T) { s := NewScanner(buf.Bytes()) - pos := Position{"", 4, 1, 5} + pos := Pos{"", 4, 1, 5} s.Scan() for _, listName := range orderedTokenLists { @@ -369,7 +369,7 @@ func testError(t *testing.T, src, pos, msg string, tok TokenType) { s := NewScanner([]byte(src)) errorCalled := false - s.Error = func(p Position, m string) { + s.Error = func(p Pos, m string) { if !errorCalled { if pos != p.String() { t.Errorf("pos = %q, want %q for %q", p, pos, src) diff --git a/scanner/token.go b/scanner/token.go index f6f473d..1891130 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -5,7 +5,7 @@ import "strconv" // Token defines a single HCL token which can be obtained via the Scanner type Token struct { token TokenType - pos Position + pos Pos text string } @@ -92,7 +92,7 @@ func (t Token) Type() TokenType { } // Pos returns the token's position -func (t Token) Pos() Position { +func (t Token) Pos() Pos { return t.pos } From 77c7bc18c5dd37263375395b677a5cbf53904130 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 9 Oct 2015 12:36:40 +0300 Subject: [PATCH 057/137] parser: rename it back to Pos() --- parser/ast.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/ast.go b/parser/ast.go index ce0cba7..ea80ba6 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -18,7 +18,7 @@ const ( type Node interface { String() string Type() NodeType - Start() scanner.Pos + Pos() scanner.Pos End() scanner.Pos } From 82c5032a9523ebe91331b1278eac15e790fc4826 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 00:20:17 +0300 Subject: [PATCH 058/137] parser: implement node interface methods --- parser/ast.go | 97 ++++++++++++++++++++++++++++++++++------- parser/parser.go | 38 ++++++++++++++-- scanner/scanner.go | 10 +---- scanner/scanner_test.go | 8 ++-- 4 files changed, 123 insertions(+), 30 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index ea80ba6..76c5db3 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -14,39 +14,88 @@ const ( Object ) -// Node is an element in the parse tree. +// Node is an element in the abstract syntax tree. type Node interface { + node() String() string - Type() NodeType Pos() scanner.Pos - End() scanner.Pos } +func (IdentStatement) node() {} +func (BlockStatement) node() {} +func (AssignStatement) node() {} +func (ListStatement) node() {} +func (ObjectStatement) node() {} + // IdentStatement represents an identifier. type IdentStatement struct { - Token scanner.Token - Pos scanner.Pos // position of the literal - Value string + pos scanner.Pos // position of the literal + token scanner.Token + value string +} + +func (i IdentStatement) String() string { + return i.value +} + +func (i IdentStatement) Pos() scanner.Pos { + return i.pos } type BlockStatement struct { - Lbrace scanner.Pos // position of "{" - Rbrace scanner.Pos // position of "}" - List []Node // the nodes in lexical order + lbrace scanner.Pos // position of "{" + rbrace scanner.Pos // position of "}" + list []Node // the nodes in lexical order +} + +func (b BlockStatement) String() string { + s := "{\n" + for _, n := range b.list { + s += n.String() + "\n" + } + + s += "}" + return s +} + +func (b BlockStatement) Pos() scanner.Pos { + return b.lbrace } // AssignStatement represents an assignment type AssignStatement struct { - Lhs Node // left hand side of the assignment - Rhs Node // right hand side of the assignment - Assign scanner.Pos // position of "=" + lhs Node // left hand side of the assignment + rhs Node // right hand side of the assignment + assign scanner.Pos // position of "=" +} + +func (a AssignStatement) String() string { + return a.lhs.String() + " = " + a.rhs.String() +} + +func (a AssignStatement) Pos() scanner.Pos { + return a.lhs.Pos() } // ListStatement represents a list type ListStatement struct { - Lbrack scanner.Pos // position of "[" - Rbrack scanner.Pos // position of "]" - List []Node // the elements in lexical order + lbrack scanner.Pos // position of "[" + rbrack scanner.Pos // position of "]" + list []Node // the elements in lexical order +} + +func (l ListStatement) String() string { + s := "[\n" + for _, n := range l.list { + s += n.String() + ",\n" + } + + s += "]" + return s +} + +func (l ListStatement) Pos() scanner.Pos { + return l.lbrack } // ObjectStatment represents an object @@ -54,3 +103,21 @@ type ObjectStatement struct { Idents []Node // the idents in elements in lexical order BlockStatement } + +func (o ObjectStatement) String() string { + s := "" + + for i, n := range o.Idents { + s += n.String() + if i != len(o.Idents) { + s += " " + } + } + + s += o.BlockStatement.String() + return s +} + +func (o ObjectStatement) Pos() scanner.Pos { + return o.Idents[0].Pos() +} diff --git a/parser/parser.go b/parser/parser.go index 46a1729..a3513f6 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -3,14 +3,46 @@ package parser import "github.com/fatih/hcl/scanner" type Parser struct { - sc *scanner.Scanner + sc *scanner.Scanner + buf struct { + tok scanner.Token // last read token + n int // buffer size (max = 1) + } } func NewParser(src []byte) *Parser { return &Parser{ - sc: scanner.NewScanner(src), + sc: scanner.New(src), } } -func (p *Parser) Parse() { +func (p *Parser) Parse() Node { + tok := p.scan() + + switch tok.Type() { + case scanner.IDENT: + // p.parseStatement() + case scanner.EOF: + } + + return nil } + +// scan returns the next token from the underlying scanner. +// If a token has been unscanned then read that instead. +func (p *Parser) scan() scanner.Token { + // If we have a token on the buffer, then return it. + if p.buf.n != 0 { + p.buf.n = 0 + return p.buf.tok + } + + // Otherwise read the next token from the scanner and Save it to the buffer + // in case we unscan later. + p.buf.tok = p.sc.Scan() + + return p.buf.tok +} + +// unscan pushes the previously read token back onto the buffer. +func (p *Parser) unread() { p.buf.n = 1 } diff --git a/scanner/scanner.go b/scanner/scanner.go index ebf91db..e127240 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -42,15 +42,9 @@ type Scanner struct { tokPos Pos } -// NewScannerstring creates and initializes a new instance of Scanner using -// string src as its source content. -func NewScannerString(src string) *Scanner { - return NewScanner([]byte(src)) -} - -// NewScanner creates and initializes a new instance of Scanner using src as +// New creates and initializes a new instance of Scanner using src as // its source content. -func NewScanner(src []byte) *Scanner { +func New(src []byte) *Scanner { // even though we accept a src, we read from a io.Reader compatible type // (*bytes.Buffer). So in the future we might easily change it to streaming // read. diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 47b91e1..5918ef2 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -182,7 +182,7 @@ func TestPosition(t *testing.T) { } } - s := NewScanner(buf.Bytes()) + s := New(buf.Bytes()) pos := Pos{"", 4, 1, 5} s.Scan() @@ -329,7 +329,7 @@ func TestRealExample(t *testing.T) { {EOF, ``}, } - s := NewScanner([]byte(complexHCL)) + s := New([]byte(complexHCL)) for _, l := range literals { tok := s.Scan() if l.token != tok.Type() { @@ -366,7 +366,7 @@ func TestError(t *testing.T) { } func testError(t *testing.T, src, pos, msg string, tok TokenType) { - s := NewScanner([]byte(src)) + s := New([]byte(src)) errorCalled := false s.Error = func(p Pos, m string) { @@ -401,7 +401,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { fmt.Fprintf(buf, "%s\n", ident.text) } - s := NewScanner(buf.Bytes()) + s := New(buf.Bytes()) for _, ident := range tokenList { tok := s.Scan() if tok.Type() != ident.tok { From 99099cda627165f1e22d318c63978275d68e85c3 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 01:38:59 +0300 Subject: [PATCH 059/137] parser: implement parsing assignments, stil wip --- parser/ast.go | 32 +++++++++++++++++---- parser/parser.go | 75 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 89 insertions(+), 18 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index 76c5db3..70c7dc6 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -14,32 +14,54 @@ const ( Object ) -// Node is an element in the abstract syntax tree. +// Node is an element in the abstract syntax tree. type Node interface { node() String() string Pos() scanner.Pos } +func (Source) node() {} func (IdentStatement) node() {} func (BlockStatement) node() {} func (AssignStatement) node() {} func (ListStatement) node() {} func (ObjectStatement) node() {} +// Source represents a single HCL source file +type Source struct { + nodes []Node +} + +func (s Source) add(node Node) { + s.nodes = append(s.nodes, node) +} + +func (s Source) String() string { + buf := "" + for _, n := range s.nodes { + buf += n.String() + } + + return buf +} + +func (s Source) Pos() scanner.Pos { + // always returns the uninitiliazed position + return scanner.Pos{} +} + // IdentStatement represents an identifier. type IdentStatement struct { - pos scanner.Pos // position of the literal token scanner.Token - value string } func (i IdentStatement) String() string { - return i.value + return i.token.String() } func (i IdentStatement) Pos() scanner.Pos { - return i.pos + return i.token.Pos() } type BlockStatement struct { diff --git a/parser/parser.go b/parser/parser.go index a3513f6..695cee6 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -3,11 +3,12 @@ package parser import "github.com/fatih/hcl/scanner" type Parser struct { - sc *scanner.Scanner - buf struct { - tok scanner.Token // last read token - n int // buffer size (max = 1) - } + sc *scanner.Scanner + + tok scanner.Token // last read token + prevTok scanner.Token // previous read token + + n int // buffer size (max = 1) } func NewParser(src []byte) *Parser { @@ -16,33 +17,81 @@ func NewParser(src []byte) *Parser { } } +// Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() Node { tok := p.scan() + node := Source{} + switch tok.Type() { case scanner.IDENT: - // p.parseStatement() + n := p.parseStatement() + node.add(n) case scanner.EOF: } + return node +} + +func (p *Parser) parseStatement() Node { + tok := p.scan() + + if tok.Type().IsLiteral() { + return p.parseIdent() + } + + switch tok.Type() { + case scanner.LBRACE: + return p.parseObject() + case scanner.LBRACK: + return p.parseList() + case scanner.ASSIGN: + return p.parseAssignment() + } return nil } +func (p *Parser) parseIdent() Node { + return IdentStatement{ + token: p.tok, + } +} + +func (p *Parser) parseObject() Node { + return nil +} + +func (p *Parser) parseList() Node { + return nil +} + +func (p *Parser) parseAssignment() Node { + return AssignStatement{ + lhs: IdentStatement{ + token: p.prevTok, + }, + assign: p.tok.Pos(), + rhs: p.parseStatement(), + } +} + // scan returns the next token from the underlying scanner. // If a token has been unscanned then read that instead. func (p *Parser) scan() scanner.Token { // If we have a token on the buffer, then return it. - if p.buf.n != 0 { - p.buf.n = 0 - return p.buf.tok + if p.n != 0 { + p.n = 0 + return p.tok } + // store previous token + p.prevTok = p.tok + // Otherwise read the next token from the scanner and Save it to the buffer // in case we unscan later. - p.buf.tok = p.sc.Scan() - - return p.buf.tok + p.tok = p.sc.Scan() + return p.tok } // unscan pushes the previously read token back onto the buffer. -func (p *Parser) unread() { p.buf.n = 1 } +func (p *Parser) unscan() { p.n = 1 } From f3dba35accc55f1a291e2d72d93222df3c4af717 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 02:27:43 +0300 Subject: [PATCH 060/137] parser: change how we obtain tokens --- parser/ast.go | 28 ++++++------ parser/parser.go | 99 +++++++++++++++++++++++++++++++---------- parser/parser_test.go | 17 +++++++ scanner/scanner.go | 6 +-- scanner/scanner_test.go | 12 ++--- scanner/token.go | 18 ++------ 6 files changed, 120 insertions(+), 60 deletions(-) create mode 100644 parser/parser_test.go diff --git a/parser/ast.go b/parser/ast.go index 70c7dc6..a66c587 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -33,11 +33,11 @@ type Source struct { nodes []Node } -func (s Source) add(node Node) { +func (s *Source) add(node Node) { s.nodes = append(s.nodes, node) } -func (s Source) String() string { +func (s *Source) String() string { buf := "" for _, n := range s.nodes { buf += n.String() @@ -46,7 +46,7 @@ func (s Source) String() string { return buf } -func (s Source) Pos() scanner.Pos { +func (s *Source) Pos() scanner.Pos { // always returns the uninitiliazed position return scanner.Pos{} } @@ -56,12 +56,12 @@ type IdentStatement struct { token scanner.Token } -func (i IdentStatement) String() string { +func (i *IdentStatement) String() string { return i.token.String() } -func (i IdentStatement) Pos() scanner.Pos { - return i.token.Pos() +func (i *IdentStatement) Pos() scanner.Pos { + return i.token.Pos } type BlockStatement struct { @@ -70,7 +70,7 @@ type BlockStatement struct { list []Node // the nodes in lexical order } -func (b BlockStatement) String() string { +func (b *BlockStatement) String() string { s := "{\n" for _, n := range b.list { s += n.String() + "\n" @@ -80,7 +80,7 @@ func (b BlockStatement) String() string { return s } -func (b BlockStatement) Pos() scanner.Pos { +func (b *BlockStatement) Pos() scanner.Pos { return b.lbrace } @@ -91,11 +91,11 @@ type AssignStatement struct { assign scanner.Pos // position of "=" } -func (a AssignStatement) String() string { +func (a *AssignStatement) String() string { return a.lhs.String() + " = " + a.rhs.String() } -func (a AssignStatement) Pos() scanner.Pos { +func (a *AssignStatement) Pos() scanner.Pos { return a.lhs.Pos() } @@ -106,7 +106,7 @@ type ListStatement struct { list []Node // the elements in lexical order } -func (l ListStatement) String() string { +func (l *ListStatement) String() string { s := "[\n" for _, n := range l.list { s += n.String() + ",\n" @@ -116,7 +116,7 @@ func (l ListStatement) String() string { return s } -func (l ListStatement) Pos() scanner.Pos { +func (l *ListStatement) Pos() scanner.Pos { return l.lbrack } @@ -126,7 +126,7 @@ type ObjectStatement struct { BlockStatement } -func (o ObjectStatement) String() string { +func (o *ObjectStatement) String() string { s := "" for i, n := range o.Idents { @@ -140,6 +140,6 @@ func (o ObjectStatement) String() string { return s } -func (o ObjectStatement) Pos() scanner.Pos { +func (o *ObjectStatement) Pos() scanner.Pos { return o.Idents[0].Pos() } diff --git a/parser/parser.go b/parser/parser.go index 695cee6..ee02e8a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1,6 +1,10 @@ package parser -import "github.com/fatih/hcl/scanner" +import ( + "fmt" + + "github.com/fatih/hcl/scanner" +) type Parser struct { sc *scanner.Scanner @@ -8,10 +12,12 @@ type Parser struct { tok scanner.Token // last read token prevTok scanner.Token // previous read token - n int // buffer size (max = 1) + enableTrace bool + indent int + n int // buffer size (max = 1) } -func NewParser(src []byte) *Parser { +func New(src []byte) *Parser { return &Parser{ sc: scanner.New(src), } @@ -19,28 +25,37 @@ func NewParser(src []byte) *Parser { // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() Node { - tok := p.scan() + defer un(trace(p, "ParseSource")) + node := &Source{} - node := Source{} + for { + // break if we hit the end + if p.tok.Type == scanner.EOF { + break + } - switch tok.Type() { - case scanner.IDENT: - n := p.parseStatement() - node.add(n) - case scanner.EOF: + if n := p.parseStatement(); n != nil { + node.add(n) + } } return node } func (p *Parser) parseStatement() Node { + defer un(trace(p, "ParseStatement")) + tok := p.scan() - if tok.Type().IsLiteral() { + if tok.Type.IsLiteral() { + // found an object + if p.prevTok.Type.IsLiteral() { + return p.parseObject() + } return p.parseIdent() } - switch tok.Type() { + switch tok.Type { case scanner.LBRACE: return p.parseObject() case scanner.LBRACK: @@ -48,11 +63,25 @@ func (p *Parser) parseStatement() Node { case scanner.ASSIGN: return p.parseAssignment() } + return nil } +func (p *Parser) parseAssignment() Node { + defer un(trace(p, "ParseAssignment")) + return &AssignStatement{ + lhs: &IdentStatement{ + token: p.prevTok, + }, + assign: p.tok.Pos, + rhs: p.parseStatement(), + } +} + func (p *Parser) parseIdent() Node { - return IdentStatement{ + defer un(trace(p, "ParseIdent")) + + return &IdentStatement{ token: p.tok, } } @@ -65,16 +94,6 @@ func (p *Parser) parseList() Node { return nil } -func (p *Parser) parseAssignment() Node { - return AssignStatement{ - lhs: IdentStatement{ - token: p.prevTok, - }, - assign: p.tok.Pos(), - rhs: p.parseStatement(), - } -} - // scan returns the next token from the underlying scanner. // If a token has been unscanned then read that instead. func (p *Parser) scan() scanner.Token { @@ -95,3 +114,37 @@ func (p *Parser) scan() scanner.Token { // unscan pushes the previously read token back onto the buffer. func (p *Parser) unscan() { p.n = 1 } + +// ---------------------------------------------------------------------------- +// Parsing support + +func (p *Parser) printTrace(a ...interface{}) { + if !p.enableTrace { + return + } + + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = len(dots) + fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column) + + i := 2 * p.indent + for i > n { + fmt.Print(dots) + i -= n + } + // i <= n + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *Parser, msg string) *Parser { + p.printTrace(msg, "(") + p.indent++ + return p +} + +// Usage pattern: defer un(trace(p, "...")) +func un(p *Parser) { + p.indent-- + p.printTrace(")") +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..e8f07eb --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,17 @@ +package parser + +import ( + "fmt" + "testing" +) + +func TestAssignStatment(t *testing.T) { + src := `ami = "${var.foo}"` + + p := New([]byte(src)) + p.enableTrace = true + n := p.Parse() + + fmt.Println(n) + +} diff --git a/scanner/scanner.go b/scanner/scanner.go index e127240..42234a2 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -204,9 +204,9 @@ func (s *Scanner) Scan() Token { s.tokStart = s.tokEnd // ensure idempotency of tokenText() call return Token{ - token: tok, - pos: s.tokPos, - text: tokenText, + Type: tok, + Pos: s.tokPos, + Text: tokenText, } } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 5918ef2..0556766 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -270,8 +270,8 @@ func TestRealExample(t *testing.T) { }` literals := []struct { - token TokenType - literal string + tokenType TokenType + literal string }{ {COMMENT, `// This comes from Terraform, as a test`}, {IDENT, `variable`}, @@ -332,8 +332,8 @@ func TestRealExample(t *testing.T) { s := New([]byte(complexHCL)) for _, l := range literals { tok := s.Scan() - if l.token != tok.Type() { - t.Errorf("got: %s want %s for %s\n", tok, l.token, tok.String()) + if l.tokenType != tok.Type { + t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String()) } if l.literal != tok.String() { @@ -383,7 +383,7 @@ func testError(t *testing.T, src, pos, msg string, tok TokenType) { } tk := s.Scan() - if tk.Type() != tok { + if tk.Type != tok { t.Errorf("tok = %s, want %s for %q", tk, tok, src) } if !errorCalled { @@ -404,7 +404,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { s := New(buf.Bytes()) for _, ident := range tokenList { tok := s.Scan() - if tok.Type() != ident.tok { + if tok.Type != ident.tok { t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) } diff --git a/scanner/token.go b/scanner/token.go index 1891130..30b215d 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -4,9 +4,9 @@ import "strconv" // Token defines a single HCL token which can be obtained via the Scanner type Token struct { - token TokenType - pos Pos - text string + Type TokenType + Pos Pos + Text string } // TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language) @@ -86,19 +86,9 @@ func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end // delimiters; it returns false otherwise. func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end } -// Type returns the token's type -func (t Token) Type() TokenType { - return t.token -} - -// Pos returns the token's position -func (t Token) Pos() Pos { - return t.pos -} - // String returns the token's literal text. Note that this is only // applicable for certain token types, such as token.IDENT, // token.STRING, etc.. func (t Token) String() string { - return t.text + return t.Text } From 00c13fa7e123147ed48de79219d9ac909bb77cbb Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 02:28:27 +0300 Subject: [PATCH 061/137] parser: it's not a statement --- parser/ast.go | 8 ++++---- parser/parser.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index a66c587..a8ec9c3 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -22,7 +22,7 @@ type Node interface { } func (Source) node() {} -func (IdentStatement) node() {} +func (Ident) node() {} func (BlockStatement) node() {} func (AssignStatement) node() {} func (ListStatement) node() {} @@ -52,15 +52,15 @@ func (s *Source) Pos() scanner.Pos { } // IdentStatement represents an identifier. -type IdentStatement struct { +type Ident struct { token scanner.Token } -func (i *IdentStatement) String() string { +func (i *Ident) String() string { return i.token.String() } -func (i *IdentStatement) Pos() scanner.Pos { +func (i *Ident) Pos() scanner.Pos { return i.token.Pos } diff --git a/parser/parser.go b/parser/parser.go index ee02e8a..84b5dff 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -70,7 +70,7 @@ func (p *Parser) parseStatement() Node { func (p *Parser) parseAssignment() Node { defer un(trace(p, "ParseAssignment")) return &AssignStatement{ - lhs: &IdentStatement{ + lhs: &Ident{ token: p.prevTok, }, assign: p.tok.Pos, @@ -81,7 +81,7 @@ func (p *Parser) parseAssignment() Node { func (p *Parser) parseIdent() Node { defer un(trace(p, "ParseIdent")) - return &IdentStatement{ + return &Ident{ token: p.tok, } } From 0e668f87b2bf9be60e3919a5ed6caf7230792afb Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 02:49:07 +0300 Subject: [PATCH 062/137] parser: working assignstatement is finished --- parser/ast.go | 2 +- parser/parser.go | 29 ++++++++++++++--------------- parser/parser_test.go | 14 ++++++++------ 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index a8ec9c3..e54a5ba 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -48,7 +48,7 @@ func (s *Source) String() string { func (s *Source) Pos() scanner.Pos { // always returns the uninitiliazed position - return scanner.Pos{} + return s.nodes[0].Pos() } // IdentStatement represents an identifier. diff --git a/parser/parser.go b/parser/parser.go index 84b5dff..96f8a61 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -29,14 +29,14 @@ func (p *Parser) Parse() Node { node := &Source{} for { + if n := p.parseStatement(); n != nil { + node.add(n) + } + // break if we hit the end if p.tok.Type == scanner.EOF { break } - - if n := p.parseStatement(); n != nil { - node.add(n) - } } return node @@ -48,20 +48,16 @@ func (p *Parser) parseStatement() Node { tok := p.scan() if tok.Type.IsLiteral() { - // found an object if p.prevTok.Type.IsLiteral() { return p.parseObject() } - return p.parseIdent() - } - switch tok.Type { - case scanner.LBRACE: - return p.parseObject() - case scanner.LBRACK: - return p.parseList() - case scanner.ASSIGN: - return p.parseAssignment() + if tok := p.scan(); tok.Type == scanner.ASSIGN { + return p.parseAssignment() + } + + p.unscan() + return p.parseIdent() } return nil @@ -113,7 +109,10 @@ func (p *Parser) scan() scanner.Token { } // unscan pushes the previously read token back onto the buffer. -func (p *Parser) unscan() { p.n = 1 } +func (p *Parser) unscan() { + p.n = 1 + p.tok = p.prevTok +} // ---------------------------------------------------------------------------- // Parsing support diff --git a/parser/parser_test.go b/parser/parser_test.go index e8f07eb..c1bf5fe 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1,17 +1,19 @@ package parser -import ( - "fmt" - "testing" -) +import "testing" func TestAssignStatment(t *testing.T) { src := `ami = "${var.foo}"` - p := New([]byte(src)) p.enableTrace = true n := p.Parse() - fmt.Println(n) + if n.String() != src { + t.Errorf("AssignStatement is not parsed correctly\n\twant: '%s'\n\tgot : '%s'", src, n.String()) + } + + if n.Pos().Line != 1 { + t.Errorf("AssignStatement position is wrong\n\twant: '%d'\n\tgot : '%d'", 1, n.Pos().Line) + } } From 0a3fe0e81e79df9e0a293a84933da531acd942bb Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 10:37:37 +0300 Subject: [PATCH 063/137] parser: return error, we might need it --- parser/parser.go | 4 ++-- parser/parser_test.go | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 96f8a61..3466af9 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -24,7 +24,7 @@ func New(src []byte) *Parser { } // Parse returns the fully parsed source and returns the abstract syntax tree. -func (p *Parser) Parse() Node { +func (p *Parser) Parse() (Node, error) { defer un(trace(p, "ParseSource")) node := &Source{} @@ -39,7 +39,7 @@ func (p *Parser) Parse() Node { } } - return node + return node, nil } func (p *Parser) parseStatement() Node { diff --git a/parser/parser_test.go b/parser/parser_test.go index c1bf5fe..9d1820a 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -6,7 +6,10 @@ func TestAssignStatment(t *testing.T) { src := `ami = "${var.foo}"` p := New([]byte(src)) p.enableTrace = true - n := p.Parse() + n, err := p.Parse() + if err != nil { + t.Fatal(err) + } if n.String() != src { t.Errorf("AssignStatement is not parsed correctly\n\twant: '%s'\n\tgot : '%s'", src, n.String()) From 8d60ae5dc54efb27d6f137c266c37098f6b51bf8 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 22:53:40 +0300 Subject: [PATCH 064/137] parser: imrprovements to AST types --- parser/ast.go | 116 ++++++++++++++++++++++++------------------ parser/parser.go | 50 ++++++++++++------ parser/parser_test.go | 2 +- scanner/token.go | 7 ++- 4 files changed, 107 insertions(+), 68 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index e54a5ba..8b0dc0a 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -21,13 +21,16 @@ type Node interface { Pos() scanner.Pos } -func (Source) node() {} -func (Ident) node() {} -func (BlockStatement) node() {} +func (Source) node() {} +func (Ident) node() {} + func (AssignStatement) node() {} -func (ListStatement) node() {} func (ObjectStatement) node() {} +func (LiteralType) node() {} +func (ObjectType) node() {} +func (ListType) node() {} + // Source represents a single HCL source file type Source struct { nodes []Node @@ -57,33 +60,13 @@ type Ident struct { } func (i *Ident) String() string { - return i.token.String() + return i.token.Text } func (i *Ident) Pos() scanner.Pos { return i.token.Pos } -type BlockStatement struct { - lbrace scanner.Pos // position of "{" - rbrace scanner.Pos // position of "}" - list []Node // the nodes in lexical order -} - -func (b *BlockStatement) String() string { - s := "{\n" - for _, n := range b.list { - s += n.String() + "\n" - } - - s += "}" - return s -} - -func (b *BlockStatement) Pos() scanner.Pos { - return b.lbrace -} - // AssignStatement represents an assignment type AssignStatement struct { lhs Node // left hand side of the assignment @@ -99,31 +82,10 @@ func (a *AssignStatement) Pos() scanner.Pos { return a.lhs.Pos() } -// ListStatement represents a list -type ListStatement struct { - lbrack scanner.Pos // position of "[" - rbrack scanner.Pos // position of "]" - list []Node // the elements in lexical order -} - -func (l *ListStatement) String() string { - s := "[\n" - for _, n := range l.list { - s += n.String() + ",\n" - } - - s += "]" - return s -} - -func (l *ListStatement) Pos() scanner.Pos { - return l.lbrack -} - -// ObjectStatment represents an object +// ObjectStatment represents an object statement type ObjectStatement struct { Idents []Node // the idents in elements in lexical order - BlockStatement + ObjectType } func (o *ObjectStatement) String() string { @@ -136,10 +98,66 @@ func (o *ObjectStatement) String() string { } } - s += o.BlockStatement.String() + s += o.ObjectType.String() return s } func (o *ObjectStatement) Pos() scanner.Pos { return o.Idents[0].Pos() } + +// LiteralType represents a literal of basic type. Valid types are: +// scanner.NUMBER, scanner.FLOAT, scanner.BOOL and scanner.STRING +type LiteralType struct { + token scanner.Token +} + +func (l *LiteralType) String() string { + return l.token.Text +} + +func (l *LiteralType) Pos() scanner.Pos { + return l.token.Pos +} + +// ListStatement represents a HCL List type +type ListType struct { + lbrack scanner.Pos // position of "[" + rbrack scanner.Pos // position of "]" + list []Node // the elements in lexical order +} + +func (l *ListType) String() string { + s := "[\n" + for _, n := range l.list { + s += n.String() + ",\n" + } + + s += "]" + return s +} + +func (l *ListType) Pos() scanner.Pos { + return l.lbrack +} + +// ObjectType represents a HCL Object Type +type ObjectType struct { + lbrace scanner.Pos // position of "{" + rbrace scanner.Pos // position of "}" + list []Node // the nodes in lexical order +} + +func (b *ObjectType) String() string { + s := "{\n" + for _, n := range b.list { + s += n.String() + "\n" + } + + s += "}" + return s +} + +func (b *ObjectType) Pos() scanner.Pos { + return b.lbrace +} diff --git a/parser/parser.go b/parser/parser.go index 3466af9..4d47810 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1,6 +1,7 @@ package parser import ( + "errors" "fmt" "github.com/fatih/hcl/scanner" @@ -29,10 +30,13 @@ func (p *Parser) Parse() (Node, error) { node := &Source{} for { - if n := p.parseStatement(); n != nil { - node.add(n) + n, err := p.parseNode() + if err != nil { + return nil, err } + node.add(n) + // break if we hit the end if p.tok.Type == scanner.EOF { break @@ -42,17 +46,20 @@ func (p *Parser) Parse() (Node, error) { return node, nil } -func (p *Parser) parseStatement() Node { - defer un(trace(p, "ParseStatement")) +func (p *Parser) parseNode() (Node, error) { + defer un(trace(p, "ParseNode")) tok := p.scan() + fmt.Println(tok) // debug + if tok.Type.IsLiteral() { if p.prevTok.Type.IsLiteral() { - return p.parseObject() + return p.parseObjectType() } - if tok := p.scan(); tok.Type == scanner.ASSIGN { + tok := p.scan() + if tok.Type == scanner.ASSIGN { return p.parseAssignment() } @@ -60,34 +67,45 @@ func (p *Parser) parseStatement() Node { return p.parseIdent() } - return nil + return nil, errors.New("not yet implemented") } -func (p *Parser) parseAssignment() Node { +func (p *Parser) parseAssignment() (*AssignStatement, error) { defer un(trace(p, "ParseAssignment")) - return &AssignStatement{ + a := &AssignStatement{ lhs: &Ident{ token: p.prevTok, }, assign: p.tok.Pos, - rhs: p.parseStatement(), } + + n, err := p.parseNode() + if err != nil { + return nil, err + } + + a.rhs = n + return a, nil } -func (p *Parser) parseIdent() Node { +func (p *Parser) parseIdent() (*Ident, error) { defer un(trace(p, "ParseIdent")) + if !p.tok.Type.IsLiteral() { + return nil, errors.New("can't parse non literal token") + } + return &Ident{ token: p.tok, - } + }, nil } -func (p *Parser) parseObject() Node { - return nil +func (p *Parser) parseObjectType() (*ObjectStatement, error) { + return nil, errors.New("ObjectStatement is not implemented yet") } -func (p *Parser) parseList() Node { - return nil +func (p *Parser) parseListType() (*ListType, error) { + return nil, errors.New("ListStatement is not implemented yet") } // scan returns the next token from the underlying scanner. diff --git a/parser/parser_test.go b/parser/parser_test.go index 9d1820a..054473a 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2,7 +2,7 @@ package parser import "testing" -func TestAssignStatment(t *testing.T) { +func TestAssignStatement(t *testing.T) { src := `ami = "${var.foo}"` p := New([]byte(src)) p.enableTrace = true diff --git a/scanner/token.go b/scanner/token.go index 30b215d..6c62fa8 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -1,6 +1,9 @@ package scanner -import "strconv" +import ( + "fmt" + "strconv" +) // Token defines a single HCL token which can be obtained via the Scanner type Token struct { @@ -90,5 +93,5 @@ func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_e // applicable for certain token types, such as token.IDENT, // token.STRING, etc.. func (t Token) String() string { - return t.Text + return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text) } From 45d01fe82dd47bf68e724c513086d400798688c9 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 12 Oct 2015 23:44:53 +0300 Subject: [PATCH 065/137] parser: more improvements around ATS --- parser/ast.go | 29 ++++++++++------------------- parser/parser.go | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index 8b0dc0a..eec25cc 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -2,18 +2,6 @@ package parser import "github.com/fatih/hcl/scanner" -type NodeType int - -const ( - Unknown NodeType = 0 - Number - Float - Bool - String - List - Object -) - // Node is an element in the abstract syntax tree. type Node interface { node() @@ -109,15 +97,18 @@ func (o *ObjectStatement) Pos() scanner.Pos { // LiteralType represents a literal of basic type. Valid types are: // scanner.NUMBER, scanner.FLOAT, scanner.BOOL and scanner.STRING type LiteralType struct { - token scanner.Token + *Ident } -func (l *LiteralType) String() string { - return l.token.Text -} - -func (l *LiteralType) Pos() scanner.Pos { - return l.token.Pos +// isValid() returns true if the underlying identifier satisfies one of the +// valid types. +func (l *LiteralType) isValid() bool { + switch l.token.Type { + case scanner.NUMBER, scanner.FLOAT, scanner.BOOL, scanner.STRING: + return true + default: + return false + } } // ListStatement represents a HCL List type diff --git a/parser/parser.go b/parser/parser.go index 4d47810..5eb03da 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -55,7 +55,7 @@ func (p *Parser) parseNode() (Node, error) { if tok.Type.IsLiteral() { if p.prevTok.Type.IsLiteral() { - return p.parseObjectType() + return p.parseObjectStatement() } tok := p.scan() @@ -70,6 +70,7 @@ func (p *Parser) parseNode() (Node, error) { return nil, errors.New("not yet implemented") } +// parseAssignment parses an assignment and returns a AssignStatement AST func (p *Parser) parseAssignment() (*AssignStatement, error) { defer un(trace(p, "ParseAssignment")) a := &AssignStatement{ @@ -88,6 +89,7 @@ func (p *Parser) parseAssignment() (*AssignStatement, error) { return a, nil } +// parseIdent parses a generic identifier and returns a Ident AST func (p *Parser) parseIdent() (*Ident, error) { defer un(trace(p, "ParseIdent")) @@ -100,12 +102,37 @@ func (p *Parser) parseIdent() (*Ident, error) { }, nil } -func (p *Parser) parseObjectType() (*ObjectStatement, error) { +// parseLiteralType parses a literal type and returns a LiteralType AST +func (p *Parser) parseLiteralType() (*LiteralType, error) { + i, err := p.parseIdent() + if err != nil { + return nil, err + } + + l := &LiteralType{} + l.Ident = i + + if !l.isValid() { + return nil, fmt.Errorf("Identifier is not a LiteralType: %s", l.token) + } + + return l, nil +} + +// parseObjectStatement parses an object statement returns an ObjectStatement +// AST. ObjectsStatements represents both normal and nested objects statement +func (p *Parser) parseObjectStatement() (*ObjectStatement, error) { return nil, errors.New("ObjectStatement is not implemented yet") } +// parseObjectType parses an object type and returns a ObjectType AST +func (p *Parser) parseObjectType() (*ObjectType, error) { + return nil, errors.New("ObjectType is not implemented yet") +} + +// parseListType parses a list type and returns a ListType AST func (p *Parser) parseListType() (*ListType, error) { - return nil, errors.New("ListStatement is not implemented yet") + return nil, errors.New("ListType is not implemented yet") } // scan returns the next token from the underlying scanner. From 3832ed0981898bd796891731bdf3f0a17f12b49c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Thu, 15 Oct 2015 01:27:35 +0300 Subject: [PATCH 066/137] parser: improve node parsing, remove string() and many other small fixes --- parser/ast.go | 58 ++++------------------------------------- parser/parser.go | 60 +++++++++++++++++++++---------------------- parser/parser_test.go | 5 ---- scanner/token.go | 10 ++++++-- 4 files changed, 43 insertions(+), 90 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index eec25cc..8170115 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -5,7 +5,6 @@ import "github.com/fatih/hcl/scanner" // Node is an element in the abstract syntax tree. type Node interface { node() - String() string Pos() scanner.Pos } @@ -28,15 +27,6 @@ func (s *Source) add(node Node) { s.nodes = append(s.nodes, node) } -func (s *Source) String() string { - buf := "" - for _, n := range s.nodes { - buf += n.String() - } - - return buf -} - func (s *Source) Pos() scanner.Pos { // always returns the uninitiliazed position return s.nodes[0].Pos() @@ -47,10 +37,6 @@ type Ident struct { token scanner.Token } -func (i *Ident) String() string { - return i.token.Text -} - func (i *Ident) Pos() scanner.Pos { return i.token.Pos } @@ -62,10 +48,6 @@ type AssignStatement struct { assign scanner.Pos // position of "=" } -func (a *AssignStatement) String() string { - return a.lhs.String() + " = " + a.rhs.String() -} - func (a *AssignStatement) Pos() scanner.Pos { return a.lhs.Pos() } @@ -76,20 +58,6 @@ type ObjectStatement struct { ObjectType } -func (o *ObjectStatement) String() string { - s := "" - - for i, n := range o.Idents { - s += n.String() - if i != len(o.Idents) { - s += " " - } - } - - s += o.ObjectType.String() - return s -} - func (o *ObjectStatement) Pos() scanner.Pos { return o.Idents[0].Pos() } @@ -97,7 +65,7 @@ func (o *ObjectStatement) Pos() scanner.Pos { // LiteralType represents a literal of basic type. Valid types are: // scanner.NUMBER, scanner.FLOAT, scanner.BOOL and scanner.STRING type LiteralType struct { - *Ident + token scanner.Token } // isValid() returns true if the underlying identifier satisfies one of the @@ -111,6 +79,10 @@ func (l *LiteralType) isValid() bool { } } +func (l *LiteralType) Pos() scanner.Pos { + return l.token.Pos +} + // ListStatement represents a HCL List type type ListType struct { lbrack scanner.Pos // position of "[" @@ -118,16 +90,6 @@ type ListType struct { list []Node // the elements in lexical order } -func (l *ListType) String() string { - s := "[\n" - for _, n := range l.list { - s += n.String() + ",\n" - } - - s += "]" - return s -} - func (l *ListType) Pos() scanner.Pos { return l.lbrack } @@ -139,16 +101,6 @@ type ObjectType struct { list []Node // the nodes in lexical order } -func (b *ObjectType) String() string { - s := "{\n" - for _, n := range b.list { - s += n.String() + "\n" - } - - s += "}" - return s -} - func (b *ObjectType) Pos() scanner.Pos { return b.lbrace } diff --git a/parser/parser.go b/parser/parser.go index 5eb03da..e7fe9c1 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -24,6 +24,8 @@ func New(src []byte) *Parser { } } +var errEofToken = errors.New("EOF token found") + // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (Node, error) { defer un(trace(p, "ParseSource")) @@ -31,16 +33,15 @@ func (p *Parser) Parse() (Node, error) { for { n, err := p.parseNode() + if err == errEofToken { + break // we are finished + } if err != nil { return nil, err } + // we successfully parsed a node, add it to the final source node node.add(n) - - // break if we hit the end - if p.tok.Type == scanner.EOF { - break - } } return node, nil @@ -50,24 +51,33 @@ func (p *Parser) parseNode() (Node, error) { defer un(trace(p, "ParseNode")) tok := p.scan() - fmt.Println(tok) // debug - if tok.Type.IsLiteral() { - if p.prevTok.Type.IsLiteral() { + switch tok.Type { + case scanner.ASSIGN: + return p.parseAssignment() + case scanner.LBRACK: + // return p.parseListType() + case scanner.LBRACE: + // return p.parseObjectTpe() + case scanner.COMMENT: + // implement comment + case scanner.EOF: + return nil, errEofToken + } + + if tok.Type.IsIdentifier() { + if p.prevTok.Type.IsIdentifier() { return p.parseObjectStatement() } - tok := p.scan() - if tok.Type == scanner.ASSIGN { - return p.parseAssignment() + if tok.Type.IsLiteral() { + return p.parseLiteralType() } - - p.unscan() return p.parseIdent() } - return nil, errors.New("not yet implemented") + return nil, fmt.Errorf("not yet implemented: %s", tok.Type) } // parseAssignment parses an assignment and returns a AssignStatement AST @@ -93,10 +103,6 @@ func (p *Parser) parseAssignment() (*AssignStatement, error) { func (p *Parser) parseIdent() (*Ident, error) { defer un(trace(p, "ParseIdent")) - if !p.tok.Type.IsLiteral() { - return nil, errors.New("can't parse non literal token") - } - return &Ident{ token: p.tok, }, nil @@ -104,24 +110,18 @@ func (p *Parser) parseIdent() (*Ident, error) { // parseLiteralType parses a literal type and returns a LiteralType AST func (p *Parser) parseLiteralType() (*LiteralType, error) { - i, err := p.parseIdent() - if err != nil { - return nil, err - } + defer un(trace(p, "ParseLiteral")) - l := &LiteralType{} - l.Ident = i - - if !l.isValid() { - return nil, fmt.Errorf("Identifier is not a LiteralType: %s", l.token) - } - - return l, nil + return &LiteralType{ + token: p.tok, + }, nil } // parseObjectStatement parses an object statement returns an ObjectStatement // AST. ObjectsStatements represents both normal and nested objects statement func (p *Parser) parseObjectStatement() (*ObjectStatement, error) { + defer un(trace(p, "ParseObjectStatement")) + return nil, errors.New("ObjectStatement is not implemented yet") } diff --git a/parser/parser_test.go b/parser/parser_test.go index 054473a..63004f5 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -11,12 +11,7 @@ func TestAssignStatement(t *testing.T) { t.Fatal(err) } - if n.String() != src { - t.Errorf("AssignStatement is not parsed correctly\n\twant: '%s'\n\tgot : '%s'", src, n.String()) - } - if n.Pos().Line != 1 { t.Errorf("AssignStatement position is wrong\n\twant: '%d'\n\tgot : '%d'", 1, n.Pos().Line) } - } diff --git a/scanner/token.go b/scanner/token.go index 6c62fa8..deb6f9a 100644 --- a/scanner/token.go +++ b/scanner/token.go @@ -21,13 +21,15 @@ const ( EOF COMMENT + identifier_beg + IDENT // literals literal_beg - IDENT // literals NUMBER // 12345 FLOAT // 123.45 BOOL // true,false STRING // "abc" literal_end + identifier_end operator_beg LBRACK // [ @@ -81,8 +83,12 @@ func (t TokenType) String() string { return s } -// IsLiteral returns true for tokens corresponding to identifiers and basic +// IsIdentifier returns true for tokens corresponding to identifiers and basic // type literals; it returns false otherwise. +func (t TokenType) IsIdentifier() bool { return identifier_beg < t && t < identifier_end } + +// IsLiteral returns true for tokens corresponding to basic type literals; it +// returns false otherwise. func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end } // IsOperator returns true for tokens corresponding to operators and From 62caacf06fcb8267122a66b4f36a4810458cb1c5 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 00:57:57 +0300 Subject: [PATCH 067/137] parser: assignments and objects are actually items and the same --- parser/ast.go | 69 +++++++++++++++++++++---------------------- parser/parser.go | 48 ++++-------------------------- parser/parser_test.go | 14 ++++++++- 3 files changed, 52 insertions(+), 79 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index 8170115..f9d5924 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -8,28 +8,48 @@ type Node interface { Pos() scanner.Pos } -func (Source) node() {} -func (Ident) node() {} - -func (AssignStatement) node() {} -func (ObjectStatement) node() {} - -func (LiteralType) node() {} +func (ObjectList) node() {} +func (ObjectItem) node() {} func (ObjectType) node() {} +func (LiteralType) node() {} func (ListType) node() {} +func (Ident) node() {} -// Source represents a single HCL source file -type Source struct { - nodes []Node +// ObjectList represents a list of ObjectItems. An HCL file itself is an +// ObjectList. +type ObjectList struct { + items []*ObjectItem } -func (s *Source) add(node Node) { - s.nodes = append(s.nodes, node) +func (o *ObjectList) add(item *ObjectItem) { + o.items = append(o.items, item) } -func (s *Source) Pos() scanner.Pos { +func (o *ObjectList) Pos() scanner.Pos { // always returns the uninitiliazed position - return s.nodes[0].Pos() + return o.items[0].Pos() +} + +// ObjectItem represents a HCL Object Item. An item is represented with a key +// (or keys). It can be an assignment or an object (both normal and nested) +type ObjectItem struct { + // key is either an Identifier or a String. The slice is only one lenght + // long, however if it's a nested object it'll can be larger than one. In + // that case "assign" is invalid as there is no assignments for a nested + // object. + key []Ident + + // assign contains the position of "=", if any + assign scanner.Pos + + // val is the item itself. It can be an object,list, number, bool or a + // string. If key lenght is larger than one, val can be only of type + // Object. + val Node +} + +func (o *ObjectItem) Pos() scanner.Pos { + return o.key[0].Pos() } // IdentStatement represents an identifier. @@ -41,27 +61,6 @@ func (i *Ident) Pos() scanner.Pos { return i.token.Pos } -// AssignStatement represents an assignment -type AssignStatement struct { - lhs Node // left hand side of the assignment - rhs Node // right hand side of the assignment - assign scanner.Pos // position of "=" -} - -func (a *AssignStatement) Pos() scanner.Pos { - return a.lhs.Pos() -} - -// ObjectStatment represents an object statement -type ObjectStatement struct { - Idents []Node // the idents in elements in lexical order - ObjectType -} - -func (o *ObjectStatement) Pos() scanner.Pos { - return o.Idents[0].Pos() -} - // LiteralType represents a literal of basic type. Valid types are: // scanner.NUMBER, scanner.FLOAT, scanner.BOOL and scanner.STRING type LiteralType struct { diff --git a/parser/parser.go b/parser/parser.go index e7fe9c1..8c062c2 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -29,10 +29,10 @@ var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (Node, error) { defer un(trace(p, "ParseSource")) - node := &Source{} + node := &ObjectList{} for { - n, err := p.parseNode() + n, err := p.parseObjectItem() if err == errEofToken { break // we are finished } @@ -47,15 +47,15 @@ func (p *Parser) Parse() (Node, error) { return node, nil } -func (p *Parser) parseNode() (Node, error) { - defer un(trace(p, "ParseNode")) +func (p *Parser) parseObjectItem() (*ObjectItem, error) { + defer un(trace(p, "ParseObjectItem")) tok := p.scan() fmt.Println(tok) // debug switch tok.Type { case scanner.ASSIGN: - return p.parseAssignment() + // return p.parseAssignment() case scanner.LBRACK: // return p.parseListType() case scanner.LBRACE: @@ -66,39 +66,9 @@ func (p *Parser) parseNode() (Node, error) { return nil, errEofToken } - if tok.Type.IsIdentifier() { - if p.prevTok.Type.IsIdentifier() { - return p.parseObjectStatement() - } - - if tok.Type.IsLiteral() { - return p.parseLiteralType() - } - return p.parseIdent() - } - return nil, fmt.Errorf("not yet implemented: %s", tok.Type) } -// parseAssignment parses an assignment and returns a AssignStatement AST -func (p *Parser) parseAssignment() (*AssignStatement, error) { - defer un(trace(p, "ParseAssignment")) - a := &AssignStatement{ - lhs: &Ident{ - token: p.prevTok, - }, - assign: p.tok.Pos, - } - - n, err := p.parseNode() - if err != nil { - return nil, err - } - - a.rhs = n - return a, nil -} - // parseIdent parses a generic identifier and returns a Ident AST func (p *Parser) parseIdent() (*Ident, error) { defer un(trace(p, "ParseIdent")) @@ -117,14 +87,6 @@ func (p *Parser) parseLiteralType() (*LiteralType, error) { }, nil } -// parseObjectStatement parses an object statement returns an ObjectStatement -// AST. ObjectsStatements represents both normal and nested objects statement -func (p *Parser) parseObjectStatement() (*ObjectStatement, error) { - defer un(trace(p, "ParseObjectStatement")) - - return nil, errors.New("ObjectStatement is not implemented yet") -} - // parseObjectType parses an object type and returns a ObjectType AST func (p *Parser) parseObjectType() (*ObjectType, error) { return nil, errors.New("ObjectType is not implemented yet") diff --git a/parser/parser_test.go b/parser/parser_test.go index 63004f5..4760e71 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1,6 +1,9 @@ package parser -import "testing" +import ( + "fmt" + "testing" +) func TestAssignStatement(t *testing.T) { src := `ami = "${var.foo}"` @@ -14,4 +17,13 @@ func TestAssignStatement(t *testing.T) { if n.Pos().Line != 1 { t.Errorf("AssignStatement position is wrong\n\twant: '%d'\n\tgot : '%d'", 1, n.Pos().Line) } + + n1, ok := n.(*ObjectList) + if !ok { + t.Fatal("First Node should be of type Source") + } + + for _, ns := range n1.nodes { + fmt.Printf("ns = %+v\n", ns) + } } From 17205f84847721a55396f96e33a7c32487328b8a Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 01:11:54 +0300 Subject: [PATCH 068/137] parser: be more specific about keys and identifiers --- parser/ast.go | 35 ++++++++++++++++++++++------------- parser/parser.go | 10 +--------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index f9d5924..d09432f 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -8,12 +8,13 @@ type Node interface { Pos() scanner.Pos } -func (ObjectList) node() {} -func (ObjectItem) node() {} +func (ObjectList) node() {} +func (ObjectItem) node() {} +func (ObjectKey) node() {} + func (ObjectType) node() {} func (LiteralType) node() {} func (ListType) node() {} -func (Ident) node() {} // ObjectList represents a list of ObjectItems. An HCL file itself is an // ObjectList. @@ -33,11 +34,10 @@ func (o *ObjectList) Pos() scanner.Pos { // ObjectItem represents a HCL Object Item. An item is represented with a key // (or keys). It can be an assignment or an object (both normal and nested) type ObjectItem struct { - // key is either an Identifier or a String. The slice is only one lenght - // long, however if it's a nested object it'll can be larger than one. In - // that case "assign" is invalid as there is no assignments for a nested - // object. - key []Ident + // keys is only one lenght long if it's of type assignment. If it's a + // nested object it can be larger than one. In that case "assign" is + // invalid as there is no assignments for a nested object. + keys []*ObjectKey // assign contains the position of "=", if any assign scanner.Pos @@ -49,16 +49,25 @@ type ObjectItem struct { } func (o *ObjectItem) Pos() scanner.Pos { - return o.key[0].Pos() + return o.keys[0].Pos() } -// IdentStatement represents an identifier. -type Ident struct { +// ObjectKeys are either an identifier or of type string. +type ObjectKey struct { token scanner.Token } -func (i *Ident) Pos() scanner.Pos { - return i.token.Pos +func (o *ObjectKey) Pos() scanner.Pos { + return o.token.Pos +} + +func (o *ObjectKey) IsValid() bool { + switch o.token.Type { + case scanner.IDENT, scanner.STRING: + return true + default: + return false + } } // LiteralType represents a literal of basic type. Valid types are: diff --git a/parser/parser.go b/parser/parser.go index 8c062c2..1ae1432 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -47,6 +47,7 @@ func (p *Parser) Parse() (Node, error) { return node, nil } +// parseObjectItem parses a single object item func (p *Parser) parseObjectItem() (*ObjectItem, error) { defer un(trace(p, "ParseObjectItem")) @@ -69,15 +70,6 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { return nil, fmt.Errorf("not yet implemented: %s", tok.Type) } -// parseIdent parses a generic identifier and returns a Ident AST -func (p *Parser) parseIdent() (*Ident, error) { - defer un(trace(p, "ParseIdent")) - - return &Ident{ - token: p.tok, - }, nil -} - // parseLiteralType parses a literal type and returns a LiteralType AST func (p *Parser) parseLiteralType() (*LiteralType, error) { defer un(trace(p, "ParseLiteral")) From 628bc890269030a73e0ff9b77dd3302fb9084956 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 02:00:02 +0300 Subject: [PATCH 069/137] parser: implement parseObjectKey function --- parser/ast.go | 8 +++-- parser/parser.go | 53 +++++++++++++++++++++++++++++++++ parser/parser_test.go | 69 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 113 insertions(+), 17 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index d09432f..98b640f 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -34,7 +34,7 @@ func (o *ObjectList) Pos() scanner.Pos { // ObjectItem represents a HCL Object Item. An item is represented with a key // (or keys). It can be an assignment or an object (both normal and nested) type ObjectItem struct { - // keys is only one lenght long if it's of type assignment. If it's a + // keys is only one length long if it's of type assignment. If it's a // nested object it can be larger than one. In that case "assign" is // invalid as there is no assignments for a nested object. keys []*ObjectKey @@ -43,7 +43,7 @@ type ObjectItem struct { assign scanner.Pos // val is the item itself. It can be an object,list, number, bool or a - // string. If key lenght is larger than one, val can be only of type + // string. If key length is larger than one, val can be only of type // Object. val Node } @@ -61,7 +61,9 @@ func (o *ObjectKey) Pos() scanner.Pos { return o.token.Pos } -func (o *ObjectKey) IsValid() bool { +// isValid() returns true if the underlying identifier satisfies one of the +// valid types (IDENT or STRING) +func (o *ObjectKey) isValid() bool { switch o.token.Type { case scanner.IDENT, scanner.STRING: return true diff --git a/parser/parser.go b/parser/parser.go index 1ae1432..2192829 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -51,6 +51,18 @@ func (p *Parser) Parse() (Node, error) { func (p *Parser) parseObjectItem() (*ObjectItem, error) { defer un(trace(p, "ParseObjectItem")) + keys, err := p.parseObjectKey() + if err != nil { + return nil, err + } + + switch len(keys) { + case 1: + // assignment or object + default: + // nested object + } + tok := p.scan() fmt.Println(tok) // debug @@ -70,6 +82,47 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { return nil, fmt.Errorf("not yet implemented: %s", tok.Type) } +// parseObjectKey parses an object key and returns a ObjectKey AST +func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { + tok := p.scan() + switch tok.Type { + case scanner.IDENT, scanner.STRING: + // add first found token + keys := []*ObjectKey{&ObjectKey{tok}} + nestedObj := false + + // now we have three casses + // 1. assignment: KEY = NODE + // 2. object: KEY { } + // 2. nested object: KEY KEY2 ... KEYN {} + for { + tok := p.scan() + switch tok.Type { + case scanner.ASSIGN: + // assignment or object, but not nested objects + if nestedObj { + return nil, fmt.Errorf("nested object expected: LBRACE got: %s", tok.Type) + } + + return keys, nil + case scanner.LBRACE: + // object + return keys, nil + case scanner.IDENT, scanner.STRING: + // nested object + nestedObj = true + keys = append(keys, &ObjectKey{ + token: tok, + }) + default: + return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", tok.Type) + } + } + default: + return nil, fmt.Errorf("expected: IDENT | STRING got: %s", tok.Type) + } +} + // parseLiteralType parses a literal type and returns a LiteralType AST func (p *Parser) parseLiteralType() (*LiteralType, error) { defer un(trace(p, "ParseLiteral")) diff --git a/parser/parser_test.go b/parser/parser_test.go index 4760e71..5a16f89 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2,28 +2,69 @@ package parser import ( "fmt" + "path/filepath" + "reflect" + "runtime" "testing" + + "github.com/fatih/hcl/scanner" ) -func TestAssignStatement(t *testing.T) { - src := `ami = "${var.foo}"` - p := New([]byte(src)) - p.enableTrace = true - n, err := p.Parse() - if err != nil { - t.Fatal(err) +func TestObjectKey(t *testing.T) { + keys := []struct { + exp []scanner.TokenType + src string + }{ + {[]scanner.TokenType{scanner.IDENT}, `foo {}`}, + {[]scanner.TokenType{scanner.IDENT}, `foo = {}`}, + {[]scanner.TokenType{scanner.IDENT}, `foo = "${var.bar}`}, + {[]scanner.TokenType{scanner.STRING}, `"foo" {}`}, + {[]scanner.TokenType{scanner.STRING}, `"foo" = {}`}, + {[]scanner.TokenType{scanner.STRING}, `"foo" = "${var.bar}`}, + {[]scanner.TokenType{scanner.IDENT, scanner.IDENT}, `foo bar {}`}, + {[]scanner.TokenType{scanner.IDENT, scanner.STRING}, `foo "bar" {}`}, + {[]scanner.TokenType{scanner.STRING, scanner.IDENT}, `"foo" bar {}`}, + {[]scanner.TokenType{scanner.IDENT, scanner.IDENT, scanner.IDENT}, `foo bar baz {}`}, } - if n.Pos().Line != 1 { - t.Errorf("AssignStatement position is wrong\n\twant: '%d'\n\tgot : '%d'", 1, n.Pos().Line) + for _, k := range keys { + p := New([]byte(k.src)) + keys, err := p.parseObjectKey() + if err != nil { + t.Fatal(err) + } + + tokens := []scanner.TokenType{} + for _, o := range keys { + tokens = append(tokens, o.token.Type) + } + + equals(t, k.exp, tokens) } - n1, ok := n.(*ObjectList) - if !ok { - t.Fatal("First Node should be of type Source") + errKeys := []struct { + src string + }{ + {`foo 12 {}`}, + {`foo bar = {}`}, + {`foo []`}, + {`12 {}`}, } - for _, ns := range n1.nodes { - fmt.Printf("ns = %+v\n", ns) + for _, k := range errKeys { + p := New([]byte(k.src)) + _, err := p.parseObjectKey() + if err == nil { + t.Errorf("case '%s' should give an error", k.src) + } + } +} + +// equals fails the test if exp is not equal to act. +func equals(tb testing.TB, exp, act interface{}) { + if !reflect.DeepEqual(exp, act) { + _, file, line, _ := runtime.Caller(1) + fmt.Printf("\033[31m%s:%d:\n\n\texp: %#v\n\n\tgot: %#v\033[39m\n\n", filepath.Base(file), line, exp, act) + tb.FailNow() } } From f43287845286d055fbf28b6cc706e076f2aaa3f0 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 14:16:12 +0300 Subject: [PATCH 070/137] parser: simplfiy code --- parser/parser.go | 73 +++++++++++++++++++++++++------------------ parser/parser_test.go | 2 ++ 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 2192829..72b4b3d 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -56,6 +56,17 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { return nil, err } + // either an assignment or object + switch p.tok.Type { + case scanner.ASSIGN: + case scanner.LBRACE: + if len(keys) > 1 { + // nested object + } + + // object or nested object + } + switch len(keys) { case 1: // assignment or object @@ -85,42 +96,44 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { // parseObjectKey parses an object key and returns a ObjectKey AST func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { tok := p.scan() + + keys := make([]*ObjectKey, 0) + switch tok.Type { case scanner.IDENT, scanner.STRING: // add first found token - keys := []*ObjectKey{&ObjectKey{tok}} - nestedObj := false - - // now we have three casses - // 1. assignment: KEY = NODE - // 2. object: KEY { } - // 2. nested object: KEY KEY2 ... KEYN {} - for { - tok := p.scan() - switch tok.Type { - case scanner.ASSIGN: - // assignment or object, but not nested objects - if nestedObj { - return nil, fmt.Errorf("nested object expected: LBRACE got: %s", tok.Type) - } - - return keys, nil - case scanner.LBRACE: - // object - return keys, nil - case scanner.IDENT, scanner.STRING: - // nested object - nestedObj = true - keys = append(keys, &ObjectKey{ - token: tok, - }) - default: - return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", tok.Type) - } - } + keys = append(keys, &ObjectKey{token: tok}) default: return nil, fmt.Errorf("expected: IDENT | STRING got: %s", tok.Type) } + + nestedObj := false + + // we have three casses + // 1. assignment: KEY = NODE + // 2. object: KEY { } + // 2. nested object: KEY KEY2 ... KEYN {} + for { + tok := p.scan() + switch tok.Type { + case scanner.ASSIGN: + // assignment or object, but not nested objects + if nestedObj { + return nil, fmt.Errorf("nested object expected: LBRACE got: %s", tok.Type) + } + + return keys, nil + case scanner.LBRACE: + // object + return keys, nil + case scanner.IDENT, scanner.STRING: + // nested object + nestedObj = true + keys = append(keys, &ObjectKey{token: tok}) + default: + return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", tok.Type) + } + } } // parseLiteralType parses a literal type and returns a LiteralType AST diff --git a/parser/parser_test.go b/parser/parser_test.go index 5a16f89..9e847f5 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -17,6 +17,8 @@ func TestObjectKey(t *testing.T) { }{ {[]scanner.TokenType{scanner.IDENT}, `foo {}`}, {[]scanner.TokenType{scanner.IDENT}, `foo = {}`}, + {[]scanner.TokenType{scanner.IDENT}, `foo = bar`}, + {[]scanner.TokenType{scanner.IDENT}, `foo = 123`}, {[]scanner.TokenType{scanner.IDENT}, `foo = "${var.bar}`}, {[]scanner.TokenType{scanner.STRING}, `"foo" {}`}, {[]scanner.TokenType{scanner.STRING}, `"foo" = {}`}, From 378bec0cf47e51fe4644f72446e0dc265f76487d Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 14:44:11 +0300 Subject: [PATCH 071/137] parser: parse assignments, WIP --- parser/parser.go | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 72b4b3d..670fa83 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -59,12 +59,25 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { // either an assignment or object switch p.tok.Type { case scanner.ASSIGN: + o := &ObjectItem{ + keys: keys, + assign: p.tok.Pos, + } + + o.val, err = p.parseType() + if err != nil { + return nil, err + } + + return o, nil case scanner.LBRACE: if len(keys) > 1 { // nested object + fmt.Println("nested object") } - // object or nested object + // object + fmt.Println("object") } switch len(keys) { @@ -78,8 +91,6 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { fmt.Println(tok) // debug switch tok.Type { - case scanner.ASSIGN: - // return p.parseAssignment() case scanner.LBRACK: // return p.parseListType() case scanner.LBRACE: @@ -93,6 +104,12 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { return nil, fmt.Errorf("not yet implemented: %s", tok.Type) } +// parseType parses any type of Type, such as number, bool, string, object or +// list. +func (p *Parser) parseType() (Node, error) { + return nil, errors.New("ParseType is not implemented yet") +} + // parseObjectKey parses an object key and returns a ObjectKey AST func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { tok := p.scan() @@ -117,7 +134,8 @@ func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { tok := p.scan() switch tok.Type { case scanner.ASSIGN: - // assignment or object, but not nested objects + // assignment or object only, but not nested objects. this is not + // allowed: `foo bar = {}` if nestedObj { return nil, fmt.Errorf("nested object expected: LBRACE got: %s", tok.Type) } From 38490ad4dcfea8c47f450d2152bffeffcb35a116 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 22:57:56 +0300 Subject: [PATCH 072/137] parser: add Walk scanner: fix tests --- parser/ast.go | 2 +- parser/parser.go | 36 +++++++++++++++++++----------------- parser/parser_test.go | 18 ++++++++++++++++++ scanner/scanner_test.go | 4 ++-- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index 98b640f..1ff158c 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -9,8 +9,8 @@ type Node interface { } func (ObjectList) node() {} -func (ObjectItem) node() {} func (ObjectKey) node() {} +func (ObjectItem) node() {} func (ObjectType) node() {} func (LiteralType) node() {} diff --git a/parser/parser.go b/parser/parser.go index 670fa83..8aafa1e 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -28,7 +28,7 @@ var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (Node, error) { - defer un(trace(p, "ParseSource")) + defer un(trace(p, "ParseObjectList")) node := &ObjectList{} for { @@ -80,39 +80,37 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { fmt.Println("object") } - switch len(keys) { - case 1: - // assignment or object - default: - // nested object - } + return nil, fmt.Errorf("not yet implemented: %s", p.tok.Type) +} +// parseType parses any type of Type, such as number, bool, string, object or +// list. +func (p *Parser) parseType() (Node, error) { + defer un(trace(p, "ParseType")) tok := p.scan() - fmt.Println(tok) // debug switch tok.Type { - case scanner.LBRACK: - // return p.parseListType() + case scanner.NUMBER, scanner.FLOAT, scanner.BOOL, scanner.STRING: + return p.parseLiteralType() case scanner.LBRACE: - // return p.parseObjectTpe() + return p.parseObjectType() + case scanner.LBRACK: + return p.parseListType() case scanner.COMMENT: // implement comment case scanner.EOF: return nil, errEofToken } - return nil, fmt.Errorf("not yet implemented: %s", tok.Type) -} - -// parseType parses any type of Type, such as number, bool, string, object or -// list. -func (p *Parser) parseType() (Node, error) { return nil, errors.New("ParseType is not implemented yet") } // parseObjectKey parses an object key and returns a ObjectKey AST func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { tok := p.scan() + if tok.Type == scanner.EOF { + return nil, errEofToken + } keys := make([]*ObjectKey, 0) @@ -165,11 +163,15 @@ func (p *Parser) parseLiteralType() (*LiteralType, error) { // parseObjectType parses an object type and returns a ObjectType AST func (p *Parser) parseObjectType() (*ObjectType, error) { + defer un(trace(p, "ParseObjectYpe")) + return nil, errors.New("ObjectType is not implemented yet") } // parseListType parses a list type and returns a ListType AST func (p *Parser) parseListType() (*ListType, error) { + defer un(trace(p, "ParseListType")) + return nil, errors.New("ListType is not implemented yet") } diff --git a/parser/parser_test.go b/parser/parser_test.go index 9e847f5..efbb8d2 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -10,6 +10,24 @@ import ( "github.com/fatih/hcl/scanner" ) +func TestParseType(t *testing.T) { + src := `foo = true` + p := New([]byte(src)) + p.enableTrace = true + + n, err := p.Parse() + if err != nil { + t.Fatal(err) + } + + fmt.Printf("n = %+v\n", n) + + Walk(n, func(node Node) bool { + fmt.Printf("node = %+v\n", node) + return true + }) +} + func TestObjectKey(t *testing.T) { keys := []struct { exp []scanner.TokenType diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 0556766..62590d6 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -336,7 +336,7 @@ func TestRealExample(t *testing.T) { t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String()) } - if l.literal != tok.String() { + if l.literal != tok.Text { t.Errorf("got: %s want %s\n", tok, l.literal) } } @@ -408,7 +408,7 @@ func testTokenList(t *testing.T, tokenList []tokenPair) { t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) } - if tok.String() != ident.text { + if tok.Text != ident.text { t.Errorf("text = %q want %q", tok.String(), ident.text) } From 72f3456c0fe9ee5686ddd97dd6c1ac2786b021ec Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 22:58:38 +0300 Subject: [PATCH 073/137] scanner: do not panic if there is nothing to unread --- parser/walk.go | 36 ++++++++++++++++++++++++++++++++++++ scanner/scanner.go | 7 +++++-- 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 parser/walk.go diff --git a/parser/walk.go b/parser/walk.go new file mode 100644 index 0000000..2bb9ed5 --- /dev/null +++ b/parser/walk.go @@ -0,0 +1,36 @@ +package parser + +// Walk traverses an AST in depth-first order: It starts by calling fn(node); +// node must not be nil. If f returns true, Walk invokes f recursively for +// each of the non-nil children of node, followed by a call of f(nil). +func Walk(node Node, fn func(Node) bool) { + if !fn(node) { + return + } + + switch n := node.(type) { + case *ObjectList: + for _, item := range n.items { + Walk(item, fn) + } + case *ObjectKey: + // nothing to do + case *ObjectItem: + for _, k := range n.keys { + Walk(k, fn) + } + Walk(n.val, fn) + case *LiteralType: + // nothing to do + case *ListType: + for _, l := range n.list { + Walk(l, fn) + } + case *ObjectType: + for _, l := range n.list { + Walk(l, fn) + } + } + + fn(nil) +} diff --git a/scanner/scanner.go b/scanner/scanner.go index 42234a2..1d6696f 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -414,7 +414,7 @@ func (s *Scanner) scanEscape() rune { } // scanDigits scans a rune with the given base for n times. For example an -// octan notation \184 would yield in scanDigits(ch, 8, 3) +// octal notation \184 would yield in scanDigits(ch, 8, 3) func (s *Scanner) scanDigits(ch rune, base, n int) rune { for n > 0 && digitVal(ch) < base { ch = s.next() @@ -436,7 +436,10 @@ func (s *Scanner) scanIdentifier() string { for isLetter(ch) || isDigit(ch) { ch = s.next() } - s.unread() // we got identifier, put back latest char + + if ch != eof { + s.unread() // we got identifier, put back latest char + } return string(s.src[offs:s.srcPos.Offset]) } From e93a8e97ca78bc183efe2da89114cbd29a63a1e8 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 23:12:26 +0300 Subject: [PATCH 074/137] hcl: split into meaningful packages --- ast/ast.go | 94 +++++++ {parser => ast}/walk.go | 12 +- parser/ast.go | 116 --------- parser/parser.go | 68 ++--- parser/parser_test.go | 35 +-- scanner/scanner.go | 64 ++--- scanner/scanner_test.go | 422 ++++++++++++++++--------------- {scanner => token}/position.go | 2 +- {scanner => token}/token.go | 2 +- {scanner => token}/token_test.go | 2 +- 10 files changed, 401 insertions(+), 416 deletions(-) create mode 100644 ast/ast.go rename {parser => ast}/walk.go (78%) delete mode 100644 parser/ast.go rename {scanner => token}/position.go (98%) rename {scanner => token}/token.go (99%) rename {scanner => token}/token_test.go (97%) diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..9c8a01e --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,94 @@ +package ast + +import "github.com/fatih/hcl/token" + +// Node is an element in the abstract syntax tree. +type Node interface { + node() + Pos() token.Pos +} + +func (ObjectList) node() {} +func (ObjectKey) node() {} +func (ObjectItem) node() {} + +func (ObjectType) node() {} +func (LiteralType) node() {} +func (ListType) node() {} + +// ObjectList represents a list of ObjectItems. An HCL file itself is an +// ObjectList. +type ObjectList struct { + Items []*ObjectItem +} + +func (o *ObjectList) Add(item *ObjectItem) { + o.Items = append(o.Items, item) +} + +func (o *ObjectList) Pos() token.Pos { + // always returns the uninitiliazed position + return o.Items[0].Pos() +} + +// ObjectItem represents a HCL Object Item. An item is represented with a key +// (or keys). It can be an assignment or an object (both normal and nested) +type ObjectItem struct { + // keys is only one length long if it's of type assignment. If it's a + // nested object it can be larger than one. In that case "assign" is + // invalid as there is no assignments for a nested object. + Keys []*ObjectKey + + // assign contains the position of "=", if any + Assign token.Pos + + // val is the item itself. It can be an object,list, number, bool or a + // string. If key length is larger than one, val can be only of type + // Object. + Val Node +} + +func (o *ObjectItem) Pos() token.Pos { + return o.Keys[0].Pos() +} + +// ObjectKeys are either an identifier or of type string. +type ObjectKey struct { + Token token.Token +} + +func (o *ObjectKey) Pos() token.Pos { + return o.Token.Pos +} + +// LiteralType represents a literal of basic type. Valid types are: +// token.NUMBER, token.FLOAT, token.BOOL and token.STRING +type LiteralType struct { + Token token.Token +} + +func (l *LiteralType) Pos() token.Pos { + return l.Token.Pos +} + +// ListStatement represents a HCL List type +type ListType struct { + Lbrack token.Pos // position of "[" + Rbrack token.Pos // position of "]" + List []Node // the elements in lexical order +} + +func (l *ListType) Pos() token.Pos { + return l.Lbrack +} + +// ObjectType represents a HCL Object Type +type ObjectType struct { + Lbrace token.Pos // position of "{" + Rbrace token.Pos // position of "}" + List []Node // the nodes in lexical order +} + +func (b *ObjectType) Pos() token.Pos { + return b.Lbrace +} diff --git a/parser/walk.go b/ast/walk.go similarity index 78% rename from parser/walk.go rename to ast/walk.go index 2bb9ed5..c015a67 100644 --- a/parser/walk.go +++ b/ast/walk.go @@ -1,4 +1,4 @@ -package parser +package ast // Walk traverses an AST in depth-first order: It starts by calling fn(node); // node must not be nil. If f returns true, Walk invokes f recursively for @@ -10,24 +10,24 @@ func Walk(node Node, fn func(Node) bool) { switch n := node.(type) { case *ObjectList: - for _, item := range n.items { + for _, item := range n.Items { Walk(item, fn) } case *ObjectKey: // nothing to do case *ObjectItem: - for _, k := range n.keys { + for _, k := range n.Keys { Walk(k, fn) } - Walk(n.val, fn) + Walk(n.Val, fn) case *LiteralType: // nothing to do case *ListType: - for _, l := range n.list { + for _, l := range n.List { Walk(l, fn) } case *ObjectType: - for _, l := range n.list { + for _, l := range n.List { Walk(l, fn) } } diff --git a/parser/ast.go b/parser/ast.go deleted file mode 100644 index 1ff158c..0000000 --- a/parser/ast.go +++ /dev/null @@ -1,116 +0,0 @@ -package parser - -import "github.com/fatih/hcl/scanner" - -// Node is an element in the abstract syntax tree. -type Node interface { - node() - Pos() scanner.Pos -} - -func (ObjectList) node() {} -func (ObjectKey) node() {} -func (ObjectItem) node() {} - -func (ObjectType) node() {} -func (LiteralType) node() {} -func (ListType) node() {} - -// ObjectList represents a list of ObjectItems. An HCL file itself is an -// ObjectList. -type ObjectList struct { - items []*ObjectItem -} - -func (o *ObjectList) add(item *ObjectItem) { - o.items = append(o.items, item) -} - -func (o *ObjectList) Pos() scanner.Pos { - // always returns the uninitiliazed position - return o.items[0].Pos() -} - -// ObjectItem represents a HCL Object Item. An item is represented with a key -// (or keys). It can be an assignment or an object (both normal and nested) -type ObjectItem struct { - // keys is only one length long if it's of type assignment. If it's a - // nested object it can be larger than one. In that case "assign" is - // invalid as there is no assignments for a nested object. - keys []*ObjectKey - - // assign contains the position of "=", if any - assign scanner.Pos - - // val is the item itself. It can be an object,list, number, bool or a - // string. If key length is larger than one, val can be only of type - // Object. - val Node -} - -func (o *ObjectItem) Pos() scanner.Pos { - return o.keys[0].Pos() -} - -// ObjectKeys are either an identifier or of type string. -type ObjectKey struct { - token scanner.Token -} - -func (o *ObjectKey) Pos() scanner.Pos { - return o.token.Pos -} - -// isValid() returns true if the underlying identifier satisfies one of the -// valid types (IDENT or STRING) -func (o *ObjectKey) isValid() bool { - switch o.token.Type { - case scanner.IDENT, scanner.STRING: - return true - default: - return false - } -} - -// LiteralType represents a literal of basic type. Valid types are: -// scanner.NUMBER, scanner.FLOAT, scanner.BOOL and scanner.STRING -type LiteralType struct { - token scanner.Token -} - -// isValid() returns true if the underlying identifier satisfies one of the -// valid types. -func (l *LiteralType) isValid() bool { - switch l.token.Type { - case scanner.NUMBER, scanner.FLOAT, scanner.BOOL, scanner.STRING: - return true - default: - return false - } -} - -func (l *LiteralType) Pos() scanner.Pos { - return l.token.Pos -} - -// ListStatement represents a HCL List type -type ListType struct { - lbrack scanner.Pos // position of "[" - rbrack scanner.Pos // position of "]" - list []Node // the elements in lexical order -} - -func (l *ListType) Pos() scanner.Pos { - return l.lbrack -} - -// ObjectType represents a HCL Object Type -type ObjectType struct { - lbrace scanner.Pos // position of "{" - rbrace scanner.Pos // position of "}" - list []Node // the nodes in lexical order -} - -func (b *ObjectType) Pos() scanner.Pos { - return b.lbrace -} diff --git a/parser/parser.go b/parser/parser.go index 8aafa1e..3fea2a9 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -4,14 +4,16 @@ import ( "errors" "fmt" + "github.com/fatih/hcl/ast" "github.com/fatih/hcl/scanner" + "github.com/fatih/hcl/token" ) type Parser struct { sc *scanner.Scanner - tok scanner.Token // last read token - prevTok scanner.Token // previous read token + tok token.Token // last read token + prevTok token.Token // previous read token enableTrace bool indent int @@ -27,9 +29,9 @@ func New(src []byte) *Parser { var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. -func (p *Parser) Parse() (Node, error) { +func (p *Parser) Parse() (ast.Node, error) { defer un(trace(p, "ParseObjectList")) - node := &ObjectList{} + node := &ast.ObjectList{} for { n, err := p.parseObjectItem() @@ -41,14 +43,14 @@ func (p *Parser) Parse() (Node, error) { } // we successfully parsed a node, add it to the final source node - node.add(n) + node.Add(n) } return node, nil } // parseObjectItem parses a single object item -func (p *Parser) parseObjectItem() (*ObjectItem, error) { +func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { defer un(trace(p, "ParseObjectItem")) keys, err := p.parseObjectKey() @@ -58,19 +60,19 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { // either an assignment or object switch p.tok.Type { - case scanner.ASSIGN: - o := &ObjectItem{ - keys: keys, - assign: p.tok.Pos, + case token.ASSIGN: + o := &ast.ObjectItem{ + Keys: keys, + Assign: p.tok.Pos, } - o.val, err = p.parseType() + o.Val, err = p.parseType() if err != nil { return nil, err } return o, nil - case scanner.LBRACE: + case token.LBRACE: if len(keys) > 1 { // nested object fmt.Println("nested object") @@ -85,20 +87,20 @@ func (p *Parser) parseObjectItem() (*ObjectItem, error) { // parseType parses any type of Type, such as number, bool, string, object or // list. -func (p *Parser) parseType() (Node, error) { +func (p *Parser) parseType() (ast.Node, error) { defer un(trace(p, "ParseType")) tok := p.scan() switch tok.Type { - case scanner.NUMBER, scanner.FLOAT, scanner.BOOL, scanner.STRING: + case token.NUMBER, token.FLOAT, token.BOOL, token.STRING: return p.parseLiteralType() - case scanner.LBRACE: + case token.LBRACE: return p.parseObjectType() - case scanner.LBRACK: + case token.LBRACK: return p.parseListType() - case scanner.COMMENT: + case token.COMMENT: // implement comment - case scanner.EOF: + case token.EOF: return nil, errEofToken } @@ -106,18 +108,18 @@ func (p *Parser) parseType() (Node, error) { } // parseObjectKey parses an object key and returns a ObjectKey AST -func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { +func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { tok := p.scan() - if tok.Type == scanner.EOF { + if tok.Type == token.EOF { return nil, errEofToken } - keys := make([]*ObjectKey, 0) + keys := make([]*ast.ObjectKey, 0) switch tok.Type { - case scanner.IDENT, scanner.STRING: + case token.IDENT, token.STRING: // add first found token - keys = append(keys, &ObjectKey{token: tok}) + keys = append(keys, &ast.ObjectKey{Token: tok}) default: return nil, fmt.Errorf("expected: IDENT | STRING got: %s", tok.Type) } @@ -131,7 +133,7 @@ func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { for { tok := p.scan() switch tok.Type { - case scanner.ASSIGN: + case token.ASSIGN: // assignment or object only, but not nested objects. this is not // allowed: `foo bar = {}` if nestedObj { @@ -139,13 +141,13 @@ func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { } return keys, nil - case scanner.LBRACE: + case token.LBRACE: // object return keys, nil - case scanner.IDENT, scanner.STRING: + case token.IDENT, token.STRING: // nested object nestedObj = true - keys = append(keys, &ObjectKey{token: tok}) + keys = append(keys, &ast.ObjectKey{Token: tok}) default: return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", tok.Type) } @@ -153,23 +155,23 @@ func (p *Parser) parseObjectKey() ([]*ObjectKey, error) { } // parseLiteralType parses a literal type and returns a LiteralType AST -func (p *Parser) parseLiteralType() (*LiteralType, error) { +func (p *Parser) parseLiteralType() (*ast.LiteralType, error) { defer un(trace(p, "ParseLiteral")) - return &LiteralType{ - token: p.tok, + return &ast.LiteralType{ + Token: p.tok, }, nil } // parseObjectType parses an object type and returns a ObjectType AST -func (p *Parser) parseObjectType() (*ObjectType, error) { +func (p *Parser) parseObjectType() (*ast.ObjectType, error) { defer un(trace(p, "ParseObjectYpe")) return nil, errors.New("ObjectType is not implemented yet") } // parseListType parses a list type and returns a ListType AST -func (p *Parser) parseListType() (*ListType, error) { +func (p *Parser) parseListType() (*ast.ListType, error) { defer un(trace(p, "ParseListType")) return nil, errors.New("ListType is not implemented yet") @@ -177,7 +179,7 @@ func (p *Parser) parseListType() (*ListType, error) { // scan returns the next token from the underlying scanner. // If a token has been unscanned then read that instead. -func (p *Parser) scan() scanner.Token { +func (p *Parser) scan() token.Token { // If we have a token on the buffer, then return it. if p.n != 0 { p.n = 0 diff --git a/parser/parser_test.go b/parser/parser_test.go index efbb8d2..4af18ab 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -7,7 +7,8 @@ import ( "runtime" "testing" - "github.com/fatih/hcl/scanner" + "github.com/fatih/hcl/ast" + "github.com/fatih/hcl/token" ) func TestParseType(t *testing.T) { @@ -22,7 +23,7 @@ func TestParseType(t *testing.T) { fmt.Printf("n = %+v\n", n) - Walk(n, func(node Node) bool { + ast.Walk(n, func(node ast.Node) bool { fmt.Printf("node = %+v\n", node) return true }) @@ -30,21 +31,21 @@ func TestParseType(t *testing.T) { func TestObjectKey(t *testing.T) { keys := []struct { - exp []scanner.TokenType + exp []token.TokenType src string }{ - {[]scanner.TokenType{scanner.IDENT}, `foo {}`}, - {[]scanner.TokenType{scanner.IDENT}, `foo = {}`}, - {[]scanner.TokenType{scanner.IDENT}, `foo = bar`}, - {[]scanner.TokenType{scanner.IDENT}, `foo = 123`}, - {[]scanner.TokenType{scanner.IDENT}, `foo = "${var.bar}`}, - {[]scanner.TokenType{scanner.STRING}, `"foo" {}`}, - {[]scanner.TokenType{scanner.STRING}, `"foo" = {}`}, - {[]scanner.TokenType{scanner.STRING}, `"foo" = "${var.bar}`}, - {[]scanner.TokenType{scanner.IDENT, scanner.IDENT}, `foo bar {}`}, - {[]scanner.TokenType{scanner.IDENT, scanner.STRING}, `foo "bar" {}`}, - {[]scanner.TokenType{scanner.STRING, scanner.IDENT}, `"foo" bar {}`}, - {[]scanner.TokenType{scanner.IDENT, scanner.IDENT, scanner.IDENT}, `foo bar baz {}`}, + {[]token.TokenType{token.IDENT}, `foo {}`}, + {[]token.TokenType{token.IDENT}, `foo = {}`}, + {[]token.TokenType{token.IDENT}, `foo = bar`}, + {[]token.TokenType{token.IDENT}, `foo = 123`}, + {[]token.TokenType{token.IDENT}, `foo = "${var.bar}`}, + {[]token.TokenType{token.STRING}, `"foo" {}`}, + {[]token.TokenType{token.STRING}, `"foo" = {}`}, + {[]token.TokenType{token.STRING}, `"foo" = "${var.bar}`}, + {[]token.TokenType{token.IDENT, token.IDENT}, `foo bar {}`}, + {[]token.TokenType{token.IDENT, token.STRING}, `foo "bar" {}`}, + {[]token.TokenType{token.STRING, token.IDENT}, `"foo" bar {}`}, + {[]token.TokenType{token.IDENT, token.IDENT, token.IDENT}, `foo bar baz {}`}, } for _, k := range keys { @@ -54,9 +55,9 @@ func TestObjectKey(t *testing.T) { t.Fatal(err) } - tokens := []scanner.TokenType{} + tokens := []token.TokenType{} for _, o := range keys { - tokens = append(tokens, o.token.Type) + tokens = append(tokens, o.Token.Type) } equals(t, k.exp, tokens) diff --git a/scanner/scanner.go b/scanner/scanner.go index 1d6696f..f622b18 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -8,6 +8,8 @@ import ( "os" "unicode" "unicode/utf8" + + "github.com/fatih/hcl/token" ) // eof represents a marker rune for the end of the reader. @@ -19,8 +21,8 @@ type Scanner struct { src []byte // Source buffer for immutable access // Source Position - srcPos Pos // current position - prevPos Pos // previous position, used for peek() method + srcPos token.Pos // current position + prevPos token.Pos // previous position, used for peek() method lastCharLen int // length of last character in bytes lastLineLen int // length of last line in characters (for correct column reporting) @@ -30,7 +32,7 @@ type Scanner struct { // Error is called for each error encountered. If no Error // function is set, the error is reported to os.Stderr. - Error func(pos Pos, msg string) + Error func(pos token.Pos, msg string) // ErrorCount is incremented by one for each error encountered. ErrorCount int @@ -39,7 +41,7 @@ type Scanner struct { // Scan. The Filename field is always left untouched by the Scanner. If // an error is reported (via Error) and Position is invalid, the scanner is // not inside a token. - tokPos Pos + tokPos token.Pos } // New creates and initializes a new instance of Scanner using src as @@ -117,7 +119,7 @@ func (s *Scanner) peek() rune { } // Scan scans the next token and returns the token. -func (s *Scanner) Scan() Token { +func (s *Scanner) Scan() token.Token { ch := s.next() // skip white space @@ -125,7 +127,7 @@ func (s *Scanner) Scan() Token { ch = s.next() } - var tok TokenType + var tok token.TokenType // token text markings s.tokStart = s.srcPos.Offset - s.lastCharLen @@ -147,47 +149,47 @@ func (s *Scanner) Scan() Token { switch { case isLetter(ch): - tok = IDENT + tok = token.IDENT lit := s.scanIdentifier() if lit == "true" || lit == "false" { - tok = BOOL + tok = token.BOOL } case isDecimal(ch): tok = s.scanNumber(ch) default: switch ch { case eof: - tok = EOF + tok = token.EOF case '"': - tok = STRING + tok = token.STRING s.scanString() case '#', '/': - tok = COMMENT + tok = token.COMMENT s.scanComment(ch) case '.': - tok = PERIOD + tok = token.PERIOD ch = s.peek() if isDecimal(ch) { - tok = FLOAT + tok = token.FLOAT ch = s.scanMantissa(ch) ch = s.scanExponent(ch) } case '[': - tok = LBRACK + tok = token.LBRACK case ']': - tok = RBRACK + tok = token.RBRACK case '{': - tok = LBRACE + tok = token.LBRACE case '}': - tok = RBRACE + tok = token.RBRACE case ',': - tok = COMMA + tok = token.COMMA case '=': - tok = ASSIGN + tok = token.ASSIGN case '+': - tok = ADD + tok = token.ADD case '-': - tok = SUB + tok = token.SUB default: s.err("illegal char") } @@ -203,7 +205,7 @@ func (s *Scanner) Scan() Token { } s.tokStart = s.tokEnd // ensure idempotency of tokenText() call - return Token{ + return token.Token{ Type: tok, Pos: s.tokPos, Text: tokenText, @@ -244,7 +246,7 @@ func (s *Scanner) scanComment(ch rune) { } // scanNumber scans a HCL number definition starting with the given rune -func (s *Scanner) scanNumber(ch rune) TokenType { +func (s *Scanner) scanNumber(ch rune) token.TokenType { if ch == '0' { // check for hexadecimal, octal or float ch = s.next() @@ -265,7 +267,7 @@ func (s *Scanner) scanNumber(ch rune) TokenType { s.unread() } - return NUMBER + return token.NUMBER } // now it's either something like: 0421(octal) or 0.1231(float) @@ -283,7 +285,7 @@ func (s *Scanner) scanNumber(ch rune) TokenType { // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { ch = s.scanExponent(ch) - return NUMBER + return token.NUMBER } if ch == '.' { @@ -293,7 +295,7 @@ func (s *Scanner) scanNumber(ch rune) TokenType { ch = s.next() ch = s.scanExponent(ch) } - return FLOAT + return token.FLOAT } if illegalOctal { @@ -303,7 +305,7 @@ func (s *Scanner) scanNumber(ch rune) TokenType { if ch != eof { s.unread() } - return NUMBER + return token.NUMBER } s.scanMantissa(ch) @@ -311,7 +313,7 @@ func (s *Scanner) scanNumber(ch rune) TokenType { // literals of form 1e10 are treates as Numbers in HCL, which differs from Go. if ch == 'e' || ch == 'E' { ch = s.scanExponent(ch) - return NUMBER + return token.NUMBER } if ch == '.' { @@ -320,11 +322,11 @@ func (s *Scanner) scanNumber(ch rune) TokenType { ch = s.next() ch = s.scanExponent(ch) } - return FLOAT + return token.FLOAT } s.unread() - return NUMBER + return token.NUMBER } // scanMantissa scans the mantissa begining from the rune. It returns the next @@ -446,7 +448,7 @@ func (s *Scanner) scanIdentifier() string { // recentPosition returns the position of the character immediately after the // character or token returned by the last call to Scan. -func (s *Scanner) recentPosition() (pos Pos) { +func (s *Scanner) recentPosition() (pos token.Pos) { pos.Offset = s.srcPos.Offset - s.lastCharLen switch { case s.srcPos.Column > 0: diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 62590d6..f9b2f5e 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -4,161 +4,163 @@ import ( "bytes" "fmt" "testing" + + "github.com/fatih/hcl/token" ) var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" type tokenPair struct { - tok TokenType + tok token.TokenType text string } var tokenLists = map[string][]tokenPair{ "comment": []tokenPair{ - {COMMENT, "//"}, - {COMMENT, "////"}, - {COMMENT, "// comment"}, - {COMMENT, "// /* comment */"}, - {COMMENT, "// // comment //"}, - {COMMENT, "//" + f100}, - {COMMENT, "#"}, - {COMMENT, "##"}, - {COMMENT, "# comment"}, - {COMMENT, "# /* comment */"}, - {COMMENT, "# # comment #"}, - {COMMENT, "#" + f100}, - {COMMENT, "/**/"}, - {COMMENT, "/***/"}, - {COMMENT, "/* comment */"}, - {COMMENT, "/* // comment */"}, - {COMMENT, "/* /* comment */"}, - {COMMENT, "/*\n comment\n*/"}, - {COMMENT, "/*" + f100 + "*/"}, + {token.COMMENT, "//"}, + {token.COMMENT, "////"}, + {token.COMMENT, "// comment"}, + {token.COMMENT, "// /* comment */"}, + {token.COMMENT, "// // comment //"}, + {token.COMMENT, "//" + f100}, + {token.COMMENT, "#"}, + {token.COMMENT, "##"}, + {token.COMMENT, "# comment"}, + {token.COMMENT, "# /* comment */"}, + {token.COMMENT, "# # comment #"}, + {token.COMMENT, "#" + f100}, + {token.COMMENT, "/**/"}, + {token.COMMENT, "/***/"}, + {token.COMMENT, "/* comment */"}, + {token.COMMENT, "/* // comment */"}, + {token.COMMENT, "/* /* comment */"}, + {token.COMMENT, "/*\n comment\n*/"}, + {token.COMMENT, "/*" + f100 + "*/"}, }, "operator": []tokenPair{ - {LBRACK, "["}, - {LBRACE, "{"}, - {COMMA, ","}, - {PERIOD, "."}, - {RBRACK, "]"}, - {RBRACE, "}"}, - {ASSIGN, "="}, - {ADD, "+"}, - {SUB, "-"}, + {token.LBRACK, "["}, + {token.LBRACE, "{"}, + {token.COMMA, ","}, + {token.PERIOD, "."}, + {token.RBRACK, "]"}, + {token.RBRACE, "}"}, + {token.ASSIGN, "="}, + {token.ADD, "+"}, + {token.SUB, "-"}, }, "bool": []tokenPair{ - {BOOL, "true"}, - {BOOL, "false"}, + {token.BOOL, "true"}, + {token.BOOL, "false"}, }, - "ident": []tokenPair{ - {IDENT, "a"}, - {IDENT, "a0"}, - {IDENT, "foobar"}, - {IDENT, "abc123"}, - {IDENT, "LGTM"}, - {IDENT, "_"}, - {IDENT, "_abc123"}, - {IDENT, "abc123_"}, - {IDENT, "_abc_123_"}, - {IDENT, "_äöü"}, - {IDENT, "_本"}, - {IDENT, "äöü"}, - {IDENT, "本"}, - {IDENT, "a۰۱۸"}, - {IDENT, "foo६४"}, - {IDENT, "bar9876"}, + "identoken.t": []tokenPair{ + {token.IDENT, "a"}, + {token.IDENT, "a0"}, + {token.IDENT, "foobar"}, + {token.IDENT, "abc123"}, + {token.IDENT, "LGTM"}, + {token.IDENT, "_"}, + {token.IDENT, "_abc123"}, + {token.IDENT, "abc123_"}, + {token.IDENT, "_abc_123_"}, + {token.IDENT, "_äöü"}, + {token.IDENT, "_本"}, + {token.IDENT, "äöü"}, + {token.IDENT, "本"}, + {token.IDENT, "a۰۱۸"}, + {token.IDENT, "foo६४"}, + {token.IDENT, "bar9876"}, }, - "string": []tokenPair{ - {STRING, `" "`}, - {STRING, `"a"`}, - {STRING, `"本"`}, - {STRING, `"\a"`}, - {STRING, `"\b"`}, - {STRING, `"\f"`}, - {STRING, `"\n"`}, - {STRING, `"\r"`}, - {STRING, `"\t"`}, - {STRING, `"\v"`}, - {STRING, `"\""`}, - {STRING, `"\000"`}, - {STRING, `"\777"`}, - {STRING, `"\x00"`}, - {STRING, `"\xff"`}, - {STRING, `"\u0000"`}, - {STRING, `"\ufA16"`}, - {STRING, `"\U00000000"`}, - {STRING, `"\U0000ffAB"`}, - {STRING, `"` + f100 + `"`}, + "stritoken.ng": []tokenPair{ + {token.STRING, `" "`}, + {token.STRING, `"a"`}, + {token.STRING, `"本"`}, + {token.STRING, `"\a"`}, + {token.STRING, `"\b"`}, + {token.STRING, `"\f"`}, + {token.STRING, `"\n"`}, + {token.STRING, `"\r"`}, + {token.STRING, `"\t"`}, + {token.STRING, `"\v"`}, + {token.STRING, `"\""`}, + {token.STRING, `"\000"`}, + {token.STRING, `"\777"`}, + {token.STRING, `"\x00"`}, + {token.STRING, `"\xff"`}, + {token.STRING, `"\u0000"`}, + {token.STRING, `"\ufA16"`}, + {token.STRING, `"\U00000000"`}, + {token.STRING, `"\U0000ffAB"`}, + {token.STRING, `"` + f100 + `"`}, }, - "number": []tokenPair{ - {NUMBER, "0"}, - {NUMBER, "1"}, - {NUMBER, "9"}, - {NUMBER, "42"}, - {NUMBER, "1234567890"}, - {NUMBER, "00"}, - {NUMBER, "01"}, - {NUMBER, "07"}, - {NUMBER, "042"}, - {NUMBER, "01234567"}, - {NUMBER, "0x0"}, - {NUMBER, "0x1"}, - {NUMBER, "0xf"}, - {NUMBER, "0x42"}, - {NUMBER, "0x123456789abcDEF"}, - {NUMBER, "0x" + f100}, - {NUMBER, "0X0"}, - {NUMBER, "0X1"}, - {NUMBER, "0XF"}, - {NUMBER, "0X42"}, - {NUMBER, "0X123456789abcDEF"}, - {NUMBER, "0X" + f100}, - {NUMBER, "0e0"}, - {NUMBER, "1e0"}, - {NUMBER, "42e0"}, - {NUMBER, "01234567890e0"}, - {NUMBER, "0E0"}, - {NUMBER, "1E0"}, - {NUMBER, "42E0"}, - {NUMBER, "01234567890E0"}, - {NUMBER, "0e+10"}, - {NUMBER, "1e-10"}, - {NUMBER, "42e+10"}, - {NUMBER, "01234567890e-10"}, - {NUMBER, "0E+10"}, - {NUMBER, "1E-10"}, - {NUMBER, "42E+10"}, - {NUMBER, "01234567890E-10"}, + "numbtoken.er": []tokenPair{ + {token.NUMBER, "0"}, + {token.NUMBER, "1"}, + {token.NUMBER, "9"}, + {token.NUMBER, "42"}, + {token.NUMBER, "1234567890"}, + {token.NUMBER, "00"}, + {token.NUMBER, "01"}, + {token.NUMBER, "07"}, + {token.NUMBER, "042"}, + {token.NUMBER, "01234567"}, + {token.NUMBER, "0x0"}, + {token.NUMBER, "0x1"}, + {token.NUMBER, "0xf"}, + {token.NUMBER, "0x42"}, + {token.NUMBER, "0x123456789abcDEF"}, + {token.NUMBER, "0x" + f100}, + {token.NUMBER, "0X0"}, + {token.NUMBER, "0X1"}, + {token.NUMBER, "0XF"}, + {token.NUMBER, "0X42"}, + {token.NUMBER, "0X123456789abcDEF"}, + {token.NUMBER, "0X" + f100}, + {token.NUMBER, "0e0"}, + {token.NUMBER, "1e0"}, + {token.NUMBER, "42e0"}, + {token.NUMBER, "01234567890e0"}, + {token.NUMBER, "0E0"}, + {token.NUMBER, "1E0"}, + {token.NUMBER, "42E0"}, + {token.NUMBER, "01234567890E0"}, + {token.NUMBER, "0e+10"}, + {token.NUMBER, "1e-10"}, + {token.NUMBER, "42e+10"}, + {token.NUMBER, "01234567890e-10"}, + {token.NUMBER, "0E+10"}, + {token.NUMBER, "1E-10"}, + {token.NUMBER, "42E+10"}, + {token.NUMBER, "01234567890E-10"}, }, - "float": []tokenPair{ - {FLOAT, "0."}, - {FLOAT, "1."}, - {FLOAT, "42."}, - {FLOAT, "01234567890."}, - {FLOAT, ".0"}, - {FLOAT, ".1"}, - {FLOAT, ".42"}, - {FLOAT, ".0123456789"}, - {FLOAT, "0.0"}, - {FLOAT, "1.0"}, - {FLOAT, "42.0"}, - {FLOAT, "01234567890.0"}, - {FLOAT, "01.8e0"}, - {FLOAT, "1.4e0"}, - {FLOAT, "42.2e0"}, - {FLOAT, "01234567890.12e0"}, - {FLOAT, "0.E0"}, - {FLOAT, "1.12E0"}, - {FLOAT, "42.123E0"}, - {FLOAT, "01234567890.213E0"}, - {FLOAT, "0.2e+10"}, - {FLOAT, "1.2e-10"}, - {FLOAT, "42.54e+10"}, - {FLOAT, "01234567890.98e-10"}, - {FLOAT, "0.1E+10"}, - {FLOAT, "1.1E-10"}, - {FLOAT, "42.1E+10"}, - {FLOAT, "01234567890.1E-10"}, + "floatoken.t": []tokenPair{ + {token.FLOAT, "0."}, + {token.FLOAT, "1."}, + {token.FLOAT, "42."}, + {token.FLOAT, "01234567890."}, + {token.FLOAT, ".0"}, + {token.FLOAT, ".1"}, + {token.FLOAT, ".42"}, + {token.FLOAT, ".0123456789"}, + {token.FLOAT, "0.0"}, + {token.FLOAT, "1.0"}, + {token.FLOAT, "42.0"}, + {token.FLOAT, "01234567890.0"}, + {token.FLOAT, "01.8e0"}, + {token.FLOAT, "1.4e0"}, + {token.FLOAT, "42.2e0"}, + {token.FLOAT, "01234567890.12e0"}, + {token.FLOAT, "0.E0"}, + {token.FLOAT, "1.12E0"}, + {token.FLOAT, "42.123E0"}, + {token.FLOAT, "01234567890.213E0"}, + {token.FLOAT, "0.2e+10"}, + {token.FLOAT, "1.2e-10"}, + {token.FLOAT, "42.54e+10"}, + {token.FLOAT, "01234567890.98e-10"}, + {token.FLOAT, "0.1E+10"}, + {token.FLOAT, "1.1E-10"}, + {token.FLOAT, "42.1E+10"}, + {token.FLOAT, "01234567890.1E-10"}, }, } @@ -184,7 +186,7 @@ func TestPosition(t *testing.T) { s := New(buf.Bytes()) - pos := Pos{"", 4, 1, 5} + pos := token.Pos{"", 4, 1, 5} s.Scan() for _, listName := range orderedTokenLists { @@ -270,63 +272,63 @@ func TestRealExample(t *testing.T) { }` literals := []struct { - tokenType TokenType + tokenType token.TokenType literal string }{ - {COMMENT, `// This comes from Terraform, as a test`}, - {IDENT, `variable`}, - {STRING, `"foo"`}, - {LBRACE, `{`}, - {IDENT, `default`}, - {ASSIGN, `=`}, - {STRING, `"bar"`}, - {IDENT, `description`}, - {ASSIGN, `=`}, - {STRING, `"bar"`}, - {RBRACE, `}`}, - {IDENT, `provider`}, - {STRING, `"aws"`}, - {LBRACE, `{`}, - {IDENT, `access_key`}, - {ASSIGN, `=`}, - {STRING, `"foo"`}, - {IDENT, `secret_key`}, - {ASSIGN, `=`}, - {STRING, `"bar"`}, - {RBRACE, `}`}, - {IDENT, `resource`}, - {STRING, `"aws_security_group"`}, - {STRING, `"firewall"`}, - {LBRACE, `{`}, - {IDENT, `count`}, - {ASSIGN, `=`}, - {NUMBER, `5`}, - {RBRACE, `}`}, - {IDENT, `resource`}, - {IDENT, `aws_instance`}, - {STRING, `"web"`}, - {LBRACE, `{`}, - {IDENT, `ami`}, - {ASSIGN, `=`}, - {STRING, `"${var.foo}"`}, - {IDENT, `security_groups`}, - {ASSIGN, `=`}, - {LBRACK, `[`}, - {STRING, `"foo"`}, - {COMMA, `,`}, - {STRING, `"${aws_security_group.firewall.foo}"`}, - {RBRACK, `]`}, - {IDENT, `network_interface`}, - {LBRACE, `{`}, - {IDENT, `device_index`}, - {ASSIGN, `=`}, - {NUMBER, `0`}, - {IDENT, `description`}, - {ASSIGN, `=`}, - {STRING, `"Main network interface"`}, - {RBRACE, `}`}, - {RBRACE, `}`}, - {EOF, ``}, + {token.COMMENT, `// This comes from Terraform, as a test`}, + {token.IDENT, `variable`}, + {token.STRING, `"foo"`}, + {token.LBRACE, `{`}, + {token.IDENT, `default`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.IDENT, `description`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.RBRACE, `}`}, + {token.IDENT, `provider`}, + {token.STRING, `"aws"`}, + {token.LBRACE, `{`}, + {token.IDENT, `access_key`}, + {token.ASSIGN, `=`}, + {token.STRING, `"foo"`}, + {token.IDENT, `secret_key`}, + {token.ASSIGN, `=`}, + {token.STRING, `"bar"`}, + {token.RBRACE, `}`}, + {token.IDENT, `resource`}, + {token.STRING, `"aws_security_group"`}, + {token.STRING, `"firewall"`}, + {token.LBRACE, `{`}, + {token.IDENT, `count`}, + {token.ASSIGN, `=`}, + {token.NUMBER, `5`}, + {token.RBRACE, `}`}, + {token.IDENT, `resource`}, + {token.IDENT, `aws_instance`}, + {token.STRING, `"web"`}, + {token.LBRACE, `{`}, + {token.IDENT, `ami`}, + {token.ASSIGN, `=`}, + {token.STRING, `"${var.foo}"`}, + {token.IDENT, `security_groups`}, + {token.ASSIGN, `=`}, + {token.LBRACK, `[`}, + {token.STRING, `"foo"`}, + {token.COMMA, `,`}, + {token.STRING, `"${aws_security_group.firewall.foo}"`}, + {token.RBRACK, `]`}, + {token.IDENT, `network_interface`}, + {token.LBRACE, `{`}, + {token.IDENT, `device_index`}, + {token.ASSIGN, `=`}, + {token.NUMBER, `0`}, + {token.IDENT, `description`}, + {token.ASSIGN, `=`}, + {token.STRING, `"Main network interface"`}, + {token.RBRACE, `}`}, + {token.RBRACE, `}`}, + {token.EOF, ``}, } s := New([]byte(complexHCL)) @@ -344,32 +346,32 @@ func TestRealExample(t *testing.T) { } func TestError(t *testing.T) { - testError(t, "\x80", "1:1", "illegal UTF-8 encoding", ILLEGAL) - testError(t, "\xff", "1:1", "illegal UTF-8 encoding", ILLEGAL) + testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) + testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) - testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", IDENT) - testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", IDENT) + testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT) + testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT) - testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", STRING) - testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", STRING) + testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) + testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) - testError(t, `01238`, "1:6", "illegal octal number", NUMBER) - testError(t, `01238123`, "1:9", "illegal octal number", NUMBER) - testError(t, `0x`, "1:3", "illegal hexadecimal number", NUMBER) - testError(t, `0xg`, "1:3", "illegal hexadecimal number", NUMBER) - testError(t, `'aa'`, "1:1", "illegal char", ILLEGAL) + testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER) + testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER) + testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER) + testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL) - testError(t, `"`, "1:2", "literal not terminated", STRING) - testError(t, `"abc`, "1:5", "literal not terminated", STRING) - testError(t, `"abc`+"\n", "1:5", "literal not terminated", STRING) - testError(t, `/*/`, "1:4", "comment not terminated", COMMENT) + testError(t, `"`, "1:2", "literal not terminated", token.STRING) + testError(t, `"abc`, "1:5", "literal not terminated", token.STRING) + testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING) + testError(t, `/*/`, "1:4", "comment not terminated", token.COMMENT) } -func testError(t *testing.T, src, pos, msg string, tok TokenType) { +func testError(t *testing.T, src, pos, msg string, tok token.TokenType) { s := New([]byte(src)) errorCalled := false - s.Error = func(p Pos, m string) { + s.Error = func(p token.Pos, m string) { if !errorCalled { if pos != p.String() { t.Errorf("pos = %q, want %q for %q", p, pos, src) diff --git a/scanner/position.go b/token/position.go similarity index 98% rename from scanner/position.go rename to token/position.go index aef546c..c151e50 100644 --- a/scanner/position.go +++ b/token/position.go @@ -1,4 +1,4 @@ -package scanner +package token import "fmt" diff --git a/scanner/token.go b/token/token.go similarity index 99% rename from scanner/token.go rename to token/token.go index deb6f9a..ada0d86 100644 --- a/scanner/token.go +++ b/token/token.go @@ -1,4 +1,4 @@ -package scanner +package token import ( "fmt" diff --git a/scanner/token_test.go b/token/token_test.go similarity index 97% rename from scanner/token_test.go rename to token/token_test.go index 0e05576..534c1d0 100644 --- a/scanner/token_test.go +++ b/token/token_test.go @@ -1,4 +1,4 @@ -package scanner +package token import "testing" From 9ee8cdff123baa7da719df5d12239c2512fafadb Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 23:22:28 +0300 Subject: [PATCH 075/137] hcl: move not finished files into their own folders --- hclfmt.go => fmt/fmt.go | 0 hclprinter/hclprinter.go => printer/printer.go | 2 +- hclprinter/hclprinter_test.go => printer/printer_test.go | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename hclfmt.go => fmt/fmt.go (100%) rename hclprinter/hclprinter.go => printer/printer.go (99%) rename hclprinter/hclprinter_test.go => printer/printer_test.go (98%) diff --git a/hclfmt.go b/fmt/fmt.go similarity index 100% rename from hclfmt.go rename to fmt/fmt.go diff --git a/hclprinter/hclprinter.go b/printer/printer.go similarity index 99% rename from hclprinter/hclprinter.go rename to printer/printer.go index ffe1777..a888ce7 100644 --- a/hclprinter/hclprinter.go +++ b/printer/printer.go @@ -1,4 +1,4 @@ -package hclprinter +package printer import ( "bytes" diff --git a/hclprinter/hclprinter_test.go b/printer/printer_test.go similarity index 98% rename from hclprinter/hclprinter_test.go rename to printer/printer_test.go index 64c8a1b..77c3913 100644 --- a/hclprinter/hclprinter_test.go +++ b/printer/printer_test.go @@ -1,4 +1,4 @@ -package hclprinter +package printer import ( "os" From 32f4e84345b4866aa663b0f98e19fd1235881beb Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 16 Oct 2015 23:23:23 +0300 Subject: [PATCH 076/137] token: rename TokenType to Type --- parser/parser_test.go | 28 ++++++++++++++-------------- scanner/scanner.go | 4 ++-- scanner/scanner_test.go | 6 +++--- token/token.go | 18 +++++++++--------- token/token_test.go | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index 4af18ab..31216a2 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -31,21 +31,21 @@ func TestParseType(t *testing.T) { func TestObjectKey(t *testing.T) { keys := []struct { - exp []token.TokenType + exp []token.Type src string }{ - {[]token.TokenType{token.IDENT}, `foo {}`}, - {[]token.TokenType{token.IDENT}, `foo = {}`}, - {[]token.TokenType{token.IDENT}, `foo = bar`}, - {[]token.TokenType{token.IDENT}, `foo = 123`}, - {[]token.TokenType{token.IDENT}, `foo = "${var.bar}`}, - {[]token.TokenType{token.STRING}, `"foo" {}`}, - {[]token.TokenType{token.STRING}, `"foo" = {}`}, - {[]token.TokenType{token.STRING}, `"foo" = "${var.bar}`}, - {[]token.TokenType{token.IDENT, token.IDENT}, `foo bar {}`}, - {[]token.TokenType{token.IDENT, token.STRING}, `foo "bar" {}`}, - {[]token.TokenType{token.STRING, token.IDENT}, `"foo" bar {}`}, - {[]token.TokenType{token.IDENT, token.IDENT, token.IDENT}, `foo bar baz {}`}, + {[]token.Type{token.IDENT}, `foo {}`}, + {[]token.Type{token.IDENT}, `foo = {}`}, + {[]token.Type{token.IDENT}, `foo = bar`}, + {[]token.Type{token.IDENT}, `foo = 123`}, + {[]token.Type{token.IDENT}, `foo = "${var.bar}`}, + {[]token.Type{token.STRING}, `"foo" {}`}, + {[]token.Type{token.STRING}, `"foo" = {}`}, + {[]token.Type{token.STRING}, `"foo" = "${var.bar}`}, + {[]token.Type{token.IDENT, token.IDENT}, `foo bar {}`}, + {[]token.Type{token.IDENT, token.STRING}, `foo "bar" {}`}, + {[]token.Type{token.STRING, token.IDENT}, `"foo" bar {}`}, + {[]token.Type{token.IDENT, token.IDENT, token.IDENT}, `foo bar baz {}`}, } for _, k := range keys { @@ -55,7 +55,7 @@ func TestObjectKey(t *testing.T) { t.Fatal(err) } - tokens := []token.TokenType{} + tokens := []token.Type{} for _, o := range keys { tokens = append(tokens, o.Token.Type) } diff --git a/scanner/scanner.go b/scanner/scanner.go index f622b18..c600014 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -127,7 +127,7 @@ func (s *Scanner) Scan() token.Token { ch = s.next() } - var tok token.TokenType + var tok token.Type // token text markings s.tokStart = s.srcPos.Offset - s.lastCharLen @@ -246,7 +246,7 @@ func (s *Scanner) scanComment(ch rune) { } // scanNumber scans a HCL number definition starting with the given rune -func (s *Scanner) scanNumber(ch rune) token.TokenType { +func (s *Scanner) scanNumber(ch rune) token.Type { if ch == '0' { // check for hexadecimal, octal or float ch = s.next() diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index f9b2f5e..56ae526 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -11,7 +11,7 @@ import ( var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" type tokenPair struct { - tok token.TokenType + tok token.Type text string } @@ -272,7 +272,7 @@ func TestRealExample(t *testing.T) { }` literals := []struct { - tokenType token.TokenType + tokenType token.Type literal string }{ {token.COMMENT, `// This comes from Terraform, as a test`}, @@ -367,7 +367,7 @@ func TestError(t *testing.T) { testError(t, `/*/`, "1:4", "comment not terminated", token.COMMENT) } -func testError(t *testing.T, src, pos, msg string, tok token.TokenType) { +func testError(t *testing.T, src, pos, msg string, tok token.Type) { s := New([]byte(src)) errorCalled := false diff --git a/token/token.go b/token/token.go index ada0d86..7c51f69 100644 --- a/token/token.go +++ b/token/token.go @@ -7,17 +7,17 @@ import ( // Token defines a single HCL token which can be obtained via the Scanner type Token struct { - Type TokenType + Type Type Pos Pos Text string } -// TokenType is the set of lexical tokens of the HCL (HashiCorp Configuration Language) -type TokenType int +// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language) +type Type int const ( // Special tokens - ILLEGAL TokenType = iota + ILLEGAL Type = iota EOF COMMENT @@ -72,9 +72,9 @@ var tokens = [...]string{ } // String returns the string corresponding to the token tok. -func (t TokenType) String() string { +func (t Type) String() string { s := "" - if 0 <= t && t < TokenType(len(tokens)) { + if 0 <= t && t < Type(len(tokens)) { s = tokens[t] } if s == "" { @@ -85,15 +85,15 @@ func (t TokenType) String() string { // IsIdentifier returns true for tokens corresponding to identifiers and basic // type literals; it returns false otherwise. -func (t TokenType) IsIdentifier() bool { return identifier_beg < t && t < identifier_end } +func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end } // IsLiteral returns true for tokens corresponding to basic type literals; it // returns false otherwise. -func (t TokenType) IsLiteral() bool { return literal_beg < t && t < literal_end } +func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end } // IsOperator returns true for tokens corresponding to operators and // delimiters; it returns false otherwise. -func (t TokenType) IsOperator() bool { return operator_beg < t && t < operator_end } +func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end } // String returns the token's literal text. Note that this is only // applicable for certain token types, such as token.IDENT, diff --git a/token/token_test.go b/token/token_test.go index 534c1d0..796945c 100644 --- a/token/token_test.go +++ b/token/token_test.go @@ -2,9 +2,9 @@ package token import "testing" -func TestTokenTypeString(t *testing.T) { +func TestTypeString(t *testing.T) { var tokens = []struct { - tt TokenType + tt Type str string }{ {ILLEGAL, "ILLEGAL"}, From 4e690ec67d04223c3306c2b49935b08c1d230e6b Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 17 Oct 2015 00:00:05 +0300 Subject: [PATCH 077/137] parser: add ListType support --- ast/ast.go | 4 ++++ parser/parser.go | 42 +++++++++++++++++++++++++++++++++++------- parser/parser_test.go | 18 ++++++++++++------ 3 files changed, 51 insertions(+), 13 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 9c8a01e..b0d8251 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -82,6 +82,10 @@ func (l *ListType) Pos() token.Pos { return l.Lbrack } +func (l *ListType) Add(node Node) { + l.List = append(l.List, node) +} + // ObjectType represents a HCL Object Type type ObjectType struct { Lbrace token.Pos // position of "{" diff --git a/parser/parser.go b/parser/parser.go index 3fea2a9..d007578 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -154,6 +154,41 @@ func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { } } +// parseListType parses a list type and returns a ListType AST +func (p *Parser) parseListType() (*ast.ListType, error) { + defer un(trace(p, "ParseListType")) + + l := &ast.ListType{ + Lbrack: p.tok.Pos, + } + + for { + tok := p.scan() + switch tok.Type { + case token.NUMBER, token.FLOAT, token.STRING: + node, err := p.parseLiteralType() + if err != nil { + return nil, err + } + l.Add(node) + case token.COMMA: + // get next list item or we are at the end + continue + case token.BOOL: + // TODO(arslan) should we support? not supported by HCL yet + case token.LBRACK: + // TODO(arslan) should we support nested lists? + case token.RBRACK: + // finished + l.Rbrack = p.tok.Pos + return l, nil + default: + return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type) + } + + } +} + // parseLiteralType parses a literal type and returns a LiteralType AST func (p *Parser) parseLiteralType() (*ast.LiteralType, error) { defer un(trace(p, "ParseLiteral")) @@ -170,13 +205,6 @@ func (p *Parser) parseObjectType() (*ast.ObjectType, error) { return nil, errors.New("ObjectType is not implemented yet") } -// parseListType parses a list type and returns a ListType AST -func (p *Parser) parseListType() (*ast.ListType, error) { - defer un(trace(p, "ParseListType")) - - return nil, errors.New("ListType is not implemented yet") -} - // scan returns the next token from the underlying scanner. // If a token has been unscanned then read that instead. func (p *Parser) scan() token.Token { diff --git a/parser/parser_test.go b/parser/parser_test.go index 31216a2..437c254 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -12,19 +12,25 @@ import ( ) func TestParseType(t *testing.T) { - src := `foo = true` + src := `foo = ["fatih", "arslan", 1224]` p := New([]byte(src)) p.enableTrace = true - n, err := p.Parse() + node, err := p.Parse() if err != nil { t.Fatal(err) } - fmt.Printf("n = %+v\n", n) - - ast.Walk(n, func(node ast.Node) bool { - fmt.Printf("node = %+v\n", node) + ast.Walk(node, func(n ast.Node) bool { + if list, ok := n.(*ast.ObjectList); ok { + for _, l := range list.Items { + for _, k := range l.Keys { + fmt.Printf("key = %+v\n", k) + } + fmt.Printf("val = %+v\n", l.Val) + } + return false + } return true }) } From 16d5eb5f08c90f51076f8d91062e5dbcef4fdf09 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 17 Oct 2015 01:14:40 +0300 Subject: [PATCH 078/137] parser: add support for object type --- ast/ast.go | 10 +++++----- ast/walk.go | 2 +- parser/parser.go | 42 ++++++++++++++++++++++++++++++++---------- parser/parser_test.go | 6 +++++- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index b0d8251..c234e5b 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -88,11 +88,11 @@ func (l *ListType) Add(node Node) { // ObjectType represents a HCL Object Type type ObjectType struct { - Lbrace token.Pos // position of "{" - Rbrace token.Pos // position of "}" - List []Node // the nodes in lexical order + Lbrace token.Pos // position of "{" + Rbrace token.Pos // position of "}" + List *ObjectList // the nodes in lexical order } -func (b *ObjectType) Pos() token.Pos { - return b.Lbrace +func (o *ObjectType) Pos() token.Pos { + return o.Lbrace } diff --git a/ast/walk.go b/ast/walk.go index c015a67..f198c3f 100644 --- a/ast/walk.go +++ b/ast/walk.go @@ -27,7 +27,7 @@ func Walk(node Node, fn func(Node) bool) { Walk(l, fn) } case *ObjectType: - for _, l := range n.List { + for _, l := range n.List.Items { Walk(l, fn) } } diff --git a/parser/parser.go b/parser/parser.go index d007578..b9c128a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -30,6 +30,10 @@ var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (ast.Node, error) { + return p.parseObjectList() +} + +func (p *Parser) parseObjectList() (*ast.ObjectList, error) { defer un(trace(p, "ParseObjectList")) node := &ast.ObjectList{} @@ -75,11 +79,11 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { case token.LBRACE: if len(keys) > 1 { // nested object - fmt.Println("nested object") + panic("nested object is not implemented") } // object - fmt.Println("object") + panic("normal object is not implemented") } return nil, fmt.Errorf("not yet implemented: %s", p.tok.Type) @@ -154,10 +158,33 @@ func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { } } +// parseObjectType parses an object type and returns a ObjectType AST +func (p *Parser) parseObjectType() (*ast.ObjectType, error) { + defer un(trace(p, "ParseObjectType")) + + // we assume that the currently scanned token is a LBRACE + o := &ast.ObjectType{ + Lbrace: p.tok.Pos, + } + + l, err := p.parseObjectList() + + // if we hit RBRACE, we are good to go (means we parsed all Items), if it's + // not a RBRACE, it's an syntax error and we just return it. + if err != nil && p.tok.Type != token.RBRACE { + return nil, err + } + + o.List = l + o.Rbrace = p.tok.Pos // advanced via parseObjectList + return o, nil +} + // parseListType parses a list type and returns a ListType AST func (p *Parser) parseListType() (*ast.ListType, error) { defer un(trace(p, "ParseListType")) + // we assume that the currently scanned token is a LBRACK l := &ast.ListType{ Lbrack: p.tok.Pos, } @@ -177,7 +204,9 @@ func (p *Parser) parseListType() (*ast.ListType, error) { case token.BOOL: // TODO(arslan) should we support? not supported by HCL yet case token.LBRACK: - // TODO(arslan) should we support nested lists? + // TODO(arslan) should we support nested lists? Even though it's + // written in README of HCL, it's not a parse of the grammar + // (defined in parse.y) case token.RBRACK: // finished l.Rbrack = p.tok.Pos @@ -198,13 +227,6 @@ func (p *Parser) parseLiteralType() (*ast.LiteralType, error) { }, nil } -// parseObjectType parses an object type and returns a ObjectType AST -func (p *Parser) parseObjectType() (*ast.ObjectType, error) { - defer un(trace(p, "ParseObjectYpe")) - - return nil, errors.New("ObjectType is not implemented yet") -} - // scan returns the next token from the underlying scanner. // If a token has been unscanned then read that instead. func (p *Parser) scan() token.Token { diff --git a/parser/parser_test.go b/parser/parser_test.go index 437c254..ddade4b 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -12,7 +12,11 @@ import ( ) func TestParseType(t *testing.T) { - src := `foo = ["fatih", "arslan", 1224]` + src := `foo = { + fatih = "true" + arslan = "deneme" +}` + p := New([]byte(src)) p.enableTrace = true From 0ed9465e4b096db45cec672378425bad4406cc06 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 17 Oct 2015 01:39:49 +0300 Subject: [PATCH 079/137] parser: add support for normal and nested objects --- parser/parser.go | 23 +++++++++++++++-------- parser/parser_test.go | 5 ++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index b9c128a..0df2f6f 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -42,8 +42,13 @@ func (p *Parser) parseObjectList() (*ast.ObjectList, error) { if err == errEofToken { break // we are finished } + if err != nil { - return nil, err + if p.tok.Type != token.RBRACE { + return nil, err + } else { + break + } } // we successfully parsed a node, add it to the final source node @@ -62,9 +67,9 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { return nil, err } - // either an assignment or object switch p.tok.Type { case token.ASSIGN: + // assignments o := &ast.ObjectItem{ Keys: keys, Assign: p.tok.Pos, @@ -74,16 +79,18 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { if err != nil { return nil, err } - return o, nil case token.LBRACE: - if len(keys) > 1 { - // nested object - panic("nested object is not implemented") + // object or nested objects + o := &ast.ObjectItem{ + Keys: keys, } - // object - panic("normal object is not implemented") + o.Val, err = p.parseObjectType() + if err != nil { + return nil, err + } + return o, nil } return nil, fmt.Errorf("not yet implemented: %s", p.tok.Type) diff --git a/parser/parser_test.go b/parser/parser_test.go index ddade4b..f36b182 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -12,9 +12,8 @@ import ( ) func TestParseType(t *testing.T) { - src := `foo = { + src := `foo { fatih = "true" - arslan = "deneme" }` p := New([]byte(src)) @@ -28,12 +27,12 @@ func TestParseType(t *testing.T) { ast.Walk(node, func(n ast.Node) bool { if list, ok := n.(*ast.ObjectList); ok { for _, l := range list.Items { + fmt.Printf("l = %+v\n", l) for _, k := range l.Keys { fmt.Printf("key = %+v\n", k) } fmt.Printf("val = %+v\n", l.Val) } - return false } return true }) From 39f76a99eb7bf47c69c3edd6feaf596ee7c9a91e Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 17 Oct 2015 01:48:38 +0300 Subject: [PATCH 080/137] parser: just return node :) --- parser/parser.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 0df2f6f..922d7bf 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -43,12 +43,10 @@ func (p *Parser) parseObjectList() (*ast.ObjectList, error) { break // we are finished } + // we don't return a nil, because might want to use already collected + // items. if err != nil { - if p.tok.Type != token.RBRACE { - return nil, err - } else { - break - } + return node, err } // we successfully parsed a node, add it to the final source node From 0cf842255dc21a688016e02206f78d7a94af4857 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 17 Oct 2015 02:16:33 +0300 Subject: [PATCH 081/137] parser: add list tests --- parser/parser_test.go | 121 ++++++++++++++++++++++++++++++++++-------- scanner/scanner.go | 6 ++- 2 files changed, 104 insertions(+), 23 deletions(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index f36b182..eae85bd 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -11,33 +11,112 @@ import ( "github.com/fatih/hcl/token" ) -func TestParseType(t *testing.T) { - src := `foo { - fatih = "true" -}` - - p := New([]byte(src)) - p.enableTrace = true - - node, err := p.Parse() - if err != nil { - t.Fatal(err) +func TestType(t *testing.T) { + var literals = []struct { + typ token.Type + src string + }{ + {token.STRING, `foo = "foo"`}, + {token.NUMBER, `foo = 123`}, + {token.FLOAT, `foo = 123.12`}, + {token.BOOL, `foo = true`}, } - ast.Walk(node, func(n ast.Node) bool { - if list, ok := n.(*ast.ObjectList); ok { - for _, l := range list.Items { - fmt.Printf("l = %+v\n", l) - for _, k := range l.Keys { - fmt.Printf("key = %+v\n", k) - } - fmt.Printf("val = %+v\n", l.Val) + for _, l := range literals { + p := New([]byte(l.src)) + item, err := p.parseObjectItem() + if err != nil { + t.Error(err) + } + + lit, ok := item.Val.(*ast.LiteralType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %+v", item.Val) + } + + if lit.Token.Type != l.typ { + t.Errorf("want: %s, got: %s", l.typ, lit.Token.Type) + } + } +} + +func TestListType(t *testing.T) { + var literals = []struct { + src string + tokens []token.Type + }{ + { + `foo = ["123", 123]`, + []token.Type{token.STRING, token.NUMBER}, + }, + { + `foo = [123, "123",]`, + []token.Type{token.NUMBER, token.STRING}, + }, + { + `foo = []`, + []token.Type{}, + }, + { + `foo = ["123", 123]`, + []token.Type{token.STRING, token.NUMBER}, + }, + } + + for _, l := range literals { + p := New([]byte(l.src)) + item, err := p.parseObjectItem() + if err != nil { + t.Error(err) + } + + list, ok := item.Val.(*ast.ListType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %+v", item.Val) + } + + var tokens []token.Type + for _, li := range list.List { + if tp, ok := li.(*ast.LiteralType); ok { + tokens = append(tokens, tp.Token.Type) } } - return true - }) + + equals(t, l.tokens, tokens) + + } } +func TestObjectType(t *testing.T) { +} + +// func TestParseType(t *testing.T) { +// src := `foo { +// fatih = "true" +// }` +// +// p := New([]byte(src)) +// p.enableTrace = true +// +// node, err := p.Parse() +// if err != nil { +// t.Fatal(err) +// } +// +// ast.Walk(node, func(n ast.Node) bool { +// if list, ok := n.(*ast.ObjectList); ok { +// for _, l := range list.Items { +// fmt.Printf("l = %+v\n", l) +// for _, k := range l.Keys { +// fmt.Printf("key = %+v\n", k) +// } +// fmt.Printf("val = %+v\n", l.Val) +// } +// } +// return true +// }) +// } + func TestObjectKey(t *testing.T) { keys := []struct { exp []token.Type diff --git a/scanner/scanner.go b/scanner/scanner.go index c600014..b5c3679 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -325,7 +325,9 @@ func (s *Scanner) scanNumber(ch rune) token.Type { return token.FLOAT } - s.unread() + if ch != eof { + s.unread() + } return token.NUMBER } @@ -338,7 +340,7 @@ func (s *Scanner) scanMantissa(ch rune) rune { scanned = true } - if scanned { + if scanned && ch != eof { s.unread() } return ch From 1f47d675b97bf1dcbe50ddffbd5d7ad11ffff402 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 18 Oct 2015 22:51:10 +0300 Subject: [PATCH 082/137] parser: fix parser test --- parser/parser_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index eae85bd..333023a 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -75,7 +75,7 @@ func TestListType(t *testing.T) { t.Errorf("node should be of type LiteralType, got: %+v", item.Val) } - var tokens []token.Type + tokens := []token.Type{} for _, li := range list.List { if tp, ok := li.(*ast.LiteralType); ok { tokens = append(tokens, tp.Token.Type) From 393af546c0f5f6f9c1a19842d01032475210da47 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 18 Oct 2015 23:19:56 +0300 Subject: [PATCH 083/137] parser: add TestObjectType --- parser/parser.go | 4 +- parser/parser_test.go | 104 ++++++++++++++++++++++++++++++------------ 2 files changed, 76 insertions(+), 32 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 922d7bf..ba1040d 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -210,8 +210,8 @@ func (p *Parser) parseListType() (*ast.ListType, error) { // TODO(arslan) should we support? not supported by HCL yet case token.LBRACK: // TODO(arslan) should we support nested lists? Even though it's - // written in README of HCL, it's not a parse of the grammar - // (defined in parse.y) + // written in README of HCL, it's not a part of the grammar + // (not defined in parse.y) case token.RBRACK: // finished l.Rbrack = p.tok.Pos diff --git a/parser/parser_test.go b/parser/parser_test.go index 333023a..ec411b6 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -31,7 +31,7 @@ func TestType(t *testing.T) { lit, ok := item.Val.(*ast.LiteralType) if !ok { - t.Errorf("node should be of type LiteralType, got: %+v", item.Val) + t.Errorf("node should be of type LiteralType, got: %T", item.Val) } if lit.Token.Type != l.typ { @@ -72,7 +72,7 @@ func TestListType(t *testing.T) { list, ok := item.Val.(*ast.ListType) if !ok { - t.Errorf("node should be of type LiteralType, got: %+v", item.Val) + t.Errorf("node should be of type LiteralType, got: %T", item.Val) } tokens := []token.Type{} @@ -83,39 +83,83 @@ func TestListType(t *testing.T) { } equals(t, l.tokens, tokens) - } } func TestObjectType(t *testing.T) { -} + var literals = []struct { + src string + nodeType []ast.Node + itemLen int + }{ + { + `foo = {}`, + nil, + 0, + }, + { + `foo = { + bar = "fatih" + }`, + []ast.Node{&ast.LiteralType{}}, + 1, + }, + { + `foo = { + bar = "fatih" + baz = ["arslan"] + }`, + []ast.Node{ + &ast.LiteralType{}, + &ast.ListType{}, + }, + 2, + }, + { + `foo = { + bar {} + }`, + []ast.Node{ + &ast.ObjectType{}, + }, + 1, + }, + { + `foo { + bar {} + foo = true + }`, + []ast.Node{ + &ast.ObjectType{}, + &ast.LiteralType{}, + }, + 2, + }, + } -// func TestParseType(t *testing.T) { -// src := `foo { -// fatih = "true" -// }` -// -// p := New([]byte(src)) -// p.enableTrace = true -// -// node, err := p.Parse() -// if err != nil { -// t.Fatal(err) -// } -// -// ast.Walk(node, func(n ast.Node) bool { -// if list, ok := n.(*ast.ObjectList); ok { -// for _, l := range list.Items { -// fmt.Printf("l = %+v\n", l) -// for _, k := range l.Keys { -// fmt.Printf("key = %+v\n", k) -// } -// fmt.Printf("val = %+v\n", l.Val) -// } -// } -// return true -// }) -// } + for _, l := range literals { + p := New([]byte(l.src)) + item, err := p.parseObjectItem() + if err != nil { + t.Error(err) + } + + // we know that the ObjectKey name is foo for all cases, what matters + // is the object + obj, ok := item.Val.(*ast.ObjectType) + if !ok { + t.Errorf("node should be of type LiteralType, got: %T", item.Val) + } + + // check if the total length of items are correct + equals(t, l.itemLen, len(obj.List.Items)) + + // check if the types are correct + for i, item := range obj.List.Items { + equals(t, reflect.TypeOf(l.nodeType[i]), reflect.TypeOf(item.Val)) + } + } +} func TestObjectKey(t *testing.T) { keys := []struct { From 9468aa324ea3f4d460a29b8be9889f4835f3c5dc Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 18 Oct 2015 23:25:17 +0300 Subject: [PATCH 084/137] parser: add official HCL tests, WIP --- parser/parser_test.go | 77 ++++++++++++++++++++++++ parser/test-fixtures/array_comment.hcl | 4 ++ parser/test-fixtures/assign_colon.hcl | 6 ++ parser/test-fixtures/assign_deep.hcl | 5 ++ parser/test-fixtures/comment.hcl | 15 +++++ parser/test-fixtures/comment_single.hcl | 1 + parser/test-fixtures/complex.hcl | 42 +++++++++++++ parser/test-fixtures/complex_key.hcl | 1 + parser/test-fixtures/empty.hcl | 0 parser/test-fixtures/list.hcl | 1 + parser/test-fixtures/list_comma.hcl | 1 + parser/test-fixtures/multiple.hcl | 2 + parser/test-fixtures/old.hcl | 3 + parser/test-fixtures/structure.hcl | 5 ++ parser/test-fixtures/structure_basic.hcl | 5 ++ parser/test-fixtures/structure_empty.hcl | 1 + parser/test-fixtures/types.hcl | 7 +++ 17 files changed, 176 insertions(+) create mode 100644 parser/test-fixtures/array_comment.hcl create mode 100644 parser/test-fixtures/assign_colon.hcl create mode 100644 parser/test-fixtures/assign_deep.hcl create mode 100644 parser/test-fixtures/comment.hcl create mode 100644 parser/test-fixtures/comment_single.hcl create mode 100644 parser/test-fixtures/complex.hcl create mode 100644 parser/test-fixtures/complex_key.hcl create mode 100644 parser/test-fixtures/empty.hcl create mode 100644 parser/test-fixtures/list.hcl create mode 100644 parser/test-fixtures/list_comma.hcl create mode 100644 parser/test-fixtures/multiple.hcl create mode 100644 parser/test-fixtures/old.hcl create mode 100644 parser/test-fixtures/structure.hcl create mode 100644 parser/test-fixtures/structure_basic.hcl create mode 100644 parser/test-fixtures/structure_empty.hcl create mode 100644 parser/test-fixtures/types.hcl diff --git a/parser/parser_test.go b/parser/parser_test.go index ec411b6..573cfe1 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2,6 +2,7 @@ package parser import ( "fmt" + "io/ioutil" "path/filepath" "reflect" "runtime" @@ -213,6 +214,82 @@ func TestObjectKey(t *testing.T) { } } +// Official HCL tests +func TestParse(t *testing.T) { + cases := []struct { + Name string + Err bool + }{ + { + "assign_colon.hcl", + true, + }, + { + "comment.hcl", + false, + }, + { + "comment_single.hcl", + false, + }, + { + "empty.hcl", + false, + }, + { + "list_comma.hcl", + false, + }, + { + "multiple.hcl", + false, + }, + { + "structure.hcl", + false, + }, + { + "structure_basic.hcl", + false, + }, + { + "structure_empty.hcl", + false, + }, + { + "complex.hcl", + false, + }, + { + "assign_deep.hcl", + true, + }, + { + "types.hcl", + false, + }, + { + "array_comment.hcl", + false, + }, + } + + const fixtureDir = "./test-fixtures" + + for _, tc := range cases { + d, err := ioutil.ReadFile(filepath.Join(fixtureDir, tc.Name)) + if err != nil { + t.Fatalf("err: %s", err) + } + + p := New(d) + _, err = p.Parse() + if (err != nil) != tc.Err { + t.Fatalf("Input: %s\n\nError: %s", tc.Name, err) + } + } +} + // equals fails the test if exp is not equal to act. func equals(tb testing.TB, exp, act interface{}) { if !reflect.DeepEqual(exp, act) { diff --git a/parser/test-fixtures/array_comment.hcl b/parser/test-fixtures/array_comment.hcl new file mode 100644 index 0000000..78c2675 --- /dev/null +++ b/parser/test-fixtures/array_comment.hcl @@ -0,0 +1,4 @@ +foo = [ + "1", + "2", # comment +] diff --git a/parser/test-fixtures/assign_colon.hcl b/parser/test-fixtures/assign_colon.hcl new file mode 100644 index 0000000..eb5a99a --- /dev/null +++ b/parser/test-fixtures/assign_colon.hcl @@ -0,0 +1,6 @@ +resource = [{ + "foo": { + "bar": {}, + "baz": [1, 2, "foo"], + } +}] diff --git a/parser/test-fixtures/assign_deep.hcl b/parser/test-fixtures/assign_deep.hcl new file mode 100644 index 0000000..dd3151c --- /dev/null +++ b/parser/test-fixtures/assign_deep.hcl @@ -0,0 +1,5 @@ +resource = [{ + foo = [{ + bar = {} + }] +}] diff --git a/parser/test-fixtures/comment.hcl b/parser/test-fixtures/comment.hcl new file mode 100644 index 0000000..1ff7f29 --- /dev/null +++ b/parser/test-fixtures/comment.hcl @@ -0,0 +1,15 @@ +// Foo + +/* Bar */ + +/* +/* +Baz +*/ + +# Another + +# Multiple +# Lines + +foo = "bar" diff --git a/parser/test-fixtures/comment_single.hcl b/parser/test-fixtures/comment_single.hcl new file mode 100644 index 0000000..fec5601 --- /dev/null +++ b/parser/test-fixtures/comment_single.hcl @@ -0,0 +1 @@ +# Hello diff --git a/parser/test-fixtures/complex.hcl b/parser/test-fixtures/complex.hcl new file mode 100644 index 0000000..cccb5b0 --- /dev/null +++ b/parser/test-fixtures/complex.hcl @@ -0,0 +1,42 @@ +// This comes from Terraform, as a test +variable "foo" { + default = "bar" + description = "bar" +} + +provider "aws" { + access_key = "foo" + secret_key = "bar" +} + +provider "do" { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" { + count = 5 +} + +resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } +} + +resource "aws_instance" "db" { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + + depends_on = ["aws_instance.web"] +} + +output "web_ip" { + value = "${aws_instance.web.private_ip}" +} diff --git a/parser/test-fixtures/complex_key.hcl b/parser/test-fixtures/complex_key.hcl new file mode 100644 index 0000000..0007aaf --- /dev/null +++ b/parser/test-fixtures/complex_key.hcl @@ -0,0 +1 @@ +foo.bar = "baz" diff --git a/parser/test-fixtures/empty.hcl b/parser/test-fixtures/empty.hcl new file mode 100644 index 0000000..e69de29 diff --git a/parser/test-fixtures/list.hcl b/parser/test-fixtures/list.hcl new file mode 100644 index 0000000..059d4ce --- /dev/null +++ b/parser/test-fixtures/list.hcl @@ -0,0 +1 @@ +foo = [1, 2, "foo"] diff --git a/parser/test-fixtures/list_comma.hcl b/parser/test-fixtures/list_comma.hcl new file mode 100644 index 0000000..50f4218 --- /dev/null +++ b/parser/test-fixtures/list_comma.hcl @@ -0,0 +1 @@ +foo = [1, 2, "foo",] diff --git a/parser/test-fixtures/multiple.hcl b/parser/test-fixtures/multiple.hcl new file mode 100644 index 0000000..029c54b --- /dev/null +++ b/parser/test-fixtures/multiple.hcl @@ -0,0 +1,2 @@ +foo = "bar" +key = 7 diff --git a/parser/test-fixtures/old.hcl b/parser/test-fixtures/old.hcl new file mode 100644 index 0000000..e9f77ca --- /dev/null +++ b/parser/test-fixtures/old.hcl @@ -0,0 +1,3 @@ +default = { + "eu-west-1": "ami-b1cf19c6", +} diff --git a/parser/test-fixtures/structure.hcl b/parser/test-fixtures/structure.hcl new file mode 100644 index 0000000..92592fb --- /dev/null +++ b/parser/test-fixtures/structure.hcl @@ -0,0 +1,5 @@ +// This is a test structure for the lexer +foo bar "baz" { + key = 7 + foo = "bar" +} diff --git a/parser/test-fixtures/structure_basic.hcl b/parser/test-fixtures/structure_basic.hcl new file mode 100644 index 0000000..7229a1f --- /dev/null +++ b/parser/test-fixtures/structure_basic.hcl @@ -0,0 +1,5 @@ +foo { + value = 7 + "value" = 8 + "complex::value" = 9 +} diff --git a/parser/test-fixtures/structure_empty.hcl b/parser/test-fixtures/structure_empty.hcl new file mode 100644 index 0000000..4d156dd --- /dev/null +++ b/parser/test-fixtures/structure_empty.hcl @@ -0,0 +1 @@ +resource "foo" "bar" {} diff --git a/parser/test-fixtures/types.hcl b/parser/test-fixtures/types.hcl new file mode 100644 index 0000000..cf2747e --- /dev/null +++ b/parser/test-fixtures/types.hcl @@ -0,0 +1,7 @@ +foo = "bar" +bar = 7 +baz = [1,2,3] +foo = -12 +bar = 3.14159 +foo = true +bar = false From 17aa3f3c5aab099b6726ffa4cbb6e1db14083600 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 19 Oct 2015 01:30:14 +0300 Subject: [PATCH 085/137] parser: add better ObjectKey parser --- ast/ast.go | 28 +++++++++ parser/parser.go | 142 +++++++++++++++++++++++++++--------------- parser/parser_test.go | 2 + 3 files changed, 123 insertions(+), 49 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index c234e5b..e30d88b 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -8,14 +8,32 @@ type Node interface { Pos() token.Pos } +func (NodeList) node() {} + func (ObjectList) node() {} func (ObjectKey) node() {} func (ObjectItem) node() {} +func (Comment) node() {} func (ObjectType) node() {} func (LiteralType) node() {} func (ListType) node() {} +// ObjectList represents a list of ObjectItems. An HCL file itself is an +// ObjectList. +type NodeList struct { + Nodes []Node +} + +func (n *NodeList) Add(node Node) { + n.Nodes = append(n.Nodes, node) +} + +func (n *NodeList) Pos() token.Pos { + // always returns the uninitiliazed position + return n.Nodes[0].Pos() +} + // ObjectList represents a list of ObjectItems. An HCL file itself is an // ObjectList. type ObjectList struct { @@ -96,3 +114,13 @@ type ObjectType struct { func (o *ObjectType) Pos() token.Pos { return o.Lbrace } + +// Comment node represents a single //, # style or /*- style commment +type Comment struct { + Start token.Pos // position of / or # + Text string +} + +func (c *Comment) Pos() token.Pos { + return c.Start +} diff --git a/parser/parser.go b/parser/parser.go index ba1040d..5674bf4 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -30,7 +30,30 @@ var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (ast.Node, error) { - return p.parseObjectList() + return p.parseNodeList() +} + +func (p *Parser) parseNodeList() (*ast.NodeList, error) { + defer un(trace(p, "ParseObjectList")) + node := &ast.NodeList{} + + for { + n, err := p.next() + if err == errEofToken { + break // we are finished + } + + // we don't return a nil, because might want to use already collected + // items. + if err != nil { + return node, err + } + + // we successfully parsed a node, add it to the final source node + node.Add(n) + } + + return node, nil } func (p *Parser) parseObjectList() (*ast.ObjectList, error) { @@ -56,6 +79,28 @@ func (p *Parser) parseObjectList() (*ast.ObjectList, error) { return node, nil } +// next returns the next node +func (p *Parser) next() (ast.Node, error) { + defer un(trace(p, "ParseNode")) + + tok := p.scan() + if tok.Type == token.EOF { + return nil, errEofToken + } + + switch tok.Type { + case token.IDENT, token.STRING: + return p.parseObjectItem() + case token.COMMENT: + return &ast.Comment{ + Start: tok.Pos, + Text: tok.Text, + }, nil + default: + return nil, fmt.Errorf("expected: IDENT | STRING got: %+v", tok.Type) + } +} + // parseObjectItem parses a single object item func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { defer un(trace(p, "ParseObjectItem")) @@ -94,6 +139,52 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { return nil, fmt.Errorf("not yet implemented: %s", p.tok.Type) } +// parseObjectKey parses an object key and returns a ObjectKey AST +func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { + firstKey := false + nestedObj := false + keys := make([]*ast.ObjectKey, 0) + + // we have three casses + // 1. assignment: KEY = NODE + // 2. object: KEY { } + // 3. nested object: KEY KEY2 ... KEYN {} + // Invalid cases: + // 1. foo bar = {} + for { + tok := p.scan() + switch tok.Type { + case token.EOF: + return nil, errEofToken + case token.ASSIGN: + // assignment or object only, but not nested objects. this is not + // allowed: `foo bar = {}` + if nestedObj { + return nil, fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type) + } + + return keys, nil + case token.LBRACE: + // object + return keys, nil + case token.IDENT, token.STRING: + // nested object + if !firstKey { + firstKey = true + } else { + nestedObj = true + } + + keys = append(keys, &ast.ObjectKey{Token: p.tok}) + case token.ILLEGAL: + fmt.Println("illegal") + // break // scan next + default: + return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type) + } + } +} + // parseType parses any type of Type, such as number, bool, string, object or // list. func (p *Parser) parseType() (ast.Node, error) { @@ -113,54 +204,7 @@ func (p *Parser) parseType() (ast.Node, error) { return nil, errEofToken } - return nil, errors.New("ParseType is not implemented yet") -} - -// parseObjectKey parses an object key and returns a ObjectKey AST -func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { - tok := p.scan() - if tok.Type == token.EOF { - return nil, errEofToken - } - - keys := make([]*ast.ObjectKey, 0) - - switch tok.Type { - case token.IDENT, token.STRING: - // add first found token - keys = append(keys, &ast.ObjectKey{Token: tok}) - default: - return nil, fmt.Errorf("expected: IDENT | STRING got: %s", tok.Type) - } - - nestedObj := false - - // we have three casses - // 1. assignment: KEY = NODE - // 2. object: KEY { } - // 2. nested object: KEY KEY2 ... KEYN {} - for { - tok := p.scan() - switch tok.Type { - case token.ASSIGN: - // assignment or object only, but not nested objects. this is not - // allowed: `foo bar = {}` - if nestedObj { - return nil, fmt.Errorf("nested object expected: LBRACE got: %s", tok.Type) - } - - return keys, nil - case token.LBRACE: - // object - return keys, nil - case token.IDENT, token.STRING: - // nested object - nestedObj = true - keys = append(keys, &ast.ObjectKey{Token: tok}) - default: - return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", tok.Type) - } - } + return nil, fmt.Errorf("Unknown token: %+v", tok) } // parseObjectType parses an object type and returns a ObjectType AST diff --git a/parser/parser_test.go b/parser/parser_test.go index 573cfe1..b91318f 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -140,6 +140,7 @@ func TestObjectType(t *testing.T) { for _, l := range literals { p := New([]byte(l.src)) + // p.enableTrace = true item, err := p.parseObjectItem() if err != nil { t.Error(err) @@ -283,6 +284,7 @@ func TestParse(t *testing.T) { } p := New(d) + // p.enableTrace = true _, err = p.Parse() if (err != nil) != tc.Err { t.Fatalf("Input: %s\n\nError: %s", tc.Name, err) From 3c21f6b72badbb30e8b1135cdf0ce02e3f4410ea Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 19 Oct 2015 02:01:33 +0300 Subject: [PATCH 086/137] parser: simplify parsing object key even more and fix unscan --- parser/parser.go | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 5674bf4..96323df 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -12,8 +12,7 @@ import ( type Parser struct { sc *scanner.Scanner - tok token.Token // last read token - prevTok token.Token // previous read token + tok token.Token // last read token enableTrace bool indent int @@ -90,6 +89,7 @@ func (p *Parser) next() (ast.Node, error) { switch tok.Type { case token.IDENT, token.STRING: + p.unscan() return p.parseObjectItem() case token.COMMENT: return &ast.Comment{ @@ -141,16 +141,9 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { // parseObjectKey parses an object key and returns a ObjectKey AST func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { - firstKey := false - nestedObj := false + keyCount := 0 keys := make([]*ast.ObjectKey, 0) - // we have three casses - // 1. assignment: KEY = NODE - // 2. object: KEY { } - // 3. nested object: KEY KEY2 ... KEYN {} - // Invalid cases: - // 1. foo bar = {} for { tok := p.scan() switch tok.Type { @@ -159,26 +152,23 @@ func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { case token.ASSIGN: // assignment or object only, but not nested objects. this is not // allowed: `foo bar = {}` - if nestedObj { + if keyCount > 1 { return nil, fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type) } + if keyCount == 0 { + return nil, errors.New("no keys found!!!") + } + return keys, nil case token.LBRACE: // object return keys, nil case token.IDENT, token.STRING: - // nested object - if !firstKey { - firstKey = true - } else { - nestedObj = true - } - + keyCount++ keys = append(keys, &ast.ObjectKey{Token: p.tok}) case token.ILLEGAL: fmt.Println("illegal") - // break // scan next default: return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type) } @@ -285,9 +275,6 @@ func (p *Parser) scan() token.Token { return p.tok } - // store previous token - p.prevTok = p.tok - // Otherwise read the next token from the scanner and Save it to the buffer // in case we unscan later. p.tok = p.sc.Scan() @@ -297,7 +284,6 @@ func (p *Parser) scan() token.Token { // unscan pushes the previously read token back onto the buffer. func (p *Parser) unscan() { p.n = 1 - p.tok = p.prevTok } // ---------------------------------------------------------------------------- From 1a63f537ebeb2b8f1e41c021abaa1c38b5ff98d3 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 19 Oct 2015 02:11:07 +0300 Subject: [PATCH 087/137] scanner: negative float and int should be supported as well --- scanner/scanner.go | 7 ++++- scanner/scanner_test.go | 66 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index b5c3679..cf558e4 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -189,7 +189,12 @@ func (s *Scanner) Scan() token.Token { case '+': tok = token.ADD case '-': - tok = token.SUB + if isDecimal(s.peek()) { + ch := s.next() + tok = s.scanNumber(ch) + } else { + tok = token.SUB + } default: s.err("illegal char") } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 56ae526..6e2f7ab 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -52,7 +52,7 @@ var tokenLists = map[string][]tokenPair{ {token.BOOL, "true"}, {token.BOOL, "false"}, }, - "identoken.t": []tokenPair{ + "ident": []tokenPair{ {token.IDENT, "a"}, {token.IDENT, "a0"}, {token.IDENT, "foobar"}, @@ -70,7 +70,7 @@ var tokenLists = map[string][]tokenPair{ {token.IDENT, "foo६४"}, {token.IDENT, "bar9876"}, }, - "stritoken.ng": []tokenPair{ + "string": []tokenPair{ {token.STRING, `" "`}, {token.STRING, `"a"`}, {token.STRING, `"本"`}, @@ -92,7 +92,7 @@ var tokenLists = map[string][]tokenPair{ {token.STRING, `"\U0000ffAB"`}, {token.STRING, `"` + f100 + `"`}, }, - "numbtoken.er": []tokenPair{ + "number": []tokenPair{ {token.NUMBER, "0"}, {token.NUMBER, "1"}, {token.NUMBER, "9"}, @@ -131,8 +131,46 @@ var tokenLists = map[string][]tokenPair{ {token.NUMBER, "1E-10"}, {token.NUMBER, "42E+10"}, {token.NUMBER, "01234567890E-10"}, + {token.NUMBER, "-0"}, + {token.NUMBER, "-1"}, + {token.NUMBER, "-9"}, + {token.NUMBER, "-42"}, + {token.NUMBER, "-1234567890"}, + {token.NUMBER, "-00"}, + {token.NUMBER, "-01"}, + {token.NUMBER, "-07"}, + {token.NUMBER, "-042"}, + {token.NUMBER, "-01234567"}, + {token.NUMBER, "-0x0"}, + {token.NUMBER, "-0x1"}, + {token.NUMBER, "-0xf"}, + {token.NUMBER, "-0x42"}, + {token.NUMBER, "-0x123456789abcDEF"}, + {token.NUMBER, "-0x" + f100}, + {token.NUMBER, "-0X0"}, + {token.NUMBER, "-0X1"}, + {token.NUMBER, "-0XF"}, + {token.NUMBER, "-0X42"}, + {token.NUMBER, "-0X123456789abcDEF"}, + {token.NUMBER, "-0X" + f100}, + {token.NUMBER, "-0e0"}, + {token.NUMBER, "-1e0"}, + {token.NUMBER, "-42e0"}, + {token.NUMBER, "-01234567890e0"}, + {token.NUMBER, "-0E0"}, + {token.NUMBER, "-1E0"}, + {token.NUMBER, "-42E0"}, + {token.NUMBER, "-01234567890E0"}, + {token.NUMBER, "-0e+10"}, + {token.NUMBER, "-1e-10"}, + {token.NUMBER, "-42e+10"}, + {token.NUMBER, "-01234567890e-10"}, + {token.NUMBER, "-0E+10"}, + {token.NUMBER, "-1E-10"}, + {token.NUMBER, "-42E+10"}, + {token.NUMBER, "-01234567890E-10"}, }, - "floatoken.t": []tokenPair{ + "float": []tokenPair{ {token.FLOAT, "0."}, {token.FLOAT, "1."}, {token.FLOAT, "42."}, @@ -161,6 +199,26 @@ var tokenLists = map[string][]tokenPair{ {token.FLOAT, "1.1E-10"}, {token.FLOAT, "42.1E+10"}, {token.FLOAT, "01234567890.1E-10"}, + {token.FLOAT, "-0.0"}, + {token.FLOAT, "-1.0"}, + {token.FLOAT, "-42.0"}, + {token.FLOAT, "-01234567890.0"}, + {token.FLOAT, "-01.8e0"}, + {token.FLOAT, "-1.4e0"}, + {token.FLOAT, "-42.2e0"}, + {token.FLOAT, "-01234567890.12e0"}, + {token.FLOAT, "-0.E0"}, + {token.FLOAT, "-1.12E0"}, + {token.FLOAT, "-42.123E0"}, + {token.FLOAT, "-01234567890.213E0"}, + {token.FLOAT, "-0.2e+10"}, + {token.FLOAT, "-1.2e-10"}, + {token.FLOAT, "-42.54e+10"}, + {token.FLOAT, "-01234567890.98e-10"}, + {token.FLOAT, "-0.1E+10"}, + {token.FLOAT, "-1.1E-10"}, + {token.FLOAT, "-42.1E+10"}, + {token.FLOAT, "-01234567890.1E-10"}, }, } From 6b5bd170f60839c029cbc8d0ed842ebe4a98a2f7 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 24 Oct 2015 22:10:10 +0300 Subject: [PATCH 088/137] parser: fix all tests, comments are still WIP --- ast/ast.go | 17 ----------------- parser/parser.go | 40 +++++++++++----------------------------- parser/parser_test.go | 1 + 3 files changed, 12 insertions(+), 46 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index e30d88b..25b42da 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -8,8 +8,6 @@ type Node interface { Pos() token.Pos } -func (NodeList) node() {} - func (ObjectList) node() {} func (ObjectKey) node() {} func (ObjectItem) node() {} @@ -19,21 +17,6 @@ func (ObjectType) node() {} func (LiteralType) node() {} func (ListType) node() {} -// ObjectList represents a list of ObjectItems. An HCL file itself is an -// ObjectList. -type NodeList struct { - Nodes []Node -} - -func (n *NodeList) Add(node Node) { - n.Nodes = append(n.Nodes, node) -} - -func (n *NodeList) Pos() token.Pos { - // always returns the uninitiliazed position - return n.Nodes[0].Pos() -} - // ObjectList represents a list of ObjectItems. An HCL file itself is an // ObjectList. type ObjectList struct { diff --git a/parser/parser.go b/parser/parser.go index 96323df..c47c21c 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -29,12 +29,12 @@ var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (ast.Node, error) { - return p.parseNodeList() + return p.parseObjectList() } -func (p *Parser) parseNodeList() (*ast.NodeList, error) { +func (p *Parser) parseObjectList() (*ast.ObjectList, error) { defer un(trace(p, "ParseObjectList")) - node := &ast.NodeList{} + node := &ast.ObjectList{} for { n, err := p.next() @@ -48,31 +48,11 @@ func (p *Parser) parseNodeList() (*ast.NodeList, error) { return node, err } - // we successfully parsed a node, add it to the final source node - node.Add(n) - } - - return node, nil -} - -func (p *Parser) parseObjectList() (*ast.ObjectList, error) { - defer un(trace(p, "ParseObjectList")) - node := &ast.ObjectList{} - - for { - n, err := p.parseObjectItem() - if err == errEofToken { - break // we are finished + if item, ok := n.(*ast.ObjectItem); ok { + // we successfully parsed a node, add it to the final source node + node.Add(item) } - // we don't return a nil, because might want to use already collected - // items. - if err != nil { - return node, err - } - - // we successfully parsed a node, add it to the final source node - node.Add(n) } return node, nil @@ -83,11 +63,10 @@ func (p *Parser) next() (ast.Node, error) { defer un(trace(p, "ParseNode")) tok := p.scan() - if tok.Type == token.EOF { - return nil, errEofToken - } switch tok.Type { + case token.EOF: + return nil, errEofToken case token.IDENT, token.STRING: p.unscan() return p.parseObjectItem() @@ -240,6 +219,9 @@ func (p *Parser) parseListType() (*ast.ListType, error) { case token.COMMA: // get next list item or we are at the end continue + case token.COMMENT: + // TODO(arslan): parse comment + continue case token.BOOL: // TODO(arslan) should we support? not supported by HCL yet case token.LBRACK: diff --git a/parser/parser_test.go b/parser/parser_test.go index b91318f..5d800a7 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -20,6 +20,7 @@ func TestType(t *testing.T) { {token.STRING, `foo = "foo"`}, {token.NUMBER, `foo = 123`}, {token.FLOAT, `foo = 123.12`}, + {token.FLOAT, `foo = -123.12`}, {token.BOOL, `foo = true`}, } From a99536146802f6ae31bce278da7aefefbcab2b2a Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 24 Oct 2015 23:11:03 +0300 Subject: [PATCH 089/137] parser: collect comments --- parser/parser.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index c47c21c..ded6a80 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -12,7 +12,8 @@ import ( type Parser struct { sc *scanner.Scanner - tok token.Token // last read token + tok token.Token // last read token + comments []*ast.Comment enableTrace bool indent int @@ -48,11 +49,12 @@ func (p *Parser) parseObjectList() (*ast.ObjectList, error) { return node, err } - if item, ok := n.(*ast.ObjectItem); ok { - // we successfully parsed a node, add it to the final source node - node.Add(item) + switch t := n.(type) { + case *ast.ObjectItem: + node.Add(t) + case *ast.Comment: + p.comments = append(p.comments, t) } - } return node, nil From de7241ebe5ec8a689a955b119924bf2407ed8ebc Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 24 Oct 2015 23:12:28 +0300 Subject: [PATCH 090/137] ast: print unknown node type --- ast/walk.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ast/walk.go b/ast/walk.go index f198c3f..feae611 100644 --- a/ast/walk.go +++ b/ast/walk.go @@ -1,5 +1,7 @@ package ast +import "fmt" + // Walk traverses an AST in depth-first order: It starts by calling fn(node); // node must not be nil. If f returns true, Walk invokes f recursively for // each of the non-nil children of node, followed by a call of f(nil). @@ -30,6 +32,8 @@ func Walk(node Node, fn func(Node) bool) { for _, l := range n.List.Items { Walk(l, fn) } + default: + fmt.Printf(" unknown type: %T\n", n) } fn(nil) From 71105156e24a90ffdecb9325ac0739a6a5099e2d Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 00:04:31 +0300 Subject: [PATCH 091/137] parser: add Parse() and make it compatible with printer --- parser/parser.go | 10 ++++++++-- parser/parser_test.go | 14 ++++++-------- printer/nodes.go | 1 + printer/printer.go | 28 +++++++++++++++++----------- printer/printer_test.go | 6 +++--- 5 files changed, 35 insertions(+), 24 deletions(-) create mode 100644 printer/nodes.go diff --git a/parser/parser.go b/parser/parser.go index ded6a80..9aec5a0 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -20,12 +20,18 @@ type Parser struct { n int // buffer size (max = 1) } -func New(src []byte) *Parser { +func newParser(src []byte) *Parser { return &Parser{ sc: scanner.New(src), } } +// Parse returns the fully parsed source and returns the abstract syntax tree. +func Parse(src []byte) (ast.Node, error) { + p := newParser(src) + return p.Parse() +} + var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. @@ -78,7 +84,7 @@ func (p *Parser) next() (ast.Node, error) { Text: tok.Text, }, nil default: - return nil, fmt.Errorf("expected: IDENT | STRING got: %+v", tok.Type) + return nil, fmt.Errorf("expected: IDENT | STRING | COMMENT got: %+v", tok.Type) } } diff --git a/parser/parser_test.go b/parser/parser_test.go index 5d800a7..eaa9519 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -25,7 +25,7 @@ func TestType(t *testing.T) { } for _, l := range literals { - p := New([]byte(l.src)) + p := newParser([]byte(l.src)) item, err := p.parseObjectItem() if err != nil { t.Error(err) @@ -66,7 +66,7 @@ func TestListType(t *testing.T) { } for _, l := range literals { - p := New([]byte(l.src)) + p := newParser([]byte(l.src)) item, err := p.parseObjectItem() if err != nil { t.Error(err) @@ -140,7 +140,7 @@ func TestObjectType(t *testing.T) { } for _, l := range literals { - p := New([]byte(l.src)) + p := newParser([]byte(l.src)) // p.enableTrace = true item, err := p.parseObjectItem() if err != nil { @@ -184,7 +184,7 @@ func TestObjectKey(t *testing.T) { } for _, k := range keys { - p := New([]byte(k.src)) + p := newParser([]byte(k.src)) keys, err := p.parseObjectKey() if err != nil { t.Fatal(err) @@ -208,7 +208,7 @@ func TestObjectKey(t *testing.T) { } for _, k := range errKeys { - p := New([]byte(k.src)) + p := newParser([]byte(k.src)) _, err := p.parseObjectKey() if err == nil { t.Errorf("case '%s' should give an error", k.src) @@ -284,9 +284,7 @@ func TestParse(t *testing.T) { t.Fatalf("err: %s", err) } - p := New(d) - // p.enableTrace = true - _, err = p.Parse() + _, err = Parse(d) if (err != nil) != tc.Err { t.Fatalf("Input: %s\n\nError: %s", tc.Name, err) } diff --git a/printer/nodes.go b/printer/nodes.go new file mode 100644 index 0000000..b0c0dd8 --- /dev/null +++ b/printer/nodes.go @@ -0,0 +1 @@ +package printer diff --git a/printer/printer.go b/printer/printer.go index a888ce7..1c46343 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -6,17 +6,23 @@ import ( "io" "text/tabwriter" - "github.com/hashicorp/hcl/hcl" + "github.com/fatih/hcl/ast" ) type printer struct { - cfg Config - obj *hcl.Object + cfg Config + node ast.Node } func (p *printer) output() []byte { var buf bytes.Buffer fmt.Println("STARTING OUTPUT") + + ast.Walk(p.node, func(n ast.Node) bool { + fmt.Printf("n = %+v\n", n) + return true + }) + return buf.Bytes() } @@ -36,10 +42,10 @@ type Config struct { Indent int // default: 0 (all code is indented at least by this much) } -func (c *Config) fprint(output io.Writer, obj *hcl.Object) error { +func (c *Config) fprint(output io.Writer, node ast.Node) error { p := &printer{ - cfg: *c, - obj: obj, + cfg: *c, + node: node, } // TODO(arslan): implement this @@ -81,12 +87,12 @@ func (c *Config) fprint(output io.Writer, obj *hcl.Object) error { return err } -func (c *Config) Fprint(output io.Writer, obj *hcl.Object) error { - return c.fprint(output, obj) +func (c *Config) Fprint(output io.Writer, node ast.Node) error { + return c.fprint(output, node) } -// Fprint "pretty-prints" an HCL object to output +// Fprint "pretty-prints" an HCL node to output // It calls Config.Fprint with default settings. -func Fprint(output io.Writer, obj *hcl.Object) error { - return (&Config{Tabwidth: 8}).Fprint(output, obj) +func Fprint(output io.Writer, node ast.Node) error { + return (&Config{Tabwidth: 8}).Fprint(output, node) } diff --git a/printer/printer_test.go b/printer/printer_test.go index 77c3913..98b409b 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -4,7 +4,7 @@ import ( "os" "testing" - "github.com/hashicorp/hcl/hcl" + "github.com/fatih/hcl/parser" ) var complexHcl = `// This comes from Terraform, as a test @@ -52,12 +52,12 @@ output "web_ip" { ` func TestPrint(t *testing.T) { - obj, err := hcl.Parse(complexHcl) + node, err := parser.Parse([]byte(complexHcl)) if err != nil { t.Fatal(err) } - if err := Fprint(os.Stdout, obj); err != nil { + if err := Fprint(os.Stdout, node); err != nil { t.Error(err) } } From 5918e3592bc4088eae49154017a7debcd76ada22 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 01:23:50 +0300 Subject: [PATCH 092/137] printer: initial implementation. I'm still experimenting though --- printer/nodes.go | 83 +++++++++++++++++++++++++++++++++++++++++ printer/printer.go | 15 ++------ printer/printer_test.go | 9 ++++- 3 files changed, 95 insertions(+), 12 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index b0c0dd8..9be9f4e 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -1 +1,84 @@ package printer + +import ( + "bytes" + "fmt" + + "github.com/fatih/hcl/ast" +) + +func (p *printer) printNode(n ast.Node) []byte { + var buf bytes.Buffer + + switch t := n.(type) { + case *ast.ObjectList: + fmt.Println("printing objectList", t) + for _, item := range t.Items { + buf.Write(p.printObjectItem(item)) + } + case *ast.ObjectKey: + fmt.Println("printing objectKey", t) + case *ast.ObjectItem: + fmt.Println("printing objectItem", t) + buf.Write(p.printObjectItem(t)) + case *ast.LiteralType: + buf.Write(p.printLiteral(t)) + case *ast.ListType: + buf.Write(p.printList(t)) + case *ast.ObjectType: + fmt.Println("printing ObjectType", t) + default: + fmt.Printf(" unknown type: %T\n", n) + } + + return buf.Bytes() +} + +func (p *printer) printObjectItem(o *ast.ObjectItem) []byte { + var buf bytes.Buffer + + for i, k := range o.Keys { + buf.WriteString(k.Token.Text) + if i != len(o.Keys)-1 || len(o.Keys) == 1 { + buf.WriteString(" ") + } + + // reach end of key + if i == len(o.Keys)-1 { + buf.WriteString("=") + buf.WriteString(" ") + } + } + + buf.Write(p.printNode(o.Val)) + return buf.Bytes() +} + +func (p *printer) printLiteral(l *ast.LiteralType) []byte { + return []byte(l.Token.Text) +} + +func (p *printer) printList(l *ast.ListType) []byte { + var buf bytes.Buffer + buf.WriteString("[") + + for i, item := range l.List { + if item.Pos().Line != l.Lbrack.Line { + // not same line + buf.WriteString("\n") + } + + buf.Write(p.printNode(item)) + + if i != len(l.List)-1 { + buf.WriteString(",") + buf.WriteString(" ") + } else if item.Pos().Line != l.Lbrack.Line { + buf.WriteString(",") + buf.WriteString("\n") + } + } + + buf.WriteString("]") + return buf.Bytes() +} diff --git a/printer/printer.go b/printer/printer.go index 1c46343..4121569 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -1,7 +1,6 @@ package printer import ( - "bytes" "fmt" "io" "text/tabwriter" @@ -10,20 +9,14 @@ import ( ) type printer struct { + out []byte // raw printer result cfg Config node ast.Node } func (p *printer) output() []byte { - var buf bytes.Buffer fmt.Println("STARTING OUTPUT") - - ast.Walk(p.node, func(n ast.Node) bool { - fmt.Printf("n = %+v\n", n) - return true - }) - - return buf.Bytes() + return p.printNode(p.node) } // A Mode value is a set of flags (or 0). They control printing. @@ -38,7 +31,7 @@ const ( // A Config node controls the output of Fprint. type Config struct { Mode Mode // default: 0 - Tabwidth int // default: 8 + Tabwidth int // default: 4 Indent int // default: 0 (all code is indented at least by this much) } @@ -94,5 +87,5 @@ func (c *Config) Fprint(output io.Writer, node ast.Node) error { // Fprint "pretty-prints" an HCL node to output // It calls Config.Fprint with default settings. func Fprint(output io.Writer, node ast.Node) error { - return (&Config{Tabwidth: 8}).Fprint(output, node) + return (&Config{Tabwidth: 4}).Fprint(output, node) } diff --git a/printer/printer_test.go b/printer/printer_test.go index 98b409b..493733b 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -7,6 +7,12 @@ import ( "github.com/fatih/hcl/parser" ) +var listHCL = `foo = ["fatih", "arslan"]` +var listHCL2 = `foo = [ + "fatih", + "arslan", +]` + var complexHcl = `// This comes from Terraform, as a test variable "foo" { default = "bar" @@ -52,7 +58,8 @@ output "web_ip" { ` func TestPrint(t *testing.T) { - node, err := parser.Parse([]byte(complexHcl)) + // node, err := parser.Parse([]byte(complexHcl)) + node, err := parser.Parse([]byte(listHCL2)) if err != nil { t.Fatal(err) } From 958093df591216cde9847268e24747b66b85de94 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 16:08:09 +0300 Subject: [PATCH 093/137] printer: implement a working version --- printer/nodes.go | 76 ++++++++++++++++++++++++++++++++++------- printer/printer.go | 43 ++--------------------- printer/printer_test.go | 9 +++-- 3 files changed, 71 insertions(+), 57 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 9be9f4e..f03e132 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -3,30 +3,38 @@ package printer import ( "bytes" "fmt" + "io" "github.com/fatih/hcl/ast" ) +const ( + blank = byte(' ') + newline = byte('\n') + tab = byte('\t') +) + func (p *printer) printNode(n ast.Node) []byte { var buf bytes.Buffer switch t := n.(type) { case *ast.ObjectList: - fmt.Println("printing objectList", t) - for _, item := range t.Items { + for i, item := range t.Items { buf.Write(p.printObjectItem(item)) + if i != len(t.Items)-1 { + buf.WriteByte(newline) + } } case *ast.ObjectKey: - fmt.Println("printing objectKey", t) + buf.WriteString(t.Token.Text) case *ast.ObjectItem: - fmt.Println("printing objectItem", t) buf.Write(p.printObjectItem(t)) case *ast.LiteralType: - buf.Write(p.printLiteral(t)) + buf.WriteString(t.Token.Text) case *ast.ListType: buf.Write(p.printList(t)) case *ast.ObjectType: - fmt.Println("printing ObjectType", t) + buf.Write(p.printObjectType(t)) default: fmt.Printf(" unknown type: %T\n", n) } @@ -39,14 +47,12 @@ func (p *printer) printObjectItem(o *ast.ObjectItem) []byte { for i, k := range o.Keys { buf.WriteString(k.Token.Text) - if i != len(o.Keys)-1 || len(o.Keys) == 1 { - buf.WriteString(" ") - } + buf.WriteByte(blank) // reach end of key if i == len(o.Keys)-1 { buf.WriteString("=") - buf.WriteString(" ") + buf.WriteByte(blank) } } @@ -58,6 +64,26 @@ func (p *printer) printLiteral(l *ast.LiteralType) []byte { return []byte(l.Token.Text) } +func (p *printer) printObjectType(o *ast.ObjectType) []byte { + var buf bytes.Buffer + buf.WriteString("{") + buf.WriteByte(newline) + + for _, item := range o.List.Items { + // buf.WriteByte(tab) + // buf.Write(p.printObjectItem(item)) + + a := p.printObjectItem(item) + a = indent(a) + buf.Write(a) + + buf.WriteByte(newline) + } + + buf.WriteString("}") + return buf.Bytes() +} + func (p *printer) printList(l *ast.ListType) []byte { var buf bytes.Buffer buf.WriteString("[") @@ -65,20 +91,44 @@ func (p *printer) printList(l *ast.ListType) []byte { for i, item := range l.List { if item.Pos().Line != l.Lbrack.Line { // not same line - buf.WriteString("\n") + buf.WriteByte(newline) } + buf.WriteByte(tab) buf.Write(p.printNode(item)) if i != len(l.List)-1 { buf.WriteString(",") - buf.WriteString(" ") + buf.WriteByte(blank) } else if item.Pos().Line != l.Lbrack.Line { buf.WriteString(",") - buf.WriteString("\n") + buf.WriteByte(newline) } } buf.WriteString("]") return buf.Bytes() } + +func writeBlank(buf io.ByteWriter, indent int) { + for i := 0; i < indent; i++ { + buf.WriteByte(blank) + } +} + +func indent(buf []byte) []byte { + splitted := bytes.Split(buf, []byte{newline}) + newBuf := make([]byte, len(splitted)) + for i, s := range splitted { + s = append(s, 0) + copy(s[1:], s[0:]) + s[0] = tab + newBuf = append(newBuf, s...) + + if i != len(splitted)-1 { + newBuf = append(newBuf, newline) + } + } + + return newBuf +} diff --git a/printer/printer.go b/printer/printer.go index 4121569..a8ddca5 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -1,7 +1,6 @@ package printer import ( - "fmt" "io" "text/tabwriter" @@ -15,24 +14,12 @@ type printer struct { } func (p *printer) output() []byte { - fmt.Println("STARTING OUTPUT") return p.printNode(p.node) } -// A Mode value is a set of flags (or 0). They control printing. -type Mode uint - -const ( - RawFormat Mode = 1 << iota // do not use a tabwriter; if set, UseSpaces is ignored - TabIndent // use tabs for indentation independent of UseSpaces - UseSpaces // use spaces instead of tabs for alignment -) - // A Config node controls the output of Fprint. type Config struct { - Mode Mode // default: 0 - Tabwidth int // default: 4 - Indent int // default: 0 (all code is indented at least by this much) + SpaceWidth int // if set, it will use spaces instead of tabs for alignment } func (c *Config) fprint(output io.Writer, node ast.Node) error { @@ -41,32 +28,6 @@ func (c *Config) fprint(output io.Writer, node ast.Node) error { node: node, } - // TODO(arslan): implement this - // redirect output through a trimmer to eliminate trailing whitespace - // (Input to a tabwriter must be untrimmed since trailing tabs provide - // formatting information. The tabwriter could provide trimming - // functionality but no tabwriter is used when RawFormat is set.) - // output = &trimmer{output: output} - - // redirect output through a tabwriter if necessary - if c.Mode&RawFormat == 0 { - minwidth := c.Tabwidth - - padchar := byte('\t') - if c.Mode&UseSpaces != 0 { - padchar = ' ' - } - - twmode := tabwriter.DiscardEmptyColumns - if c.Mode&TabIndent != 0 { - minwidth = 0 - twmode |= tabwriter.TabIndent - } - - output = tabwriter.NewWriter(output, minwidth, c.Tabwidth, 1, padchar, twmode) - } - - // write printer result via tabwriter/trimmer to output if _, err := output.Write(p.output()); err != nil { return err } @@ -87,5 +48,5 @@ func (c *Config) Fprint(output io.Writer, node ast.Node) error { // Fprint "pretty-prints" an HCL node to output // It calls Config.Fprint with default settings. func Fprint(output io.Writer, node ast.Node) error { - return (&Config{Tabwidth: 4}).Fprint(output, node) + return (&Config{}).Fprint(output, node) } diff --git a/printer/printer_test.go b/printer/printer_test.go index 493733b..e7bf891 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -1,6 +1,7 @@ package printer import ( + "fmt" "os" "testing" @@ -9,7 +10,7 @@ import ( var listHCL = `foo = ["fatih", "arslan"]` var listHCL2 = `foo = [ - "fatih", + "fatih", "arslan", ]` @@ -58,8 +59,8 @@ output "web_ip" { ` func TestPrint(t *testing.T) { - // node, err := parser.Parse([]byte(complexHcl)) - node, err := parser.Parse([]byte(listHCL2)) + node, err := parser.Parse([]byte(complexHcl)) + // node, err := parser.Parse([]byte(listHCL2)) if err != nil { t.Fatal(err) } @@ -67,4 +68,6 @@ func TestPrint(t *testing.T) { if err := Fprint(os.Stdout, node); err != nil { t.Error(err) } + + fmt.Println("") } From 4d7ec81122c971ae8e43fe720da642c2fd7b45ad Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 16:12:08 +0300 Subject: [PATCH 094/137] printer: add newline after each object --- printer/nodes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/printer/nodes.go b/printer/nodes.go index f03e132..f49ab65 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -22,7 +22,7 @@ func (p *printer) printNode(n ast.Node) []byte { for i, item := range t.Items { buf.Write(p.printObjectItem(item)) if i != len(t.Items)-1 { - buf.WriteByte(newline) + buf.Write([]byte{newline, newline}) } } case *ast.ObjectKey: From 2ea5683566f1d931309df5e09a44c885510b81c8 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 16:13:06 +0300 Subject: [PATCH 095/137] printer: simplify indenting of objects --- printer/nodes.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index f49ab65..0b97346 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -70,13 +70,7 @@ func (p *printer) printObjectType(o *ast.ObjectType) []byte { buf.WriteByte(newline) for _, item := range o.List.Items { - // buf.WriteByte(tab) - // buf.Write(p.printObjectItem(item)) - - a := p.printObjectItem(item) - a = indent(a) - buf.Write(a) - + buf.Write(indent(p.printObjectItem(item))) buf.WriteByte(newline) } From 0fc42b65dfd68c066a23d557f54d59074f4045c0 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 17:51:18 +0300 Subject: [PATCH 096/137] printer: implement file tests --- printer/nodes.go | 20 ++-- printer/printer_test.go | 168 ++++++++++++++++++++--------- printer/testdata/complexhcl.golden | 39 +++++++ printer/testdata/complexhcl.input | 42 ++++++++ 4 files changed, 205 insertions(+), 64 deletions(-) create mode 100644 printer/testdata/complexhcl.golden create mode 100644 printer/testdata/complexhcl.input diff --git a/printer/nodes.go b/printer/nodes.go index 0b97346..f338250 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -111,18 +111,14 @@ func writeBlank(buf io.ByteWriter, indent int) { } func indent(buf []byte) []byte { - splitted := bytes.Split(buf, []byte{newline}) - newBuf := make([]byte, len(splitted)) - for i, s := range splitted { - s = append(s, 0) - copy(s[1:], s[0:]) - s[0] = tab - newBuf = append(newBuf, s...) - - if i != len(splitted)-1 { - newBuf = append(newBuf, newline) + var res []byte + bol := true + for _, c := range buf { + if bol && c != '\n' { + res = append(res, []byte{tab}...) } + res = append(res, c) + bol = c == '\n' } - - return newBuf + return res } diff --git a/printer/printer_test.go b/printer/printer_test.go index e7bf891..1141e85 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -1,73 +1,137 @@ package printer import ( + "bytes" + "errors" + "flag" "fmt" - "os" + "io/ioutil" + "path/filepath" "testing" "github.com/fatih/hcl/parser" ) -var listHCL = `foo = ["fatih", "arslan"]` -var listHCL2 = `foo = [ - "fatih", - "arslan", -]` +var update = flag.Bool("update", false, "update golden files") -var complexHcl = `// This comes from Terraform, as a test -variable "foo" { - default = "bar" - description = "bar" +const ( + dataDir = "testdata" +) + +type entry struct { + source, golden string } -provider "aws" { - access_key = "foo" - secret_key = "bar" +// Use go test -update to create/update the respective golden files. +var data = []entry{ + {"complexhcl.input", "complexhcl.golden"}, } -provider "do" { - api_key = "${var.foo}" +func TestFiles(t *testing.T) { + for _, e := range data { + source := filepath.Join(dataDir, e.source) + golden := filepath.Join(dataDir, e.golden) + check(t, source, golden) + } } -resource "aws_security_group" "firewall" { - count = 5 -} - -resource aws_instance "web" { - ami = "${var.foo}" - security_groups = [ - "foo", - "${aws_security_group.firewall.foo}" - ] - - network_interface { - device_index = 0 - description = "Main network interface" - } -} - -resource "aws_instance" "db" { - security_groups = "${aws_security_group.firewall.*.id}" - VPC = "foo" - - depends_on = ["aws_instance.web"] -} - -output "web_ip" { - value = "${aws_instance.web.private_ip}" -} -` - -func TestPrint(t *testing.T) { - node, err := parser.Parse([]byte(complexHcl)) - // node, err := parser.Parse([]byte(listHCL2)) +func check(t *testing.T, source, golden string) { + src, err := ioutil.ReadFile(source) if err != nil { - t.Fatal(err) - } - - if err := Fprint(os.Stdout, node); err != nil { t.Error(err) + return } - fmt.Println("") + res, err := format(src) + if err != nil { + t.Error(err) + return + } + + // update golden files if necessary + if *update { + if err := ioutil.WriteFile(golden, res, 0644); err != nil { + t.Error(err) + } + return + } + + // get golden + gld, err := ioutil.ReadFile(golden) + if err != nil { + t.Error(err) + return + } + + // formatted source and golden must be the same + if err := diff(source, golden, res, gld); err != nil { + t.Error(err) + return + } +} + +// diff compares a and b. +func diff(aname, bname string, a, b []byte) error { + var buf bytes.Buffer // holding long error message + + // compare lengths + if len(a) != len(b) { + fmt.Fprintf(&buf, "\nlength changed: len(%s) = %d, len(%s) = %d", aname, len(a), bname, len(b)) + } + + // compare contents + line := 1 + offs := 1 + for i := 0; i < len(a) && i < len(b); i++ { + ch := a[i] + if ch != b[i] { + fmt.Fprintf(&buf, "\n%s:%d:%d: %s", aname, line, i-offs+1, lineAt(a, offs)) + fmt.Fprintf(&buf, "\n%s:%d:%d: %s", bname, line, i-offs+1, lineAt(b, offs)) + fmt.Fprintf(&buf, "\n\n") + break + } + if ch == '\n' { + line++ + offs = i + 1 + } + } + + if buf.Len() > 0 { + return errors.New(buf.String()) + } + return nil +} + +// format parses src, prints the corresponding AST, verifies the resulting +// src is syntactically correct, and returns the resulting src or an error +// if any. +func format(src []byte) ([]byte, error) { + // parse src + node, err := parser.Parse(src) + if err != nil { + return nil, fmt.Errorf("parse: %s\n%s", err, src) + } + + var buf bytes.Buffer + if err := Fprint(&buf, node); err != nil { + return nil, fmt.Errorf("print: %s", err) + } + + // make sure formatted output is syntactically correct + res := buf.Bytes() + + if _, err := parser.Parse(src); err != nil { + return nil, fmt.Errorf("parse: %s\n%s", err, src) + } + + return res, nil +} + +// lineAt returns the line in text starting at offset offs. +func lineAt(text []byte, offs int) []byte { + i := offs + for i < len(text) && text[i] != '\n' { + i++ + } + return text[offs:i] } diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden new file mode 100644 index 0000000..10ab6d3 --- /dev/null +++ b/printer/testdata/complexhcl.golden @@ -0,0 +1,39 @@ +variable "foo" = { + default = "bar" + description = "bar" +} + +provider "aws" = { + access_key = "foo" + secret_key = "bar" +} + +provider "do" = { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" = { + count = 5 +} + +resource aws_instance "web" = { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}", + ] + network_interface = { + device_index = 0 + description = "Main network interface" + } +} + +resource "aws_instance" "db" = { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + depends_on = [ "aws_instance.web"] +} + +output "web_ip" = { + value = "${aws_instance.web.private_ip}" +} diff --git a/printer/testdata/complexhcl.input b/printer/testdata/complexhcl.input new file mode 100644 index 0000000..cccb5b0 --- /dev/null +++ b/printer/testdata/complexhcl.input @@ -0,0 +1,42 @@ +// This comes from Terraform, as a test +variable "foo" { + default = "bar" + description = "bar" +} + +provider "aws" { + access_key = "foo" + secret_key = "bar" +} + +provider "do" { + api_key = "${var.foo}" +} + +resource "aws_security_group" "firewall" { + count = 5 +} + +resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } +} + +resource "aws_instance" "db" { + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + + depends_on = ["aws_instance.web"] +} + +output "web_ip" { + value = "${aws_instance.web.private_ip}" +} From e9475123627586deb0bad317efc7a86bfc0eb16b Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 18:02:40 +0300 Subject: [PATCH 097/137] printer: add spacewidth support --- printer/nodes.go | 13 ++++++--- printer/testdata/complexhcl.golden | 2 +- testdata/complex.hcl | 42 ------------------------------ 3 files changed, 11 insertions(+), 46 deletions(-) delete mode 100644 testdata/complex.hcl diff --git a/printer/nodes.go b/printer/nodes.go index f338250..1d35cfb 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -70,7 +70,7 @@ func (p *printer) printObjectType(o *ast.ObjectType) []byte { buf.WriteByte(newline) for _, item := range o.List.Items { - buf.Write(indent(p.printObjectItem(item))) + buf.Write(p.indent(p.printObjectItem(item))) buf.WriteByte(newline) } @@ -110,12 +110,19 @@ func writeBlank(buf io.ByteWriter, indent int) { } } -func indent(buf []byte) []byte { +func (p *printer) indent(buf []byte) []byte { + prefix := []byte{tab} + if p.cfg.SpaceWidth != 0 { + for i := 0; i < p.cfg.SpaceWidth; i++ { + prefix = append(prefix, blank) + } + } + var res []byte bol := true for _, c := range buf { if bol && c != '\n' { - res = append(res, []byte{tab}...) + res = append(res, prefix...) } res = append(res, c) bol = c == '\n' diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index 10ab6d3..9a1e6ac 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -36,4 +36,4 @@ resource "aws_instance" "db" = { output "web_ip" = { value = "${aws_instance.web.private_ip}" -} +} \ No newline at end of file diff --git a/testdata/complex.hcl b/testdata/complex.hcl deleted file mode 100644 index cccb5b0..0000000 --- a/testdata/complex.hcl +++ /dev/null @@ -1,42 +0,0 @@ -// This comes from Terraform, as a test -variable "foo" { - default = "bar" - description = "bar" -} - -provider "aws" { - access_key = "foo" - secret_key = "bar" -} - -provider "do" { - api_key = "${var.foo}" -} - -resource "aws_security_group" "firewall" { - count = 5 -} - -resource aws_instance "web" { - ami = "${var.foo}" - security_groups = [ - "foo", - "${aws_security_group.firewall.foo}" - ] - - network_interface { - device_index = 0 - description = "Main network interface" - } -} - -resource "aws_instance" "db" { - security_groups = "${aws_security_group.firewall.*.id}" - VPC = "foo" - - depends_on = ["aws_instance.web"] -} - -output "web_ip" { - value = "${aws_instance.web.private_ip}" -} From ad50ec59334752a8aabb680b0738263f83dc94d7 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 18:10:34 +0300 Subject: [PATCH 098/137] printer: rename methods to make it more readable --- printer/nodes.go | 43 +++++++++++++++++++---------------------- printer/printer.go | 21 +++++--------------- printer/printer_test.go | 10 +++++++++- 3 files changed, 34 insertions(+), 40 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 1d35cfb..c05a211 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -3,7 +3,6 @@ package printer import ( "bytes" "fmt" - "io" "github.com/fatih/hcl/ast" ) @@ -14,13 +13,14 @@ const ( tab = byte('\t') ) -func (p *printer) printNode(n ast.Node) []byte { +// node +func (p *printer) output(n ast.Node) []byte { var buf bytes.Buffer switch t := n.(type) { case *ast.ObjectList: for i, item := range t.Items { - buf.Write(p.printObjectItem(item)) + buf.Write(p.objectItem(item)) if i != len(t.Items)-1 { buf.Write([]byte{newline, newline}) } @@ -28,13 +28,13 @@ func (p *printer) printNode(n ast.Node) []byte { case *ast.ObjectKey: buf.WriteString(t.Token.Text) case *ast.ObjectItem: - buf.Write(p.printObjectItem(t)) + buf.Write(p.objectItem(t)) case *ast.LiteralType: buf.WriteString(t.Token.Text) case *ast.ListType: - buf.Write(p.printList(t)) + buf.Write(p.list(t)) case *ast.ObjectType: - buf.Write(p.printObjectType(t)) + buf.Write(p.objectType(t)) default: fmt.Printf(" unknown type: %T\n", n) } @@ -42,7 +42,7 @@ func (p *printer) printNode(n ast.Node) []byte { return buf.Bytes() } -func (p *printer) printObjectItem(o *ast.ObjectItem) []byte { +func (p *printer) objectItem(o *ast.ObjectItem) []byte { var buf bytes.Buffer for i, k := range o.Keys { @@ -56,21 +56,21 @@ func (p *printer) printObjectItem(o *ast.ObjectItem) []byte { } } - buf.Write(p.printNode(o.Val)) + buf.Write(p.output(o.Val)) return buf.Bytes() } -func (p *printer) printLiteral(l *ast.LiteralType) []byte { +func (p *printer) literal(l *ast.LiteralType) []byte { return []byte(l.Token.Text) } -func (p *printer) printObjectType(o *ast.ObjectType) []byte { +func (p *printer) objectType(o *ast.ObjectType) []byte { var buf bytes.Buffer buf.WriteString("{") buf.WriteByte(newline) for _, item := range o.List.Items { - buf.Write(p.indent(p.printObjectItem(item))) + buf.Write(p.indent(p.objectItem(item))) buf.WriteByte(newline) } @@ -78,7 +78,8 @@ func (p *printer) printObjectType(o *ast.ObjectType) []byte { return buf.Bytes() } -func (p *printer) printList(l *ast.ListType) []byte { +// printList prints a HCL list +func (p *printer) list(l *ast.ListType) []byte { var buf bytes.Buffer buf.WriteString("[") @@ -88,8 +89,7 @@ func (p *printer) printList(l *ast.ListType) []byte { buf.WriteByte(newline) } - buf.WriteByte(tab) - buf.Write(p.printNode(item)) + buf.Write(p.indent(p.output(item))) if i != len(l.List)-1 { buf.WriteString(",") @@ -104,18 +104,15 @@ func (p *printer) printList(l *ast.ListType) []byte { return buf.Bytes() } -func writeBlank(buf io.ByteWriter, indent int) { - for i := 0; i < indent; i++ { - buf.WriteByte(blank) - } -} - +// indent indents the lines of the given buffer for each non-empty line func (p *printer) indent(buf []byte) []byte { - prefix := []byte{tab} - if p.cfg.SpaceWidth != 0 { - for i := 0; i < p.cfg.SpaceWidth; i++ { + var prefix []byte + if p.cfg.SpacesWidth != 0 { + for i := 0; i < p.cfg.SpacesWidth; i++ { prefix = append(prefix, blank) } + } else { + prefix = []byte{tab} } var res []byte diff --git a/printer/printer.go b/printer/printer.go index a8ddca5..5bedca0 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -8,27 +8,20 @@ import ( ) type printer struct { - out []byte // raw printer result - cfg Config - node ast.Node -} - -func (p *printer) output() []byte { - return p.printNode(p.node) + cfg Config } // A Config node controls the output of Fprint. type Config struct { - SpaceWidth int // if set, it will use spaces instead of tabs for alignment + SpacesWidth int // if set, it will use spaces instead of tabs for alignment } -func (c *Config) fprint(output io.Writer, node ast.Node) error { +func (c *Config) Fprint(output io.Writer, node ast.Node) error { p := &printer{ - cfg: *c, - node: node, + cfg: *c, } - if _, err := output.Write(p.output()); err != nil { + if _, err := output.Write(p.output(node)); err != nil { return err } @@ -41,10 +34,6 @@ func (c *Config) fprint(output io.Writer, node ast.Node) error { return err } -func (c *Config) Fprint(output io.Writer, node ast.Node) error { - return c.fprint(output, node) -} - // Fprint "pretty-prints" an HCL node to output // It calls Config.Fprint with default settings. func Fprint(output io.Writer, node ast.Node) error { diff --git a/printer/printer_test.go b/printer/printer_test.go index 1141e85..208d390 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -113,7 +113,15 @@ func format(src []byte) ([]byte, error) { } var buf bytes.Buffer - if err := Fprint(&buf, node); err != nil { + + // test with Spaces + // cfg := &Config{SpacesWidth: 4} + // if err := cfg.Fprint(&buf, node); err != nil { + // return nil, fmt.Errorf("print: %s", err) + // } + + cfg := &Config{} + if err := cfg.Fprint(&buf, node); err != nil { return nil, fmt.Errorf("print: %s", err) } From d94e8e9409da73cf776c0febea4fd85ca7ac0367 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 18:14:16 +0300 Subject: [PATCH 099/137] hcl: add package comments --- ast/ast.go | 2 ++ parser/parser.go | 2 ++ printer/printer.go | 1 + token/token.go | 2 ++ 4 files changed, 7 insertions(+) diff --git a/ast/ast.go b/ast/ast.go index 25b42da..ad5ad5a 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -1,3 +1,5 @@ +// Package ast declares the types used to represent syntax trees for HCL +// (HashiCorp Configuration Language) package ast import "github.com/fatih/hcl/token" diff --git a/parser/parser.go b/parser/parser.go index 9aec5a0..4146077 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1,3 +1,5 @@ +// Package parser implements a parser for HCL (HashiCorp Configuration +// Language) package parser import ( diff --git a/printer/printer.go b/printer/printer.go index 5bedca0..a34d160 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -1,3 +1,4 @@ +// Package printer implements printing of AST nodes to HCL format. package printer import ( diff --git a/token/token.go b/token/token.go index 7c51f69..3b7581a 100644 --- a/token/token.go +++ b/token/token.go @@ -1,3 +1,5 @@ +// Package token defines constants representing the lexical tokens for HCL +// (HashiCorp Configuration Language) package token import ( From 94c0e1e8d42e1b06392bf30797a708db4cf5c404 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 18:18:26 +0300 Subject: [PATCH 100/137] printer: mess up input :) --- printer/testdata/complexhcl.input | 44 +++++++++++++++++-------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/printer/testdata/complexhcl.input b/printer/testdata/complexhcl.input index cccb5b0..fa0ea8e 100644 --- a/printer/testdata/complexhcl.input +++ b/printer/testdata/complexhcl.input @@ -1,42 +1,46 @@ // This comes from Terraform, as a test variable "foo" { - default = "bar" - description = "bar" + default = "bar" + description = "bar" } provider "aws" { - access_key = "foo" - secret_key = "bar" + access_key ="foo" + secret_key = "bar" } -provider "do" { + provider "do" { api_key = "${var.foo}" } resource "aws_security_group" "firewall" { - count = 5 -} + count = 5 + } -resource aws_instance "web" { - ami = "${var.foo}" - security_groups = [ - "foo", - "${aws_security_group.firewall.foo}" - ] - - network_interface { - device_index = 0 - description = "Main network interface" + resource aws_instance "web" { + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}" + ] + + network_interface { + device_index = 0 + description = "Main network interface" + } } -} resource "aws_instance" "db" { - security_groups = "${aws_security_group.firewall.*.id}" + security_groups = "${aws_security_group.firewall.*.id}" VPC = "foo" depends_on = ["aws_instance.web"] + + + } output "web_ip" { - value = "${aws_instance.web.private_ip}" + + value="${aws_instance.web.private_ip}" } From 69f125c80fb44eb767e35d1cef62c41b36490fc3 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 18:45:54 +0300 Subject: [PATCH 101/137] fmt: initial working version of hclfmt --- {fmt => cmd/hclfmt}/fmt.go | 28 ++++++++++++++++++---------- printer/printer.go | 21 ++++++++++++++++++++- 2 files changed, 38 insertions(+), 11 deletions(-) rename {fmt => cmd/hclfmt}/fmt.go (83%) diff --git a/fmt/fmt.go b/cmd/hclfmt/fmt.go similarity index 83% rename from fmt/fmt.go rename to cmd/hclfmt/fmt.go index 707fa4b..987f34c 100644 --- a/fmt/fmt.go +++ b/cmd/hclfmt/fmt.go @@ -12,7 +12,14 @@ import ( "runtime/pprof" "strings" - "github.com/hashicorp/hcl" + "github.com/fatih/hcl/printer" +) + +var ( + write = flag.Bool("w", false, "write result to (source) file instead of stdout") + + // debugging + cpuprofile = flag.String("cpuprofile", "", "write cpu profile to this file") ) func main() { @@ -23,12 +30,6 @@ func main() { } func realMain() error { - var ( - write = flag.Bool("w", false, "write result to (source) file instead of stdout") - - // debugging - cpuprofile = flag.String("cpuprofile", "", "write cpu profile to this file") - ) flag.Usage = usage flag.Parse() @@ -114,11 +115,18 @@ func processFile(filename string, in io.Reader, out io.Writer, stdin bool) error return err } - obj, err := hcl.Parse(string(src)) + res, err := printer.Format(src) if err != nil { return err } - fmt.Printf("obj = %+v\n", obj) - return errors.New("not imlemented yet") + if *write { + err = ioutil.WriteFile(filename, res, 0644) + if err != nil { + return err + } + } + + _, err = out.Write(res) + return err } diff --git a/printer/printer.go b/printer/printer.go index a34d160..a9a0d78 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -2,12 +2,16 @@ package printer import ( + "bytes" "io" "text/tabwriter" "github.com/fatih/hcl/ast" + "github.com/fatih/hcl/parser" ) +var DefaultConfig = Config{} + type printer struct { cfg Config } @@ -38,5 +42,20 @@ func (c *Config) Fprint(output io.Writer, node ast.Node) error { // Fprint "pretty-prints" an HCL node to output // It calls Config.Fprint with default settings. func Fprint(output io.Writer, node ast.Node) error { - return (&Config{}).Fprint(output, node) + return DefaultConfig.Fprint(output, node) +} + +// Format formats src HCL and returns the result. +func Format(src []byte) ([]byte, error) { + node, err := parser.Parse(src) + if err != nil { + return nil, err + } + + var buf bytes.Buffer + if err := DefaultConfig.Fprint(&buf, node); err != nil { + return nil, err + } + + return buf.Bytes(), nil } From 5e525249f31ac023c9a909f3115ccdb8be37a6ce Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 18:59:29 +0300 Subject: [PATCH 102/137] printer: fix assigning equal sign for multi keys --- printer/nodes.go | 2 +- printer/testdata/complexhcl.golden | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index c05a211..689cc56 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -50,7 +50,7 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte { buf.WriteByte(blank) // reach end of key - if i == len(o.Keys)-1 { + if i == len(o.Keys)-1 && len(o.Keys) == 1 { buf.WriteString("=") buf.WriteByte(blank) } diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index 9a1e6ac..d881e91 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -1,22 +1,22 @@ -variable "foo" = { +variable "foo" { default = "bar" description = "bar" } -provider "aws" = { +provider "aws" { access_key = "foo" secret_key = "bar" } -provider "do" = { +provider "do" { api_key = "${var.foo}" } -resource "aws_security_group" "firewall" = { +resource "aws_security_group" "firewall" { count = 5 } -resource aws_instance "web" = { +resource aws_instance "web" { ami = "${var.foo}" security_groups = [ "foo", @@ -28,12 +28,12 @@ resource aws_instance "web" = { } } -resource "aws_instance" "db" = { +resource "aws_instance" "db" { security_groups = "${aws_security_group.firewall.*.id}" VPC = "foo" depends_on = [ "aws_instance.web"] } -output "web_ip" = { +output "web_ip" { value = "${aws_instance.web.private_ip}" } \ No newline at end of file From ccbedaa03296be3d95dd748b0106d3bafda8c902 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 19:09:22 +0300 Subject: [PATCH 103/137] printer: fix printing of lists --- printer/nodes.go | 8 +++++--- printer/testdata/complexhcl.golden | 4 +++- printer/testdata/complexhcl.input | 2 ++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 689cc56..ecbdba5 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -85,12 +85,14 @@ func (p *printer) list(l *ast.ListType) []byte { for i, item := range l.List { if item.Pos().Line != l.Lbrack.Line { - // not same line + // multiline list, add newline before we add each item buf.WriteByte(newline) + // also indent each line + buf.Write(p.indent(p.output(item))) + } else { + buf.Write(p.output(item)) } - buf.Write(p.indent(p.output(item))) - if i != len(l.List)-1 { buf.WriteString(",") buf.WriteByte(blank) diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index d881e91..f6bd3cb 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -3,6 +3,8 @@ variable "foo" { description = "bar" } +developer = ["fatih", "arslan"] + provider "aws" { access_key = "foo" secret_key = "bar" @@ -31,7 +33,7 @@ resource aws_instance "web" { resource "aws_instance" "db" { security_groups = "${aws_security_group.firewall.*.id}" VPC = "foo" - depends_on = [ "aws_instance.web"] + depends_on = ["aws_instance.web"] } output "web_ip" { diff --git a/printer/testdata/complexhcl.input b/printer/testdata/complexhcl.input index fa0ea8e..53b5cd2 100644 --- a/printer/testdata/complexhcl.input +++ b/printer/testdata/complexhcl.input @@ -4,6 +4,8 @@ variable "foo" { description = "bar" } +developer = [ "fatih", "arslan"] + provider "aws" { access_key ="foo" secret_key = "bar" From ea21641224049e18d0c286a5d3a2b90fc58b820e Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 19:27:26 +0300 Subject: [PATCH 104/137] Add README file --- README.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..f676fb2 --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# HCL [![GoDoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://godoc.org/github.com/fatih/hcl) [![Build Status](http://img.shields.io/travis/fatih/hcl.svg?style=flat-square)](https://travis-ci.org/fatih/hcl) + +HCL is a lexer and parser family written in Go for +[HCL](https://github.com/hashicorp/hcl) (Hashicorp Configuration Language). It +has several components, similar to Go's own parser family. It provides a set of +packages to write tools and customize files written in HCL. For example both +`hclfmt` and `hcl2json` is written based on these tools. + +## API + +If you are already familiar with Go's own parser family it's really easy to +dive. It basically resembles the same logic. Howser there several differences +and the implemntation is completely different. Right now it contains the +following packages: + +* `token`: defines constants reresenting the lexical tokens for a scanned HCL file. +* `scanner`: scanner is a lexical scanner. It scans a given HCL file and + returns a stream of tokens. +* `ast`: declares the types used to repesent the syntax tree for parsed HCL files. +* `parser`: parses a given HCL file and creates a AST representation +* `printer`: prints any given ast node and formats + +## Why did you create it? + +The whole parser familiy was created because I wanted a proper `hclfmt` +command, which like `gofmt` formats a HCL file. I didn't want to use +[HCL](https://github.com/hashicorp/hcl) in the first place, because the lexer +and parser is generated and it doesn't expose any kind of flexibility. + +Another reason was that I wanted to learn and experience how to implement a +proper lexer and parser in Go. It was really fun and I think it was worht it. + +## License + +The MIT License (MIT) - see +[`LICENSE.md`](https://github.com/fatih/hcl/blob/master/LICENSE.md) for more +details + From 5329930d861d16752c3c9213bc931cf7294d863e Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 19:29:57 +0300 Subject: [PATCH 105/137] BSD 3-clause license --- LICENSE | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2dd5110 --- /dev/null +++ b/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2015, Fatih Arslan +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of hcl nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From 0f1099656cc186160e1be4d9014f0acb46fcf386 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 19:30:31 +0300 Subject: [PATCH 106/137] README.md: update license information --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f676fb2..87b0da8 100644 --- a/README.md +++ b/README.md @@ -23,16 +23,17 @@ following packages: ## Why did you create it? The whole parser familiy was created because I wanted a proper `hclfmt` -command, which like `gofmt` formats a HCL file. I didn't want to use -[HCL](https://github.com/hashicorp/hcl) in the first place, because the lexer -and parser is generated and it doesn't expose any kind of flexibility. +command, which like `gofmt` formats a HCL file. I didn't want to use the +package [github/hashicorp/hcl](https://github.com/hashicorp/hcl) in the first +place, because the lexer and parser is generated and it doesn't expose any kind +of flexibility. Another reason was that I wanted to learn and experience how to implement a proper lexer and parser in Go. It was really fun and I think it was worht it. ## License -The MIT License (MIT) - see -[`LICENSE.md`](https://github.com/fatih/hcl/blob/master/LICENSE.md) for more +The BSD 3-Clause License - see +[`LICENSE`](https://github.com/fatih/hcl/blob/master/LICENSE.md) for more details From df379b5fc1f63654e6ec567cf7c0aa3226e99e3f Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 19:43:02 +0300 Subject: [PATCH 107/137] hcl: move fmt to hclfmt repository --- README.md | 3 +- cmd/hclfmt/fmt.go | 132 ---------------------------------------------- 2 files changed, 2 insertions(+), 133 deletions(-) delete mode 100644 cmd/hclfmt/fmt.go diff --git a/README.md b/README.md index 87b0da8..c120e32 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ HCL is a lexer and parser family written in Go for [HCL](https://github.com/hashicorp/hcl) (Hashicorp Configuration Language). It has several components, similar to Go's own parser family. It provides a set of packages to write tools and customize files written in HCL. For example both -`hclfmt` and `hcl2json` is written based on these tools. +[`hclfmt`](https://github.com/fatih/hclfmt) and `hcl2json` is written based on +these tools. ## API diff --git a/cmd/hclfmt/fmt.go b/cmd/hclfmt/fmt.go deleted file mode 100644 index 987f34c..0000000 --- a/cmd/hclfmt/fmt.go +++ /dev/null @@ -1,132 +0,0 @@ -package main - -import ( - "errors" - "flag" - "fmt" - "go/scanner" - "io" - "io/ioutil" - "os" - "path/filepath" - "runtime/pprof" - "strings" - - "github.com/fatih/hcl/printer" -) - -var ( - write = flag.Bool("w", false, "write result to (source) file instead of stdout") - - // debugging - cpuprofile = flag.String("cpuprofile", "", "write cpu profile to this file") -) - -func main() { - if err := realMain(); err != nil { - fmt.Fprintln(os.Stderr, err.Error()) - os.Exit(1) - } -} - -func realMain() error { - - flag.Usage = usage - flag.Parse() - - if *cpuprofile != "" { - f, err := os.Create(*cpuprofile) - if err != nil { - return fmt.Errorf("creating cpu profile: %s\n", err) - } - defer f.Close() - pprof.StartCPUProfile(f) - defer pprof.StopCPUProfile() - } - - if flag.NArg() == 0 { - if *write { - return errors.New("error: cannot use -w with standard input") - } - - return processFile("", os.Stdin, os.Stdout, true) - } - - for i := 0; i < flag.NArg(); i++ { - path := flag.Arg(i) - switch dir, err := os.Stat(path); { - case err != nil: - report(err) - case dir.IsDir(): - walkDir(path) - default: - if err := processFile(path, nil, os.Stdout, false); err != nil { - report(err) - } - } - } - - return nil -} - -func usage() { - fmt.Fprintf(os.Stderr, "usage: hclfmt [flags] [path ...]\n") - flag.PrintDefaults() - os.Exit(2) -} - -func report(err error) { - scanner.PrintError(os.Stderr, err) -} - -func isHclFile(f os.FileInfo) bool { - // ignore non-hcl files - name := f.Name() - return !f.IsDir() && !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".hcl") -} - -func walkDir(path string) { - filepath.Walk(path, visitFile) -} - -func visitFile(path string, f os.FileInfo, err error) error { - if err == nil && isHclFile(f) { - err = processFile(path, nil, os.Stdout, false) - } - if err != nil { - report(err) - } - return nil -} - -// If in == nil, the source is the contents of the file with the given filename. -func processFile(filename string, in io.Reader, out io.Writer, stdin bool) error { - if in == nil { - f, err := os.Open(filename) - if err != nil { - return err - } - defer f.Close() - in = f - } - - src, err := ioutil.ReadAll(in) - if err != nil { - return err - } - - res, err := printer.Format(src) - if err != nil { - return err - } - - if *write { - err = ioutil.WriteFile(filename, res, 0644) - if err != nil { - return err - } - } - - _, err = out.Write(res) - return err -} From c703010b7f05f4f8fc457be6c75b535c7a061333 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 19:57:07 +0300 Subject: [PATCH 108/137] README.md: small fixes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c120e32..42fe2fe 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# HCL [![GoDoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://godoc.org/github.com/fatih/hcl) [![Build Status](http://img.shields.io/travis/fatih/hcl.svg?style=flat-square)](https://travis-ci.org/fatih/hcl) +# hcl [![GoDoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://godoc.org/github.com/fatih/hcl) [![Build Status](http://img.shields.io/travis/fatih/hcl.svg?style=flat-square)](https://travis-ci.org/fatih/hcl) HCL is a lexer and parser family written in Go for [HCL](https://github.com/hashicorp/hcl) (Hashicorp Configuration Language). It @@ -35,6 +35,6 @@ proper lexer and parser in Go. It was really fun and I think it was worht it. ## License The BSD 3-Clause License - see -[`LICENSE`](https://github.com/fatih/hcl/blob/master/LICENSE.md) for more +[`LICENSE`](https://github.com/fatih/hcl/blob/master/LICENSE) for more details From c9e7ec3621b0b765645dcc1fd45079cfc70c7ea7 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 25 Oct 2015 21:18:46 +0300 Subject: [PATCH 109/137] printer: add list example --- printer/printer_test.go | 1 + printer/testdata/list.golden | 27 +++++++++++++++++++++++++++ printer/testdata/list.input | 21 +++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 printer/testdata/list.golden create mode 100644 printer/testdata/list.input diff --git a/printer/printer_test.go b/printer/printer_test.go index 208d390..1901121 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -25,6 +25,7 @@ type entry struct { // Use go test -update to create/update the respective golden files. var data = []entry{ {"complexhcl.input", "complexhcl.golden"}, + {"list.input", "list.golden"}, } func TestFiles(t *testing.T) { diff --git a/printer/testdata/list.golden b/printer/testdata/list.golden new file mode 100644 index 0000000..0b949f1 --- /dev/null +++ b/printer/testdata/list.golden @@ -0,0 +1,27 @@ +foo = ["fatih", "arslan"] + +foo = ["bar", "qaz"] + +foo = ["zeynep", + "arslan", +] + +foo = ["fatih", "zeynep", + "arslan", +] + +foo = [ + "vim-go", + "golang", + "hcl", +] + +foo = [] + +foo = [1, 2, 3, 4] + +foo = [ + "kenya", + "ethiopia", + "columbia", +] \ No newline at end of file diff --git a/printer/testdata/list.input b/printer/testdata/list.input new file mode 100644 index 0000000..dd68fdd --- /dev/null +++ b/printer/testdata/list.input @@ -0,0 +1,21 @@ +foo = ["fatih", "arslan" ] + +foo = [ "bar", "qaz", ] + +foo = [ "zeynep", +"arslan", ] + +foo = ["fatih", "zeynep", +"arslan", ] + +foo = [ + "vim-go", + "golang", "hcl"] + +foo = [] + +foo = [1, 2,3, 4] + +foo = [ + "kenya", "ethiopia", + "columbia"] From 710dd69efb797f7ee2228f00199bc5a2cd72fb73 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 26 Oct 2015 01:34:41 +0300 Subject: [PATCH 110/137] Several changed and improvements --- README.md | 28 +++++++++++----------- parser/test-fixtures/complex.hcl | 41 ++++++++++++++++---------------- printer/nodes.go | 2 +- printer/printer_test.go | 7 +----- printer/testdata/comment.golden | 15 ++++++++++++ printer/testdata/comment.input | 15 ++++++++++++ 6 files changed, 66 insertions(+), 42 deletions(-) create mode 100644 printer/testdata/comment.golden create mode 100644 printer/testdata/comment.input diff --git a/README.md b/README.md index 42fe2fe..2ababa5 100644 --- a/README.md +++ b/README.md @@ -4,33 +4,33 @@ HCL is a lexer and parser family written in Go for [HCL](https://github.com/hashicorp/hcl) (Hashicorp Configuration Language). It has several components, similar to Go's own parser family. It provides a set of packages to write tools and customize files written in HCL. For example both -[`hclfmt`](https://github.com/fatih/hclfmt) and `hcl2json` is written based on -these tools. +[`hclfmt`](https://github.com/fatih/hclfmt) and `hcl2json` (coming soon) is +written based on these tools. ## API If you are already familiar with Go's own parser family it's really easy to -dive. It basically resembles the same logic. Howser there several differences -and the implemntation is completely different. Right now it contains the +dive. It basically resembles the same logic. However there are several differences +and the implementation is completely different. Right now it contains the following packages: -* `token`: defines constants reresenting the lexical tokens for a scanned HCL file. +* `token`: defines constants representing the lexical tokens for a scanned HCL file. * `scanner`: scanner is a lexical scanner. It scans a given HCL file and returns a stream of tokens. -* `ast`: declares the types used to repesent the syntax tree for parsed HCL files. +* `ast`: declares the types used to represent the syntax tree for parsed HCL files. * `parser`: parses a given HCL file and creates a AST representation -* `printer`: prints any given ast node and formats +* `printer`: prints any given AST node and formats -## Why did you create it? +## Why -The whole parser familiy was created because I wanted a proper `hclfmt` -command, which like `gofmt` formats a HCL file. I didn't want to use the -package [github/hashicorp/hcl](https://github.com/hashicorp/hcl) in the first -place, because the lexer and parser is generated and it doesn't expose any kind -of flexibility. +The whole parser family was created because I wanted a `hclfmt` command, which +like `gofmt` would format a HCL file. I didn't want to use the package +[github/hashicorp/hcl](https://github.com/hashicorp/hcl) in the first place, +because the lexer and parser is generated and it doesn't expose any kind of +flexibility. Another reason was that I wanted to learn and experience how to implement a -proper lexer and parser in Go. It was really fun and I think it was worht it. +proper lexer and parser in Go. It was really fun and I think it was worth it. ## License diff --git a/parser/test-fixtures/complex.hcl b/parser/test-fixtures/complex.hcl index cccb5b0..b3bf70d 100644 --- a/parser/test-fixtures/complex.hcl +++ b/parser/test-fixtures/complex.hcl @@ -1,42 +1,41 @@ -// This comes from Terraform, as a test variable "foo" { - default = "bar" - description = "bar" + default = "bar" + description = "bar" } provider "aws" { - access_key = "foo" - secret_key = "bar" + access_key = "foo" + secret_key = "bar" } provider "do" { - api_key = "${var.foo}" + api_key = "${var.foo}" } resource "aws_security_group" "firewall" { - count = 5 + count = 5 } resource aws_instance "web" { - ami = "${var.foo}" - security_groups = [ - "foo", - "${aws_security_group.firewall.foo}" - ] + ami = "${var.foo}" + security_groups = [ + "foo", + "${aws_security_group.firewall.foo}", + ] + network_interface = { + device_index = 0 + description = "Main network interface" + } - network_interface { - device_index = 0 - description = "Main network interface" - } + foo = ["faith", arslan] } resource "aws_instance" "db" { - security_groups = "${aws_security_group.firewall.*.id}" - VPC = "foo" - - depends_on = ["aws_instance.web"] + security_groups = "${aws_security_group.firewall.*.id}" + VPC = "foo" + depends_on = ["aws_instance.web"] } output "web_ip" { - value = "${aws_instance.web.private_ip}" + value = "${aws_instance.web.private_ip}" } diff --git a/printer/nodes.go b/printer/nodes.go index ecbdba5..7aebc59 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -13,7 +13,7 @@ const ( tab = byte('\t') ) -// node +// output prints creates a printable HCL output and returns it. func (p *printer) output(n ast.Node) []byte { var buf bytes.Buffer diff --git a/printer/printer_test.go b/printer/printer_test.go index 1901121..185965f 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -26,6 +26,7 @@ type entry struct { var data = []entry{ {"complexhcl.input", "complexhcl.golden"}, {"list.input", "list.golden"}, + {"comment.input", "comment.golden"}, } func TestFiles(t *testing.T) { @@ -115,12 +116,6 @@ func format(src []byte) ([]byte, error) { var buf bytes.Buffer - // test with Spaces - // cfg := &Config{SpacesWidth: 4} - // if err := cfg.Fprint(&buf, node); err != nil { - // return nil, fmt.Errorf("print: %s", err) - // } - cfg := &Config{} if err := cfg.Fprint(&buf, node); err != nil { return nil, fmt.Errorf("print: %s", err) diff --git a/printer/testdata/comment.golden b/printer/testdata/comment.golden new file mode 100644 index 0000000..e32be87 --- /dev/null +++ b/printer/testdata/comment.golden @@ -0,0 +1,15 @@ +// Foo + +/* Bar */ + +/* +/* +Baz +*/ + +# Another + +# Multiple +# Lines + +foo = "bar" diff --git a/printer/testdata/comment.input b/printer/testdata/comment.input new file mode 100644 index 0000000..e32be87 --- /dev/null +++ b/printer/testdata/comment.input @@ -0,0 +1,15 @@ +// Foo + +/* Bar */ + +/* +/* +Baz +*/ + +# Another + +# Multiple +# Lines + +foo = "bar" From 26846b59319ae6733d551c608ccb7af47af31e04 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 26 Oct 2015 01:46:37 +0300 Subject: [PATCH 111/137] parser: fix leaked comment --- parser/test-fixtures/complex.hcl | 2 -- 1 file changed, 2 deletions(-) diff --git a/parser/test-fixtures/complex.hcl b/parser/test-fixtures/complex.hcl index b3bf70d..46981bb 100644 --- a/parser/test-fixtures/complex.hcl +++ b/parser/test-fixtures/complex.hcl @@ -26,8 +26,6 @@ resource aws_instance "web" { device_index = 0 description = "Main network interface" } - - foo = ["faith", arslan] } resource "aws_instance" "db" { From e6b8a3e7b422687988e573ad1e24e7d347950779 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Mon, 26 Oct 2015 21:37:17 +0300 Subject: [PATCH 112/137] parser: rename methods --- parser/parser.go | 46 +++++++++++++++++++++---------------------- parser/parser_test.go | 10 +++++----- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 4146077..69cc724 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -38,10 +38,10 @@ var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. func (p *Parser) Parse() (ast.Node, error) { - return p.parseObjectList() + return p.objectList() } -func (p *Parser) parseObjectList() (*ast.ObjectList, error) { +func (p *Parser) objectList() (*ast.ObjectList, error) { defer un(trace(p, "ParseObjectList")) node := &ast.ObjectList{} @@ -79,7 +79,7 @@ func (p *Parser) next() (ast.Node, error) { return nil, errEofToken case token.IDENT, token.STRING: p.unscan() - return p.parseObjectItem() + return p.objectItem() case token.COMMENT: return &ast.Comment{ Start: tok.Pos, @@ -90,11 +90,11 @@ func (p *Parser) next() (ast.Node, error) { } } -// parseObjectItem parses a single object item -func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { +// objectItem parses a single object item +func (p *Parser) objectItem() (*ast.ObjectItem, error) { defer un(trace(p, "ParseObjectItem")) - keys, err := p.parseObjectKey() + keys, err := p.objectKey() if err != nil { return nil, err } @@ -107,7 +107,7 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { Assign: p.tok.Pos, } - o.Val, err = p.parseType() + o.Val, err = p.object() if err != nil { return nil, err } @@ -118,7 +118,7 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { Keys: keys, } - o.Val, err = p.parseObjectType() + o.Val, err = p.objectType() if err != nil { return nil, err } @@ -128,8 +128,8 @@ func (p *Parser) parseObjectItem() (*ast.ObjectItem, error) { return nil, fmt.Errorf("not yet implemented: %s", p.tok.Type) } -// parseObjectKey parses an object key and returns a ObjectKey AST -func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { +// objectKey parses an object key and returns a ObjectKey AST +func (p *Parser) objectKey() ([]*ast.ObjectKey, error) { keyCount := 0 keys := make([]*ast.ObjectKey, 0) @@ -164,19 +164,19 @@ func (p *Parser) parseObjectKey() ([]*ast.ObjectKey, error) { } } -// parseType parses any type of Type, such as number, bool, string, object or +// object parses any type of object, such as number, bool, string, object or // list. -func (p *Parser) parseType() (ast.Node, error) { +func (p *Parser) object() (ast.Node, error) { defer un(trace(p, "ParseType")) tok := p.scan() switch tok.Type { case token.NUMBER, token.FLOAT, token.BOOL, token.STRING: - return p.parseLiteralType() + return p.literalType() case token.LBRACE: - return p.parseObjectType() + return p.objectType() case token.LBRACK: - return p.parseListType() + return p.listType() case token.COMMENT: // implement comment case token.EOF: @@ -186,8 +186,8 @@ func (p *Parser) parseType() (ast.Node, error) { return nil, fmt.Errorf("Unknown token: %+v", tok) } -// parseObjectType parses an object type and returns a ObjectType AST -func (p *Parser) parseObjectType() (*ast.ObjectType, error) { +// ibjectType parses an object type and returns a ObjectType AST +func (p *Parser) objectType() (*ast.ObjectType, error) { defer un(trace(p, "ParseObjectType")) // we assume that the currently scanned token is a LBRACE @@ -195,7 +195,7 @@ func (p *Parser) parseObjectType() (*ast.ObjectType, error) { Lbrace: p.tok.Pos, } - l, err := p.parseObjectList() + l, err := p.objectList() // if we hit RBRACE, we are good to go (means we parsed all Items), if it's // not a RBRACE, it's an syntax error and we just return it. @@ -208,8 +208,8 @@ func (p *Parser) parseObjectType() (*ast.ObjectType, error) { return o, nil } -// parseListType parses a list type and returns a ListType AST -func (p *Parser) parseListType() (*ast.ListType, error) { +// listType parses a list type and returns a ListType AST +func (p *Parser) listType() (*ast.ListType, error) { defer un(trace(p, "ParseListType")) // we assume that the currently scanned token is a LBRACK @@ -221,7 +221,7 @@ func (p *Parser) parseListType() (*ast.ListType, error) { tok := p.scan() switch tok.Type { case token.NUMBER, token.FLOAT, token.STRING: - node, err := p.parseLiteralType() + node, err := p.literalType() if err != nil { return nil, err } @@ -249,8 +249,8 @@ func (p *Parser) parseListType() (*ast.ListType, error) { } } -// parseLiteralType parses a literal type and returns a LiteralType AST -func (p *Parser) parseLiteralType() (*ast.LiteralType, error) { +// literalType parses a literal type and returns a LiteralType AST +func (p *Parser) literalType() (*ast.LiteralType, error) { defer un(trace(p, "ParseLiteral")) return &ast.LiteralType{ diff --git a/parser/parser_test.go b/parser/parser_test.go index eaa9519..8f18e7f 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -26,7 +26,7 @@ func TestType(t *testing.T) { for _, l := range literals { p := newParser([]byte(l.src)) - item, err := p.parseObjectItem() + item, err := p.objectItem() if err != nil { t.Error(err) } @@ -67,7 +67,7 @@ func TestListType(t *testing.T) { for _, l := range literals { p := newParser([]byte(l.src)) - item, err := p.parseObjectItem() + item, err := p.objectItem() if err != nil { t.Error(err) } @@ -142,7 +142,7 @@ func TestObjectType(t *testing.T) { for _, l := range literals { p := newParser([]byte(l.src)) // p.enableTrace = true - item, err := p.parseObjectItem() + item, err := p.objectItem() if err != nil { t.Error(err) } @@ -185,7 +185,7 @@ func TestObjectKey(t *testing.T) { for _, k := range keys { p := newParser([]byte(k.src)) - keys, err := p.parseObjectKey() + keys, err := p.objectKey() if err != nil { t.Fatal(err) } @@ -209,7 +209,7 @@ func TestObjectKey(t *testing.T) { for _, k := range errKeys { p := newParser([]byte(k.src)) - _, err := p.parseObjectKey() + _, err := p.objectKey() if err == nil { t.Errorf("case '%s' should give an error", k.src) } From acef702a2f354bfae992ca9b05c28e436b931ad1 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 27 Oct 2015 01:23:22 +0300 Subject: [PATCH 113/137] hcl: support attaching comments to ast and printing them --- ast/ast.go | 22 ++++- parser/parser.go | 142 ++++++++++++++++++++++-------- printer/nodes.go | 21 ++++- printer/testdata/comment.golden | 29 +++--- printer/testdata/comment.input | 29 +++--- printer/testdata/complexhcl.input | 1 - 6 files changed, 182 insertions(+), 62 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index ad5ad5a..62733f1 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -14,10 +14,11 @@ func (ObjectList) node() {} func (ObjectKey) node() {} func (ObjectItem) node() {} -func (Comment) node() {} -func (ObjectType) node() {} -func (LiteralType) node() {} -func (ListType) node() {} +func (Comment) node() {} +func (CommentGroup) node() {} +func (ObjectType) node() {} +func (LiteralType) node() {} +func (ListType) node() {} // ObjectList represents a list of ObjectItems. An HCL file itself is an // ObjectList. @@ -49,6 +50,9 @@ type ObjectItem struct { // string. If key length is larger than one, val can be only of type // Object. Val Node + + LeadComment *CommentGroup // associated lead comment + LineComment *CommentGroup // associated line comment } func (o *ObjectItem) Pos() token.Pos { @@ -109,3 +113,13 @@ type Comment struct { func (c *Comment) Pos() token.Pos { return c.Start } + +// CommentGroup node represents a sequence of comments with no other tokens and +// no empty lines between. +type CommentGroup struct { + List []*Comment // len(List) > 0 +} + +func (c *CommentGroup) Pos() token.Pos { + return c.List[0].Pos() +} diff --git a/parser/parser.go b/parser/parser.go index 69cc724..d2664ac 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -14,8 +14,13 @@ import ( type Parser struct { sc *scanner.Scanner - tok token.Token // last read token - comments []*ast.Comment + // Last read token + tok token.Token + + // comments + comments []*ast.CommentGroup + leadComment *ast.CommentGroup // last lead comment + lineComment *ast.CommentGroup // last line comment enableTrace bool indent int @@ -46,48 +51,56 @@ func (p *Parser) objectList() (*ast.ObjectList, error) { node := &ast.ObjectList{} for { - n, err := p.next() + n, err := p.objectItem() if err == errEofToken { break // we are finished } - // we don't return a nil, because might want to use already collected - // items. + // we don't return a nil node, because might want to use already + // collected items. if err != nil { return node, err } - switch t := n.(type) { - case *ast.ObjectItem: - node.Add(t) - case *ast.Comment: - p.comments = append(p.comments, t) - } + node.Add(n) } return node, nil } -// next returns the next node -func (p *Parser) next() (ast.Node, error) { - defer un(trace(p, "ParseNode")) +func (p *Parser) consumeComment() (comment *ast.Comment, endline int) { + endline = p.tok.Pos.Line - tok := p.scan() - - switch tok.Type { - case token.EOF: - return nil, errEofToken - case token.IDENT, token.STRING: - p.unscan() - return p.objectItem() - case token.COMMENT: - return &ast.Comment{ - Start: tok.Pos, - Text: tok.Text, - }, nil - default: - return nil, fmt.Errorf("expected: IDENT | STRING | COMMENT got: %+v", tok.Type) + // count the endline if it's multiline comment, ie starting with /* + if p.tok.Text[1] == '*' { + // don't use range here - no need to decode Unicode code points + for i := 0; i < len(p.tok.Text); i++ { + if p.tok.Text[i] == '\n' { + endline++ + } + } } + + comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text} + p.tok = p.sc.Scan() + return +} + +func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) { + var list []*ast.Comment + endline = p.tok.Pos.Line + + for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n { + var comment *ast.Comment + comment, endline = p.consumeComment() + list = append(list, comment) + } + + // add comment group to the comments list + comments = &ast.CommentGroup{List: list} + p.comments = append(p.comments, comments) + + return } // objectItem parses a single object item @@ -107,10 +120,24 @@ func (p *Parser) objectItem() (*ast.ObjectItem, error) { Assign: p.tok.Pos, } + if p.leadComment != nil { + o.LeadComment = p.leadComment + p.leadComment = nil + } + o.Val, err = p.object() if err != nil { return nil, err } + + // do a look-ahead for line comment + p.scan() + if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil { + o.LineComment = p.lineComment + p.lineComment = nil + } + p.unscan() + return o, nil case token.LBRACE: // object or nested objects @@ -118,10 +145,24 @@ func (p *Parser) objectItem() (*ast.ObjectItem, error) { Keys: keys, } + if p.leadComment != nil { + o.LeadComment = p.leadComment + // free it up so we don't add it for following items + p.leadComment = nil + } + o.Val, err = p.objectType() if err != nil { return nil, err } + + // do a look-ahead for line comment + p.scan() + if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil { + o.LineComment = p.lineComment + p.lineComment = nil + } + p.unscan() return o, nil } @@ -186,7 +227,7 @@ func (p *Parser) object() (ast.Node, error) { return nil, fmt.Errorf("Unknown token: %+v", tok) } -// ibjectType parses an object type and returns a ObjectType AST +// objectType parses an object type and returns a ObjectType AST func (p *Parser) objectType() (*ast.ObjectType, error) { defer un(trace(p, "ParseObjectType")) @@ -225,13 +266,11 @@ func (p *Parser) listType() (*ast.ListType, error) { if err != nil { return nil, err } + l.Add(node) case token.COMMA: // get next list item or we are at the end continue - case token.COMMENT: - // TODO(arslan): parse comment - continue case token.BOOL: // TODO(arslan) should we support? not supported by HCL yet case token.LBRACK: @@ -258,8 +297,9 @@ func (p *Parser) literalType() (*ast.LiteralType, error) { }, nil } -// scan returns the next token from the underlying scanner. -// If a token has been unscanned then read that instead. +// scan returns the next token from the underlying scanner. If a token has +// been unscanned then read that instead. In the process, it collects any +// comment groups encountered, and remembers the last lead and line comments. func (p *Parser) scan() token.Token { // If we have a token on the buffer, then return it. if p.n != 0 { @@ -269,7 +309,39 @@ func (p *Parser) scan() token.Token { // Otherwise read the next token from the scanner and Save it to the buffer // in case we unscan later. + prev := p.tok p.tok = p.sc.Scan() + + if p.tok.Type == token.COMMENT { + var comment *ast.CommentGroup + var endline int + + // fmt.Printf("p.tok.Pos.Line = %+v prev: %d \n", p.tok.Pos.Line, prev.Pos.Line) + if p.tok.Pos.Line == prev.Pos.Line { + // The comment is on same line as the previous token; it + // cannot be a lead comment but may be a line comment. + comment, endline = p.consumeCommentGroup(0) + if p.tok.Pos.Line != endline { + // The next token is on a different line, thus + // the last comment group is a line comment. + p.lineComment = comment + } + } + + // consume successor comments, if any + endline = -1 + for p.tok.Type == token.COMMENT { + comment, endline = p.consumeCommentGroup(1) + } + + if endline+1 == p.tok.Pos.Line { + // The next token is following on the line immediately after the + // comment group, thus the last comment group is a lead comment. + p.leadComment = comment + } + + } + return p.tok } diff --git a/printer/nodes.go b/printer/nodes.go index 7aebc59..bba5acb 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -14,7 +14,7 @@ const ( ) // output prints creates a printable HCL output and returns it. -func (p *printer) output(n ast.Node) []byte { +func (p *printer) output(n interface{}) []byte { var buf bytes.Buffer switch t := n.(type) { @@ -42,9 +42,22 @@ func (p *printer) output(n ast.Node) []byte { return buf.Bytes() } +func (p *printer) comment(c *ast.CommentGroup) []byte { + var buf bytes.Buffer + for _, comment := range c.List { + buf.WriteString(comment.Text) + } + return buf.Bytes() +} + func (p *printer) objectItem(o *ast.ObjectItem) []byte { var buf bytes.Buffer + if o.LeadComment != nil { + buf.Write(p.comment(o.LeadComment)) + buf.WriteByte(newline) + } + for i, k := range o.Keys { buf.WriteString(k.Token.Text) buf.WriteByte(blank) @@ -57,6 +70,12 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte { } buf.Write(p.output(o.Val)) + + if o.Val.Pos().Line == o.Keys[0].Pos().Line && o.LineComment != nil { + buf.WriteByte(blank) + buf.Write(p.comment(o.LineComment)) + } + return buf.Bytes() } diff --git a/printer/testdata/comment.golden b/printer/testdata/comment.golden index e32be87..f14d003 100644 --- a/printer/testdata/comment.golden +++ b/printer/testdata/comment.golden @@ -1,15 +1,22 @@ -// Foo +// This comes from Terraform, as a test +variable "foo" { + default = "bar" + description = "bar" # yooo +} -/* Bar */ +/* This is a developer test +account and a multine comment */ +developer = ["fatih", "arslan"] // fatih arslan -/* -/* -Baz -*/ +# One line here +numbers = [1, 2] // another line here -# Another +# Another comment +variable = { + description = "bar" # another yooo +} -# Multiple -# Lines - -foo = "bar" +// lead comment +foo = { + bar = "fatih" // line comment 2 +} // line comment 3 \ No newline at end of file diff --git a/printer/testdata/comment.input b/printer/testdata/comment.input index e32be87..7f0615c 100644 --- a/printer/testdata/comment.input +++ b/printer/testdata/comment.input @@ -1,15 +1,24 @@ -// Foo + // This comes from Terraform, as a test +variable "foo" { + default = "bar" + description = "bar" # yooo +} -/* Bar */ +/* This is a developer test +account and a multine comment */ +developer = [ "fatih", "arslan"] // fatih arslan -/* -/* -Baz -*/ +# One line here +numbers = [1,2] // another line here -# Another + # Another comment +variable = { + description = "bar" # another yooo +} -# Multiple -# Lines -foo = "bar" +// lead comment +foo { + bar = "fatih" // line comment 2 +} // line comment 3 + diff --git a/printer/testdata/complexhcl.input b/printer/testdata/complexhcl.input index 53b5cd2..aa83f90 100644 --- a/printer/testdata/complexhcl.input +++ b/printer/testdata/complexhcl.input @@ -1,4 +1,3 @@ -// This comes from Terraform, as a test variable "foo" { default = "bar" description = "bar" From bbc2d1992381f85c521611b12d1d21aca93e6601 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 27 Oct 2015 01:26:51 +0300 Subject: [PATCH 114/137] parser: simplify objectItem method --- parser/parser.go | 61 +++++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index d2664ac..56b6aac 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -112,61 +112,38 @@ func (p *Parser) objectItem() (*ast.ObjectItem, error) { return nil, err } + o := &ast.ObjectItem{ + Keys: keys, + } + + if p.leadComment != nil { + o.LeadComment = p.leadComment + p.leadComment = nil + } + switch p.tok.Type { case token.ASSIGN: - // assignments - o := &ast.ObjectItem{ - Keys: keys, - Assign: p.tok.Pos, - } - - if p.leadComment != nil { - o.LeadComment = p.leadComment - p.leadComment = nil - } - + o.Assign = p.tok.Pos o.Val, err = p.object() if err != nil { return nil, err } - - // do a look-ahead for line comment - p.scan() - if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil { - o.LineComment = p.lineComment - p.lineComment = nil - } - p.unscan() - - return o, nil case token.LBRACE: - // object or nested objects - o := &ast.ObjectItem{ - Keys: keys, - } - - if p.leadComment != nil { - o.LeadComment = p.leadComment - // free it up so we don't add it for following items - p.leadComment = nil - } - o.Val, err = p.objectType() if err != nil { return nil, err } - - // do a look-ahead for line comment - p.scan() - if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil { - o.LineComment = p.lineComment - p.lineComment = nil - } - p.unscan() - return o, nil } - return nil, fmt.Errorf("not yet implemented: %s", p.tok.Type) + // do a look-ahead for line comment + p.scan() + if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil { + o.LineComment = p.lineComment + p.lineComment = nil + } + + p.unscan() + return o, nil } // objectKey parses an object key and returns a ObjectKey AST From 3ee0cb44fa333a9e31305e8ee6e718ed4562e7d5 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 27 Oct 2015 01:42:05 +0300 Subject: [PATCH 115/137] printer: fix leadcomments for multiple comments --- printer/nodes.go | 18 +++++++----------- printer/testdata/comment.golden | 7 +++++++ printer/testdata/comment.input | 13 ++++++++++--- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index bba5acb..6dc8e7f 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -42,20 +42,14 @@ func (p *printer) output(n interface{}) []byte { return buf.Bytes() } -func (p *printer) comment(c *ast.CommentGroup) []byte { - var buf bytes.Buffer - for _, comment := range c.List { - buf.WriteString(comment.Text) - } - return buf.Bytes() -} - func (p *printer) objectItem(o *ast.ObjectItem) []byte { var buf bytes.Buffer if o.LeadComment != nil { - buf.Write(p.comment(o.LeadComment)) - buf.WriteByte(newline) + for _, comment := range o.LeadComment.List { + buf.WriteString(comment.Text) + buf.WriteByte(newline) + } } for i, k := range o.Keys { @@ -73,7 +67,9 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte { if o.Val.Pos().Line == o.Keys[0].Pos().Line && o.LineComment != nil { buf.WriteByte(blank) - buf.Write(p.comment(o.LineComment)) + for _, comment := range o.LineComment.List { + buf.WriteString(comment.Text) + } } return buf.Bytes() diff --git a/printer/testdata/comment.golden b/printer/testdata/comment.golden index f14d003..6587adb 100644 --- a/printer/testdata/comment.golden +++ b/printer/testdata/comment.golden @@ -4,6 +4,13 @@ variable "foo" { description = "bar" # yooo } +aligned = { + a = "bar" # yoo1 + default = "bar" #yoo2 + bar = "bar" # yoo3 +} + +// fatih arslan /* This is a developer test account and a multine comment */ developer = ["fatih", "arslan"] // fatih arslan diff --git a/printer/testdata/comment.input b/printer/testdata/comment.input index 7f0615c..725017b 100644 --- a/printer/testdata/comment.input +++ b/printer/testdata/comment.input @@ -4,6 +4,13 @@ variable "foo" { description = "bar" # yooo } +aligned { + a = "bar" # yoo1 + default = "bar" #yoo2 + bar = "bar" # yoo3 +} + +// fatih arslan /* This is a developer test account and a multine comment */ developer = [ "fatih", "arslan"] // fatih arslan @@ -17,8 +24,8 @@ variable = { } -// lead comment + // lead comment foo { - bar = "fatih" // line comment 2 -} // line comment 3 + bar = "fatih" // line comment 2 +} // line comment 3 From b93aefc3c32da12a8d8cd678a53e1cedf99d06de Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 27 Oct 2015 23:51:44 +0300 Subject: [PATCH 116/137] printer: add aligned comment support, still WIP --- printer/nodes.go | 75 ++++++++++++++++++++++++- printer/printer_test.go | 7 ++- printer/testdata/comment.golden | 8 +-- printer/testdata/comment.input | 6 -- printer/testdata/comment_aligned.golden | 12 ++++ printer/testdata/comment_aligned.input | 11 ++++ 6 files changed, 102 insertions(+), 17 deletions(-) create mode 100644 printer/testdata/comment_aligned.golden create mode 100644 printer/testdata/comment_aligned.input diff --git a/printer/nodes.go b/printer/nodes.go index 6dc8e7f..34ba4b8 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -75,6 +75,51 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte { return buf.Bytes() } +func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { + var buf bytes.Buffer + + var longestLine int + for _, item := range items { + lineLen := len(item.Keys[0].Token.Text) + len(p.output(item.Val)) + if lineLen > longestLine { + longestLine = lineLen + } + } + + for _, item := range items { + curLen := 0 + for i, k := range item.Keys { + buf.WriteString(k.Token.Text) + buf.WriteByte(blank) + + // reach end of key + if i == len(item.Keys)-1 && len(item.Keys) == 1 { + buf.WriteString("=") + buf.WriteByte(blank) + } + + curLen = len(k.Token.Text) // two blanks and one assign + } + val := p.output(item.Val) + buf.Write(val) + curLen += len(val) + + if item.Val.Pos().Line == item.Keys[0].Pos().Line && item.LineComment != nil { + for i := 0; i < longestLine-curLen+1; i++ { + buf.WriteByte(blank) + } + + for _, comment := range item.LineComment.List { + buf.WriteString(comment.Text) + } + } + + buf.WriteByte(newline) + } + + return buf.Bytes() +} + func (p *printer) literal(l *ast.LiteralType) []byte { return []byte(l.Token.Text) } @@ -84,7 +129,35 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { buf.WriteString("{") buf.WriteByte(newline) - for _, item := range o.List.Items { + // check if we have adjacent one liner items. If yes we'll going to align + // the comments. + var oneLines []*ast.ObjectItem + for i, item := range o.List.Items { + // protect agains slice bounds + if i == len(o.List.Items)-1 { + break + } + + if o.List.Items[i+1].Pos().Line == item.Pos().Line+1 { + oneLines = append(oneLines, item) + } else { + // break in any nonadjacent items + break + } + } + + // fmt.Printf("len(oneLines) = %+v\n", len(oneLines)) + // for _, i := range oneLines { + // a := i.Keys[0] + // fmt.Printf("a = %+v\n", a) + // } + if len(oneLines) != 0 { + items := p.alignedItems(oneLines) + buf.Write(p.indent(items)) + buf.WriteByte(newline) + } + + for _, item := range o.List.Items[len(oneLines):] { buf.Write(p.indent(p.objectItem(item))) buf.WriteByte(newline) } diff --git a/printer/printer_test.go b/printer/printer_test.go index 185965f..7e2b132 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -24,9 +24,10 @@ type entry struct { // Use go test -update to create/update the respective golden files. var data = []entry{ - {"complexhcl.input", "complexhcl.golden"}, - {"list.input", "list.golden"}, - {"comment.input", "comment.golden"}, + // {"complexhcl.input", "complexhcl.golden"}, + // {"list.input", "list.golden"}, + // {"comment.input", "comment.golden"}, + {"comment_aligned.input", "comment_aligned.golden"}, } func TestFiles(t *testing.T) { diff --git a/printer/testdata/comment.golden b/printer/testdata/comment.golden index 6587adb..1d327f6 100644 --- a/printer/testdata/comment.golden +++ b/printer/testdata/comment.golden @@ -4,12 +4,6 @@ variable "foo" { description = "bar" # yooo } -aligned = { - a = "bar" # yoo1 - default = "bar" #yoo2 - bar = "bar" # yoo3 -} - // fatih arslan /* This is a developer test account and a multine comment */ @@ -26,4 +20,4 @@ variable = { // lead comment foo = { bar = "fatih" // line comment 2 -} // line comment 3 \ No newline at end of file +} // line comment 3 diff --git a/printer/testdata/comment.input b/printer/testdata/comment.input index 725017b..cff962e 100644 --- a/printer/testdata/comment.input +++ b/printer/testdata/comment.input @@ -4,12 +4,6 @@ variable "foo" { description = "bar" # yooo } -aligned { - a = "bar" # yoo1 - default = "bar" #yoo2 - bar = "bar" # yoo3 -} - // fatih arslan /* This is a developer test account and a multine comment */ diff --git a/printer/testdata/comment_aligned.golden b/printer/testdata/comment_aligned.golden new file mode 100644 index 0000000..ce72661 --- /dev/null +++ b/printer/testdata/comment_aligned.golden @@ -0,0 +1,12 @@ +aligned = { + a = "bar" # yoo1 + default = "bar" # yoo2 + bar = "bar" # yoo3 + fatih = ["fatih", "arslan"] // yoo4 + + deneme = { + bar = "fatih" + } + projects = ["vim-go"] # yoo5 + default = "foo" # yoo6 +} \ No newline at end of file diff --git a/printer/testdata/comment_aligned.input b/printer/testdata/comment_aligned.input new file mode 100644 index 0000000..4a27145 --- /dev/null +++ b/printer/testdata/comment_aligned.input @@ -0,0 +1,11 @@ +aligned { + a = "bar" # yoo1 + default = "bar" # yoo2 + bar = "bar" # yoo3 + fatih = ["fatih", "arslan"] // yoo4 + deneme = { + bar = "fatih" + } + projects = ["vim-go"] # yoo5 + default = "foo" # yoo6 +} From a16955dcadf9d9af6c74845b44e2dae0f04bc060 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 28 Oct 2015 00:59:54 +0300 Subject: [PATCH 117/137] printer: partially fixed aligned comments, still WIP --- printer/nodes.go | 81 +++++++++++++++++-------- printer/testdata/comment_aligned.golden | 10 ++- printer/testdata/comment_aligned.input | 3 + 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 34ba4b8..9e753e2 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -131,38 +131,59 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { // check if we have adjacent one liner items. If yes we'll going to align // the comments. - var oneLines []*ast.ObjectItem - for i, item := range o.List.Items { - // protect agains slice bounds - if i == len(o.List.Items)-1 { + var index int + for { + var oneLines []*ast.ObjectItem + for _, item := range o.List.Items[index:] { + // protect agains slice bounds + if index == len(o.List.Items)-1 { + // check for the latest item of a series of one liners in the + // end of a list. + if index != 0 && // do not check if the list length is one + lines(string(p.objectItem(item))) < 1 && // be sure it's really a one line + o.List.Items[index-1].Pos().Line == item.Pos().Line-1 { + + oneLines = append(oneLines, item) + index++ + } + break + } + + if o.List.Items[1+index].Pos().Line == item.Pos().Line+1 { + oneLines = append(oneLines, item) + index++ + } else { + // break in any nonadjacent items + break + } + } + + // fmt.Printf("len(oneLines) = %+v\n", len(oneLines)) + // for _, i := range oneLines { + // a := i.Keys[0] + // fmt.Printf("a = %+v\n", a) + // } + + if len(oneLines) != 0 { + items := p.alignedItems(oneLines) + buf.Write(p.indent(items)) + + if index != len(o.List.Items) { + buf.WriteByte(newline) + } + } + + if index == len(o.List.Items) { break } - if o.List.Items[i+1].Pos().Line == item.Pos().Line+1 { - oneLines = append(oneLines, item) - } else { - // break in any nonadjacent items - break - } - } - - // fmt.Printf("len(oneLines) = %+v\n", len(oneLines)) - // for _, i := range oneLines { - // a := i.Keys[0] - // fmt.Printf("a = %+v\n", a) - // } - if len(oneLines) != 0 { - items := p.alignedItems(oneLines) - buf.Write(p.indent(items)) - buf.WriteByte(newline) - } - - for _, item := range o.List.Items[len(oneLines):] { - buf.Write(p.indent(p.objectItem(item))) + buf.Write(p.indent(p.objectItem(o.List.Items[index]))) buf.WriteByte(newline) + index++ } buf.WriteString("}") + buf.WriteByte(newline) return buf.Bytes() } @@ -216,3 +237,13 @@ func (p *printer) indent(buf []byte) []byte { } return res } + +func lines(txt string) int { + endline := 0 + for i := 0; i < len(txt); i++ { + if txt[i] == '\n' { + endline++ + } + } + return endline +} diff --git a/printer/testdata/comment_aligned.golden b/printer/testdata/comment_aligned.golden index ce72661..bab08b0 100644 --- a/printer/testdata/comment_aligned.golden +++ b/printer/testdata/comment_aligned.golden @@ -7,6 +7,12 @@ aligned = { deneme = { bar = "fatih" } + projects = ["vim-go"] # yoo5 - default = "foo" # yoo6 -} \ No newline at end of file + default = "foo" # yoo6 + + example = { + bar = "fatih" + } + +} diff --git a/printer/testdata/comment_aligned.input b/printer/testdata/comment_aligned.input index 4a27145..1b98b48 100644 --- a/printer/testdata/comment_aligned.input +++ b/printer/testdata/comment_aligned.input @@ -8,4 +8,7 @@ aligned { } projects = ["vim-go"] # yoo5 default = "foo" # yoo6 + example = { + bar = "fatih" + } } From 3a165313dbc2b8dc8b502212398709edb267edab Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 28 Oct 2015 01:07:37 +0300 Subject: [PATCH 118/137] printer: improvements on comment aligning --- printer/nodes.go | 8 -------- printer/printer_test.go | 6 +++--- printer/testdata/comment.golden | 2 +- printer/testdata/comment_aligned.golden | 10 ++++------ printer/testdata/comment_aligned.input | 6 +++--- 5 files changed, 11 insertions(+), 21 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 9e753e2..92a7c4c 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -158,16 +158,9 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { } } - // fmt.Printf("len(oneLines) = %+v\n", len(oneLines)) - // for _, i := range oneLines { - // a := i.Keys[0] - // fmt.Printf("a = %+v\n", a) - // } - if len(oneLines) != 0 { items := p.alignedItems(oneLines) buf.Write(p.indent(items)) - if index != len(o.List.Items) { buf.WriteByte(newline) } @@ -183,7 +176,6 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { } buf.WriteString("}") - buf.WriteByte(newline) return buf.Bytes() } diff --git a/printer/printer_test.go b/printer/printer_test.go index 7e2b132..54c86dd 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -24,9 +24,9 @@ type entry struct { // Use go test -update to create/update the respective golden files. var data = []entry{ - // {"complexhcl.input", "complexhcl.golden"}, - // {"list.input", "list.golden"}, - // {"comment.input", "comment.golden"}, + {"complexhcl.input", "complexhcl.golden"}, + {"list.input", "list.golden"}, + {"comment.input", "comment.golden"}, {"comment_aligned.input", "comment_aligned.golden"}, } diff --git a/printer/testdata/comment.golden b/printer/testdata/comment.golden index 1d327f6..65a4e4e 100644 --- a/printer/testdata/comment.golden +++ b/printer/testdata/comment.golden @@ -20,4 +20,4 @@ variable = { // lead comment foo = { bar = "fatih" // line comment 2 -} // line comment 3 +} // line comment 3 \ No newline at end of file diff --git a/printer/testdata/comment_aligned.golden b/printer/testdata/comment_aligned.golden index bab08b0..60347c3 100644 --- a/printer/testdata/comment_aligned.golden +++ b/printer/testdata/comment_aligned.golden @@ -7,12 +7,10 @@ aligned = { deneme = { bar = "fatih" } + bar = "bar" # yoo3 + fatih = ["fatih", "arslan"] // yoo4 - projects = ["vim-go"] # yoo5 - default = "foo" # yoo6 - - example = { + deneme = { bar = "fatih" } - -} +} \ No newline at end of file diff --git a/printer/testdata/comment_aligned.input b/printer/testdata/comment_aligned.input index 1b98b48..87c330a 100644 --- a/printer/testdata/comment_aligned.input +++ b/printer/testdata/comment_aligned.input @@ -6,9 +6,9 @@ aligned { deneme = { bar = "fatih" } - projects = ["vim-go"] # yoo5 - default = "foo" # yoo6 - example = { + bar = "bar" # yoo3 + fatih = ["fatih", "arslan"] // yoo4 + deneme = { bar = "fatih" } } From b4756273daed51c066000648409d75be6b76f5df Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 28 Oct 2015 02:40:51 +0300 Subject: [PATCH 119/137] printer: now aligned comments are working as expected --- printer/nodes.go | 51 +++++++++++++++++-------- printer/testdata/comment_aligned.golden | 5 ++- printer/testdata/comment_aligned.input | 4 +- 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 92a7c4c..2fcd6be 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -129,39 +129,52 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { buf.WriteString("{") buf.WriteByte(newline) - // check if we have adjacent one liner items. If yes we'll going to align - // the comments. var index int for { + // check if we have adjacent one liner items. If yes we'll going to align + // the comments. var oneLines []*ast.ObjectItem for _, item := range o.List.Items[index:] { - // protect agains slice bounds - if index == len(o.List.Items)-1 { - // check for the latest item of a series of one liners in the - // end of a list. - if index != 0 && // do not check if the list length is one - lines(string(p.objectItem(item))) < 1 && // be sure it's really a one line - o.List.Items[index-1].Pos().Line == item.Pos().Line-1 { - - oneLines = append(oneLines, item) - index++ - } + // we don't group one line lists + if len(o.List.Items) == 1 { break } - if o.List.Items[1+index].Pos().Line == item.Pos().Line+1 { + cur := lines(string(p.objectItem(item))) + if cur != 1 { + break + } + + next := 0 + if index != len(o.List.Items)-1 { + next = lines(string(p.objectItem(o.List.Items[index+0]))) + } + + prev := 0 + if index != 0 { + prev = lines(string(p.objectItem(o.List.Items[index-1]))) + } + + if cur == next { + oneLines = append(oneLines, item) + index++ + } else if cur == prev { oneLines = append(oneLines, item) index++ } else { - // break in any nonadjacent items break } } if len(oneLines) != 0 { items := p.alignedItems(oneLines) + + // put newlines if the items are between other non aligned items + if index != len(oneLines) { + buf.WriteByte(newline) + } buf.Write(p.indent(items)) - if index != len(o.List.Items) { + if index != len(o.List.Items) && len(oneLines) > 1 { buf.WriteByte(newline) } } @@ -237,5 +250,11 @@ func lines(txt string) int { endline++ } } + + // some txt do not have any kinde of newlines, treat them also as a one + // liner + if endline == 0 { + endline = 1 + } return endline } diff --git a/printer/testdata/comment_aligned.golden b/printer/testdata/comment_aligned.golden index 60347c3..16f90c8 100644 --- a/printer/testdata/comment_aligned.golden +++ b/printer/testdata/comment_aligned.golden @@ -7,8 +7,9 @@ aligned = { deneme = { bar = "fatih" } - bar = "bar" # yoo3 - fatih = ["fatih", "arslan"] // yoo4 + + bar = "bar" # yoo5 + fatih = ["fatih", "arslan"] // yoo6 deneme = { bar = "fatih" diff --git a/printer/testdata/comment_aligned.input b/printer/testdata/comment_aligned.input index 87c330a..477aa7b 100644 --- a/printer/testdata/comment_aligned.input +++ b/printer/testdata/comment_aligned.input @@ -6,8 +6,8 @@ aligned { deneme = { bar = "fatih" } - bar = "bar" # yoo3 - fatih = ["fatih", "arslan"] // yoo4 + bar = "bar" # yoo5 + fatih = ["fatih", "arslan"] // yoo6 deneme = { bar = "fatih" } From 877d63151c37c57b7cde1e056861546394d8d410 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 30 Oct 2015 21:49:10 +0300 Subject: [PATCH 120/137] printer: support lead comment on aligned items --- printer/nodes.go | 72 ++++++++++++++----------- printer/testdata/comment.input | 2 +- printer/testdata/comment_aligned.golden | 23 ++++---- printer/testdata/comment_aligned.input | 18 ++++--- printer/testdata/complexhcl.golden | 2 + 5 files changed, 68 insertions(+), 49 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 2fcd6be..84da4e1 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -86,7 +86,14 @@ func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { } } - for _, item := range items { + for i, item := range items { + if item.LeadComment != nil { + for _, comment := range item.LeadComment.List { + buf.WriteString(comment.Text) + buf.WriteByte(newline) + } + } + curLen := 0 for i, k := range item.Keys { buf.WriteString(k.Token.Text) @@ -114,7 +121,10 @@ func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { } } - buf.WriteByte(newline) + // do not print for the last item + if i != len(items)-1 { + buf.WriteByte(newline) + } } return buf.Bytes() @@ -133,21 +143,24 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { for { // check if we have adjacent one liner items. If yes we'll going to align // the comments. - var oneLines []*ast.ObjectItem - for _, item := range o.List.Items[index:] { + var aligned []*ast.ObjectItem + for i, item := range o.List.Items[index:] { // we don't group one line lists if len(o.List.Items) == 1 { break } + // one means a oneliner with out any lead comment + // two means a oneliner with lead comment + // anything else might be something else cur := lines(string(p.objectItem(item))) - if cur != 1 { + if cur > 2 { break } next := 0 if index != len(o.List.Items)-1 { - next = lines(string(p.objectItem(o.List.Items[index+0]))) + next = lines(string(p.objectItem(o.List.Items[index+1]))) } prev := 0 @@ -155,37 +168,42 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { prev = lines(string(p.objectItem(o.List.Items[index-1]))) } - if cur == next { - oneLines = append(oneLines, item) + if (cur == next && next == 1) || (next == 1 && cur == 2 && i == 0) { + aligned = append(aligned, item) index++ - } else if cur == prev { - oneLines = append(oneLines, item) + } else if (cur == prev && prev == 1) || (prev == 2 && cur == 1) { + aligned = append(aligned, item) index++ } else { break } } - if len(oneLines) != 0 { - items := p.alignedItems(oneLines) + // fmt.Printf("==================> len(aligned) = %+v\n", len(aligned)) + // for _, b := range aligned { + // fmt.Printf("b = %+v\n", b) + // } - // put newlines if the items are between other non aligned items - if index != len(oneLines) { - buf.WriteByte(newline) - } - buf.Write(p.indent(items)) - if index != len(o.List.Items) && len(oneLines) > 1 { - buf.WriteByte(newline) - } + // put newlines if the items are between other non aligned items + if index != len(aligned) { + buf.WriteByte(newline) } + if len(aligned) >= 1 { + items := p.alignedItems(aligned) + + buf.Write(p.indent(items)) + } else { + buf.Write(p.indent(p.objectItem(o.List.Items[index]))) + index++ + } + + buf.WriteByte(newline) + if index == len(o.List.Items) { break } - buf.Write(p.indent(p.objectItem(o.List.Items[index]))) - buf.WriteByte(newline) - index++ } buf.WriteString("}") @@ -244,17 +262,11 @@ func (p *printer) indent(buf []byte) []byte { } func lines(txt string) int { - endline := 0 + endline := 1 for i := 0; i < len(txt); i++ { if txt[i] == '\n' { endline++ } } - - // some txt do not have any kinde of newlines, treat them also as a one - // liner - if endline == 0 { - endline = 1 - } return endline } diff --git a/printer/testdata/comment.input b/printer/testdata/comment.input index cff962e..7d4e07a 100644 --- a/printer/testdata/comment.input +++ b/printer/testdata/comment.input @@ -19,7 +19,7 @@ variable = { // lead comment -foo { +foo { bar = "fatih" // line comment 2 } // line comment 3 diff --git a/printer/testdata/comment_aligned.golden b/printer/testdata/comment_aligned.golden index 16f90c8..d4b12b0 100644 --- a/printer/testdata/comment_aligned.golden +++ b/printer/testdata/comment_aligned.golden @@ -1,17 +1,20 @@ aligned = { - a = "bar" # yoo1 - default = "bar" # yoo2 - bar = "bar" # yoo3 - fatih = ["fatih", "arslan"] // yoo4 + # We have some aligned items below + foo = "bar" # yoo1 + default = "bar" # yoo2 + bar = "bar" # yoo3 - deneme = { - bar = "fatih" + default = { + bar = "example" } - bar = "bar" # yoo5 - fatih = ["fatih", "arslan"] // yoo6 + #deneme arslan + fatih = ["fatih"] # yoo4 - deneme = { - bar = "fatih" + #fatih arslan + fatiharslan = ["arslan"] // yoo5 + + default = { + bar = "example" } } \ No newline at end of file diff --git a/printer/testdata/comment_aligned.input b/printer/testdata/comment_aligned.input index 477aa7b..81ba17d 100644 --- a/printer/testdata/comment_aligned.input +++ b/printer/testdata/comment_aligned.input @@ -1,14 +1,16 @@ aligned { - a = "bar" # yoo1 +# We have some aligned items below + foo = "bar" # yoo1 default = "bar" # yoo2 bar = "bar" # yoo3 - fatih = ["fatih", "arslan"] // yoo4 - deneme = { - bar = "fatih" + default = { + bar = "example" } - bar = "bar" # yoo5 - fatih = ["fatih", "arslan"] // yoo6 - deneme = { - bar = "fatih" + #deneme arslan + fatih = ["fatih"] # yoo4 + #fatih arslan + fatiharslan = ["arslan"] // yoo5 + default = { + bar = "example" } } diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index f6bd3cb..d46e1c1 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -20,10 +20,12 @@ resource "aws_security_group" "firewall" { resource aws_instance "web" { ami = "${var.foo}" + security_groups = [ "foo", "${aws_security_group.firewall.foo}", ] + network_interface = { device_index = 0 description = "Main network interface" From 407cd650d139e5921b17dffc090b840732b77d1f Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Fri, 30 Oct 2015 22:51:35 +0300 Subject: [PATCH 121/137] hcl: add *ast.File with comments --- ast/ast.go | 11 +++++++++++ parser/parser.go | 15 +++++++++++---- printer/nodes.go | 7 +++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 62733f1..6619eef 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -10,6 +10,7 @@ type Node interface { Pos() token.Pos } +func (File) node() {} func (ObjectList) node() {} func (ObjectKey) node() {} func (ObjectItem) node() {} @@ -20,6 +21,16 @@ func (ObjectType) node() {} func (LiteralType) node() {} func (ListType) node() {} +// File represents a single HCL file +type File struct { + Node Node // usually a *ObjectList + Comments []*CommentGroup // list of all comments in the source +} + +func (f *File) Pos() token.Pos { + return f.Node.Pos() +} + // ObjectList represents a list of ObjectItems. An HCL file itself is an // ObjectList. type ObjectList struct { diff --git a/parser/parser.go b/parser/parser.go index 56b6aac..8623a83 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -34,7 +34,7 @@ func newParser(src []byte) *Parser { } // Parse returns the fully parsed source and returns the abstract syntax tree. -func Parse(src []byte) (ast.Node, error) { +func Parse(src []byte) (*ast.File, error) { p := newParser(src) return p.Parse() } @@ -42,8 +42,16 @@ func Parse(src []byte) (ast.Node, error) { var errEofToken = errors.New("EOF token found") // Parse returns the fully parsed source and returns the abstract syntax tree. -func (p *Parser) Parse() (ast.Node, error) { - return p.objectList() +func (p *Parser) Parse() (*ast.File, error) { + f := &ast.File{} + var err error + f.Node, err = p.objectList() + if err != nil { + return nil, err + } + + f.Comments = p.comments + return f, nil } func (p *Parser) objectList() (*ast.ObjectList, error) { @@ -64,7 +72,6 @@ func (p *Parser) objectList() (*ast.ObjectList, error) { node.Add(n) } - return node, nil } diff --git a/printer/nodes.go b/printer/nodes.go index 84da4e1..ec36302 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -18,6 +18,13 @@ func (p *printer) output(n interface{}) []byte { var buf bytes.Buffer switch t := n.(type) { + case *ast.File: + // for i, group := range t.Comments { + // for _, comment := range group.List { + // fmt.Printf("[%d] comment = %+v\n", i, comment) + // } + // } + return p.output(t.Node) case *ast.ObjectList: for i, item := range t.Items { buf.Write(p.objectItem(item)) From 629539558bd2a1e6faea4399db84a2b10fca8253 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 31 Oct 2015 01:15:36 +0300 Subject: [PATCH 122/137] printer: collect standalone comments for printing --- ast/walk.go | 2 ++ parser/parser.go | 1 - printer/nodes.go | 59 ++++++++++++++++++++++++++++++++++++++++++++++ printer/printer.go | 10 ++++---- 4 files changed, 66 insertions(+), 6 deletions(-) diff --git a/ast/walk.go b/ast/walk.go index feae611..c6dc75a 100644 --- a/ast/walk.go +++ b/ast/walk.go @@ -11,6 +11,8 @@ func Walk(node Node, fn func(Node) bool) { } switch n := node.(type) { + case *File: + Walk(n.Node, fn) case *ObjectList: for _, item := range n.Items { Walk(item, fn) diff --git a/parser/parser.go b/parser/parser.go index 8623a83..9b5e2c9 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -17,7 +17,6 @@ type Parser struct { // Last read token tok token.Token - // comments comments []*ast.CommentGroup leadComment *ast.CommentGroup // last lead comment lineComment *ast.CommentGroup // last line comment diff --git a/printer/nodes.go b/printer/nodes.go index ec36302..f4d9c3a 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/fatih/hcl/ast" + "github.com/fatih/hcl/token" ) const ( @@ -13,6 +14,64 @@ const ( tab = byte('\t') ) +type printer struct { + cfg Config + comments []*ast.CommentGroup // may be nil, contains all comments + standaloneComments []*ast.CommentGroup // contains all standalone comments (not assigned to any node) +} + +func (p *printer) collectComments(node ast.Node) { + leadComments := make([]*ast.CommentGroup, 0) + lineComments := make([]*ast.CommentGroup, 0) + + ast.Walk(node, func(nn ast.Node) bool { + switch t := nn.(type) { + case *ast.File: + // will happen only once + p.comments = t.Comments + case *ast.ObjectItem: + if t.LeadComment != nil { + leadComments = append(leadComments, t.LeadComment) + } + + if t.LineComment != nil { + lineComments = append(lineComments, t.LineComment) + } + } + + return true + }) + + standaloneComments := make(map[token.Pos]*ast.CommentGroup, 0) + for _, c := range p.comments { + standaloneComments[c.Pos()] = c + } + for _, lead := range leadComments { + for _, comment := range lead.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } + } + + for _, line := range lineComments { + for _, comment := range line.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } + } + + for _, c := range standaloneComments { + p.standaloneComments = append(p.standaloneComments, c) + } + + fmt.Printf("All comments len = %+v\n", len(p.comments)) + fmt.Printf("Lead commetns = %+v\n", len(leadComments)) + fmt.Printf("len(lineComments) = %+v\n", len(lineComments)) + fmt.Printf("StandAlone Comments = %+v\n", len(p.standaloneComments)) +} + // output prints creates a printable HCL output and returns it. func (p *printer) output(n interface{}) []byte { var buf bytes.Buffer diff --git a/printer/printer.go b/printer/printer.go index a9a0d78..1d15fc8 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -12,10 +12,6 @@ import ( var DefaultConfig = Config{} -type printer struct { - cfg Config -} - // A Config node controls the output of Fprint. type Config struct { SpacesWidth int // if set, it will use spaces instead of tabs for alignment @@ -23,9 +19,13 @@ type Config struct { func (c *Config) Fprint(output io.Writer, node ast.Node) error { p := &printer{ - cfg: *c, + cfg: *c, + comments: make([]*ast.CommentGroup, 0), + standaloneComments: make([]*ast.CommentGroup, 0), } + p.collectComments(node) + if _, err := output.Write(p.output(node)); err != nil { return err } From 792e0fef49880f82645258345cefed7ae0afc953 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 31 Oct 2015 01:19:32 +0300 Subject: [PATCH 123/137] printer: simplify standalone collecting --- printer/nodes.go | 59 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index f4d9c3a..8b44b0a 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -21,24 +21,14 @@ type printer struct { } func (p *printer) collectComments(node ast.Node) { - leadComments := make([]*ast.CommentGroup, 0) - lineComments := make([]*ast.CommentGroup, 0) - + // first collect all comments. This is already stored in + // ast.File.(comments) ast.Walk(node, func(nn ast.Node) bool { switch t := nn.(type) { case *ast.File: - // will happen only once p.comments = t.Comments - case *ast.ObjectItem: - if t.LeadComment != nil { - leadComments = append(leadComments, t.LeadComment) - } - - if t.LineComment != nil { - lineComments = append(lineComments, t.LineComment) - } + return false } - return true }) @@ -46,30 +36,39 @@ func (p *printer) collectComments(node ast.Node) { for _, c := range p.comments { standaloneComments[c.Pos()] = c } - for _, lead := range leadComments { - for _, comment := range lead.List { - if _, ok := standaloneComments[comment.Pos()]; ok { - delete(standaloneComments, comment.Pos()) - } - } - } - for _, line := range lineComments { - for _, comment := range line.List { - if _, ok := standaloneComments[comment.Pos()]; ok { - delete(standaloneComments, comment.Pos()) + // next remove all lead and line comments from the overall comment map. + // This will give us comments which are standalone, comments which are not + // assigned to any kind of node. + ast.Walk(node, func(nn ast.Node) bool { + switch t := nn.(type) { + case *ast.ObjectItem: + if t.LeadComment != nil { + for _, comment := range t.LeadComment.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } + } + + if t.LineComment != nil { + for _, comment := range t.LineComment.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } } } - } + + return true + }) for _, c := range standaloneComments { + for _, comment := range c.List { + fmt.Printf("comment = %+v\n", comment) + } p.standaloneComments = append(p.standaloneComments, c) } - - fmt.Printf("All comments len = %+v\n", len(p.comments)) - fmt.Printf("Lead commetns = %+v\n", len(leadComments)) - fmt.Printf("len(lineComments) = %+v\n", len(lineComments)) - fmt.Printf("StandAlone Comments = %+v\n", len(p.standaloneComments)) } // output prints creates a printable HCL output and returns it. From 9b5083066a4b4d13a09889e9d9acc1a355f4be90 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 31 Oct 2015 15:01:49 +0300 Subject: [PATCH 124/137] printer: implement standalone comments, still WIP --- ast/ast.go | 14 ++ printer/nodes.go | 256 +++++++++++++++++++++----------- printer/printer.go | 2 + printer/testdata/comment.golden | 13 ++ printer/testdata/comment.input | 14 +- token/position.go | 10 ++ 6 files changed, 223 insertions(+), 86 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 6619eef..3e5bc6c 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -10,6 +10,12 @@ type Node interface { Pos() token.Pos } +// NewNode returns a non usable Node interface implementer. The position is +// initalizied to zero. +func NewNode() Node { + return &zero{} +} + func (File) node() {} func (ObjectList) node() {} func (ObjectKey) node() {} @@ -21,6 +27,14 @@ func (ObjectType) node() {} func (LiteralType) node() {} func (ListType) node() {} +type zero struct{} + +func (zero) node() {} + +func (z *zero) Pos() token.Pos { + return token.Pos{} +} + // File represents a single HCL file type File struct { Node Node // usually a *ObjectList diff --git a/printer/nodes.go b/printer/nodes.go index 8b44b0a..e84e9b0 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -3,6 +3,7 @@ package printer import ( "bytes" "fmt" + "sort" "github.com/fatih/hcl/ast" "github.com/fatih/hcl/token" @@ -15,11 +16,22 @@ const ( ) type printer struct { - cfg Config + cfg Config + prev ast.Node + comments []*ast.CommentGroup // may be nil, contains all comments standaloneComments []*ast.CommentGroup // contains all standalone comments (not assigned to any node) + + enableTrace bool + indentTrace int } +type ByPosition []*ast.CommentGroup + +func (b ByPosition) Len() int { return len(b) } +func (b ByPosition) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b ByPosition) Less(i, j int) bool { return b[i].Pos().Before(b[j].Pos()) } + func (p *printer) collectComments(node ast.Node) { // first collect all comments. This is already stored in // ast.File.(comments) @@ -64,28 +76,33 @@ func (p *printer) collectComments(node ast.Node) { }) for _, c := range standaloneComments { + p.standaloneComments = append(p.standaloneComments, c) + } + sort.Sort(ByPosition(p.standaloneComments)) + + fmt.Printf("standaloneComments = %+v\n", len(p.standaloneComments)) + for _, c := range p.standaloneComments { for _, comment := range c.List { fmt.Printf("comment = %+v\n", comment) } - p.standaloneComments = append(p.standaloneComments, c) } + } -// output prints creates a printable HCL output and returns it. +var count int + +// output prints creates b printable HCL output and returns it. func (p *printer) output(n interface{}) []byte { var buf bytes.Buffer + count++ switch t := n.(type) { case *ast.File: - // for i, group := range t.Comments { - // for _, comment := range group.List { - // fmt.Printf("[%d] comment = %+v\n", i, comment) - // } - // } return p.output(t.Node) case *ast.ObjectList: for i, item := range t.Items { - buf.Write(p.objectItem(item)) + fmt.Printf("[%d] item: %s\n", i, item.Keys[0].Token.Text) + buf.Write(p.output(item)) if i != len(t.Items)-1 { buf.Write([]byte{newline, newline}) } @@ -93,6 +110,20 @@ func (p *printer) output(n interface{}) []byte { case *ast.ObjectKey: buf.WriteString(t.Token.Text) case *ast.ObjectItem: + for _, c := range p.standaloneComments { + for _, comment := range c.List { + fmt.Printf("[%d] OBJECTITEM p.prev = %+v\n", count, p.prev.Pos()) + fmt.Printf("[%d] OBJECTITEM comment.Pos() = %+v\n", count, comment.Pos()) + fmt.Printf("[%d] OBJECTTYPE t.Pos() = %+v\n", count, t.Pos()) + if comment.Pos().After(p.prev.Pos()) && comment.Pos().Before(t.Pos()) { + buf.WriteString(comment.Text) + buf.WriteByte(newline) + buf.WriteByte(newline) + } + } + } + + p.prev = t buf.Write(p.objectItem(t)) case *ast.LiteralType: buf.WriteString(t.Token.Text) @@ -104,10 +135,15 @@ func (p *printer) output(n interface{}) []byte { fmt.Printf(" unknown type: %T\n", n) } + // if item, ok := n.(ast.Node); ok { + // p.prev = item + // } + return buf.Bytes() } func (p *printer) objectItem(o *ast.ObjectItem) []byte { + defer un(trace(p, fmt.Sprintf("ObjectItem: %s", o.Keys[0].Token.Text))) var buf bytes.Buffer if o.LeadComment != nil { @@ -140,6 +176,100 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte { return buf.Bytes() } +func (p *printer) objectType(o *ast.ObjectType) []byte { + defer un(trace(p, "ObjectType")) + var buf bytes.Buffer + buf.WriteString("{") + buf.WriteByte(newline) + + for _, c := range p.standaloneComments { + for _, comment := range c.List { + fmt.Printf("[%d] OBJECTTYPE p.prev = %+v\n", count, p.prev.Pos()) + fmt.Printf("[%d] OBJECTTYPE comment.Pos() = %+v\n", count, comment.Pos()) + fmt.Printf("[%d] OBJECTTYPE t.Pos() = %+v\n", count, o.Pos()) + firstItem := o.List.Pos() + if comment.Pos().After(p.prev.Pos()) && comment.Pos().Before(firstItem) { + buf.Write(p.indent([]byte(comment.Text))) // TODO(arslan): indent + buf.WriteByte(newline) + buf.WriteByte(newline) + } + } + } + + var index int + for { + // check if we have adjacent one liner items. If yes we'll going to align + // the comments. + var aligned []*ast.ObjectItem + for i, item := range o.List.Items[index:] { + // we don't group one line lists + if len(o.List.Items) == 1 { + break + } + + // one means a oneliner with out any lead comment + // two means a oneliner with lead comment + // anything else might be something else + cur := lines(string(p.objectItem(item))) + if cur > 2 { + break + } + + next := 0 + if index != len(o.List.Items)-1 { + next = lines(string(p.objectItem(o.List.Items[index+1]))) + } + + prev := 0 + if index != 0 { + prev = lines(string(p.objectItem(o.List.Items[index-1]))) + } + + if (cur == next && next == 1) || (next == 1 && cur == 2 && i == 0) { + aligned = append(aligned, item) + index++ + } else if (cur == prev && prev == 1) || (prev == 2 && cur == 1) { + aligned = append(aligned, item) + index++ + } else { + break + } + } + + // fmt.Printf("==================> len(aligned) = %+v\n", len(aligned)) + // for _, b := range aligned { + // fmt.Printf("b = %+v\n", b) + // } + + // put newlines if the items are between other non aligned items + if index != len(aligned) { + buf.WriteByte(newline) + } + + if len(aligned) >= 1 { + p.prev = aligned[len(aligned)-1] + + items := p.alignedItems(aligned) + buf.Write(p.indent(items)) + } else { + p.prev = o.List.Items[index] + + buf.Write(p.indent(p.objectItem(o.List.Items[index]))) + index++ + } + + buf.WriteByte(newline) + + if index == len(o.List.Items) { + break + } + + } + + buf.WriteString("}") + return buf.Bytes() +} + func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { var buf bytes.Buffer @@ -199,82 +329,6 @@ func (p *printer) literal(l *ast.LiteralType) []byte { return []byte(l.Token.Text) } -func (p *printer) objectType(o *ast.ObjectType) []byte { - var buf bytes.Buffer - buf.WriteString("{") - buf.WriteByte(newline) - - var index int - for { - // check if we have adjacent one liner items. If yes we'll going to align - // the comments. - var aligned []*ast.ObjectItem - for i, item := range o.List.Items[index:] { - // we don't group one line lists - if len(o.List.Items) == 1 { - break - } - - // one means a oneliner with out any lead comment - // two means a oneliner with lead comment - // anything else might be something else - cur := lines(string(p.objectItem(item))) - if cur > 2 { - break - } - - next := 0 - if index != len(o.List.Items)-1 { - next = lines(string(p.objectItem(o.List.Items[index+1]))) - } - - prev := 0 - if index != 0 { - prev = lines(string(p.objectItem(o.List.Items[index-1]))) - } - - if (cur == next && next == 1) || (next == 1 && cur == 2 && i == 0) { - aligned = append(aligned, item) - index++ - } else if (cur == prev && prev == 1) || (prev == 2 && cur == 1) { - aligned = append(aligned, item) - index++ - } else { - break - } - } - - // fmt.Printf("==================> len(aligned) = %+v\n", len(aligned)) - // for _, b := range aligned { - // fmt.Printf("b = %+v\n", b) - // } - - // put newlines if the items are between other non aligned items - if index != len(aligned) { - buf.WriteByte(newline) - } - - if len(aligned) >= 1 { - items := p.alignedItems(aligned) - - buf.Write(p.indent(items)) - } else { - buf.Write(p.indent(p.objectItem(o.List.Items[index]))) - index++ - } - - buf.WriteByte(newline) - - if index == len(o.List.Items) { - break - } - - } - - buf.WriteString("}") - return buf.Bytes() -} - // printList prints a HCL list func (p *printer) list(l *ast.ListType) []byte { var buf bytes.Buffer @@ -335,3 +389,35 @@ func lines(txt string) int { } return endline } + +// ---------------------------------------------------------------------------- +// Tracing support + +func (p *printer) printTrace(a ...interface{}) { + if !p.enableTrace { + return + } + + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = len(dots) + i := 2 * p.indentTrace + for i > n { + fmt.Print(dots) + i -= n + } + // i <= n + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *printer, msg string) *printer { + p.printTrace(msg, "(") + p.indentTrace++ + return p +} + +// Usage pattern: defer un(trace(p, "...")) +func un(p *printer) { + p.indentTrace-- + p.printTrace(")") +} diff --git a/printer/printer.go b/printer/printer.go index 1d15fc8..af7a939 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -22,6 +22,8 @@ func (c *Config) Fprint(output io.Writer, node ast.Node) error { cfg: *c, comments: make([]*ast.CommentGroup, 0), standaloneComments: make([]*ast.CommentGroup, 0), + prev: ast.NewNode(), + // enableTrace: true, } p.collectComments(node) diff --git a/printer/testdata/comment.golden b/printer/testdata/comment.golden index 65a4e4e..e77ebe9 100644 --- a/printer/testdata/comment.golden +++ b/printer/testdata/comment.golden @@ -1,9 +1,16 @@ +// A standalone comment is a comment which is not attached to any kind of node + // This comes from Terraform, as a test variable "foo" { + # Standalone comment should be still here + default = "bar" description = "bar" # yooo } +/* This is a multi line standalone +comment*/ + // fatih arslan /* This is a developer test account and a multine comment */ @@ -15,6 +22,12 @@ numbers = [1, 2] // another line here # Another comment variable = { description = "bar" # another yooo + + foo = { + # Nested standalone + + bar = "fatih" + } } // lead comment diff --git a/printer/testdata/comment.input b/printer/testdata/comment.input index 7d4e07a..57c37ac 100644 --- a/printer/testdata/comment.input +++ b/printer/testdata/comment.input @@ -1,9 +1,17 @@ +// A standalone comment is a comment which is not attached to any kind of node + // This comes from Terraform, as a test variable "foo" { + # Standalone comment should be still here + default = "bar" description = "bar" # yooo } +/* This is a multi line standalone +comment*/ + + // fatih arslan /* This is a developer test account and a multine comment */ @@ -15,9 +23,13 @@ numbers = [1,2] // another line here # Another comment variable = { description = "bar" # another yooo + foo { + # Nested standalone + + bar = "fatih" + } } - // lead comment foo { bar = "fatih" // line comment 2 diff --git a/token/position.go b/token/position.go index c151e50..59c1bb7 100644 --- a/token/position.go +++ b/token/position.go @@ -34,3 +34,13 @@ func (p Pos) String() string { } return s } + +// Before reports whether the position p is before u. +func (p Pos) Before(u Pos) bool { + return u.Offset > p.Offset || u.Line > p.Line +} + +// After reports whether the position p is after u. +func (p Pos) After(u Pos) bool { + return u.Offset < p.Offset || u.Line < p.Line +} From 07cb4267298610bff32a9bc39dcfbd85cfaf35e6 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 31 Oct 2015 15:11:53 +0300 Subject: [PATCH 125/137] parser: fix panicing for # style comments HCL supports # style comments, which are 1 size len. We assumed that it's always // or /* , which are two size length --- parser/parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/parser.go b/parser/parser.go index 9b5e2c9..bd51411 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -78,7 +78,7 @@ func (p *Parser) consumeComment() (comment *ast.Comment, endline int) { endline = p.tok.Pos.Line // count the endline if it's multiline comment, ie starting with /* - if p.tok.Text[1] == '*' { + if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' { // don't use range here - no need to decode Unicode code points for i := 0; i < len(p.tok.Text); i++ { if p.tok.Text[i] == '\n' { From c9ef0afb41056340047d9c9fa02fad66e30aefb3 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sat, 31 Oct 2015 17:45:43 +0300 Subject: [PATCH 126/137] printer: imropve alignment printing for standalone comments --- printer/nodes.go | 89 +++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 32 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index e84e9b0..c4683e6 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -117,6 +117,7 @@ func (p *printer) output(n interface{}) []byte { fmt.Printf("[%d] OBJECTTYPE t.Pos() = %+v\n", count, t.Pos()) if comment.Pos().After(p.prev.Pos()) && comment.Pos().Before(t.Pos()) { buf.WriteString(comment.Text) + // TODO(arslan): do not print new lines if the comments are one lines buf.WriteByte(newline) buf.WriteByte(newline) } @@ -182,26 +183,37 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { buf.WriteString("{") buf.WriteByte(newline) - for _, c := range p.standaloneComments { - for _, comment := range c.List { - fmt.Printf("[%d] OBJECTTYPE p.prev = %+v\n", count, p.prev.Pos()) - fmt.Printf("[%d] OBJECTTYPE comment.Pos() = %+v\n", count, comment.Pos()) - fmt.Printf("[%d] OBJECTTYPE t.Pos() = %+v\n", count, o.Pos()) - firstItem := o.List.Pos() - if comment.Pos().After(p.prev.Pos()) && comment.Pos().Before(firstItem) { - buf.Write(p.indent([]byte(comment.Text))) // TODO(arslan): indent - buf.WriteByte(newline) - buf.WriteByte(newline) + var index int + var nextItem token.Pos + for { + for _, c := range p.standaloneComments { + for _, comment := range c.List { + fmt.Printf("[%d] OBJECTTYPE p.prev = %+v\n", count, p.prev.Pos()) + fmt.Printf("[%d] OBJECTTYPE comment.Pos() = %+v\n", count, comment.Pos()) + + if index != len(o.List.Items) { + nextItem = o.List.Items[index].Pos() + } else { + nextItem = o.Rbrace + + } + fmt.Printf("[%d] OBJECTTYPE nextItem = %+v\n", count, nextItem) + if comment.Pos().After(p.prev.Pos()) && comment.Pos().Before(nextItem) { + buf.Write(p.indent([]byte(comment.Text))) // TODO(arslan): indent + buf.WriteByte(newline) + buf.WriteByte(newline) + } } } - } - var index int - for { + if index == len(o.List.Items) { + break + } + // check if we have adjacent one liner items. If yes we'll going to align // the comments. var aligned []*ast.ObjectItem - for i, item := range o.List.Items[index:] { + for _, item := range o.List.Items[index:] { // we don't group one line lists if len(o.List.Items) == 1 { break @@ -215,31 +227,48 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { break } - next := 0 + curPos := item.Pos() + + nextPos := token.Pos{} if index != len(o.List.Items)-1 { - next = lines(string(p.objectItem(o.List.Items[index+1]))) + nextPos = o.List.Items[index+1].Pos() } - prev := 0 + prevPos := token.Pos{} if index != 0 { - prev = lines(string(p.objectItem(o.List.Items[index-1]))) + prevPos = o.List.Items[index-1].Pos() } - if (cur == next && next == 1) || (next == 1 && cur == 2 && i == 0) { + // fmt.Println("DEBUG ----------------") + // fmt.Printf("prev = %+v prevPos: %s\n", prev, prevPos) + // fmt.Printf("cur = %+v curPos: %s\n", cur, curPos) + // fmt.Printf("next = %+v nextPos: %s\n", next, nextPos) + + if curPos.Line+1 == nextPos.Line { aligned = append(aligned, item) index++ - } else if (cur == prev && prev == 1) || (prev == 2 && cur == 1) { - aligned = append(aligned, item) - index++ - } else { - break + continue } + + if curPos.Line-1 == prevPos.Line { + aligned = append(aligned, item) + index++ + + // finish if we have a new line or comment next. This happens + // if the next item is not adjacent + if curPos.Line+1 != nextPos.Line { + break + } + continue + } + + break } - // fmt.Printf("==================> len(aligned) = %+v\n", len(aligned)) - // for _, b := range aligned { - // fmt.Printf("b = %+v\n", b) - // } + fmt.Printf("==================> len(aligned) = %+v\n", len(aligned)) + for _, b := range aligned { + fmt.Printf("b = %+v\n", b) + } // put newlines if the items are between other non aligned items if index != len(aligned) { @@ -260,10 +289,6 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { buf.WriteByte(newline) - if index == len(o.List.Items) { - break - } - } buf.WriteString("}") From 66daded6ac0e9036b935ba404c6ddb3cbe04ce91 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 1 Nov 2015 01:24:50 +0300 Subject: [PATCH 127/137] printer: finalize comment printing, yay \o/ --- parser/parser.go | 13 ++- printer/nodes.go | 118 ++++++++++++--------- printer/printer.go | 1 - printer/printer_test.go | 1 + printer/testdata/comment_standalone.golden | 17 +++ printer/testdata/comment_standalone.input | 16 +++ 6 files changed, 108 insertions(+), 58 deletions(-) create mode 100644 printer/testdata/comment_standalone.golden create mode 100644 printer/testdata/comment_standalone.input diff --git a/parser/parser.go b/parser/parser.go index bd51411..648f871 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -317,10 +317,15 @@ func (p *Parser) scan() token.Token { comment, endline = p.consumeCommentGroup(1) } - if endline+1 == p.tok.Pos.Line { - // The next token is following on the line immediately after the - // comment group, thus the last comment group is a lead comment. - p.leadComment = comment + if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE { + switch p.tok.Type { + case token.RBRACE, token.RBRACK: + // Do not count for these cases + default: + // The next token is following on the line immediately after the + // comment group, thus the last comment group is a lead comment. + p.leadComment = comment + } } } diff --git a/printer/nodes.go b/printer/nodes.go index c4683e6..a2f72f1 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -10,14 +10,15 @@ import ( ) const ( - blank = byte(' ') - newline = byte('\n') - tab = byte('\t') + blank = byte(' ') + newline = byte('\n') + tab = byte('\t') + infinity = 1 << 30 // offset or line ) type printer struct { cfg Config - prev ast.Node + prev token.Pos comments []*ast.CommentGroup // may be nil, contains all comments standaloneComments []*ast.CommentGroup // contains all standalone comments (not assigned to any node) @@ -78,15 +79,8 @@ func (p *printer) collectComments(node ast.Node) { for _, c := range standaloneComments { p.standaloneComments = append(p.standaloneComments, c) } + sort.Sort(ByPosition(p.standaloneComments)) - - fmt.Printf("standaloneComments = %+v\n", len(p.standaloneComments)) - for _, c := range p.standaloneComments { - for _, comment := range c.List { - fmt.Printf("comment = %+v\n", comment) - } - } - } var count int @@ -100,31 +94,49 @@ func (p *printer) output(n interface{}) []byte { case *ast.File: return p.output(t.Node) case *ast.ObjectList: - for i, item := range t.Items { - fmt.Printf("[%d] item: %s\n", i, item.Keys[0].Token.Text) - buf.Write(p.output(item)) - if i != len(t.Items)-1 { + + var index int + var nextItem token.Pos + var commented bool + for { + // TODO(arslan): refactor below comment printing, we have the same in objectType + + // print stand alone upper level stand alone comments + for _, c := range p.standaloneComments { + for _, comment := range c.List { + if index != len(t.Items) { + nextItem = t.Items[index].Pos() + } else { + nextItem = token.Pos{Offset: infinity, Line: infinity} + } + + if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) { + // if we hit the end add newlines so we can print the comment + if index == len(t.Items) { + buf.Write([]byte{newline, newline}) + } + + buf.WriteString(comment.Text) + // TODO(arslan): do not print new lines if the comments are one liner + buf.Write([]byte{newline, newline}) + } + } + } + + if index == len(t.Items) { + break + } + + buf.Write(p.output(t.Items[index])) + if !commented && index != len(t.Items)-1 { buf.Write([]byte{newline, newline}) } + index++ } case *ast.ObjectKey: buf.WriteString(t.Token.Text) case *ast.ObjectItem: - for _, c := range p.standaloneComments { - for _, comment := range c.List { - fmt.Printf("[%d] OBJECTITEM p.prev = %+v\n", count, p.prev.Pos()) - fmt.Printf("[%d] OBJECTITEM comment.Pos() = %+v\n", count, comment.Pos()) - fmt.Printf("[%d] OBJECTTYPE t.Pos() = %+v\n", count, t.Pos()) - if comment.Pos().After(p.prev.Pos()) && comment.Pos().Before(t.Pos()) { - buf.WriteString(comment.Text) - // TODO(arslan): do not print new lines if the comments are one lines - buf.WriteByte(newline) - buf.WriteByte(newline) - } - } - } - - p.prev = t + p.prev = t.Pos() buf.Write(p.objectItem(t)) case *ast.LiteralType: buf.WriteString(t.Token.Text) @@ -136,10 +148,6 @@ func (p *printer) output(n interface{}) []byte { fmt.Printf(" unknown type: %T\n", n) } - // if item, ok := n.(ast.Node); ok { - // p.prev = item - // } - return buf.Bytes() } @@ -185,28 +193,36 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { var index int var nextItem token.Pos + var commented bool for { + // Print stand alone comments for _, c := range p.standaloneComments { for _, comment := range c.List { - fmt.Printf("[%d] OBJECTTYPE p.prev = %+v\n", count, p.prev.Pos()) - fmt.Printf("[%d] OBJECTTYPE comment.Pos() = %+v\n", count, comment.Pos()) - + // if we hit the end, last item should be the brace if index != len(o.List.Items) { nextItem = o.List.Items[index].Pos() } else { nextItem = o.Rbrace - } - fmt.Printf("[%d] OBJECTTYPE nextItem = %+v\n", count, nextItem) - if comment.Pos().After(p.prev.Pos()) && comment.Pos().Before(nextItem) { - buf.Write(p.indent([]byte(comment.Text))) // TODO(arslan): indent - buf.WriteByte(newline) + + if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) { + // add newline if it's between other printed nodes + if index > 0 { + commented = true + buf.WriteByte(newline) + } + + buf.Write(p.indent([]byte(comment.Text))) buf.WriteByte(newline) + if index != len(o.List.Items) { + buf.WriteByte(newline) // do not print on the end + } } } } if index == len(o.List.Items) { + p.prev = o.Rbrace break } @@ -265,30 +281,26 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { break } - fmt.Printf("==================> len(aligned) = %+v\n", len(aligned)) - for _, b := range aligned { - fmt.Printf("b = %+v\n", b) - } - - // put newlines if the items are between other non aligned items - if index != len(aligned) { + // put newlines if the items are between other non aligned items. + // newlines are also added if there is a standalone comment already, so + // check it too + if !commented && index != len(aligned) { buf.WriteByte(newline) } if len(aligned) >= 1 { - p.prev = aligned[len(aligned)-1] + p.prev = aligned[len(aligned)-1].Pos() items := p.alignedItems(aligned) buf.Write(p.indent(items)) } else { - p.prev = o.List.Items[index] + p.prev = o.List.Items[index].Pos() buf.Write(p.indent(p.objectItem(o.List.Items[index]))) index++ } buf.WriteByte(newline) - } buf.WriteString("}") diff --git a/printer/printer.go b/printer/printer.go index af7a939..613ad08 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -22,7 +22,6 @@ func (c *Config) Fprint(output io.Writer, node ast.Node) error { cfg: *c, comments: make([]*ast.CommentGroup, 0), standaloneComments: make([]*ast.CommentGroup, 0), - prev: ast.NewNode(), // enableTrace: true, } diff --git a/printer/printer_test.go b/printer/printer_test.go index 54c86dd..19f3739 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -28,6 +28,7 @@ var data = []entry{ {"list.input", "list.golden"}, {"comment.input", "comment.golden"}, {"comment_aligned.input", "comment_aligned.golden"}, + {"comment_standalone.input", "comment_standalone.golden"}, } func TestFiles(t *testing.T) { diff --git a/printer/testdata/comment_standalone.golden b/printer/testdata/comment_standalone.golden new file mode 100644 index 0000000..24bd43f --- /dev/null +++ b/printer/testdata/comment_standalone.golden @@ -0,0 +1,17 @@ +// A standalone comment + +aligned = { + # Standalone 1 + + a = "bar" # yoo1 + default = "bar" # yoo2 + + # Standalone 2 +} + +# Standalone 3 + +numbers = [1, 2] // another line here + +# Standalone 4 + diff --git a/printer/testdata/comment_standalone.input b/printer/testdata/comment_standalone.input new file mode 100644 index 0000000..4436cb1 --- /dev/null +++ b/printer/testdata/comment_standalone.input @@ -0,0 +1,16 @@ +// A standalone comment + +aligned { + # Standalone 1 + + a = "bar" # yoo1 + default = "bar" # yoo2 + + # Standalone 2 +} + + # Standalone 3 + +numbers = [1,2] // another line here + + # Standalone 4 From 858f6116bf9f7d19fc943bc3c32038a243b1e42d Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 1 Nov 2015 01:28:11 +0300 Subject: [PATCH 128/137] printer: complex.golden is now fixed --- printer/nodes.go | 8 +++----- printer/testdata/complexhcl.golden | 1 + printer/testdata/complexhcl.input | 3 --- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index a2f72f1..1cf158d 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -33,6 +33,8 @@ func (b ByPosition) Len() int { return len(b) } func (b ByPosition) Swap(i, j int) { b[i], b[j] = b[j], b[i] } func (b ByPosition) Less(i, j int) bool { return b[i].Pos().Before(b[j].Pos()) } +// collectComments comments all standalone comments which are not lead or line +// comment func (p *printer) collectComments(node ast.Node) { // first collect all comments. This is already stored in // ast.File.(comments) @@ -83,25 +85,21 @@ func (p *printer) collectComments(node ast.Node) { sort.Sort(ByPosition(p.standaloneComments)) } -var count int - // output prints creates b printable HCL output and returns it. func (p *printer) output(n interface{}) []byte { var buf bytes.Buffer - count++ switch t := n.(type) { case *ast.File: return p.output(t.Node) case *ast.ObjectList: - var index int var nextItem token.Pos var commented bool for { // TODO(arslan): refactor below comment printing, we have the same in objectType - // print stand alone upper level stand alone comments + // print upper leve stand alone comments for _, c := range p.standaloneComments { for _, comment := range c.List { if index != len(t.Items) { diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index d46e1c1..1e9562e 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -35,6 +35,7 @@ resource aws_instance "web" { resource "aws_instance" "db" { security_groups = "${aws_security_group.firewall.*.id}" VPC = "foo" + depends_on = ["aws_instance.web"] } diff --git a/printer/testdata/complexhcl.input b/printer/testdata/complexhcl.input index aa83f90..899f5fc 100644 --- a/printer/testdata/complexhcl.input +++ b/printer/testdata/complexhcl.input @@ -36,9 +36,6 @@ resource "aws_instance" "db" { VPC = "foo" depends_on = ["aws_instance.web"] - - - } output "web_ip" { From 566c59bf699882cffaf67bbceaf3881b6638a2de Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 1 Nov 2015 01:29:26 +0300 Subject: [PATCH 129/137] printer: fix newline for standalone comments --- printer/nodes.go | 6 +++++- printer/testdata/comment_standalone.golden | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 1cf158d..8ca8050 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -116,7 +116,11 @@ func (p *printer) output(n interface{}) []byte { buf.WriteString(comment.Text) // TODO(arslan): do not print new lines if the comments are one liner - buf.Write([]byte{newline, newline}) + + buf.WriteByte(newline) + if index != len(t.Items) { + buf.WriteByte(newline) + } } } } diff --git a/printer/testdata/comment_standalone.golden b/printer/testdata/comment_standalone.golden index 24bd43f..448bb89 100644 --- a/printer/testdata/comment_standalone.golden +++ b/printer/testdata/comment_standalone.golden @@ -14,4 +14,3 @@ aligned = { numbers = [1, 2] // another line here # Standalone 4 - From c73430cb1ed30e167bc1785396167d585ec9caa4 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 1 Nov 2015 03:23:33 +0300 Subject: [PATCH 130/137] printer: align comments in lists --- ast/ast.go | 3 ++ parser/parser.go | 18 +++++-- printer/nodes.go | 64 ++++++++++++++++++------- printer/testdata/comment_aligned.golden | 5 ++ printer/testdata/comment_aligned.input | 5 ++ printer/testdata/complexhcl.golden | 2 +- printer/testdata/list.golden | 8 ++-- 7 files changed, 81 insertions(+), 24 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 3e5bc6c..04ef380 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -97,6 +97,9 @@ func (o *ObjectKey) Pos() token.Pos { // token.NUMBER, token.FLOAT, token.BOOL and token.STRING type LiteralType struct { Token token.Token + + // associated line comment, only when used in a list + LineComment *CommentGroup } func (l *LiteralType) Pos() token.Pos { diff --git a/parser/parser.go b/parser/parser.go index 648f871..7381bd8 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -15,7 +15,8 @@ type Parser struct { sc *scanner.Scanner // Last read token - tok token.Token + tok token.Token + commaPrev token.Token comments []*ast.CommentGroup leadComment *ast.CommentGroup // last lead comment @@ -147,7 +148,6 @@ func (p *Parser) objectItem() (*ast.ObjectItem, error) { o.LineComment = p.lineComment p.lineComment = nil } - p.unscan() return o, nil } @@ -253,6 +253,17 @@ func (p *Parser) listType() (*ast.ListType, error) { l.Add(node) case token.COMMA: // get next list item or we are at the end + // do a look-ahead for line comment + p.scan() + if p.lineComment != nil { + lit, ok := l.List[len(l.List)-1].(*ast.LiteralType) + if ok { + lit.LineComment = p.lineComment + l.List[len(l.List)-1] = lit + p.lineComment = nil + } + } + p.unscan() continue case token.BOOL: // TODO(arslan) should we support? not supported by HCL yet @@ -299,7 +310,8 @@ func (p *Parser) scan() token.Token { var comment *ast.CommentGroup var endline int - // fmt.Printf("p.tok.Pos.Line = %+v prev: %d \n", p.tok.Pos.Line, prev.Pos.Line) + // fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n", + // p.tok.Pos.Line, prev.Pos.Line, endline) if p.tok.Pos.Line == prev.Pos.Line { // The comment is on same line as the previous token; it // cannot be a lead comment but may be a line comment. diff --git a/printer/nodes.go b/printer/nodes.go index 8ca8050..2b8d6ae 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -57,6 +57,14 @@ func (p *printer) collectComments(node ast.Node) { // assigned to any kind of node. ast.Walk(node, func(nn ast.Node) bool { switch t := nn.(type) { + case *ast.LiteralType: + if t.LineComment != nil { + for _, comment := range t.LineComment.List { + if _, ok := standaloneComments[comment.Pos()]; ok { + delete(standaloneComments, comment.Pos()) + } + } + } case *ast.ObjectItem: if t.LeadComment != nil { for _, comment := range t.LeadComment.List { @@ -83,6 +91,7 @@ func (p *printer) collectComments(node ast.Node) { } sort.Sort(ByPosition(p.standaloneComments)) + } // output prints creates b printable HCL output and returns it. @@ -98,8 +107,6 @@ func (p *printer) output(n interface{}) []byte { var commented bool for { // TODO(arslan): refactor below comment printing, we have the same in objectType - - // print upper leve stand alone comments for _, c := range p.standaloneComments { for _, comment := range c.List { if index != len(t.Items) { @@ -115,7 +122,6 @@ func (p *printer) output(n interface{}) []byte { } buf.WriteString(comment.Text) - // TODO(arslan): do not print new lines if the comments are one liner buf.WriteByte(newline) if index != len(t.Items) { @@ -153,6 +159,9 @@ func (p *printer) output(n interface{}) []byte { return buf.Bytes() } +// objectItem returns the printable HCL form of an object item. An object type +// starts with one/multiple keys and has a value. The value might be of any +// type. func (p *printer) objectItem(o *ast.ObjectItem) []byte { defer un(trace(p, fmt.Sprintf("ObjectItem: %s", o.Keys[0].Token.Text))) var buf bytes.Buffer @@ -187,6 +196,8 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte { return buf.Bytes() } +// objectType returns the printable HCL form of an object type. An object type +// begins with a brace and ends with a brace. func (p *printer) objectType(o *ast.ObjectType) []byte { defer un(trace(p, "ObjectType")) var buf bytes.Buffer @@ -364,32 +375,53 @@ func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { return buf.Bytes() } -func (p *printer) literal(l *ast.LiteralType) []byte { - return []byte(l.Token.Text) -} - -// printList prints a HCL list +// list returns the printable HCL form of an list type. func (p *printer) list(l *ast.ListType) []byte { var buf bytes.Buffer buf.WriteString("[") + var longestLine int + for _, item := range l.List { + // for now we assume that the list only contains literal types + if lit, ok := item.(*ast.LiteralType); ok { + lineLen := len(lit.Token.Text) + if lineLen > longestLine { + longestLine = lineLen + } + } + } + for i, item := range l.List { if item.Pos().Line != l.Lbrack.Line { // multiline list, add newline before we add each item buf.WriteByte(newline) // also indent each line - buf.Write(p.indent(p.output(item))) + val := p.output(item) + curLen := len(val) + buf.Write(p.indent(val)) + buf.WriteString(",") + + if lit, ok := item.(*ast.LiteralType); ok && lit.LineComment != nil { + for i := 0; i < longestLine-curLen+1; i++ { + buf.WriteByte(blank) + } + + for _, comment := range lit.LineComment.List { + buf.WriteString(comment.Text) + } + } + + if i == len(l.List)-1 { + buf.WriteByte(newline) + } } else { buf.Write(p.output(item)) + if i != len(l.List)-1 { + buf.WriteString(",") + buf.WriteByte(blank) + } } - if i != len(l.List)-1 { - buf.WriteString(",") - buf.WriteByte(blank) - } else if item.Pos().Line != l.Lbrack.Line { - buf.WriteString(",") - buf.WriteByte(newline) - } } buf.WriteString("]") diff --git a/printer/testdata/comment_aligned.golden b/printer/testdata/comment_aligned.golden index d4b12b0..16fc6ff 100644 --- a/printer/testdata/comment_aligned.golden +++ b/printer/testdata/comment_aligned.golden @@ -17,4 +17,9 @@ aligned = { default = { bar = "example" } + + security_groups = [ + "foo", # kenya 1 + "${aws_security_group.firewall.foo}", # kenya 2 + ] } \ No newline at end of file diff --git a/printer/testdata/comment_aligned.input b/printer/testdata/comment_aligned.input index 81ba17d..2233738 100644 --- a/printer/testdata/comment_aligned.input +++ b/printer/testdata/comment_aligned.input @@ -13,4 +13,9 @@ aligned { default = { bar = "example" } + +security_groups = [ + "foo", # kenya 1 + "${aws_security_group.firewall.foo}", # kenya 2 +] } diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index 1e9562e..3bf4e06 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -22,7 +22,7 @@ resource aws_instance "web" { ami = "${var.foo}" security_groups = [ - "foo", + "foo", "${aws_security_group.firewall.foo}", ] diff --git a/printer/testdata/list.golden b/printer/testdata/list.golden index 0b949f1..385912f 100644 --- a/printer/testdata/list.golden +++ b/printer/testdata/list.golden @@ -11,8 +11,8 @@ foo = ["fatih", "zeynep", ] foo = [ - "vim-go", - "golang", + "vim-go", + "golang", "hcl", ] @@ -21,7 +21,7 @@ foo = [] foo = [1, 2, 3, 4] foo = [ - "kenya", - "ethiopia", + "kenya", + "ethiopia", "columbia", ] \ No newline at end of file From 1ae9933e7dc26ec188b004e8987f452c2773c068 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Sun, 1 Nov 2015 03:29:23 +0300 Subject: [PATCH 131/137] printer: only align list items if items have comments --- printer/nodes.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index 2b8d6ae..abd85c1 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -402,8 +402,14 @@ func (p *printer) list(l *ast.ListType) []byte { buf.WriteString(",") if lit, ok := item.(*ast.LiteralType); ok && lit.LineComment != nil { - for i := 0; i < longestLine-curLen+1; i++ { - buf.WriteByte(blank) + // if the next item doesn't have any comments, do not align + buf.WriteByte(blank) // align one space + if i != len(l.List)-1 { + if lit, ok := l.List[i+1].(*ast.LiteralType); ok && lit.LineComment != nil { + for i := 0; i < longestLine-curLen; i++ { + buf.WriteByte(blank) + } + } } for _, comment := range lit.LineComment.List { From 8fb44d8c3105b92f6a21783fd91303571970c725 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 3 Nov 2015 02:21:28 +0300 Subject: [PATCH 132/137] README.md: add note about development case --- README.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2ababa5..860681f 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,9 @@ packages to write tools and customize files written in HCL. For example both [`hclfmt`](https://github.com/fatih/hclfmt) and `hcl2json` (coming soon) is written based on these tools. +This package is still under heavy development. The next stable version will be +released with version 0.1. + ## API If you are already familiar with Go's own parser family it's really easy to @@ -23,14 +26,15 @@ following packages: ## Why -The whole parser family was created because I wanted a `hclfmt` command, which -like `gofmt` would format a HCL file. I didn't want to use the package -[github/hashicorp/hcl](https://github.com/hashicorp/hcl) in the first place, -because the lexer and parser is generated and it doesn't expose any kind of -flexibility. +The whole parser family was created because I wanted a `hclfmt` command. This +command would be just like `gofmt`, format an HCL file. I didn't want to use +the package [github/hashicorp/hcl](https://github.com/hashicorp/hcl) in the +first place, because the lexer and parser is generated and it doesn't expose +the flexibility I wanted to have. Another reason was that I wanted to learn and experience how to implement a -proper lexer and parser in Go. It was really fun and I think it was worth it. +proper lexer and parser in Go and how a formatter could be implemented from an +AST. It was really fun and I think it was worth it. ## License From 079bc726dd530ed3e799df5a3d4dd6b0cbfb590c Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 3 Nov 2015 02:41:11 +0300 Subject: [PATCH 133/137] Add .travis.yml file --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..3a569c5 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,3 @@ +language: go +go: 1.5 + From 68aca194ab2821c268b33e7ee091b6e0ab219c58 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Tue, 3 Nov 2015 02:46:13 +0300 Subject: [PATCH 134/137] ast: we don't need zero node Also trigger a build on Travis --- ast/ast.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 04ef380..c5a6f47 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -10,12 +10,6 @@ type Node interface { Pos() token.Pos } -// NewNode returns a non usable Node interface implementer. The position is -// initalizied to zero. -func NewNode() Node { - return &zero{} -} - func (File) node() {} func (ObjectList) node() {} func (ObjectKey) node() {} @@ -27,14 +21,6 @@ func (ObjectType) node() {} func (LiteralType) node() {} func (ListType) node() {} -type zero struct{} - -func (zero) node() {} - -func (z *zero) Pos() token.Pos { - return token.Pos{} -} - // File represents a single HCL file type File struct { Node Node // usually a *ObjectList From 59fd9d5a007c90b6923982d4b93bc3823fd26fc6 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 4 Nov 2015 17:07:47 +0300 Subject: [PATCH 135/137] printer: indent must be two spaces, closes #2 --- printer/printer.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/printer/printer.go b/printer/printer.go index 613ad08..146521b 100644 --- a/printer/printer.go +++ b/printer/printer.go @@ -10,7 +10,9 @@ import ( "github.com/fatih/hcl/parser" ) -var DefaultConfig = Config{} +var DefaultConfig = Config{ + SpacesWidth: 2, +} // A Config node controls the output of Fprint. type Config struct { From 6f84551a04d95b37a678ff090589dfee14ec271e Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 4 Nov 2015 17:21:17 +0300 Subject: [PATCH 136/137] printer: aligned adjacent attributes, closes #3 --- printer/nodes.go | 29 ++++++++++++++-------- printer/testdata/comment.golden | 2 +- printer/testdata/comment_aligned.golden | 6 ++--- printer/testdata/comment_aligned.input | 4 +-- printer/testdata/comment_standalone.golden | 2 +- printer/testdata/complexhcl.golden | 8 +++--- 6 files changed, 30 insertions(+), 21 deletions(-) diff --git a/printer/nodes.go b/printer/nodes.go index abd85c1..da7f197 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -323,11 +323,19 @@ func (p *printer) objectType(o *ast.ObjectType) []byte { func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { var buf bytes.Buffer - var longestLine int + // find the longest key and value length, needed for alignment + var longestKeyLen int // longest key length + var longestValLen int // longest value length for _, item := range items { - lineLen := len(item.Keys[0].Token.Text) + len(p.output(item.Val)) - if lineLen > longestLine { - longestLine = lineLen + key := len(item.Keys[0].Token.Text) + val := len(p.output(item.Val)) + + if key > longestKeyLen { + longestKeyLen = key + } + + if val > longestValLen { + longestValLen = val } } @@ -339,25 +347,26 @@ func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { } } - curLen := 0 for i, k := range item.Keys { + keyLen := len(k.Token.Text) buf.WriteString(k.Token.Text) - buf.WriteByte(blank) + for i := 0; i < longestKeyLen-keyLen+1; i++ { + buf.WriteByte(blank) + } // reach end of key if i == len(item.Keys)-1 && len(item.Keys) == 1 { buf.WriteString("=") buf.WriteByte(blank) } - - curLen = len(k.Token.Text) // two blanks and one assign } + val := p.output(item.Val) + valLen := len(val) buf.Write(val) - curLen += len(val) if item.Val.Pos().Line == item.Keys[0].Pos().Line && item.LineComment != nil { - for i := 0; i < longestLine-curLen+1; i++ { + for i := 0; i < longestValLen-valLen+1; i++ { buf.WriteByte(blank) } diff --git a/printer/testdata/comment.golden b/printer/testdata/comment.golden index e77ebe9..8263733 100644 --- a/printer/testdata/comment.golden +++ b/printer/testdata/comment.golden @@ -4,7 +4,7 @@ variable "foo" { # Standalone comment should be still here - default = "bar" + default = "bar" description = "bar" # yooo } diff --git a/printer/testdata/comment_aligned.golden b/printer/testdata/comment_aligned.golden index 16fc6ff..da8f9d0 100644 --- a/printer/testdata/comment_aligned.golden +++ b/printer/testdata/comment_aligned.golden @@ -1,8 +1,8 @@ aligned = { # We have some aligned items below - foo = "bar" # yoo1 - default = "bar" # yoo2 - bar = "bar" # yoo3 + foo = "fatih" # yoo1 + default = "bar" # yoo2 + bar = "bar and foo" # yoo3 default = { bar = "example" diff --git a/printer/testdata/comment_aligned.input b/printer/testdata/comment_aligned.input index 2233738..1c6ba0d 100644 --- a/printer/testdata/comment_aligned.input +++ b/printer/testdata/comment_aligned.input @@ -1,8 +1,8 @@ aligned { # We have some aligned items below - foo = "bar" # yoo1 + foo = "fatih" # yoo1 default = "bar" # yoo2 - bar = "bar" # yoo3 + bar = "bar and foo" # yoo3 default = { bar = "example" } diff --git a/printer/testdata/comment_standalone.golden b/printer/testdata/comment_standalone.golden index 448bb89..a1d28ef 100644 --- a/printer/testdata/comment_standalone.golden +++ b/printer/testdata/comment_standalone.golden @@ -3,7 +3,7 @@ aligned = { # Standalone 1 - a = "bar" # yoo1 + a = "bar" # yoo1 default = "bar" # yoo2 # Standalone 2 diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index 3bf4e06..8c01253 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -1,5 +1,5 @@ variable "foo" { - default = "bar" + default = "bar" description = "bar" } @@ -28,17 +28,17 @@ resource aws_instance "web" { network_interface = { device_index = 0 - description = "Main network interface" + description = "Main network interface" } } resource "aws_instance" "db" { security_groups = "${aws_security_group.firewall.*.id}" - VPC = "foo" + VPC = "foo" depends_on = ["aws_instance.web"] } output "web_ip" { value = "${aws_instance.web.private_ip}" -} \ No newline at end of file +} From b0913285c3344d11eabd1813be555ee1009681f2 Mon Sep 17 00:00:00 2001 From: Fatih Arslan Date: Wed, 4 Nov 2015 17:27:17 +0300 Subject: [PATCH 137/137] printer: update golden example --- printer/testdata/complexhcl.golden | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/printer/testdata/complexhcl.golden b/printer/testdata/complexhcl.golden index 8c01253..1639944 100644 --- a/printer/testdata/complexhcl.golden +++ b/printer/testdata/complexhcl.golden @@ -41,4 +41,4 @@ resource "aws_instance" "db" { output "web_ip" { value = "${aws_instance.web.private_ip}" -} +} \ No newline at end of file