package hclwrite import ( "github.com/hashicorp/hcl2/hcl/hclsyntax" ) var inKeyword = hclsyntax.Keyword([]byte{'i', 'n'}) // placeholder token used when we don't have a token but we don't want // to pass a real "nil" and complicate things with nil pointer checks var nilToken = &Token{ Type: hclsyntax.TokenNil, Bytes: []byte{}, SpacesBefore: 0, } // format rewrites tokens within the given sequence, in-place, to adjust the // whitespace around their content to achieve canonical formatting. func format(tokens Tokens) { // Formatting is a multi-pass process. More details on the passes below, // but this is the overview: // - adjust the leading space on each line to create appropriate // indentation // - adjust spaces between tokens in a single cell using a set of rules // - adjust the leading space in the "assign" and "comment" cells on each // line to vertically align with neighboring lines. // All of these steps operate in-place on the given tokens, so a caller // may collect a flat sequence of all of the tokens underlying an AST // and pass it here and we will then indirectly modify the AST itself. // Formatting must change only whitespace. Specifically, that means // changing the SpacesBefore attribute on a token while leaving the // other token attributes unchanged. lines := linesForFormat(tokens) formatIndent(lines) formatSpaces(lines) formatCells(lines) } func formatIndent(lines []formatLine) { // Our methodology for indents is to take the input one line at a time // and count the bracketing delimiters on each line. If a line has a net // increase in open brackets, we increase the indent level by one and // remember how many new openers we had. If the line has a net _decrease_, // we'll compare it to the most recent number of openers and decrease the // dedent level by one each time we pass an indent level remembered // earlier. // The "indent stack" used here allows for us to recognize degenerate // input where brackets are not symmetrical within lines and avoid // pushing things too far left or right, creating confusion. // We'll start our indent stack at a reasonable capacity to minimize the // chance of us needing to grow it; 10 here means 10 levels of indent, // which should be more than enough for reasonable HCL uses. indents := make([]int, 0, 10) inHeredoc := false for i := range lines { line := &lines[i] if len(line.lead) == 0 { continue } if inHeredoc { for _, token := range line.lead { if token.Type == hclsyntax.TokenCHeredoc { inHeredoc = false } } continue // don't touch indentation inside heredocs } if line.lead[0].Type == hclsyntax.TokenNewline { // Never place spaces before a newline line.lead[0].SpacesBefore = 0 continue } netBrackets := 0 for _, token := range line.lead { netBrackets += tokenBracketChange(token) } for _, token := range line.assign { netBrackets += tokenBracketChange(token) if token.Type == hclsyntax.TokenOHeredoc { inHeredoc = true } } switch { case netBrackets > 0: line.lead[0].SpacesBefore = 2 * len(indents) indents = append(indents, netBrackets) case netBrackets < 0: closed := -netBrackets for closed > 0 && len(indents) > 0 { switch { case closed > indents[len(indents)-1]: closed -= indents[len(indents)-1] indents = indents[:len(indents)-1] case closed < indents[len(indents)-1]: indents[len(indents)-1] -= closed closed = 0 default: indents = indents[:len(indents)-1] closed = 0 } } line.lead[0].SpacesBefore = 2 * len(indents) default: line.lead[0].SpacesBefore = 2 * len(indents) } } } func formatSpaces(lines []formatLine) { for _, line := range lines { for i, token := range line.lead { var before, after *Token if i > 0 { before = line.lead[i-1] } else { before = nilToken } if i < (len(line.lead) - 1) { after = line.lead[i+1] } else { after = nilToken } if spaceAfterToken(token, before, after) { after.SpacesBefore = 1 } else { after.SpacesBefore = 0 } } for i, token := range line.assign { if i == 0 { // first token in "assign" always has one space before to // separate the equals sign from what it's assigning. token.SpacesBefore = 1 } var before, after *Token if i > 0 { before = line.assign[i-1] } else { before = nilToken } if i < (len(line.assign) - 1) { after = line.assign[i+1] } else { after = nilToken } if spaceAfterToken(token, before, after) { after.SpacesBefore = 1 } else { after.SpacesBefore = 0 } } } } func formatCells(lines []formatLine) { chainStart := -1 maxColumns := 0 // We'll deal with the "assign" cell first, since moving that will // also impact the "comment" cell. closeAssignChain := func(i int) { for _, chainLine := range lines[chainStart:i] { columns := chainLine.lead.Columns() spaces := (maxColumns - columns) + 1 chainLine.assign[0].SpacesBefore = spaces } chainStart = -1 maxColumns = 0 } for i, line := range lines { if line.assign == nil { if chainStart != -1 { closeAssignChain(i) } } else { if chainStart == -1 { chainStart = i } columns := line.lead.Columns() if columns > maxColumns { maxColumns = columns } } } if chainStart != -1 { closeAssignChain(len(lines)) } // Now we'll deal with the comments closeCommentChain := func(i int) { for _, chainLine := range lines[chainStart:i] { columns := chainLine.lead.Columns() + chainLine.assign.Columns() spaces := (maxColumns - columns) + 1 chainLine.comment[0].SpacesBefore = spaces } chainStart = -1 maxColumns = 0 } for i, line := range lines { if line.comment == nil { if chainStart != -1 { closeCommentChain(i) } } else { if chainStart == -1 { chainStart = i } columns := line.lead.Columns() + line.assign.Columns() if columns > maxColumns { maxColumns = columns } } } if chainStart != -1 { closeCommentChain(len(lines)) } } // spaceAfterToken decides whether a particular subject token should have a // space after it when surrounded by the given before and after tokens. // "before" can be TokenNil, if the subject token is at the start of a sequence. func spaceAfterToken(subject, before, after *Token) bool { switch { case after.Type == hclsyntax.TokenNewline || after.Type == hclsyntax.TokenNil: // Never add spaces before a newline return false case subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenOParen: // Don't split a function name from open paren in a call return false case subject.Type == hclsyntax.TokenDot || after.Type == hclsyntax.TokenDot: // Don't use spaces around attribute access dots return false case after.Type == hclsyntax.TokenComma: // No space right before a comma in an argument list return false case subject.Type == hclsyntax.TokenComma: // Always a space after a comma return true case subject.Type == hclsyntax.TokenQuotedLit || subject.Type == hclsyntax.TokenStringLit || subject.Type == hclsyntax.TokenOQuote || subject.Type == hclsyntax.TokenOHeredoc || after.Type == hclsyntax.TokenQuotedLit || after.Type == hclsyntax.TokenStringLit || after.Type == hclsyntax.TokenCQuote || after.Type == hclsyntax.TokenCHeredoc: // No extra spaces within templates return false case inKeyword.TokenMatches(subject.asHCLSyntax()) && before.Type == hclsyntax.TokenIdent: // This is a special case for inside for expressions where a user // might want to use a literal tuple constructor: // [for x in [foo]: x] // ... in that case, we would normally produce in[foo] thinking that // in is a reference, but we'll recognize it as a keyword here instead // to make the result less confusing. return true case after.Type == hclsyntax.TokenOBrack && (subject.Type == hclsyntax.TokenIdent || subject.Type == hclsyntax.TokenNumberLit || tokenBracketChange(subject) < 0): return false case subject.Type == hclsyntax.TokenMinus: // Since a minus can either be subtraction or negation, and the latter // should _not_ have a space after it, we need to use some heuristics // to decide which case this is. // We guess that we have a negation if the token before doesn't look // like it could be the end of an expression. switch before.Type { case hclsyntax.TokenNil: // Minus at the start of input must be a negation return false case hclsyntax.TokenOParen, hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenEqual, hclsyntax.TokenColon, hclsyntax.TokenComma, hclsyntax.TokenQuestion: // Minus immediately after an opening bracket or separator must be a negation. return false case hclsyntax.TokenPlus, hclsyntax.TokenStar, hclsyntax.TokenSlash, hclsyntax.TokenPercent, hclsyntax.TokenMinus: // Minus immediately after another arithmetic operator must be negation. return false case hclsyntax.TokenEqualOp, hclsyntax.TokenNotEqual, hclsyntax.TokenGreaterThan, hclsyntax.TokenGreaterThanEq, hclsyntax.TokenLessThan, hclsyntax.TokenLessThanEq: // Minus immediately after another comparison operator must be negation. return false case hclsyntax.TokenAnd, hclsyntax.TokenOr, hclsyntax.TokenBang: // Minus immediately after logical operator doesn't make sense but probably intended as negation. return false default: return true } case subject.Type == hclsyntax.TokenOBrace || after.Type == hclsyntax.TokenCBrace: // Unlike other bracket types, braces have spaces on both sides of them, // both in single-line nested blocks foo { bar = baz } and in object // constructor expressions foo = { bar = baz }. if subject.Type == hclsyntax.TokenOBrace && after.Type == hclsyntax.TokenCBrace { // An open brace followed by a close brace is an exception, however. // e.g. foo {} rather than foo { } return false } return true case after.Type == hclsyntax.TokenColon: // Never spaces before colons return false // In the unlikely event that an interpolation expression is just // a single object constructor, we'll put a space between the ${ and // the following { to make this more obvious, and then the same // thing for the two braces at the end. case (subject.Type == hclsyntax.TokenTemplateInterp || subject.Type == hclsyntax.TokenTemplateControl) && after.Type == hclsyntax.TokenOBrace: return true case subject.Type == hclsyntax.TokenCBrace && after.Type == hclsyntax.TokenTemplateSeqEnd: return true case tokenBracketChange(subject) > 0: // No spaces after open brackets return false case tokenBracketChange(after) < 0: // No spaces before close brackets return false default: // Most tokens are space-separated return true } } func linesForFormat(tokens Tokens) []formatLine { if len(tokens) == 0 { return make([]formatLine, 0) } // first we'll count our lines, so we can allocate the array for them in // a single block. (We want to minimize memory pressure in this codepath, // so it can be run somewhat-frequently by editor integrations.) lineCount := 1 // if there are zero newlines then there is one line for _, tok := range tokens { if tokenIsNewline(tok) { lineCount++ } } // To start, we'll just put everything in the "lead" cell on each line, // and then do another pass over the lines afterwards to adjust. lines := make([]formatLine, lineCount) li := 0 lineStart := 0 for i, tok := range tokens { if tok.Type == hclsyntax.TokenEOF { // The EOF token doesn't belong to any line, and terminates the // token sequence. lines[li].lead = tokens[lineStart:i] break } if tokenIsNewline(tok) { lines[li].lead = tokens[lineStart : i+1] lineStart = i + 1 li++ } } // If a set of tokens doesn't end in TokenEOF (e.g. because it's a // fragment of tokens from the middle of a file) then we might fall // out here with a line still pending. if lineStart < len(tokens) { lines[li].lead = tokens[lineStart:] if lines[li].lead[len(lines[li].lead)-1].Type == hclsyntax.TokenEOF { lines[li].lead = lines[li].lead[:len(lines[li].lead)-1] } } // Now we'll pick off any trailing comments and attribute assignments // to shuffle off into the "comment" and "assign" cells. inHeredoc := false for i := range lines { line := &lines[i] if len(line.lead) == 0 { // if the line is empty then there's nothing for us to do // (this should happen only for the final line, because all other // lines would have a newline token of some kind) continue } if inHeredoc { for _, tok := range line.lead { if tok.Type == hclsyntax.TokenCHeredoc { inHeredoc = false break } } // Inside a heredoc everything is "lead", even if there's a // template interpolation embedded in there that might otherwise // confuse our logic below. continue } for _, tok := range line.lead { if tok.Type == hclsyntax.TokenOHeredoc { inHeredoc = true break } } if len(line.lead) > 1 && line.lead[len(line.lead)-1].Type == hclsyntax.TokenComment { line.comment = line.lead[len(line.lead)-1:] line.lead = line.lead[:len(line.lead)-1] } for i, tok := range line.lead { if i > 0 && tok.Type == hclsyntax.TokenEqual { // We only move the tokens into "assign" if the RHS seems to // be a whole expression, which we determine by counting // brackets. If there's a net positive number of brackets // then that suggests we're introducing a multi-line expression. netBrackets := 0 for _, token := range line.lead[i:] { netBrackets += tokenBracketChange(token) } if netBrackets == 0 { line.assign = line.lead[i:] line.lead = line.lead[:i] } break } } } return lines } func tokenIsNewline(tok *Token) bool { if tok.Type == hclsyntax.TokenNewline { return true } else if tok.Type == hclsyntax.TokenComment { // Single line tokens (# and //) consume their terminating newline, // so we need to treat them as newline tokens as well. if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' { return true } } return false } func tokenBracketChange(tok *Token) int { switch tok.Type { case hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenOParen, hclsyntax.TokenTemplateControl, hclsyntax.TokenTemplateInterp: return 1 case hclsyntax.TokenCBrace, hclsyntax.TokenCBrack, hclsyntax.TokenCParen, hclsyntax.TokenTemplateSeqEnd: return -1 default: return 0 } } // formatLine represents a single line of source code for formatting purposes, // splitting its tokens into up to three "cells": // // lead: always present, representing everything up to one of the others // assign: if line contains an attribute assignment, represents the tokens // starting at (and including) the equals symbol // comment: if line contains any non-comment tokens and ends with a // single-line comment token, represents the comment. // // When formatting, the leading spaces of the first tokens in each of these // cells is adjusted to align vertically their occurences on consecutive // rows. type formatLine struct { lead Tokens assign Tokens comment Tokens }