...
Run Format

Source file src/go/doc/comment.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Godoc comment extraction and comment -> HTML formatting.
     6	
     7	package doc
     8	
     9	import (
    10		"io"
    11		"regexp"
    12		"strings"
    13		"text/template" // for HTMLEscape
    14		"unicode"
    15		"unicode/utf8"
    16	)
    17	
    18	var (
    19		ldquo = []byte("“")
    20		rdquo = []byte("”")
    21	)
    22	
    23	// Escape comment text for HTML. If nice is set,
    24	// also turn `` into “ and '' into ”.
    25	func commentEscape(w io.Writer, text string, nice bool) {
    26		last := 0
    27		if nice {
    28			for i := 0; i < len(text)-1; i++ {
    29				ch := text[i]
    30				if ch == text[i+1] && (ch == '`' || ch == '\'') {
    31					template.HTMLEscape(w, []byte(text[last:i]))
    32					last = i + 2
    33					switch ch {
    34					case '`':
    35						w.Write(ldquo)
    36					case '\'':
    37						w.Write(rdquo)
    38					}
    39					i++ // loop will add one more
    40				}
    41			}
    42		}
    43		template.HTMLEscape(w, []byte(text[last:]))
    44	}
    45	
    46	const (
    47		// Regexp for Go identifiers
    48		identRx = `[\pL_][\pL_0-9]*`
    49	
    50		// Regexp for URLs
    51		protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
    52		hostPart = `[a-zA-Z0-9_@\-]+`
    53		filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
    54		urlRx    = `(` + protocol + `)://` +   // http://
    55			hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
    56			filePart + `([:.,;]` + filePart + `)*`
    57	)
    58	
    59	var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
    60	
    61	var (
    62		html_a      = []byte(`<a href="`)
    63		html_aq     = []byte(`">`)
    64		html_enda   = []byte("</a>")
    65		html_i      = []byte("<i>")
    66		html_endi   = []byte("</i>")
    67		html_p      = []byte("<p>\n")
    68		html_endp   = []byte("</p>\n")
    69		html_pre    = []byte("<pre>")
    70		html_endpre = []byte("</pre>\n")
    71		html_h      = []byte(`<h3 id="`)
    72		html_hq     = []byte(`">`)
    73		html_endh   = []byte("</h3>\n")
    74	)
    75	
    76	// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
    77	func pairedParensPrefixLen(s string) int {
    78		parens := 0
    79		l := len(s)
    80		for i, ch := range s {
    81			switch ch {
    82			case '(':
    83				if parens == 0 {
    84					l = i
    85				}
    86				parens++
    87			case ')':
    88				parens--
    89				if parens == 0 {
    90					l = len(s)
    91				} else if parens < 0 {
    92					return i
    93				}
    94			}
    95		}
    96		return l
    97	}
    98	
    99	// Emphasize and escape a line of text for HTML. URLs are converted into links;
   100	// if the URL also appears in the words map, the link is taken from the map (if
   101	// the corresponding map value is the empty string, the URL is not converted
   102	// into a link). Go identifiers that appear in the words map are italicized; if
   103	// the corresponding map value is not the empty string, it is considered a URL
   104	// and the word is converted into a link. If nice is set, the remaining text's
   105	// appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
   106	// and '' into &rdquo;).
   107	func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
   108		for {
   109			m := matchRx.FindStringSubmatchIndex(line)
   110			if m == nil {
   111				break
   112			}
   113			// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
   114	
   115			// write text before match
   116			commentEscape(w, line[0:m[0]], nice)
   117	
   118			// adjust match if necessary
   119			match := line[m[0]:m[1]]
   120			if n := pairedParensPrefixLen(match); n < len(match) {
   121				// match contains unpaired parentheses (rare);
   122				// redo matching with shortened line for correct indices
   123				m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
   124				match = match[:n]
   125			}
   126	
   127			// analyze match
   128			url := ""
   129			italics := false
   130			if words != nil {
   131				url, italics = words[match]
   132			}
   133			if m[2] >= 0 {
   134				// match against first parenthesized sub-regexp; must be match against urlRx
   135				if !italics {
   136					// no alternative URL in words list, use match instead
   137					url = match
   138				}
   139				italics = false // don't italicize URLs
   140			}
   141	
   142			// write match
   143			if len(url) > 0 {
   144				w.Write(html_a)
   145				template.HTMLEscape(w, []byte(url))
   146				w.Write(html_aq)
   147			}
   148			if italics {
   149				w.Write(html_i)
   150			}
   151			commentEscape(w, match, nice)
   152			if italics {
   153				w.Write(html_endi)
   154			}
   155			if len(url) > 0 {
   156				w.Write(html_enda)
   157			}
   158	
   159			// advance
   160			line = line[m[1]:]
   161		}
   162		commentEscape(w, line, nice)
   163	}
   164	
   165	func indentLen(s string) int {
   166		i := 0
   167		for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   168			i++
   169		}
   170		return i
   171	}
   172	
   173	func isBlank(s string) bool {
   174		return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   175	}
   176	
   177	func commonPrefix(a, b string) string {
   178		i := 0
   179		for i < len(a) && i < len(b) && a[i] == b[i] {
   180			i++
   181		}
   182		return a[0:i]
   183	}
   184	
   185	func unindent(block []string) {
   186		if len(block) == 0 {
   187			return
   188		}
   189	
   190		// compute maximum common white prefix
   191		prefix := block[0][0:indentLen(block[0])]
   192		for _, line := range block {
   193			if !isBlank(line) {
   194				prefix = commonPrefix(prefix, line[0:indentLen(line)])
   195			}
   196		}
   197		n := len(prefix)
   198	
   199		// remove
   200		for i, line := range block {
   201			if !isBlank(line) {
   202				block[i] = line[n:]
   203			}
   204		}
   205	}
   206	
   207	// heading returns the trimmed line if it passes as a section heading;
   208	// otherwise it returns the empty string.
   209	func heading(line string) string {
   210		line = strings.TrimSpace(line)
   211		if len(line) == 0 {
   212			return ""
   213		}
   214	
   215		// a heading must start with an uppercase letter
   216		r, _ := utf8.DecodeRuneInString(line)
   217		if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   218			return ""
   219		}
   220	
   221		// it must end in a letter or digit:
   222		r, _ = utf8.DecodeLastRuneInString(line)
   223		if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   224			return ""
   225		}
   226	
   227		// exclude lines with illegal characters
   228		if strings.ContainsAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") {
   229			return ""
   230		}
   231	
   232		// allow "'" for possessive "'s" only
   233		for b := line; ; {
   234			i := strings.IndexRune(b, '\'')
   235			if i < 0 {
   236				break
   237			}
   238			if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   239				return "" // not followed by "s "
   240			}
   241			b = b[i+2:]
   242		}
   243	
   244		return line
   245	}
   246	
   247	type op int
   248	
   249	const (
   250		opPara op = iota
   251		opHead
   252		opPre
   253	)
   254	
   255	type block struct {
   256		op    op
   257		lines []string
   258	}
   259	
   260	var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
   261	
   262	func anchorID(line string) string {
   263		// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
   264		return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
   265	}
   266	
   267	// ToHTML converts comment text to formatted HTML.
   268	// The comment was prepared by DocReader,
   269	// so it is known not to have leading, trailing blank lines
   270	// nor to have trailing spaces at the end of lines.
   271	// The comment markers have already been removed.
   272	//
   273	// Each span of unindented non-blank lines is converted into
   274	// a single paragraph. There is one exception to the rule: a span that
   275	// consists of a single line, is followed by another paragraph span,
   276	// begins with a capital letter, and contains no punctuation
   277	// is formatted as a heading.
   278	//
   279	// A span of indented lines is converted into a <pre> block,
   280	// with the common indent prefix removed.
   281	//
   282	// URLs in the comment text are converted into links; if the URL also appears
   283	// in the words map, the link is taken from the map (if the corresponding map
   284	// value is the empty string, the URL is not converted into a link).
   285	//
   286	// Go identifiers that appear in the words map are italicized; if the corresponding
   287	// map value is not the empty string, it is considered a URL and the word is converted
   288	// into a link.
   289	func ToHTML(w io.Writer, text string, words map[string]string) {
   290		for _, b := range blocks(text) {
   291			switch b.op {
   292			case opPara:
   293				w.Write(html_p)
   294				for _, line := range b.lines {
   295					emphasize(w, line, words, true)
   296				}
   297				w.Write(html_endp)
   298			case opHead:
   299				w.Write(html_h)
   300				id := ""
   301				for _, line := range b.lines {
   302					if id == "" {
   303						id = anchorID(line)
   304						w.Write([]byte(id))
   305						w.Write(html_hq)
   306					}
   307					commentEscape(w, line, true)
   308				}
   309				if id == "" {
   310					w.Write(html_hq)
   311				}
   312				w.Write(html_endh)
   313			case opPre:
   314				w.Write(html_pre)
   315				for _, line := range b.lines {
   316					emphasize(w, line, nil, false)
   317				}
   318				w.Write(html_endpre)
   319			}
   320		}
   321	}
   322	
   323	func blocks(text string) []block {
   324		var (
   325			out  []block
   326			para []string
   327	
   328			lastWasBlank   = false
   329			lastWasHeading = false
   330		)
   331	
   332		close := func() {
   333			if para != nil {
   334				out = append(out, block{opPara, para})
   335				para = nil
   336			}
   337		}
   338	
   339		lines := strings.SplitAfter(text, "\n")
   340		unindent(lines)
   341		for i := 0; i < len(lines); {
   342			line := lines[i]
   343			if isBlank(line) {
   344				// close paragraph
   345				close()
   346				i++
   347				lastWasBlank = true
   348				continue
   349			}
   350			if indentLen(line) > 0 {
   351				// close paragraph
   352				close()
   353	
   354				// count indented or blank lines
   355				j := i + 1
   356				for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   357					j++
   358				}
   359				// but not trailing blank lines
   360				for j > i && isBlank(lines[j-1]) {
   361					j--
   362				}
   363				pre := lines[i:j]
   364				i = j
   365	
   366				unindent(pre)
   367	
   368				// put those lines in a pre block
   369				out = append(out, block{opPre, pre})
   370				lastWasHeading = false
   371				continue
   372			}
   373	
   374			if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   375				isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   376				// current line is non-blank, surrounded by blank lines
   377				// and the next non-blank line is not indented: this
   378				// might be a heading.
   379				if head := heading(line); head != "" {
   380					close()
   381					out = append(out, block{opHead, []string{head}})
   382					i += 2
   383					lastWasHeading = true
   384					continue
   385				}
   386			}
   387	
   388			// open paragraph
   389			lastWasBlank = false
   390			lastWasHeading = false
   391			para = append(para, lines[i])
   392			i++
   393		}
   394		close()
   395	
   396		return out
   397	}
   398	
   399	// ToText prepares comment text for presentation in textual output.
   400	// It wraps paragraphs of text to width or fewer Unicode code points
   401	// and then prefixes each line with the indent. In preformatted sections
   402	// (such as program text), it prefixes each non-blank line with preIndent.
   403	func ToText(w io.Writer, text string, indent, preIndent string, width int) {
   404		l := lineWrapper{
   405			out:    w,
   406			width:  width,
   407			indent: indent,
   408		}
   409		for _, b := range blocks(text) {
   410			switch b.op {
   411			case opPara:
   412				// l.write will add leading newline if required
   413				for _, line := range b.lines {
   414					l.write(line)
   415				}
   416				l.flush()
   417			case opHead:
   418				w.Write(nl)
   419				for _, line := range b.lines {
   420					l.write(line + "\n")
   421				}
   422				l.flush()
   423			case opPre:
   424				w.Write(nl)
   425				for _, line := range b.lines {
   426					if isBlank(line) {
   427						w.Write([]byte("\n"))
   428					} else {
   429						w.Write([]byte(preIndent))
   430						w.Write([]byte(line))
   431					}
   432				}
   433			}
   434		}
   435	}
   436	
   437	type lineWrapper struct {
   438		out       io.Writer
   439		printed   bool
   440		width     int
   441		indent    string
   442		n         int
   443		pendSpace int
   444	}
   445	
   446	var nl = []byte("\n")
   447	var space = []byte(" ")
   448	
   449	func (l *lineWrapper) write(text string) {
   450		if l.n == 0 && l.printed {
   451			l.out.Write(nl) // blank line before new paragraph
   452		}
   453		l.printed = true
   454	
   455		for _, f := range strings.Fields(text) {
   456			w := utf8.RuneCountInString(f)
   457			// wrap if line is too long
   458			if l.n > 0 && l.n+l.pendSpace+w > l.width {
   459				l.out.Write(nl)
   460				l.n = 0
   461				l.pendSpace = 0
   462			}
   463			if l.n == 0 {
   464				l.out.Write([]byte(l.indent))
   465			}
   466			l.out.Write(space[:l.pendSpace])
   467			l.out.Write([]byte(f))
   468			l.n += l.pendSpace + w
   469			l.pendSpace = 1
   470		}
   471	}
   472	
   473	func (l *lineWrapper) flush() {
   474		if l.n == 0 {
   475			return
   476		}
   477		l.out.Write(nl)
   478		l.pendSpace = 0
   479		l.n = 0
   480	}
   481	

View as plain text