...
Run Format

Source file src/net/mail/message.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	/*
     6	Package mail implements parsing of mail messages.
     7	
     8	For the most part, this package follows the syntax as specified by RFC 5322 and
     9	extended by RFC 6532.
    10	Notable divergences:
    11		* Obsolete address formats are not parsed, including addresses with
    12		  embedded route information.
    13		* Group addresses are not parsed.
    14		* The full range of spacing (the CFWS syntax element) is not supported,
    15		  such as breaking addresses across lines.
    16		* No unicode normalization is performed.
    17	*/
    18	package mail
    19	
    20	import (
    21		"bufio"
    22		"bytes"
    23		"errors"
    24		"fmt"
    25		"io"
    26		"log"
    27		"mime"
    28		"net/textproto"
    29		"strings"
    30		"time"
    31		"unicode/utf8"
    32	)
    33	
    34	var debug = debugT(false)
    35	
    36	type debugT bool
    37	
    38	func (d debugT) Printf(format string, args ...interface{}) {
    39		if d {
    40			log.Printf(format, args...)
    41		}
    42	}
    43	
    44	// A Message represents a parsed mail message.
    45	type Message struct {
    46		Header Header
    47		Body   io.Reader
    48	}
    49	
    50	// ReadMessage reads a message from r.
    51	// The headers are parsed, and the body of the message will be available
    52	// for reading from r.
    53	func ReadMessage(r io.Reader) (msg *Message, err error) {
    54		tp := textproto.NewReader(bufio.NewReader(r))
    55	
    56		hdr, err := tp.ReadMIMEHeader()
    57		if err != nil {
    58			return nil, err
    59		}
    60	
    61		return &Message{
    62			Header: Header(hdr),
    63			Body:   tp.R,
    64		}, nil
    65	}
    66	
    67	// Layouts suitable for passing to time.Parse.
    68	// These are tried in order.
    69	var dateLayouts []string
    70	
    71	func init() {
    72		// Generate layouts based on RFC 5322, section 3.3.
    73	
    74		dows := [...]string{"", "Mon, "}   // day-of-week
    75		days := [...]string{"2", "02"}     // day = 1*2DIGIT
    76		years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
    77		seconds := [...]string{":05", ""}  // second
    78		// "-0700 (MST)" is not in RFC 5322, but is common.
    79		zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    80	
    81		for _, dow := range dows {
    82			for _, day := range days {
    83				for _, year := range years {
    84					for _, second := range seconds {
    85						for _, zone := range zones {
    86							s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    87							dateLayouts = append(dateLayouts, s)
    88						}
    89					}
    90				}
    91			}
    92		}
    93	}
    94	
    95	// ParseDate parses an RFC 5322 date string.
    96	func ParseDate(date string) (time.Time, error) {
    97		for _, layout := range dateLayouts {
    98			t, err := time.Parse(layout, date)
    99			if err == nil {
   100				return t, nil
   101			}
   102		}
   103		return time.Time{}, errors.New("mail: header could not be parsed")
   104	}
   105	
   106	// A Header represents the key-value pairs in a mail message header.
   107	type Header map[string][]string
   108	
   109	// Get gets the first value associated with the given key.
   110	// It is case insensitive; CanonicalMIMEHeaderKey is used
   111	// to canonicalize the provided key.
   112	// If there are no values associated with the key, Get returns "".
   113	// To access multiple values of a key, or to use non-canonical keys,
   114	// access the map directly.
   115	func (h Header) Get(key string) string {
   116		return textproto.MIMEHeader(h).Get(key)
   117	}
   118	
   119	var ErrHeaderNotPresent = errors.New("mail: header not in message")
   120	
   121	// Date parses the Date header field.
   122	func (h Header) Date() (time.Time, error) {
   123		hdr := h.Get("Date")
   124		if hdr == "" {
   125			return time.Time{}, ErrHeaderNotPresent
   126		}
   127		return ParseDate(hdr)
   128	}
   129	
   130	// AddressList parses the named header field as a list of addresses.
   131	func (h Header) AddressList(key string) ([]*Address, error) {
   132		hdr := h.Get(key)
   133		if hdr == "" {
   134			return nil, ErrHeaderNotPresent
   135		}
   136		return ParseAddressList(hdr)
   137	}
   138	
   139	// Address represents a single mail address.
   140	// An address such as "Barry Gibbs <bg@example.com>" is represented
   141	// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
   142	type Address struct {
   143		Name    string // Proper name; may be empty.
   144		Address string // user@domain
   145	}
   146	
   147	// Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
   148	func ParseAddress(address string) (*Address, error) {
   149		return (&addrParser{s: address}).parseSingleAddress()
   150	}
   151	
   152	// ParseAddressList parses the given string as a list of addresses.
   153	func ParseAddressList(list string) ([]*Address, error) {
   154		return (&addrParser{s: list}).parseAddressList()
   155	}
   156	
   157	// An AddressParser is an RFC 5322 address parser.
   158	type AddressParser struct {
   159		// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
   160		WordDecoder *mime.WordDecoder
   161	}
   162	
   163	// Parse parses a single RFC 5322 address of the
   164	// form "Gogh Fir <gf@example.com>" or "foo@example.com".
   165	func (p *AddressParser) Parse(address string) (*Address, error) {
   166		return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
   167	}
   168	
   169	// ParseList parses the given string as a list of comma-separated addresses
   170	// of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
   171	func (p *AddressParser) ParseList(list string) ([]*Address, error) {
   172		return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
   173	}
   174	
   175	// String formats the address as a valid RFC 5322 address.
   176	// If the address's name contains non-ASCII characters
   177	// the name will be rendered according to RFC 2047.
   178	func (a *Address) String() string {
   179		// Format address local@domain
   180		at := strings.LastIndex(a.Address, "@")
   181		var local, domain string
   182		if at < 0 {
   183			// This is a malformed address ("@" is required in addr-spec);
   184			// treat the whole address as local-part.
   185			local = a.Address
   186		} else {
   187			local, domain = a.Address[:at], a.Address[at+1:]
   188		}
   189	
   190		// Add quotes if needed
   191		quoteLocal := false
   192		for i, r := range local {
   193			if isAtext(r, false) {
   194				continue
   195			}
   196			if r == '.' {
   197				// Dots are okay if they are surrounded by atext.
   198				// We only need to check that the previous byte is
   199				// not a dot, and this isn't the end of the string.
   200				if i > 0 && local[i-1] != '.' && i < len(local)-1 {
   201					continue
   202				}
   203			}
   204			quoteLocal = true
   205			break
   206		}
   207		if quoteLocal {
   208			local = quoteString(local)
   209	
   210		}
   211	
   212		s := "<" + local + "@" + domain + ">"
   213	
   214		if a.Name == "" {
   215			return s
   216		}
   217	
   218		// If every character is printable ASCII, quoting is simple.
   219		allPrintable := true
   220		for _, r := range a.Name {
   221			// isWSP here should actually be isFWS,
   222			// but we don't support folding yet.
   223			if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
   224				allPrintable = false
   225				break
   226			}
   227		}
   228		if allPrintable {
   229			return quoteString(a.Name) + " " + s
   230		}
   231	
   232		// Text in an encoded-word in a display-name must not contain certain
   233		// characters like quotes or parentheses (see RFC 2047 section 5.3).
   234		// When this is the case encode the name using base64 encoding.
   235		if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
   236			return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
   237		}
   238		return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
   239	}
   240	
   241	type addrParser struct {
   242		s   string
   243		dec *mime.WordDecoder // may be nil
   244	}
   245	
   246	func (p *addrParser) parseAddressList() ([]*Address, error) {
   247		var list []*Address
   248		for {
   249			p.skipSpace()
   250			addr, err := p.parseAddress()
   251			if err != nil {
   252				return nil, err
   253			}
   254			list = append(list, addr)
   255	
   256			p.skipSpace()
   257			if p.empty() {
   258				break
   259			}
   260			if !p.consume(',') {
   261				return nil, errors.New("mail: expected comma")
   262			}
   263		}
   264		return list, nil
   265	}
   266	
   267	func (p *addrParser) parseSingleAddress() (*Address, error) {
   268		addr, err := p.parseAddress()
   269		if err != nil {
   270			return nil, err
   271		}
   272		p.skipSpace()
   273		if !p.empty() {
   274			return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
   275		}
   276		return addr, nil
   277	}
   278	
   279	// parseAddress parses a single RFC 5322 address at the start of p.
   280	func (p *addrParser) parseAddress() (addr *Address, err error) {
   281		debug.Printf("parseAddress: %q", p.s)
   282		p.skipSpace()
   283		if p.empty() {
   284			return nil, errors.New("mail: no address")
   285		}
   286	
   287		// address = name-addr / addr-spec
   288		// TODO(dsymonds): Support parsing group address.
   289	
   290		// addr-spec has a more restricted grammar than name-addr,
   291		// so try parsing it first, and fallback to name-addr.
   292		// TODO(dsymonds): Is this really correct?
   293		spec, err := p.consumeAddrSpec()
   294		if err == nil {
   295			return &Address{
   296				Address: spec,
   297			}, err
   298		}
   299		debug.Printf("parseAddress: not an addr-spec: %v", err)
   300		debug.Printf("parseAddress: state is now %q", p.s)
   301	
   302		// display-name
   303		var displayName string
   304		if p.peek() != '<' {
   305			displayName, err = p.consumePhrase()
   306			if err != nil {
   307				return nil, err
   308			}
   309		}
   310		debug.Printf("parseAddress: displayName=%q", displayName)
   311	
   312		// angle-addr = "<" addr-spec ">"
   313		p.skipSpace()
   314		if !p.consume('<') {
   315			return nil, errors.New("mail: no angle-addr")
   316		}
   317		spec, err = p.consumeAddrSpec()
   318		if err != nil {
   319			return nil, err
   320		}
   321		if !p.consume('>') {
   322			return nil, errors.New("mail: unclosed angle-addr")
   323		}
   324		debug.Printf("parseAddress: spec=%q", spec)
   325	
   326		return &Address{
   327			Name:    displayName,
   328			Address: spec,
   329		}, nil
   330	}
   331	
   332	// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   333	func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   334		debug.Printf("consumeAddrSpec: %q", p.s)
   335	
   336		orig := *p
   337		defer func() {
   338			if err != nil {
   339				*p = orig
   340			}
   341		}()
   342	
   343		// local-part = dot-atom / quoted-string
   344		var localPart string
   345		p.skipSpace()
   346		if p.empty() {
   347			return "", errors.New("mail: no addr-spec")
   348		}
   349		if p.peek() == '"' {
   350			// quoted-string
   351			debug.Printf("consumeAddrSpec: parsing quoted-string")
   352			localPart, err = p.consumeQuotedString()
   353			if localPart == "" {
   354				err = errors.New("mail: empty quoted string in addr-spec")
   355			}
   356		} else {
   357			// dot-atom
   358			debug.Printf("consumeAddrSpec: parsing dot-atom")
   359			localPart, err = p.consumeAtom(true, false)
   360		}
   361		if err != nil {
   362			debug.Printf("consumeAddrSpec: failed: %v", err)
   363			return "", err
   364		}
   365	
   366		if !p.consume('@') {
   367			return "", errors.New("mail: missing @ in addr-spec")
   368		}
   369	
   370		// domain = dot-atom / domain-literal
   371		var domain string
   372		p.skipSpace()
   373		if p.empty() {
   374			return "", errors.New("mail: no domain in addr-spec")
   375		}
   376		// TODO(dsymonds): Handle domain-literal
   377		domain, err = p.consumeAtom(true, false)
   378		if err != nil {
   379			return "", err
   380		}
   381	
   382		return localPart + "@" + domain, nil
   383	}
   384	
   385	// consumePhrase parses the RFC 5322 phrase at the start of p.
   386	func (p *addrParser) consumePhrase() (phrase string, err error) {
   387		debug.Printf("consumePhrase: [%s]", p.s)
   388		// phrase = 1*word
   389		var words []string
   390		for {
   391			// word = atom / quoted-string
   392			var word string
   393			p.skipSpace()
   394			if p.empty() {
   395				return "", errors.New("mail: missing phrase")
   396			}
   397			if p.peek() == '"' {
   398				// quoted-string
   399				word, err = p.consumeQuotedString()
   400			} else {
   401				// atom
   402				// We actually parse dot-atom here to be more permissive
   403				// than what RFC 5322 specifies.
   404				word, err = p.consumeAtom(true, true)
   405				if err == nil {
   406					word, err = p.decodeRFC2047Word(word)
   407				}
   408			}
   409	
   410			if err != nil {
   411				break
   412			}
   413			debug.Printf("consumePhrase: consumed %q", word)
   414			words = append(words, word)
   415		}
   416		// Ignore any error if we got at least one word.
   417		if err != nil && len(words) == 0 {
   418			debug.Printf("consumePhrase: hit err: %v", err)
   419			return "", fmt.Errorf("mail: missing word in phrase: %v", err)
   420		}
   421		phrase = strings.Join(words, " ")
   422		return phrase, nil
   423	}
   424	
   425	// consumeQuotedString parses the quoted string at the start of p.
   426	func (p *addrParser) consumeQuotedString() (qs string, err error) {
   427		// Assume first byte is '"'.
   428		i := 1
   429		qsb := make([]rune, 0, 10)
   430	
   431		escaped := false
   432	
   433	Loop:
   434		for {
   435			r, size := utf8.DecodeRuneInString(p.s[i:])
   436	
   437			switch {
   438			case size == 0:
   439				return "", errors.New("mail: unclosed quoted-string")
   440	
   441			case size == 1 && r == utf8.RuneError:
   442				return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
   443	
   444			case escaped:
   445				//  quoted-pair = ("\" (VCHAR / WSP))
   446	
   447				if !isVchar(r) && !isWSP(r) {
   448					return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   449				}
   450	
   451				qsb = append(qsb, r)
   452				escaped = false
   453	
   454			case isQtext(r) || isWSP(r):
   455				// qtext (printable US-ASCII excluding " and \), or
   456				// FWS (almost; we're ignoring CRLF)
   457				qsb = append(qsb, r)
   458	
   459			case r == '"':
   460				break Loop
   461	
   462			case r == '\\':
   463				escaped = true
   464	
   465			default:
   466				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
   467	
   468			}
   469	
   470			i += size
   471		}
   472		p.s = p.s[i+1:]
   473		return string(qsb), nil
   474	}
   475	
   476	// consumeAtom parses an RFC 5322 atom at the start of p.
   477	// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   478	// If permissive is true, consumeAtom will not fail on
   479	// leading/trailing/double dots in the atom (see golang.org/issue/4938).
   480	func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
   481		i := 0
   482	
   483	Loop:
   484		for {
   485			r, size := utf8.DecodeRuneInString(p.s[i:])
   486	
   487			switch {
   488			case size == 1 && r == utf8.RuneError:
   489				return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
   490	
   491			case size == 0 || !isAtext(r, dot):
   492				break Loop
   493	
   494			default:
   495				i += size
   496	
   497			}
   498		}
   499	
   500		if i == 0 {
   501			return "", errors.New("mail: invalid string")
   502		}
   503		atom, p.s = p.s[:i], p.s[i:]
   504		if !permissive {
   505			if strings.HasPrefix(atom, ".") {
   506				return "", errors.New("mail: leading dot in atom")
   507			}
   508			if strings.Contains(atom, "..") {
   509				return "", errors.New("mail: double dot in atom")
   510			}
   511			if strings.HasSuffix(atom, ".") {
   512				return "", errors.New("mail: trailing dot in atom")
   513			}
   514		}
   515		return atom, nil
   516	}
   517	
   518	func (p *addrParser) consume(c byte) bool {
   519		if p.empty() || p.peek() != c {
   520			return false
   521		}
   522		p.s = p.s[1:]
   523		return true
   524	}
   525	
   526	// skipSpace skips the leading space and tab characters.
   527	func (p *addrParser) skipSpace() {
   528		p.s = strings.TrimLeft(p.s, " \t")
   529	}
   530	
   531	func (p *addrParser) peek() byte {
   532		return p.s[0]
   533	}
   534	
   535	func (p *addrParser) empty() bool {
   536		return p.len() == 0
   537	}
   538	
   539	func (p *addrParser) len() int {
   540		return len(p.s)
   541	}
   542	
   543	func (p *addrParser) decodeRFC2047Word(s string) (string, error) {
   544		if p.dec != nil {
   545			return p.dec.DecodeHeader(s)
   546		}
   547	
   548		dec, err := rfc2047Decoder.Decode(s)
   549		if err == nil {
   550			return dec, nil
   551		}
   552	
   553		if _, ok := err.(charsetError); ok {
   554			return s, err
   555		}
   556	
   557		// Ignore invalid RFC 2047 encoded-word errors.
   558		return s, nil
   559	}
   560	
   561	var rfc2047Decoder = mime.WordDecoder{
   562		CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
   563			return nil, charsetError(charset)
   564		},
   565	}
   566	
   567	type charsetError string
   568	
   569	func (e charsetError) Error() string {
   570		return fmt.Sprintf("charset not supported: %q", string(e))
   571	}
   572	
   573	// isAtext reports whether r is an RFC 5322 atext character.
   574	// If dot is true, period is included.
   575	func isAtext(r rune, dot bool) bool {
   576		switch r {
   577		case '.':
   578			return dot
   579	
   580		case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
   581			return false
   582		}
   583		return isVchar(r)
   584	}
   585	
   586	// isQtext reports whether r is an RFC 5322 qtext character.
   587	func isQtext(r rune) bool {
   588		// Printable US-ASCII, excluding backslash or quote.
   589		if r == '\\' || r == '"' {
   590			return false
   591		}
   592		return isVchar(r)
   593	}
   594	
   595	// quoteString renders a string as an RFC 5322 quoted-string.
   596	func quoteString(s string) string {
   597		var buf bytes.Buffer
   598		buf.WriteByte('"')
   599		for _, r := range s {
   600			if isQtext(r) || isWSP(r) {
   601				buf.WriteRune(r)
   602			} else if isVchar(r) {
   603				buf.WriteByte('\\')
   604				buf.WriteRune(r)
   605			}
   606		}
   607		buf.WriteByte('"')
   608		return buf.String()
   609	}
   610	
   611	// isVchar reports whether r is an RFC 5322 VCHAR character.
   612	func isVchar(r rune) bool {
   613		// Visible (printing) characters.
   614		return '!' <= r && r <= '~' || isMultibyte(r)
   615	}
   616	
   617	// isMultibyte reports whether r is a multi-byte UTF-8 character
   618	// as supported by RFC 6532
   619	func isMultibyte(r rune) bool {
   620		return r >= utf8.RuneSelf
   621	}
   622	
   623	// isWSP reports whether r is a WSP (white space).
   624	// WSP is a space or horizontal tab (RFC 5234 Appendix B).
   625	func isWSP(r rune) bool {
   626		return r == ' ' || r == '\t'
   627	}
   628	

View as plain text