...
Run Format

Source file src/mime/mediatype.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package mime
     6	
     7	import (
     8		"bytes"
     9		"errors"
    10		"fmt"
    11		"sort"
    12		"strings"
    13		"unicode"
    14	)
    15	
    16	// FormatMediaType serializes mediatype t and the parameters
    17	// param as a media type conforming to RFC 2045 and RFC 2616.
    18	// The type and parameter names are written in lower-case.
    19	// When any of the arguments result in a standard violation then
    20	// FormatMediaType returns the empty string.
    21	func FormatMediaType(t string, param map[string]string) string {
    22		var b bytes.Buffer
    23		if slash := strings.Index(t, "/"); slash == -1 {
    24			if !isToken(t) {
    25				return ""
    26			}
    27			b.WriteString(strings.ToLower(t))
    28		} else {
    29			major, sub := t[:slash], t[slash+1:]
    30			if !isToken(major) || !isToken(sub) {
    31				return ""
    32			}
    33			b.WriteString(strings.ToLower(major))
    34			b.WriteByte('/')
    35			b.WriteString(strings.ToLower(sub))
    36		}
    37	
    38		attrs := make([]string, 0, len(param))
    39		for a := range param {
    40			attrs = append(attrs, a)
    41		}
    42		sort.Strings(attrs)
    43	
    44		for _, attribute := range attrs {
    45			value := param[attribute]
    46			b.WriteByte(';')
    47			b.WriteByte(' ')
    48			if !isToken(attribute) {
    49				return ""
    50			}
    51			b.WriteString(strings.ToLower(attribute))
    52			b.WriteByte('=')
    53			if isToken(value) {
    54				b.WriteString(value)
    55				continue
    56			}
    57	
    58			b.WriteByte('"')
    59			offset := 0
    60			for index, character := range value {
    61				if character == '"' || character == '\\' {
    62					b.WriteString(value[offset:index])
    63					offset = index
    64					b.WriteByte('\\')
    65				}
    66				if character&0x80 != 0 {
    67					return ""
    68				}
    69			}
    70			b.WriteString(value[offset:])
    71			b.WriteByte('"')
    72		}
    73		return b.String()
    74	}
    75	
    76	func checkMediaTypeDisposition(s string) error {
    77		typ, rest := consumeToken(s)
    78		if typ == "" {
    79			return errors.New("mime: no media type")
    80		}
    81		if rest == "" {
    82			return nil
    83		}
    84		if !strings.HasPrefix(rest, "/") {
    85			return errors.New("mime: expected slash after first token")
    86		}
    87		subtype, rest := consumeToken(rest[1:])
    88		if subtype == "" {
    89			return errors.New("mime: expected token after slash")
    90		}
    91		if rest != "" {
    92			return errors.New("mime: unexpected content after media subtype")
    93		}
    94		return nil
    95	}
    96	
    97	// ParseMediaType parses a media type value and any optional
    98	// parameters, per RFC 1521.  Media types are the values in
    99	// Content-Type and Content-Disposition headers (RFC 2183).
   100	// On success, ParseMediaType returns the media type converted
   101	// to lowercase and trimmed of white space and a non-nil map.
   102	// The returned map, params, maps from the lowercase
   103	// attribute to the attribute value with its case preserved.
   104	func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
   105		i := strings.Index(v, ";")
   106		if i == -1 {
   107			i = len(v)
   108		}
   109		mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
   110	
   111		err = checkMediaTypeDisposition(mediatype)
   112		if err != nil {
   113			return "", nil, err
   114		}
   115	
   116		params = make(map[string]string)
   117	
   118		// Map of base parameter name -> parameter name -> value
   119		// for parameters containing a '*' character.
   120		// Lazily initialized.
   121		var continuation map[string]map[string]string
   122	
   123		v = v[i:]
   124		for len(v) > 0 {
   125			v = strings.TrimLeftFunc(v, unicode.IsSpace)
   126			if len(v) == 0 {
   127				break
   128			}
   129			key, value, rest := consumeMediaParam(v)
   130			if key == "" {
   131				if strings.TrimSpace(rest) == ";" {
   132					// Ignore trailing semicolons.
   133					// Not an error.
   134					return
   135				}
   136				// Parse error.
   137				return "", nil, errors.New("mime: invalid media parameter")
   138			}
   139	
   140			pmap := params
   141			if idx := strings.Index(key, "*"); idx != -1 {
   142				baseName := key[:idx]
   143				if continuation == nil {
   144					continuation = make(map[string]map[string]string)
   145				}
   146				var ok bool
   147				if pmap, ok = continuation[baseName]; !ok {
   148					continuation[baseName] = make(map[string]string)
   149					pmap = continuation[baseName]
   150				}
   151			}
   152			if _, exists := pmap[key]; exists {
   153				// Duplicate parameter name is bogus.
   154				return "", nil, errors.New("mime: duplicate parameter name")
   155			}
   156			pmap[key] = value
   157			v = rest
   158		}
   159	
   160		// Stitch together any continuations or things with stars
   161		// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   162		var buf bytes.Buffer
   163		for key, pieceMap := range continuation {
   164			singlePartKey := key + "*"
   165			if v, ok := pieceMap[singlePartKey]; ok {
   166				decv := decode2231Enc(v)
   167				params[key] = decv
   168				continue
   169			}
   170	
   171			buf.Reset()
   172			valid := false
   173			for n := 0; ; n++ {
   174				simplePart := fmt.Sprintf("%s*%d", key, n)
   175				if v, ok := pieceMap[simplePart]; ok {
   176					valid = true
   177					buf.WriteString(v)
   178					continue
   179				}
   180				encodedPart := simplePart + "*"
   181				if v, ok := pieceMap[encodedPart]; ok {
   182					valid = true
   183					if n == 0 {
   184						buf.WriteString(decode2231Enc(v))
   185					} else {
   186						decv, _ := percentHexUnescape(v)
   187						buf.WriteString(decv)
   188					}
   189				} else {
   190					break
   191				}
   192			}
   193			if valid {
   194				params[key] = buf.String()
   195			}
   196		}
   197	
   198		return
   199	}
   200	
   201	func decode2231Enc(v string) string {
   202		sv := strings.SplitN(v, "'", 3)
   203		if len(sv) != 3 {
   204			return ""
   205		}
   206		// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   207		// need to decide how to expose it in the API. But I'm not sure
   208		// anybody uses it in practice.
   209		charset := strings.ToLower(sv[0])
   210		if charset != "us-ascii" && charset != "utf-8" {
   211			// TODO: unsupported encoding
   212			return ""
   213		}
   214		encv, _ := percentHexUnescape(sv[2])
   215		return encv
   216	}
   217	
   218	func isNotTokenChar(r rune) bool {
   219		return !isTokenChar(r)
   220	}
   221	
   222	// consumeToken consumes a token from the beginning of provided
   223	// string, per RFC 2045 section 5.1 (referenced from 2183), and return
   224	// the token consumed and the rest of the string. Returns ("", v) on
   225	// failure to consume at least one character.
   226	func consumeToken(v string) (token, rest string) {
   227		notPos := strings.IndexFunc(v, isNotTokenChar)
   228		if notPos == -1 {
   229			return v, ""
   230		}
   231		if notPos == 0 {
   232			return "", v
   233		}
   234		return v[0:notPos], v[notPos:]
   235	}
   236	
   237	// consumeValue consumes a "value" per RFC 2045, where a value is
   238	// either a 'token' or a 'quoted-string'.  On success, consumeValue
   239	// returns the value consumed (and de-quoted/escaped, if a
   240	// quoted-string) and the rest of the string. On failure, returns
   241	// ("", v).
   242	func consumeValue(v string) (value, rest string) {
   243		if v == "" {
   244			return
   245		}
   246		if v[0] != '"' {
   247			return consumeToken(v)
   248		}
   249	
   250		// parse a quoted-string
   251		buffer := new(bytes.Buffer)
   252		for i := 1; i < len(v); i++ {
   253			r := v[i]
   254			if r == '"' {
   255				return buffer.String(), v[i+1:]
   256			}
   257			// When MSIE sends a full file path (in "intranet mode"), it does not
   258			// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
   259			//
   260			// No known MIME generators emit unnecessary backslash escapes
   261			// for simple token characters like numbers and letters.
   262			//
   263			// If we see an unnecessary backslash escape, assume it is from MSIE
   264			// and intended as a literal backslash. This makes Go servers deal better
   265			// with MSIE without affecting the way they handle conforming MIME
   266			// generators.
   267			if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) {
   268				buffer.WriteByte(v[i+1])
   269				i++
   270				continue
   271			}
   272			if r == '\r' || r == '\n' {
   273				return "", v
   274			}
   275			buffer.WriteByte(v[i])
   276		}
   277		// Did not find end quote.
   278		return "", v
   279	}
   280	
   281	func consumeMediaParam(v string) (param, value, rest string) {
   282		rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   283		if !strings.HasPrefix(rest, ";") {
   284			return "", "", v
   285		}
   286	
   287		rest = rest[1:] // consume semicolon
   288		rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   289		param, rest = consumeToken(rest)
   290		param = strings.ToLower(param)
   291		if param == "" {
   292			return "", "", v
   293		}
   294	
   295		rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   296		if !strings.HasPrefix(rest, "=") {
   297			return "", "", v
   298		}
   299		rest = rest[1:] // consume equals sign
   300		rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   301		value, rest2 := consumeValue(rest)
   302		if value == "" && rest2 == rest {
   303			return "", "", v
   304		}
   305		rest = rest2
   306		return param, value, rest
   307	}
   308	
   309	func percentHexUnescape(s string) (string, error) {
   310		// Count %, check that they're well-formed.
   311		percents := 0
   312		for i := 0; i < len(s); {
   313			if s[i] != '%' {
   314				i++
   315				continue
   316			}
   317			percents++
   318			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   319				s = s[i:]
   320				if len(s) > 3 {
   321					s = s[0:3]
   322				}
   323				return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   324			}
   325			i += 3
   326		}
   327		if percents == 0 {
   328			return s, nil
   329		}
   330	
   331		t := make([]byte, len(s)-2*percents)
   332		j := 0
   333		for i := 0; i < len(s); {
   334			switch s[i] {
   335			case '%':
   336				t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   337				j++
   338				i += 3
   339			default:
   340				t[j] = s[i]
   341				j++
   342				i++
   343			}
   344		}
   345		return string(t), nil
   346	}
   347	
   348	func ishex(c byte) bool {
   349		switch {
   350		case '0' <= c && c <= '9':
   351			return true
   352		case 'a' <= c && c <= 'f':
   353			return true
   354		case 'A' <= c && c <= 'F':
   355			return true
   356		}
   357		return false
   358	}
   359	
   360	func unhex(c byte) byte {
   361		switch {
   362		case '0' <= c && c <= '9':
   363			return c - '0'
   364		case 'a' <= c && c <= 'f':
   365			return c - 'a' + 10
   366		case 'A' <= c && c <= 'F':
   367			return c - 'A' + 10
   368		}
   369		return 0
   370	}
   371	

View as plain text