...
Run Format

Source file test/bench/go1/fasta_test.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package go1
     6	
     7	import "runtime"
     8	
     9	// Not a benchmark; input for revcomp.
    10	
    11	var fastabytes = makefasta()
    12	
    13	func makefasta() []byte {
    14		var n int = 25e6
    15		if runtime.GOARCH == "arm" || runtime.GOARCH == "mips" || runtime.GOARCH == "mips64" {
    16			// TODO(dfc) remove this limitation after precise gc.
    17			// A value of 25e6 consumes 465mb of heap on 32bit
    18			// platforms, which is too much for some systems.
    19			// A value of 25e5 produces a memory layout that
    20			// confuses the gc on 32bit platforms. So 25e4 it is.
    21			n = 25e4
    22		}
    23		return fasta(n)
    24	}
    25	
    26	func fasta(n int) []byte {
    27		out := make(fastaBuffer, 0, 11*n)
    28	
    29		iub := []fastaAcid{
    30			{prob: 0.27, sym: 'a'},
    31			{prob: 0.12, sym: 'c'},
    32			{prob: 0.12, sym: 'g'},
    33			{prob: 0.27, sym: 't'},
    34			{prob: 0.02, sym: 'B'},
    35			{prob: 0.02, sym: 'D'},
    36			{prob: 0.02, sym: 'H'},
    37			{prob: 0.02, sym: 'K'},
    38			{prob: 0.02, sym: 'M'},
    39			{prob: 0.02, sym: 'N'},
    40			{prob: 0.02, sym: 'R'},
    41			{prob: 0.02, sym: 'S'},
    42			{prob: 0.02, sym: 'V'},
    43			{prob: 0.02, sym: 'W'},
    44			{prob: 0.02, sym: 'Y'},
    45		}
    46	
    47		homosapiens := []fastaAcid{
    48			{prob: 0.3029549426680, sym: 'a'},
    49			{prob: 0.1979883004921, sym: 'c'},
    50			{prob: 0.1975473066391, sym: 'g'},
    51			{prob: 0.3015094502008, sym: 't'},
    52		}
    53	
    54		alu := []byte(
    55			"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
    56				"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
    57				"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
    58				"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
    59				"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
    60				"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
    61				"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
    62	
    63		out.WriteString(">ONE Homo sapiens alu\n")
    64		fastaRepeat(&out, alu, 2*n)
    65		out.WriteString(">TWO IUB ambiguity codes\n")
    66		fastaRandom(&out, iub, 3*n)
    67		out.WriteString(">THREE Homo sapiens frequency\n")
    68		fastaRandom(&out, homosapiens, 5*n)
    69		return out
    70	}
    71	
    72	type fastaBuffer []byte
    73	
    74	func (b *fastaBuffer) Flush() {
    75		panic("flush")
    76	}
    77	
    78	func (b *fastaBuffer) WriteString(s string) {
    79		p := b.NextWrite(len(s))
    80		copy(p, s)
    81	}
    82	
    83	func (b *fastaBuffer) NextWrite(n int) []byte {
    84		p := *b
    85		if len(p)+n > cap(p) {
    86			b.Flush()
    87			p = *b
    88		}
    89		out := p[len(p) : len(p)+n]
    90		*b = p[:len(p)+n]
    91		return out
    92	}
    93	
    94	const fastaLine = 60
    95	
    96	func fastaRepeat(out *fastaBuffer, alu []byte, n int) {
    97		buf := append(alu, alu...)
    98		off := 0
    99		for n > 0 {
   100			m := n
   101			if m > fastaLine {
   102				m = fastaLine
   103			}
   104			buf1 := out.NextWrite(m + 1)
   105			copy(buf1, buf[off:])
   106			buf1[m] = '\n'
   107			if off += m; off >= len(alu) {
   108				off -= len(alu)
   109			}
   110			n -= m
   111		}
   112	}
   113	
   114	const (
   115		fastaLookupSize          = 4096
   116		fastaLookupScale float64 = fastaLookupSize - 1
   117	)
   118	
   119	var fastaRand uint32 = 42
   120	
   121	type fastaAcid struct {
   122		sym   byte
   123		prob  float64
   124		cprob float64
   125		next  *fastaAcid
   126	}
   127	
   128	func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid {
   129		var lookup [fastaLookupSize]*fastaAcid
   130		var p float64
   131		for i := range acid {
   132			p += acid[i].prob
   133			acid[i].cprob = p * fastaLookupScale
   134			if i > 0 {
   135				acid[i-1].next = &acid[i]
   136			}
   137		}
   138		acid[len(acid)-1].cprob = 1.0 * fastaLookupScale
   139	
   140		j := 0
   141		for i := range lookup {
   142			for acid[j].cprob < float64(i) {
   143				j++
   144			}
   145			lookup[i] = &acid[j]
   146		}
   147	
   148		return &lookup
   149	}
   150	
   151	func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) {
   152		const (
   153			IM = 139968
   154			IA = 3877
   155			IC = 29573
   156		)
   157		lookup := fastaComputeLookup(acid)
   158		for n > 0 {
   159			m := n
   160			if m > fastaLine {
   161				m = fastaLine
   162			}
   163			buf := out.NextWrite(m + 1)
   164			f := fastaLookupScale / IM
   165			myrand := fastaRand
   166			for i := 0; i < m; i++ {
   167				myrand = (myrand*IA + IC) % IM
   168				r := float64(int(myrand)) * f
   169				a := lookup[int(r)]
   170				for a.cprob < r {
   171					a = a.next
   172				}
   173				buf[i] = a.sym
   174			}
   175			fastaRand = myrand
   176			buf[m] = '\n'
   177			n -= m
   178		}
   179	}
   180	

View as plain text