|
|
package main
import ( "bufio" "bytes" "io" "os" "runtime" "runtime/pprof" "sync" )
const lf = byte('\n')
func main() { f, _ := os.Create("cprof") pprof.StartCPUProfile(f) defer pprof.StopCPUProfile()
if len(os.Args) != 2 { print("One argument required\n") os.Exit(1) }
f, err := os.Open("locate.txt") if err != nil { panic(err) } defer f.Close()
var needle = os.Args[1]
// To make sure all threads have ended when the program finishes
var wg sync.WaitGroup
// Make a channel and a thread for each CPU core
var cores = runtime.NumCPU() var workchannel = make(chan []byte) for i := 0; i < cores; i++ { go scannerThread(workchannel, []byte(needle), &wg) wg.Add(1) }
var br = bufio.NewReaderSize(f, 1<<25) // reader with 32 MiB buffer
var buf = make([]byte, 1<<20) // 1 MiB buffer for searching
var remainder []byte var nread int
for { nread, err = br.Read(buf) if err != nil && err != io.EOF { panic(err) } if err != io.EOF { // Get the remainder of the last line
remainder, err = br.ReadBytes(byte('\n')) if err != nil && err != io.EOF { panic(err) } }
workchannel <- append(buf[:nread], remainder...)
if err == io.EOF { break } }
close(workchannel) wg.Wait() }
func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { var batch []byte var ok bool var i, linelen, start, end int
defer wg.Done() // Sync up the goroutines when done
for { batch, ok = <-workchannel if !ok { return // channel closed. we're done
}
linelen = len(batch)
// This loop is for every result found, when there are no more results
// it stops. When it runs for the first time the first index function is
// executed, which indexes the entire batch. In the following iterations
// the second index function is used, which begins searching where the
// last result ended so it doesn't get found twice.
for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch[end:], needle) { start, end = i+end, i+end // needle was found, but where?
for { // find the start (line feed of the line before it, or index 0)
if batch[start] == lf { start++ // the line feed is from the previous line, so skip it
break } else if start == 0 { break // result is at start of file
} start-- }
for { // find the end (line feed at the end of the line)
if batch[end] == lf { end++ // include the line feed in the line
// https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1
break } else if end == linelen { break } end++ }
// Print the result. Note that stdout is not necessarily
// concurrency-safe, so in a real application this would have to be
// passed through a channel.
os.Stdout.Write(batch[start:end])
// This is to keep track of where the end of the byte array is so we
// can avoid index out of bounds panics
linelen = linelen - end } } }
|