package main import ( "bufio" "bytes" "io" "os" "runtime" "sync" ) const lf = byte('\n') func main() { if len(os.Args) != 2 { print("One argument required\n") os.Exit(1) } f, err := os.Open("locate.txt") if err != nil { panic(err) } defer f.Close() var needle = os.Args[1] // To make sure all threads have ended when the program finishes var wg sync.WaitGroup // Make a channel and a thread for each CPU core var cores = runtime.NumCPU() var workchannel = make(chan []byte) for i := 0; i < cores; i++ { go scannerThread(workchannel, []byte(needle), &wg) wg.Add(1) } var br = bufio.NewReaderSize(f, 1<<25) // reader with 32 MiB buffer var buf = make([]byte, 1<<20) // 1 MiB buffer for searching var remainder []byte var nread int for { nread, err = br.Read(buf) if err != nil && err != io.EOF { panic(err) } if err != io.EOF { // Get the remainder of the last line remainder, err = br.ReadBytes(byte('\n')) if err != nil && err != io.EOF { panic(err) } } workchannel <- append(buf[:nread], remainder...) if err == io.EOF { break } } close(workchannel) wg.Wait() } func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { var batch []byte var ok bool var start, end int var linelen int var i int defer wg.Done() for { batch, ok = <-workchannel if !ok { return // channel closed. we're done } // This loop is for every result found, when there are no more results it stops for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch, needle) { start, end = i, i // needle was found, but where? for { // find the start if batch[start] == lf { start++ // the line feed is from the previous line, so skip it break } else if start == 0 { break // result is at start of file } start-- } for { // find the end if batch[end] == lf { end++ // include the line feed in the line // https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1 break } else if end == linelen { break } end++ } os.Stdout.Write(batch[start:end]) // Chop all of the bytes before the result off so it doesn't get // searched again batch = batch[end:] linelen = len(batch) } } }