rapidgrep/main.go


								package main


								import (

									"bufio"

									"bytes"

									"io"

									"os"

									"runtime"

									"runtime/pprof"

									"sync"

								)


								const lf = byte('\n')


								func main() {

									f, _ := os.Create("cprof")

									pprof.StartCPUProfile(f)

									defer pprof.StopCPUProfile()


									if len(os.Args) != 2 {

										print("One argument required\n")

										os.Exit(1)

									}


									f, err := os.Open("locate.txt")

									if err != nil {

										panic(err)

									}

									defer f.Close()


									var needle = os.Args[1]


									// To make sure all threads have ended when the program finishes

									var wg sync.WaitGroup


									// Make a channel and a thread for each CPU core

									var cores = runtime.NumCPU()

									var workchannel = make(chan []byte)

									for i := 0; i < cores; i++ {

										go scannerThread(workchannel, []byte(needle), &wg)

										wg.Add(1)

									}


									var br = bufio.NewReaderSize(f, 1<<25) // reader with 32 MiB buffer

									var buf = make([]byte, 1<<20)          // 1 MiB buffer for searching

									var remainder []byte

									var nread int


									for {

										nread, err = br.Read(buf)

										if err != nil && err != io.EOF {

											panic(err)

										}

										if err != io.EOF {

											// Get the remainder of the last line

											remainder, err = br.ReadBytes(byte('\n'))

											if err != nil && err != io.EOF {

												panic(err)

											}

										}


										workchannel <- append(buf[:nread], remainder...)


										if err == io.EOF {

											break

										}

									}


									close(workchannel)

									wg.Wait()

								}


								func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) {

									var batch []byte

									var ok bool

									var i, linelen, start, end int


									defer wg.Done() // Sync up the goroutines when done


									for {

										batch, ok = <-workchannel

										if !ok {

											return // channel closed. we're done

										}


										linelen = len(batch)


										// This loop is for every result found, when there are no more results

										// it stops. When it runs for the first time the first index function is

										// executed, which indexes the entire batch. In the following iterations

										// the second index function is used, which begins searching where the

										// last result ended so it doesn't get found twice.

										for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch[end:], needle) {

											start, end = i+end, i+end

											// needle was found, but where?


											for { // find the start (line feed of the line before it, or index 0)

												if batch[start] == lf {

													start++ // the line feed is from the previous line, so skip it

													break

												} else if start == 0 {

													break // result is at start of file

												}

												start--

											}


											for { // find the end (line feed at the end of the line)

												if batch[end] == lf {

													end++ // include the line feed in the line

													// https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1

													break

												} else if end == linelen {

													break

												}

												end++

											}


											// Print the result. Note that stdout is not necessarily

											// concurrency-safe, so in a real application this would have to be

											// passed through a channel.

											os.Stdout.Write(batch[start:end])


											// This is to keep track of where the end of the byte array is so we

											// can avoid index out of bounds panics

											linelen = linelen - end

										}

									}

								}