package main import ( "bytes" "io" "os" "runtime" "sync" ) func main() { // prof, _ := os.Create("cprof") // pprof.StartCPUProfile(prof) // defer pprof.StopCPUProfile() if len(os.Args) != 2 { print("One argument required\n") os.Exit(1) } f, err := NewAsyncLineReader("locate.txt") if err != nil { panic(err) } defer f.Close() var needle = os.Args[1] // To make sure all threads have ended when the program finishes var wg sync.WaitGroup // Make a channel and a thread for each CPU core var threads = runtime.NumCPU() for i := 0; i < threads; i++ { go scannerThread(f, []byte(needle), &wg) wg.Add(1) } wg.Wait() } func scannerThread(r *AsyncLineReader, needle []byte, wg *sync.WaitGroup) { var i, linelen, start, end int var buf = make([]byte, 1<<24) // 1 MiB buffer for searching var err error defer wg.Done() // Sync up the goroutines when done for { linelen, err = r.Read(buf) if err != nil { if err == io.EOF { return } panic(err) } // This loop is for every result found, when there are no more results // it stops. When it runs for the first time the first index function is // executed, which indexes the entire batch. In the following iterations // the second index function is used, which begins searching where the // last result ended so it doesn't get found twice. for i = bytes.Index(buf, needle); i != -1; i = bytes.Index(buf[end:], needle) { start, end = i+end, i+end // needle was found, but where? for { // find the start (line feed of the line before it, or index 0) if buf[start] == lf { start++ // the line feed is from the previous line, so skip it break } else if start == 0 { break // result is at start of file } start-- } for { // find the end (line feed at the end of the line) if buf[end] == lf { end++ // include the line feed in the line // https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1 break } else if end == linelen { break } end++ } // Print the result. Note that stdout is not necessarily // concurrency-safe, so in a real application this would have to be // passed through a channel. os.Stdout.Write(buf[start:end]) // This is to keep track of where the end of the byte array is so we // can avoid index out of bounds panics linelen = linelen - end } end = 0 } }