From 87c6dc267d48f9fd30dd458180a2ed5e75af9399 Mon Sep 17 00:00:00 2001 From: Wim Brand Date: Fri, 18 May 2018 22:52:02 +0200 Subject: [PATCH] Add build script and remove an array index operation --- build.sh | 3 +++ main.go | 40 +++++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 13 deletions(-) create mode 100755 build.sh diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..e5462ba --- /dev/null +++ b/build.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +go build -o rapidgrep -ldflags="-s -w" main.go diff --git a/main.go b/main.go index 6b6c1fc..1ee0e5f 100644 --- a/main.go +++ b/main.go @@ -6,12 +6,17 @@ import ( "io" "os" "runtime" + "runtime/pprof" "sync" ) const lf = byte('\n') func main() { + f, _ := os.Create("cprof") + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + if len(os.Args) != 2 { print("One argument required\n") os.Exit(1) @@ -68,21 +73,28 @@ func main() { func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { var batch []byte var ok bool - var start, end int - var linelen int - var i int - defer wg.Done() + var i, linelen, start, end int + + defer wg.Done() // Sync up the goroutines when done + for { batch, ok = <-workchannel if !ok { return // channel closed. we're done } - // This loop is for every result found, when there are no more results it stops - for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch, needle) { - start, end = i, i + linelen = len(batch) + + // This loop is for every result found, when there are no more results + // it stops. When it runs for the first time the first index function is + // executed, which indexes the entire batch. In the following iterations + // the second index function is used, which begins searching where the + // last result ended so it doesn't get found twice. + for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch[end:], needle) { + start, end = i+end, i+end // needle was found, but where? - for { // find the start + + for { // find the start (line feed of the line before it, or index 0) if batch[start] == lf { start++ // the line feed is from the previous line, so skip it break @@ -92,7 +104,7 @@ func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { start-- } - for { // find the end + for { // find the end (line feed at the end of the line) if batch[end] == lf { end++ // include the line feed in the line // https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1 @@ -103,12 +115,14 @@ func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { end++ } + // Print the result. Note that stdout is not necessarily + // concurrency-safe, so in a real application this would have to be + // passed through a channel. os.Stdout.Write(batch[start:end]) - // Chop all of the bytes before the result off so it doesn't get - // searched again - batch = batch[end:] - linelen = len(batch) + // This is to keep track of where the end of the byte array is so we + // can avoid index out of bounds panics + linelen = linelen - end } } }