Add build script and remove an array index operation

This commit is contained in:
Wim Brand 2018-05-18 22:52:02 +02:00
parent ef3c887b72
commit 87c6dc267d
2 changed files with 30 additions and 13 deletions

3
build.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
go build -o rapidgrep -ldflags="-s -w" main.go

40
main.go
View File

@ -6,12 +6,17 @@ import (
"io" "io"
"os" "os"
"runtime" "runtime"
"runtime/pprof"
"sync" "sync"
) )
const lf = byte('\n') const lf = byte('\n')
func main() { func main() {
f, _ := os.Create("cprof")
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
if len(os.Args) != 2 { if len(os.Args) != 2 {
print("One argument required\n") print("One argument required\n")
os.Exit(1) os.Exit(1)
@ -68,21 +73,28 @@ func main() {
func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) {
var batch []byte var batch []byte
var ok bool var ok bool
var start, end int var i, linelen, start, end int
var linelen int
var i int defer wg.Done() // Sync up the goroutines when done
defer wg.Done()
for { for {
batch, ok = <-workchannel batch, ok = <-workchannel
if !ok { if !ok {
return // channel closed. we're done return // channel closed. we're done
} }
// This loop is for every result found, when there are no more results it stops linelen = len(batch)
for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch, needle) {
start, end = i, i // This loop is for every result found, when there are no more results
// it stops. When it runs for the first time the first index function is
// executed, which indexes the entire batch. In the following iterations
// the second index function is used, which begins searching where the
// last result ended so it doesn't get found twice.
for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch[end:], needle) {
start, end = i+end, i+end
// needle was found, but where? // needle was found, but where?
for { // find the start
for { // find the start (line feed of the line before it, or index 0)
if batch[start] == lf { if batch[start] == lf {
start++ // the line feed is from the previous line, so skip it start++ // the line feed is from the previous line, so skip it
break break
@ -92,7 +104,7 @@ func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) {
start-- start--
} }
for { // find the end for { // find the end (line feed at the end of the line)
if batch[end] == lf { if batch[end] == lf {
end++ // include the line feed in the line end++ // include the line feed in the line
// https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1 // https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1
@ -103,12 +115,14 @@ func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) {
end++ end++
} }
// Print the result. Note that stdout is not necessarily
// concurrency-safe, so in a real application this would have to be
// passed through a channel.
os.Stdout.Write(batch[start:end]) os.Stdout.Write(batch[start:end])
// Chop all of the bytes before the result off so it doesn't get // This is to keep track of where the end of the byte array is so we
// searched again // can avoid index out of bounds panics
batch = batch[end:] linelen = linelen - end
linelen = len(batch)
} }
} }
} }