|
@ -6,12 +6,17 @@ import ( |
|
|
"io" |
|
|
"io" |
|
|
"os" |
|
|
"os" |
|
|
"runtime" |
|
|
"runtime" |
|
|
|
|
|
"runtime/pprof" |
|
|
"sync" |
|
|
"sync" |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
const lf = byte('\n') |
|
|
const lf = byte('\n') |
|
|
|
|
|
|
|
|
func main() { |
|
|
func main() { |
|
|
|
|
|
f, _ := os.Create("cprof") |
|
|
|
|
|
pprof.StartCPUProfile(f) |
|
|
|
|
|
defer pprof.StopCPUProfile() |
|
|
|
|
|
|
|
|
if len(os.Args) != 2 { |
|
|
if len(os.Args) != 2 { |
|
|
print("One argument required\n") |
|
|
print("One argument required\n") |
|
|
os.Exit(1) |
|
|
os.Exit(1) |
|
@ -68,21 +73,28 @@ func main() { |
|
|
func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { |
|
|
func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { |
|
|
var batch []byte |
|
|
var batch []byte |
|
|
var ok bool |
|
|
var ok bool |
|
|
var start, end int |
|
|
|
|
|
var linelen int |
|
|
|
|
|
var i int |
|
|
|
|
|
defer wg.Done() |
|
|
|
|
|
|
|
|
var i, linelen, start, end int |
|
|
|
|
|
|
|
|
|
|
|
defer wg.Done() // Sync up the goroutines when done
|
|
|
|
|
|
|
|
|
for { |
|
|
for { |
|
|
batch, ok = <-workchannel |
|
|
batch, ok = <-workchannel |
|
|
if !ok { |
|
|
if !ok { |
|
|
return // channel closed. we're done
|
|
|
return // channel closed. we're done
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// This loop is for every result found, when there are no more results it stops
|
|
|
|
|
|
for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch, needle) { |
|
|
|
|
|
start, end = i, i |
|
|
|
|
|
|
|
|
linelen = len(batch) |
|
|
|
|
|
|
|
|
|
|
|
// This loop is for every result found, when there are no more results
|
|
|
|
|
|
// it stops. When it runs for the first time the first index function is
|
|
|
|
|
|
// executed, which indexes the entire batch. In the following iterations
|
|
|
|
|
|
// the second index function is used, which begins searching where the
|
|
|
|
|
|
// last result ended so it doesn't get found twice.
|
|
|
|
|
|
for i = bytes.Index(batch, needle); i != -1; i = bytes.Index(batch[end:], needle) { |
|
|
|
|
|
start, end = i+end, i+end |
|
|
// needle was found, but where?
|
|
|
// needle was found, but where?
|
|
|
for { // find the start
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for { // find the start (line feed of the line before it, or index 0)
|
|
|
if batch[start] == lf { |
|
|
if batch[start] == lf { |
|
|
start++ // the line feed is from the previous line, so skip it
|
|
|
start++ // the line feed is from the previous line, so skip it
|
|
|
break |
|
|
break |
|
@ -92,7 +104,7 @@ func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { |
|
|
start-- |
|
|
start-- |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for { // find the end
|
|
|
|
|
|
|
|
|
for { // find the end (line feed at the end of the line)
|
|
|
if batch[end] == lf { |
|
|
if batch[end] == lf { |
|
|
end++ // include the line feed in the line
|
|
|
end++ // include the line feed in the line
|
|
|
// https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1
|
|
|
// https://stackoverflow.com/questions/26857582/in-a-go-slice-why-does-slohi-end-at-element-hi-1
|
|
@ -103,12 +115,14 @@ func scannerThread(workchannel chan []byte, needle []byte, wg *sync.WaitGroup) { |
|
|
end++ |
|
|
end++ |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Print the result. Note that stdout is not necessarily
|
|
|
|
|
|
// concurrency-safe, so in a real application this would have to be
|
|
|
|
|
|
// passed through a channel.
|
|
|
os.Stdout.Write(batch[start:end]) |
|
|
os.Stdout.Write(batch[start:end]) |
|
|
|
|
|
|
|
|
// Chop all of the bytes before the result off so it doesn't get
|
|
|
|
|
|
// searched again
|
|
|
|
|
|
batch = batch[end:] |
|
|
|
|
|
linelen = len(batch) |
|
|
|
|
|
|
|
|
// This is to keep track of where the end of the byte array is so we
|
|
|
|
|
|
// can avoid index out of bounds panics
|
|
|
|
|
|
linelen = linelen - end |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |