Fast multithreaded string searching in large text files.
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
package main
import ( "io" "sync"
"golang.org/x/exp/mmap" )
const lf = byte('\n') const null = byte(0)
type AsyncLineReader struct { offset int64 eof bool file *mmap.ReaderAt mu *sync.Mutex remainder []byte }
func NewAsyncLineReader(name string) (*AsyncLineReader, error) { var a = &AsyncLineReader{ offset: 0, eof: false, mu: &sync.Mutex{}, }
var err error a.file, err = mmap.Open(name)
return a, err }
func (a *AsyncLineReader) Read(b []byte) (int, error) { if a.eof { return 0, io.EOF }
a.mu.Lock() defer a.mu.Unlock()
var n, err = a.file.ReadAt(b, a.offset) if err == io.EOF { a.eof = true return 0, err } if n == 0 { return 0, err }
// find the last line feed in the batch
for { n--
if b[n] == lf { // selected character is a line feed, we're done
break } else if n == 0 { break // result is at start of file
}
b[n] = null // Discard this byte so it doesn't get read multiple times
}
a.offset += int64(n)
return n, err }
func (a *AsyncLineReader) Close() error { return a.file.Close() }
|