Fast multithreaded string searching in large text files.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

72 lines
1.1 KiB

package main
import (
"io"
"sync"
"golang.org/x/exp/mmap"
)
const lf = byte('\n')
const null = byte(0)
type AsyncLineReader struct {
offset int64
eof bool
file *mmap.ReaderAt
mu *sync.Mutex
remainder []byte
}
func NewAsyncLineReader(name string) (*AsyncLineReader, error) {
var a = &AsyncLineReader{
offset: 0,
eof: false,
mu: &sync.Mutex{},
}
var err error
a.file, err = mmap.Open(name)
return a, err
}
func (a *AsyncLineReader) Read(b []byte) (int, error) {
if a.eof {
return 0, io.EOF
}
a.mu.Lock()
defer a.mu.Unlock()
var n, err = a.file.ReadAt(b, a.offset)
if err == io.EOF {
a.eof = true
return 0, err
}
if n == 0 {
return 0, err
}
// find the last line feed in the batch
for {
n--
if b[n] == lf {
// selected character is a line feed, we're done
break
} else if n == 0 {
break // result is at start of file
}
b[n] = null // Discard this byte so it doesn't get read multiple times
}
a.offset += int64(n)
return n, err
}
func (a *AsyncLineReader) Close() error {
return a.file.Close()
}