Fast multithreaded string searching in large text files.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
1.1 KiB

  1. package main
  2. import (
  3. "io"
  4. "sync"
  5. "golang.org/x/exp/mmap"
  6. )
  7. const lf = byte('\n')
  8. const null = byte(0)
  9. type AsyncLineReader struct {
  10. offset int64
  11. eof bool
  12. file *mmap.ReaderAt
  13. mu *sync.Mutex
  14. remainder []byte
  15. }
  16. func NewAsyncLineReader(name string) (*AsyncLineReader, error) {
  17. var a = &AsyncLineReader{
  18. offset: 0,
  19. eof: false,
  20. mu: &sync.Mutex{},
  21. }
  22. var err error
  23. a.file, err = mmap.Open(name)
  24. return a, err
  25. }
  26. func (a *AsyncLineReader) Read(b []byte) (int, error) {
  27. if a.eof {
  28. return 0, io.EOF
  29. }
  30. a.mu.Lock()
  31. defer a.mu.Unlock()
  32. var n, err = a.file.ReadAt(b, a.offset)
  33. if err == io.EOF {
  34. a.eof = true
  35. return 0, err
  36. }
  37. if n == 0 {
  38. return 0, err
  39. }
  40. // find the last line feed in the batch
  41. for {
  42. n--
  43. if b[n] == lf {
  44. // selected character is a line feed, we're done
  45. break
  46. } else if n == 0 {
  47. break // result is at start of file
  48. }
  49. b[n] = null // Discard this byte so it doesn't get read multiple times
  50. }
  51. a.offset += int64(n)
  52. return n, err
  53. }
  54. func (a *AsyncLineReader) Close() error {
  55. return a.file.Close()
  56. }