fix: scan file buff size

This commit is contained in:
loveuer 2024-03-27 18:09:11 +08:00
parent 8f901f74bd
commit f06782bd9d
5 changed files with 93 additions and 17 deletions

View File

@ -174,22 +174,27 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
close(ch) close(ch)
}() }()
Loop:
for _, query := range queries { for _, query := range queries {
select { for {
case <-c.Done(): select {
return case <-c.Done():
default:
if lines, err = input.ReadData(c, f_limit, query); err != nil {
errCh <- err
return return
} default:
if lines, err = input.ReadData(c, f_limit, query); err != nil {
errCh <- err
return
}
if len(lines) == 0 { logrus.Debugf("executeData: input read_data got lines=%d", len(lines))
input.ResetOffset()
continue
}
ch <- lines if len(lines) == 0 {
input.ResetOffset()
continue Loop
}
ch <- lines
}
} }
} }
}(ctx) }(ctx)
@ -219,6 +224,8 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
return err return err
} }
logrus.Debugf("executeData: output write_data succeed lines=%d", succeed)
if succeed != len(docs) { if succeed != len(docs) {
return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(docs), succeed) return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(docs), succeed)
} }

View File

@ -7,4 +7,7 @@ const (
var ( var (
Debug bool Debug bool
Timeout int Timeout int
BuffSize = 5 * 1024 * 1024 // 5M
MaxBuffSize = 100 * 1024 * 1024 // 100M, default elastic_search doc max size
) )

View File

@ -152,8 +152,6 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
return 0, err return 0, err
} }
logrus.Debugf("xes.Write: doc content=%s", string(bs))
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{ if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
Action: "index", Action: "index",
Index: c.index, Index: c.index,

View File

@ -1,6 +1,9 @@
package xes package xes
import ( import (
"bufio"
"fmt"
"os"
"testing" "testing"
elastic "github.com/elastic/go-elasticsearch/v7" elastic "github.com/elastic/go-elasticsearch/v7"
@ -37,3 +40,68 @@ func TestGetESMapping(t *testing.T) {
t.Log("get source:", r.String()) t.Log("get source:", r.String())
} }
func TestScanWithInterrupt(t *testing.T) {
filename := "test_scan.txt"
f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
t.Error(1, err)
return
}
defer func() {
os.Remove(filename)
}()
f.WriteString(`line 01
line 02
line 03
line 04
line 05
line 06
line 07
line 08
line 09
line 10
line 11
line 12
line 13
line 14
line 15`)
f.Close()
of, err := os.Open(filename)
if err != nil {
t.Error(2, err)
return
}
scanner := bufio.NewScanner(of)
count := 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
}
}

View File

@ -4,11 +4,11 @@ import (
"bufio" "bufio"
"context" "context"
"encoding/json" "encoding/json"
"github.com/loveuer/esgo2dump/internal/opt"
"io" "io"
"os" "os"
"github.com/loveuer/esgo2dump/internal/interfaces" "github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/sirupsen/logrus"
) )
type client struct { type client struct {
@ -121,8 +121,6 @@ func (c *client) ReadData(ctx context.Context, i int, _ map[string]any) ([]*inte
for c.scanner.Scan() { for c.scanner.Scan() {
line := c.scanner.Text() line := c.scanner.Text()
logrus.Debugf("xfile.Read: line=%s", line)
item := new(interfaces.ESSource) item := new(interfaces.ESSource)
if err = json.Unmarshal([]byte(line), item); err != nil { if err = json.Unmarshal([]byte(line), item); err != nil {
return list, err return list, err
@ -152,6 +150,8 @@ func NewClient(file *os.File, ioType interfaces.IO) (interfaces.DumpIO, error) {
if ioType == interfaces.IOInput { if ioType == interfaces.IOInput {
c.scanner = bufio.NewScanner(c.f) c.scanner = bufio.NewScanner(c.f)
buf := make([]byte, opt.BuffSize)
c.scanner.Buffer(buf, opt.MaxBuffSize)
} }
return c, nil return c, nil