Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
31d3892d89 | |||
16df5b6b7e | |||
eb97f7b0a3 | |||
5acad1096f | |||
76312a0e56 | |||
f06782bd9d | |||
9c4c7f5690 |
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
@ -16,6 +16,9 @@ jobs:
|
||||
- name: checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: fill version
|
||||
run: sed -i -E "s/v[0-9]+.[0-9]+.[0-9]+/${{ github.ref_name }}/g" internal/opt/version.go
|
||||
|
||||
- name: install golang
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
@ -52,5 +55,4 @@ jobs:
|
||||
dist/esgo2dump_${{ github.ref_name }}_windows_amd64.exe
|
||||
dist/esgo2dump_${{ github.ref_name }}_windows_arm64.exe
|
||||
dist/esgo2dump_${{ github.ref_name }}_darwin_amd64
|
||||
dist/esgo2dump_${{ github.ref_name }}_darwin_amd64
|
||||
dist/esgo2dump_${{ github.ref_name }}_darwin_arm64
|
2
go.mod
2
go.mod
@ -4,6 +4,7 @@ go 1.18
|
||||
|
||||
require (
|
||||
github.com/elastic/go-elasticsearch/v7 v7.17.10
|
||||
github.com/samber/lo v1.39.0
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/spf13/cobra v1.8.0
|
||||
)
|
||||
@ -12,5 +13,6 @@ require (
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/stretchr/testify v1.8.4 // indirect
|
||||
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
|
||||
golang.org/x/sys v0.14.0 // indirect
|
||||
)
|
||||
|
4
go.sum
4
go.sum
@ -9,6 +9,8 @@ github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLf
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
|
||||
github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
|
||||
@ -19,6 +21,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM=
|
||||
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
|
||||
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
|
@ -21,6 +21,8 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9
|
||||
|
||||
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
|
||||
|
||||
esgo2dump --input=http://127.0.0.1:9200/some_index --source='id;name;age;address' --output=./data.json
|
||||
|
||||
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'
|
||||
|
||||
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json`,
|
||||
@ -30,18 +32,23 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_
|
||||
f_output string
|
||||
f_limit int
|
||||
f_type string
|
||||
f_source string
|
||||
f_query string
|
||||
|
||||
f_query_file string
|
||||
|
||||
f_version bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
rootCommand.Flags().BoolVar(&opt.Debug, "debug", false, "")
|
||||
rootCommand.Flags().BoolVarP(&f_version, "version", "v", false, "print esgo2dump version")
|
||||
rootCommand.Flags().IntVar(&opt.Timeout, "timeout", 30, "max timeout seconds per operation with limit")
|
||||
|
||||
rootCommand.Flags().StringVarP(&f_input, "input", "i", "", "*required: input file or es url (example :data.json / http://127.0.0.1:9200/my_index)")
|
||||
rootCommand.Flags().StringVarP(&f_output, "output", "o", "output.json", "")
|
||||
rootCommand.Flags().StringVarP(&f_type, "type", "t", "data", "data/mapping/setting")
|
||||
rootCommand.Flags().StringVarP(&f_source, "source", "s", "", "query source, use ';' to separate")
|
||||
rootCommand.Flags().StringVarP(&f_query, "query", "q", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`)
|
||||
rootCommand.Flags().StringVar(&f_query_file, "query_file", "", `query json file (will execute line by line)`)
|
||||
rootCommand.Flags().IntVarP(&f_limit, "limit", "l", 100, "")
|
||||
|
@ -7,11 +7,13 @@ import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/loveuer/esgo2dump/internal/interfaces"
|
||||
"github.com/loveuer/esgo2dump/internal/opt"
|
||||
"github.com/loveuer/esgo2dump/internal/xes"
|
||||
"github.com/loveuer/esgo2dump/internal/xfile"
|
||||
"github.com/samber/lo"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
@ -50,6 +52,11 @@ func run(cmd *cobra.Command, args []string) error {
|
||||
logrus.SetLevel(logrus.DebugLevel)
|
||||
}
|
||||
|
||||
if f_version {
|
||||
logrus.Infof("esgo2dump (Version: %s)", opt.Version)
|
||||
return nil
|
||||
}
|
||||
|
||||
if err = check(cmd); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -71,6 +78,10 @@ func run(cmd *cobra.Command, args []string) error {
|
||||
return fmt.Errorf("with file input, query or query_file can't be supported")
|
||||
}
|
||||
|
||||
if (f_source != "") && ioi.IsFile() {
|
||||
return fmt.Errorf("with file input, source can't be supported")
|
||||
}
|
||||
|
||||
switch f_type {
|
||||
case "data":
|
||||
if err = executeData(cmd.Context(), ioi, ioo); err != nil {
|
||||
@ -117,8 +128,15 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
|
||||
ch = make(chan []*interfaces.ESSource, 1)
|
||||
errCh = make(chan error)
|
||||
queries = make([]map[string]any, 0)
|
||||
sources = make([]string, 0)
|
||||
)
|
||||
|
||||
if f_source != "" {
|
||||
sources = lo.Map(strings.Split(f_source, ";"), func(item string, idx int) string {
|
||||
return strings.TrimSpace(item)
|
||||
})
|
||||
}
|
||||
|
||||
if f_query != "" {
|
||||
query := make(map[string]any)
|
||||
if err = json.Unmarshal([]byte(f_query), &query); err != nil {
|
||||
@ -142,6 +160,7 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
|
||||
}()
|
||||
|
||||
scanner := bufio.NewScanner(qf)
|
||||
scanner.Buffer(make([]byte, 1*1024*1024), 5*1024*1024)
|
||||
lineCount := 1
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
@ -174,24 +193,33 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
|
||||
close(ch)
|
||||
}()
|
||||
|
||||
Loop:
|
||||
for _, query := range queries {
|
||||
for {
|
||||
select {
|
||||
case <-c.Done():
|
||||
return
|
||||
default:
|
||||
if lines, err = input.ReadData(c, f_limit, query); err != nil {
|
||||
if lines, err = input.ReadData(c, f_limit, query, sources); err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
|
||||
logrus.Debugf("executeData: input read_data got lines=%d", len(lines))
|
||||
|
||||
if len(lines) == 0 {
|
||||
input.ResetOffset()
|
||||
continue
|
||||
if query != nil {
|
||||
bs, _ := json.Marshal(query)
|
||||
logrus.Infof("Dump: query_file query=%s read done!!!", string(bs))
|
||||
}
|
||||
continue Loop
|
||||
}
|
||||
|
||||
ch <- lines
|
||||
}
|
||||
}
|
||||
}
|
||||
}(ctx)
|
||||
|
||||
var (
|
||||
@ -219,6 +247,8 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
|
||||
return err
|
||||
}
|
||||
|
||||
logrus.Debugf("executeData: output write_data succeed lines=%d", succeed)
|
||||
|
||||
if succeed != len(docs) {
|
||||
return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(docs), succeed)
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ package interfaces
|
||||
import "context"
|
||||
|
||||
type DumpIO interface {
|
||||
ReadData(context.Context, int, map[string]any) ([]*ESSource, error)
|
||||
ReadData(context.Context, int, map[string]any, []string) ([]*ESSource, error)
|
||||
WriteData(ctx context.Context, docs []*ESSource) (int, error)
|
||||
|
||||
ResetOffset()
|
||||
|
@ -7,4 +7,7 @@ const (
|
||||
var (
|
||||
Debug bool
|
||||
Timeout int
|
||||
|
||||
BuffSize = 5 * 1024 * 1024 // 5M
|
||||
MaxBuffSize = 100 * 1024 * 1024 // 100M, default elastic_search doc max size
|
||||
)
|
||||
|
3
internal/opt/version.go
Normal file
3
internal/opt/version.go
Normal file
@ -0,0 +1,3 @@
|
||||
package opt
|
||||
|
||||
const Version = "v0.1.2"
|
@ -128,7 +128,26 @@ func (c *client) Close() error {
|
||||
}
|
||||
|
||||
func (c *client) ResetOffset() {
|
||||
defer func() {
|
||||
c.scrollId = ""
|
||||
}()
|
||||
|
||||
bs, _ := json.Marshal(map[string]string{
|
||||
"scroll_id": c.scrollId,
|
||||
})
|
||||
|
||||
rr, err := c.c.ClearScroll(
|
||||
c.c.ClearScroll.WithContext(util.Timeout(3)),
|
||||
c.c.ClearScroll.WithBody(bytes.NewReader(bs)),
|
||||
)
|
||||
if err != nil {
|
||||
logrus.Warnf("ResetOffset: clear scroll id=%s err=%v", c.scrollId, err)
|
||||
return
|
||||
}
|
||||
|
||||
if rr.StatusCode != 200 {
|
||||
logrus.Warnf("ResetOffset: clear scroll id=%s msg=%s", c.scrollId, rr.String())
|
||||
}
|
||||
}
|
||||
func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) {
|
||||
var (
|
||||
@ -152,8 +171,6 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
|
||||
return 0, err
|
||||
}
|
||||
|
||||
logrus.Debugf("xes.Write: doc content=%s", string(bs))
|
||||
|
||||
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
|
||||
Action: "index",
|
||||
Index: c.index,
|
||||
@ -184,7 +201,7 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (c *client) ReadData(ctx context.Context, i int, query map[string]any) ([]*interfaces.ESSource, error) {
|
||||
func (c *client) ReadData(ctx context.Context, i int, query map[string]any, source []string) ([]*interfaces.ESSource, error) {
|
||||
var (
|
||||
err error
|
||||
resp *esapi.Response
|
||||
@ -197,7 +214,11 @@ func (c *client) ReadData(ctx context.Context, i int, query map[string]any) ([]*
|
||||
c.c.Search.WithIndex(c.index),
|
||||
c.c.Search.WithSize(i),
|
||||
c.c.Search.WithFrom(0),
|
||||
c.c.Search.WithScroll(time.Duration(opt.ScrollDurationSeconds) * time.Second),
|
||||
c.c.Search.WithScroll(time.Duration(opt.Timeout*2) * time.Second),
|
||||
}
|
||||
|
||||
if len(source) > 0 {
|
||||
qs = append(qs, c.c.Search.WithSourceIncludes(source...))
|
||||
}
|
||||
|
||||
if query != nil && len(query) > 0 {
|
||||
@ -225,7 +246,7 @@ func (c *client) ReadData(ctx context.Context, i int, query map[string]any) ([]*
|
||||
|
||||
if resp, err = c.c.Scroll(
|
||||
c.c.Scroll.WithScrollID(c.scrollId),
|
||||
c.c.Scroll.WithScroll(time.Duration(opt.ScrollDurationSeconds)*time.Second),
|
||||
c.c.Scroll.WithScroll(time.Duration(opt.Timeout*2)*time.Second),
|
||||
); err != nil {
|
||||
return result.Hits.Hits, nil
|
||||
}
|
||||
|
@ -1,6 +1,9 @@
|
||||
package xes
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
elastic "github.com/elastic/go-elasticsearch/v7"
|
||||
@ -37,3 +40,68 @@ func TestGetESMapping(t *testing.T) {
|
||||
|
||||
t.Log("get source:", r.String())
|
||||
}
|
||||
|
||||
func TestScanWithInterrupt(t *testing.T) {
|
||||
filename := "test_scan.txt"
|
||||
f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
|
||||
if err != nil {
|
||||
t.Error(1, err)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
os.Remove(filename)
|
||||
}()
|
||||
f.WriteString(`line 01
|
||||
line 02
|
||||
line 03
|
||||
line 04
|
||||
line 05
|
||||
line 06
|
||||
line 07
|
||||
line 08
|
||||
line 09
|
||||
line 10
|
||||
line 11
|
||||
line 12
|
||||
line 13
|
||||
line 14
|
||||
line 15`)
|
||||
f.Close()
|
||||
|
||||
of, err := os.Open(filename)
|
||||
if err != nil {
|
||||
t.Error(2, err)
|
||||
return
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(of)
|
||||
|
||||
count := 0
|
||||
for scanner.Scan() {
|
||||
text := scanner.Text()
|
||||
fmt.Printf("[line: %2d] = %s\n", count, text)
|
||||
count++
|
||||
|
||||
if count > 5 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
count = 0
|
||||
for scanner.Scan() {
|
||||
text := scanner.Text()
|
||||
fmt.Printf("[line: %2d] = %s\n", count, text)
|
||||
count++
|
||||
|
||||
if count > 5 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
count = 0
|
||||
for scanner.Scan() {
|
||||
text := scanner.Text()
|
||||
fmt.Printf("[line: %2d] = %s\n", count, text)
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
@ -7,8 +7,9 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/loveuer/esgo2dump/internal/opt"
|
||||
|
||||
"github.com/loveuer/esgo2dump/internal/interfaces"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type client struct {
|
||||
@ -111,7 +112,7 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (c *client) ReadData(ctx context.Context, i int, _ map[string]any) ([]*interfaces.ESSource, error) {
|
||||
func (c *client) ReadData(ctx context.Context, i int, _ map[string]any, _ []string) ([]*interfaces.ESSource, error) {
|
||||
var (
|
||||
err error
|
||||
count = 0
|
||||
@ -121,8 +122,6 @@ func (c *client) ReadData(ctx context.Context, i int, _ map[string]any) ([]*inte
|
||||
for c.scanner.Scan() {
|
||||
line := c.scanner.Text()
|
||||
|
||||
logrus.Debugf("xfile.Read: line=%s", line)
|
||||
|
||||
item := new(interfaces.ESSource)
|
||||
if err = json.Unmarshal([]byte(line), item); err != nil {
|
||||
return list, err
|
||||
@ -152,6 +151,8 @@ func NewClient(file *os.File, ioType interfaces.IO) (interfaces.DumpIO, error) {
|
||||
|
||||
if ioType == interfaces.IOInput {
|
||||
c.scanner = bufio.NewScanner(c.f)
|
||||
buf := make([]byte, opt.BuffSize)
|
||||
c.scanner.Buffer(buf, opt.MaxBuffSize)
|
||||
}
|
||||
|
||||
return c, nil
|
||||
|
Reference in New Issue
Block a user