9 Commits

Author SHA1 Message Date
63b9abbc76 feat: add es version 6 support 2024-05-08 23:16:03 +08:00
2bac049856 doc: update doc add source example 2024-05-08 19:15:00 +08:00
31d3892d89 feat: add input(es) source support 2024-05-08 19:02:49 +08:00
16df5b6b7e fix: auto build release file 2024-03-29 15:04:26 +08:00
eb97f7b0a3 feat: add version command 2024-03-29 11:30:37 +08:00
5acad1096f fix: es reset_offset(clear scroll_id) 2024-03-28 16:53:53 +08:00
76312a0e56 Merge remote-tracking branch 'origin/master' 2024-03-28 10:26:02 +08:00
f06782bd9d fix: scan file buff size 2024-03-27 21:37:23 +08:00
9c4c7f5690 wip: debug read large file 2024-03-27 18:09:11 +08:00
15 changed files with 588 additions and 35 deletions

View File

@ -16,6 +16,9 @@ jobs:
- name: checkout repository - name: checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: fill version
run: sed -i -E "s/v[0-9]+.[0-9]+.[0-9]+/${{ github.ref_name }}/g" internal/opt/version.go
- name: install golang - name: install golang
uses: actions/setup-go@v4 uses: actions/setup-go@v4
with: with:
@ -52,5 +55,4 @@ jobs:
dist/esgo2dump_${{ github.ref_name }}_windows_amd64.exe dist/esgo2dump_${{ github.ref_name }}_windows_amd64.exe
dist/esgo2dump_${{ github.ref_name }}_windows_arm64.exe dist/esgo2dump_${{ github.ref_name }}_windows_arm64.exe
dist/esgo2dump_${{ github.ref_name }}_darwin_amd64 dist/esgo2dump_${{ github.ref_name }}_darwin_amd64
dist/esgo2dump_${{ github.ref_name }}_darwin_amd64
dist/esgo2dump_${{ github.ref_name }}_darwin_arm64 dist/esgo2dump_${{ github.ref_name }}_darwin_arm64

9
.gitignore vendored
View File

@ -1,9 +1,10 @@
.idea .idea
.vscode .vscode
.DS_Store .DS_Store
data.json *data.json
mapping.json *mapping.json
setting.json *setting.json
output.json *output.json
*test.json
*.txt *.txt
dist dist

3
go.mod
View File

@ -3,7 +3,9 @@ module github.com/loveuer/esgo2dump
go 1.18 go 1.18
require ( require (
github.com/elastic/go-elasticsearch/v6 v6.8.10
github.com/elastic/go-elasticsearch/v7 v7.17.10 github.com/elastic/go-elasticsearch/v7 v7.17.10
github.com/samber/lo v1.39.0
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.8.0 github.com/spf13/cobra v1.8.0
) )
@ -12,5 +14,6 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.8.4 // indirect github.com/stretchr/testify v1.8.4 // indirect
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
golang.org/x/sys v0.14.0 // indirect golang.org/x/sys v0.14.0 // indirect
) )

6
go.sum
View File

@ -2,6 +2,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/elastic/go-elasticsearch/v6 v6.8.10 h1:2lN0gJ93gMBXvkhwih5xquldszpm8FlUwqG5sPzr6a8=
github.com/elastic/go-elasticsearch/v6 v6.8.10/go.mod h1:UwaDJsD3rWLM5rKNFzv9hgox93HoX8utj1kxD9aFUcI=
github.com/elastic/go-elasticsearch/v7 v7.17.10 h1:TCQ8i4PmIJuBunvBS6bwT2ybzVFxxUhhltAs3Gyu1yo= github.com/elastic/go-elasticsearch/v7 v7.17.10 h1:TCQ8i4PmIJuBunvBS6bwT2ybzVFxxUhhltAs3Gyu1yo=
github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4= github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@ -9,6 +11,8 @@ github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLf
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
@ -19,6 +23,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=

View File

@ -19,8 +19,14 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9200/some_index --limit=5000 esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9200/some_index --limit=5000
esgo2dump --input=http://127.0.0.1:9200/some_index --i-version 6 --output=./data.json
esgo2dump --output=http://127.0.0.1:9200/some_index --o-version 6 --input=./data.json
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --source='id;name;age;address' --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}' esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json`, esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json`,
@ -30,18 +36,27 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_
f_output string f_output string
f_limit int f_limit int
f_type string f_type string
f_source string
f_query string f_query string
f_query_file string f_query_file string
f_version bool
es_iversion, es_oversion string
) )
func init() { func init() {
rootCommand.Flags().BoolVar(&opt.Debug, "debug", false, "") rootCommand.Flags().BoolVar(&opt.Debug, "debug", false, "")
rootCommand.Flags().BoolVarP(&f_version, "version", "v", false, "print esgo2dump version")
rootCommand.Flags().IntVar(&opt.Timeout, "timeout", 30, "max timeout seconds per operation with limit") rootCommand.Flags().IntVar(&opt.Timeout, "timeout", 30, "max timeout seconds per operation with limit")
rootCommand.Flags().StringVarP(&f_input, "input", "i", "", "*required: input file or es url (example :data.json / http://127.0.0.1:9200/my_index)") rootCommand.Flags().StringVarP(&f_input, "input", "i", "", "*required: input file or es url (example :data.json / http://127.0.0.1:9200/my_index)")
rootCommand.Flags().StringVarP(&f_output, "output", "o", "output.json", "") rootCommand.Flags().StringVarP(&f_output, "output", "o", "output.json", "")
rootCommand.Flags().StringVar(&es_iversion, "i-version", "7", "input(es) version")
rootCommand.Flags().StringVar(&es_oversion, "o-version", "7", "output(es) version")
rootCommand.Flags().StringVarP(&f_type, "type", "t", "data", "data/mapping/setting") rootCommand.Flags().StringVarP(&f_type, "type", "t", "data", "data/mapping/setting")
rootCommand.Flags().StringVarP(&f_source, "source", "s", "", "query source, use ';' to separate")
rootCommand.Flags().StringVarP(&f_query, "query", "q", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`) rootCommand.Flags().StringVarP(&f_query, "query", "q", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`)
rootCommand.Flags().StringVar(&f_query_file, "query_file", "", `query json file (will execute line by line)`) rootCommand.Flags().StringVar(&f_query_file, "query_file", "", `query json file (will execute line by line)`)
rootCommand.Flags().IntVarP(&f_limit, "limit", "l", 100, "") rootCommand.Flags().IntVarP(&f_limit, "limit", "l", 100, "")

View File

@ -4,14 +4,17 @@ import (
"bufio" "bufio"
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"net/url" "net/url"
"os" "os"
"strings"
"github.com/loveuer/esgo2dump/internal/interfaces" "github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt" "github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/xes" "github.com/loveuer/esgo2dump/internal/xes"
"github.com/loveuer/esgo2dump/internal/xfile" "github.com/loveuer/esgo2dump/internal/xfile"
"github.com/samber/lo"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@ -48,17 +51,24 @@ func run(cmd *cobra.Command, args []string) error {
if opt.Debug { if opt.Debug {
logrus.SetLevel(logrus.DebugLevel) logrus.SetLevel(logrus.DebugLevel)
logrus.SetReportCaller(true)
logrus.SetFormatter(&logrus.TextFormatter{})
}
if f_version {
logrus.Infof("esgo2dump (Version: %s)", opt.Version)
return nil
} }
if err = check(cmd); err != nil { if err = check(cmd); err != nil {
return err return err
} }
if ioi, err = newIO(f_input, interfaces.IOInput); err != nil { if ioi, err = newIO(f_input, interfaces.IOInput, es_iversion); err != nil {
return err return err
} }
if ioo, err = newIO(f_output, interfaces.IOOutput); err != nil { if ioo, err = newIO(f_output, interfaces.IOOutput, es_oversion); err != nil {
return err return err
} }
@ -71,6 +81,10 @@ func run(cmd *cobra.Command, args []string) error {
return fmt.Errorf("with file input, query or query_file can't be supported") return fmt.Errorf("with file input, query or query_file can't be supported")
} }
if (f_source != "") && ioi.IsFile() {
return fmt.Errorf("with file input, source can't be supported")
}
switch f_type { switch f_type {
case "data": case "data":
if err = executeData(cmd.Context(), ioi, ioo); err != nil { if err = executeData(cmd.Context(), ioi, ioo); err != nil {
@ -117,8 +131,15 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
ch = make(chan []*interfaces.ESSource, 1) ch = make(chan []*interfaces.ESSource, 1)
errCh = make(chan error) errCh = make(chan error)
queries = make([]map[string]any, 0) queries = make([]map[string]any, 0)
sources = make([]string, 0)
) )
if f_source != "" {
sources = lo.Map(strings.Split(f_source, ";"), func(item string, idx int) string {
return strings.TrimSpace(item)
})
}
if f_query != "" { if f_query != "" {
query := make(map[string]any) query := make(map[string]any)
if err = json.Unmarshal([]byte(f_query), &query); err != nil { if err = json.Unmarshal([]byte(f_query), &query); err != nil {
@ -142,6 +163,7 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
}() }()
scanner := bufio.NewScanner(qf) scanner := bufio.NewScanner(qf)
scanner.Buffer(make([]byte, 1*1024*1024), 5*1024*1024)
lineCount := 1 lineCount := 1
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
@ -174,24 +196,33 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
close(ch) close(ch)
}() }()
Loop:
for _, query := range queries { for _, query := range queries {
for {
select { select {
case <-c.Done(): case <-c.Done():
return return
default: default:
if lines, err = input.ReadData(c, f_limit, query); err != nil { if lines, err = input.ReadData(c, f_limit, query, sources); err != nil {
errCh <- err errCh <- err
return return
} }
logrus.Debugf("executeData: input read_data got lines=%d", len(lines))
if len(lines) == 0 { if len(lines) == 0 {
input.ResetOffset() input.ResetOffset()
continue if query != nil {
bs, _ := json.Marshal(query)
logrus.Infof("Dump: query_file query=%s read done!!!", string(bs))
}
continue Loop
} }
ch <- lines ch <- lines
} }
} }
}
}(ctx) }(ctx)
var ( var (
@ -219,6 +250,8 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
return err return err
} }
logrus.Debugf("executeData: output write_data succeed lines=%d", succeed)
if succeed != len(docs) { if succeed != len(docs) {
return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(docs), succeed) return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(docs), succeed)
} }
@ -230,7 +263,7 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
} }
} }
func newIO(source string, ioType interfaces.IO) (interfaces.DumpIO, error) { func newIO(source string, ioType interfaces.IO, esv string) (interfaces.DumpIO, error) {
var ( var (
err error err error
iurl *url.URL iurl *url.URL
@ -262,9 +295,18 @@ func newIO(source string, ioType interfaces.IO) (interfaces.DumpIO, error) {
} }
} }
logrus.Debugf("newIO.%s: source as url=%+v", ioType.Code(), *iurl) logrus.Debugf("newIO.%s: source as url=%+v version=%s", ioType.Code(), *iurl, esv)
switch esv {
case "7":
return xes.NewClient(iurl, ioType) return xes.NewClient(iurl, ioType)
case "6":
return xes.NewClientV6(iurl, ioType)
case "8":
return nil, errors.New("es version 8 comming soon")
default:
return nil, fmt.Errorf("unknown es version=%s", esv)
}
ClientByFile: ClientByFile:
if ioType == interfaces.IOOutput { if ioType == interfaces.IOOutput {

View File

@ -3,7 +3,7 @@ package interfaces
import "context" import "context"
type DumpIO interface { type DumpIO interface {
ReadData(context.Context, int, map[string]any) ([]*ESSource, error) ReadData(context.Context, int, map[string]any, []string) ([]*ESSource, error)
WriteData(ctx context.Context, docs []*ESSource) (int, error) WriteData(ctx context.Context, docs []*ESSource) (int, error)
ResetOffset() ResetOffset()

View File

@ -31,3 +31,20 @@ type ESMapping map[string]struct {
Properties map[string]any `json:"properties"` Properties map[string]any `json:"properties"`
} `json:"mappings"` } `json:"mappings"`
} }
type ESResponseV6 struct {
ScrollId string `json:"_scroll_id"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`
Shards struct {
Total int `json:"total"`
Successful int `json:"successful"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
} `json:"_shards"`
Hits struct {
Total int `json:"total"`
MaxScore float64 `json:"max_score"`
Hits []*ESSource `json:"hits"`
} `json:"hits"`
}

View File

@ -7,4 +7,7 @@ const (
var ( var (
Debug bool Debug bool
Timeout int Timeout int
BuffSize = 5 * 1024 * 1024 // 5M
MaxBuffSize = 100 * 1024 * 1024 // 100M, default elastic_search doc max size
) )

3
internal/opt/version.go Normal file
View File

@ -0,0 +1,3 @@
package opt
const Version = "v0.1.2"

View File

@ -128,7 +128,26 @@ func (c *client) Close() error {
} }
func (c *client) ResetOffset() { func (c *client) ResetOffset() {
defer func() {
c.scrollId = "" c.scrollId = ""
}()
bs, _ := json.Marshal(map[string]string{
"scroll_id": c.scrollId,
})
rr, err := c.c.ClearScroll(
c.c.ClearScroll.WithContext(util.Timeout(3)),
c.c.ClearScroll.WithBody(bytes.NewReader(bs)),
)
if err != nil {
logrus.Warnf("ResetOffset: clear scroll id=%s err=%v", c.scrollId, err)
return
}
if rr.StatusCode != 200 {
logrus.Warnf("ResetOffset: clear scroll id=%s msg=%s", c.scrollId, rr.String())
}
} }
func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) { func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) {
var ( var (
@ -152,8 +171,6 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
return 0, err return 0, err
} }
logrus.Debugf("xes.Write: doc content=%s", string(bs))
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{ if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
Action: "index", Action: "index",
Index: c.index, Index: c.index,
@ -184,7 +201,7 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
return count, nil return count, nil
} }
func (c *client) ReadData(ctx context.Context, i int, query map[string]any) ([]*interfaces.ESSource, error) { func (c *client) ReadData(ctx context.Context, i int, query map[string]any, source []string) ([]*interfaces.ESSource, error) {
var ( var (
err error err error
resp *esapi.Response resp *esapi.Response
@ -197,7 +214,11 @@ func (c *client) ReadData(ctx context.Context, i int, query map[string]any) ([]*
c.c.Search.WithIndex(c.index), c.c.Search.WithIndex(c.index),
c.c.Search.WithSize(i), c.c.Search.WithSize(i),
c.c.Search.WithFrom(0), c.c.Search.WithFrom(0),
c.c.Search.WithScroll(time.Duration(opt.ScrollDurationSeconds) * time.Second), c.c.Search.WithScroll(time.Duration(opt.Timeout*2) * time.Second),
}
if len(source) > 0 {
qs = append(qs, c.c.Search.WithSourceIncludes(source...))
} }
if query != nil && len(query) > 0 { if query != nil && len(query) > 0 {
@ -225,7 +246,7 @@ func (c *client) ReadData(ctx context.Context, i int, query map[string]any) ([]*
if resp, err = c.c.Scroll( if resp, err = c.c.Scroll(
c.c.Scroll.WithScrollID(c.scrollId), c.c.Scroll.WithScrollID(c.scrollId),
c.c.Scroll.WithScroll(time.Duration(opt.ScrollDurationSeconds)*time.Second), c.c.Scroll.WithScroll(time.Duration(opt.Timeout*2)*time.Second),
); err != nil { ); err != nil {
return result.Hits.Hits, nil return result.Hits.Hits, nil
} }

364
internal/xes/xes6.go Normal file
View File

@ -0,0 +1,364 @@
package xes
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/url"
"strings"
"time"
elastic "github.com/elastic/go-elasticsearch/v6"
"github.com/elastic/go-elasticsearch/v6/esapi"
"github.com/elastic/go-elasticsearch/v6/esutil"
"github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/util"
"github.com/sirupsen/logrus"
)
func NewClientV6(url *url.URL, iot interfaces.IO) (interfaces.DumpIO, error) {
var (
address = fmt.Sprintf("%s://%s", url.Scheme, url.Host)
urlIndex = strings.TrimPrefix(url.Path, "/")
urlUsername string
urlPassword string
errCh = make(chan error)
cliCh = make(chan *elastic.Client)
)
if url.User != nil {
urlUsername = url.User.Username()
if p, ok := url.User.Password(); ok {
urlPassword = p
}
}
logrus.Debugf("xes.NewClient: endpoint=%s index=%s (username=%s password=%s)", address, urlIndex, urlUsername, urlPassword)
if urlIndex == "" {
return nil, fmt.Errorf("please specify index name: (like => http://127.0.0.1:9200/my_index)")
}
ncFunc := func(endpoints []string, username, password, index string) {
var (
err error
cli *elastic.Client
infoResp *esapi.Response
)
if cli, err = elastic.NewClient(
elastic.Config{
Addresses: endpoints,
Username: username,
Password: password,
CACert: nil,
RetryOnStatus: []int{429},
MaxRetries: 3,
RetryBackoff: nil,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{Timeout: 10 * time.Second}).DialContext,
},
},
); err != nil {
logrus.Debugf("xes.NewClient: elastic new client with endpont=%s err=%v", endpoints, err)
errCh <- err
return
}
if infoResp, err = cli.Info(); err != nil {
logrus.Debugf("xes.NewClient: ping err=%v", err)
errCh <- err
return
}
if infoResp.StatusCode != 200 {
err = fmt.Errorf("info xes status=%d", infoResp.StatusCode)
logrus.Debugf("xes.NewClient: status err=%v", err)
errCh <- err
return
}
cliCh <- cli
}
go ncFunc([]string{address}, urlUsername, urlPassword, urlIndex)
select {
case <-util.Timeout(10).Done():
return nil, fmt.Errorf("dial es=%s err=%v", address, context.DeadlineExceeded)
case c := <-cliCh:
return &clientv6{c: c, index: urlIndex, iot: iot}, nil
case e := <-errCh:
return nil, e
}
}
type clientv6 struct {
c *elastic.Client
iot interfaces.IO
index string
scrollId string
}
func (c *clientv6) checkResponse(r *esapi.Response) error {
if r.StatusCode == 200 {
return nil
}
return fmt.Errorf("status=%d msg=%s", r.StatusCode, r.String())
}
func (c *clientv6) IOType() interfaces.IO {
return c.iot
}
func (c *clientv6) IsFile() bool {
return false
}
func (c *clientv6) Close() error {
return nil
}
func (c *clientv6) ResetOffset() {
defer func() {
c.scrollId = ""
}()
bs, _ := json.Marshal(map[string]string{
"scroll_id": c.scrollId,
})
rr, err := c.c.ClearScroll(
c.c.ClearScroll.WithContext(util.Timeout(3)),
c.c.ClearScroll.WithBody(bytes.NewReader(bs)),
)
if err != nil {
logrus.Warnf("ResetOffset: clear scroll id=%s err=%v", c.scrollId, err)
return
}
if rr.StatusCode != 200 {
logrus.Warnf("ResetOffset: clear scroll id=%s msg=%s", c.scrollId, rr.String())
}
}
func (c *clientv6) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) {
var (
err error
indexer esutil.BulkIndexer
count int
be error
)
if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
Client: c.c,
Index: c.index,
Refresh: "",
DocumentType: "_doc",
}); err != nil {
return 0, err
}
for _, doc := range docs {
var bs []byte
if bs, err = json.Marshal(doc.Content); err != nil {
return 0, err
}
logrus.WithField("raw", string(bs)).Debug()
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
Action: "index",
Index: c.index,
DocumentID: doc.DocId,
Body: bytes.NewReader(bs),
OnFailure: func(ctx context.Context, item esutil.BulkIndexerItem, item2 esutil.BulkIndexerResponseItem, bulkErr error) {
be = bulkErr
},
}); err != nil {
return 0, err
}
count++
}
if err = indexer.Close(util.TimeoutCtx(ctx, opt.Timeout)); err != nil {
return 0, err
}
if be != nil {
return 0, be
}
stats := indexer.Stats()
if stats.NumFailed > 0 {
return count, fmt.Errorf("write to xes failed_count=%d bulk_count=%d", stats.NumFailed, count)
}
return count, nil
}
func (c *clientv6) ReadData(ctx context.Context, i int, query map[string]any, source []string) ([]*interfaces.ESSource, error) {
var (
err error
resp *esapi.Response
result = new(interfaces.ESResponseV6)
bs []byte
)
if c.scrollId == "" {
qs := []func(*esapi.SearchRequest){
c.c.Search.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.c.Search.WithIndex(c.index),
c.c.Search.WithSize(i),
c.c.Search.WithFrom(0),
c.c.Search.WithScroll(time.Duration(opt.Timeout*2) * time.Second),
}
if len(source) > 0 {
qs = append(qs, c.c.Search.WithSourceIncludes(source...))
}
if query != nil && len(query) > 0 {
queryBs, _ := json.Marshal(map[string]any{"query": query})
qs = append(qs, c.c.Search.WithBody(bytes.NewReader(queryBs)))
}
if resp, err = c.c.Search(qs...); err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf(resp.String())
}
if bs, err = io.ReadAll(resp.Body); err != nil {
return nil, err
}
if err = json.Unmarshal(bs, result); err != nil {
logrus.
WithField("err", err.Error()).
WithField("raw", string(bs)).
Debug()
return nil, err
}
c.scrollId = result.ScrollId
return result.Hits.Hits, nil
}
if resp, err = c.c.Scroll(
c.c.Scroll.WithScrollID(c.scrollId),
c.c.Scroll.WithScroll(time.Duration(opt.Timeout*2)*time.Second),
); err != nil {
return result.Hits.Hits, nil
}
decoder := json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
return nil, err
}
return result.Hits.Hits, nil
}
func (c *clientv6) ReadMapping(ctx context.Context) (map[string]any, error) {
r, err := c.c.Indices.GetMapping(
c.c.Indices.GetMapping.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *clientv6) WriteMapping(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
for idxKey := range m {
if bs, err = json.Marshal(m[idxKey]); err != nil {
return err
}
if result, err = c.c.Indices.Create(
c.index,
c.c.Indices.Create.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.c.Indices.Create.WithBody(bytes.NewReader(bs)),
); err != nil {
return err
}
if err = c.checkResponse(result); err != nil {
return err
}
}
return nil
}
func (c *clientv6) ReadSetting(ctx context.Context) (map[string]any, error) {
r, err := c.c.Indices.GetSettings(
c.c.Indices.GetSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.c.Indices.GetSettings.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *clientv6) WriteSetting(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
if bs, err = json.Marshal(m); err != nil {
return err
}
if result, err = c.c.Indices.PutSettings(
bytes.NewReader(bs),
c.c.Indices.PutSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
); err != nil {
return err
}
return c.checkResponse(result)
}

View File

@ -1,6 +1,9 @@
package xes package xes
import ( import (
"bufio"
"fmt"
"os"
"testing" "testing"
elastic "github.com/elastic/go-elasticsearch/v7" elastic "github.com/elastic/go-elasticsearch/v7"
@ -37,3 +40,68 @@ func TestGetESMapping(t *testing.T) {
t.Log("get source:", r.String()) t.Log("get source:", r.String())
} }
func TestScanWithInterrupt(t *testing.T) {
filename := "test_scan.txt"
f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
t.Error(1, err)
return
}
defer func() {
os.Remove(filename)
}()
f.WriteString(`line 01
line 02
line 03
line 04
line 05
line 06
line 07
line 08
line 09
line 10
line 11
line 12
line 13
line 14
line 15`)
f.Close()
of, err := os.Open(filename)
if err != nil {
t.Error(2, err)
return
}
scanner := bufio.NewScanner(of)
count := 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
}
}

View File

@ -7,8 +7,9 @@ import (
"io" "io"
"os" "os"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/interfaces" "github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/sirupsen/logrus"
) )
type client struct { type client struct {
@ -111,7 +112,7 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
return count, nil return count, nil
} }
func (c *client) ReadData(ctx context.Context, i int, _ map[string]any) ([]*interfaces.ESSource, error) { func (c *client) ReadData(ctx context.Context, i int, _ map[string]any, _ []string) ([]*interfaces.ESSource, error) {
var ( var (
err error err error
count = 0 count = 0
@ -121,8 +122,6 @@ func (c *client) ReadData(ctx context.Context, i int, _ map[string]any) ([]*inte
for c.scanner.Scan() { for c.scanner.Scan() {
line := c.scanner.Text() line := c.scanner.Text()
logrus.Debugf("xfile.Read: line=%s", line)
item := new(interfaces.ESSource) item := new(interfaces.ESSource)
if err = json.Unmarshal([]byte(line), item); err != nil { if err = json.Unmarshal([]byte(line), item); err != nil {
return list, err return list, err
@ -152,6 +151,8 @@ func NewClient(file *os.File, ioType interfaces.IO) (interfaces.DumpIO, error) {
if ioType == interfaces.IOInput { if ioType == interfaces.IOInput {
c.scanner = bufio.NewScanner(c.f) c.scanner = bufio.NewScanner(c.f)
buf := make([]byte, opt.BuffSize)
c.scanner.Buffer(buf, opt.MaxBuffSize)
} }
return c, nil return c, nil

View File

@ -26,11 +26,17 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9200/some_index --limit=5000 esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9200/some_index --limit=5000
esgo2dump --input=http://127.0.0.1:9200/some_index --i-version 6 --output=./data.json
esgo2dump --output=http://127.0.0.1:9200/some_index --o-version 6 --input=./data.json
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --source='id;name;age;address;phones' --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}' esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json`,
``` ```
- example_queries.json - example_queries.json
@ -49,3 +55,4 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_
- [x] auto create index with mapping - [x] auto create index with mapping
- [ ] auto create index with mapping,setting - [ ] auto create index with mapping,setting
- [ ] support es8 - [ ] support es8
- [x] support es6