1 Commits

Author SHA1 Message Date
loveuer
34104bdef6 fix: size 0 bug
fix: huge index can't sort _id, back use scroll_id
refac: some files arch
2024-12-13 19:00:24 +08:00
11 changed files with 103 additions and 146 deletions

View File

@@ -16,6 +16,9 @@ jobs:
- name: checkout repository - name: checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: fill version
run: sed -i -E "s/v[0-9]+.[0-9]+.[0-9]+/${{ github.ref_name }}/g" internal/opt/version.go
- name: install golang - name: install golang
uses: actions/setup-go@v4 uses: actions/setup-go@v4
with: with:

6
.gitignore vendored
View File

@@ -1,7 +1,11 @@
.idea .idea
.vscode .vscode
.DS_Store .DS_Store
*.json *data.json
*mapping.json
*setting.json
*output.json
*test.json
*.txt *.txt
dist dist
xtest xtest

View File

@@ -57,7 +57,6 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_
func init() { func init() {
rootCommand.PersistentFlags().BoolVar(&opt.Cfg.Debug, "debug", false, "") rootCommand.PersistentFlags().BoolVar(&opt.Cfg.Debug, "debug", false, "")
rootCommand.PersistentFlags().BoolVar(&opt.Cfg.Dev, "dev", false, "") rootCommand.PersistentFlags().BoolVar(&opt.Cfg.Dev, "dev", false, "")
rootCommand.PersistentFlags().BoolVar(&opt.Cfg.DisablePing, "disable-ping", false, "")
rootCommand.PersistentFlags().BoolVarP(&opt.Cfg.Args.Version, "version", "v", false, "print esgo2dump version") rootCommand.PersistentFlags().BoolVarP(&opt.Cfg.Args.Version, "version", "v", false, "print esgo2dump version")
rootCommand.Flags().IntVar(&opt.Cfg.Args.Timeout, "timeout", 30, "max timeout seconds per operation with limit") rootCommand.Flags().IntVar(&opt.Cfg.Args.Timeout, "timeout", 30, "max timeout seconds per operation with limit")

View File

@@ -60,14 +60,10 @@ func run(cmd *cobra.Command, args []string) error {
return err return err
} }
log.Debug("init: new input io success!")
if ioo, err = newIO(opt.Cfg.Args.Output, interfaces.IOOutput, es_oversion); err != nil { if ioo, err = newIO(opt.Cfg.Args.Output, interfaces.IOOutput, es_oversion); err != nil {
return err return err
} }
log.Debug("init: new output io success!")
defer func() { defer func() {
_ = ioi.Close() _ = ioi.Close()
_ = ioo.Close() _ = ioo.Close()
@@ -190,10 +186,10 @@ func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
wg = sync.WaitGroup{} wg = sync.WaitGroup{}
) )
wg.Add(1)
go func() { go func() {
wg.Add(1)
if err = output.WriteData(ctx, wch); err != nil { if err = output.WriteData(ctx, wch); err != nil {
log.Fatal("Dump: write data err: %s", err.Error()) e2ch <- err
} }
wg.Done() wg.Done()
@@ -214,26 +210,18 @@ Loop:
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
case err, ok = <-ech: case err, ok = <-ech:
if !ok { if err != nil {
log.Debug("pipe: read io closed") return err
continue Loop
} }
log.Debug("pipe: got err from read io, err = %s", err.Error())
return err continue Loop
case err, ok = <-e2ch: case err, _ = <-e2ch:
if !ok {
log.Debug("pipe: write io closed")
continue Loop
}
log.Debug("pipe: got err from write io, err = %s", err.Error())
return err return err
case docs, ok = <-dch: case docs, ok = <-dch:
if !ok || len(docs) == 0 { if !ok || len(docs) == 0 {
continue Loop continue Loop
} }
log.Debug("pipe: got %d docs from read io", len(docs))
wch <- docs wch <- docs
} }
} }
@@ -241,7 +229,6 @@ Loop:
close(wch) close(wch)
log.Debug("pipe: wait for all io closed")
wg.Wait() wg.Wait()
return nil return nil
@@ -281,7 +268,7 @@ func newIO(source string, ioType interfaces.IO, esv string) (interfaces.DumpIO,
switch esv { switch esv {
case "7": case "7":
return xes.NewClient(source, ioType) return xes.NewClient(iurl, ioType)
case "6": case "6":
return xes.NewClientV6(iurl, ioType) return xes.NewClientV6(iurl, ioType)
case "8": case "8":

View File

@@ -15,10 +15,9 @@ type args struct {
} }
type config struct { type config struct {
Debug bool `json:"-"` Debug bool `json:"-"`
Dev bool `json:"-"` Dev bool `json:"-"`
DisablePing bool `json:"-"` Args args `json:"-"`
Args args `json:"-"`
} }
var Cfg = &config{} var Cfg = &config{}

View File

@@ -32,27 +32,22 @@ func (c *client) WriteData(ctx context.Context, docsCh <-chan []*model.ESSource)
return es7.WriteData(ctx, c.client, c.index, docsCh, c) return es7.WriteData(ctx, c.client, c.index, docsCh, c)
} }
func NewClient(uri string, iot interfaces.IO) (interfaces.DumpIO, error) { func NewClient(url *url.URL, iot interfaces.IO) (interfaces.DumpIO, error) {
var ( var (
cli *elastic.Client urlIndex = strings.TrimPrefix(url.Path, "/")
err error cli *elastic.Client
ins *url.URL err error
index string
) )
if ins, err = url.Parse(uri); err != nil { if urlIndex == "" {
return nil, err
}
if index = strings.TrimSpace(strings.TrimPrefix(ins.Path, "/")); index == "" {
return nil, fmt.Errorf("please specify index name: (like => http://127.0.0.1:9200/my_index)") return nil, fmt.Errorf("please specify index name: (like => http://127.0.0.1:9200/my_index)")
} }
if cli, err = es7.NewClient(context.TODO(), uri, es7.Config{DisablePing: opt.Cfg.DisablePing}); err != nil { if cli, err = es7.NewClient(context.TODO(), url); err != nil {
return nil, err return nil, err
} }
return &client{client: cli, iot: iot, index: index}, nil return &client{client: cli, iot: iot, index: urlIndex}, nil
} }
func (c *client) checkResponse(r *esapi.Response) error { func (c *client) checkResponse(r *esapi.Response) error {

View File

@@ -119,7 +119,6 @@ func (c *client) ReadData(ctx context.Context, size int, _ map[string]any, _ []s
dch = make(chan []*model.ESSource) dch = make(chan []*model.ESSource)
ech = make(chan error) ech = make(chan error)
ready = make(chan bool) ready = make(chan bool)
total = 0
) )
go func(ctx context.Context) { go func(ctx context.Context) {
@@ -145,7 +144,6 @@ func (c *client) ReadData(ctx context.Context, size int, _ map[string]any, _ []s
list = append(list, item) list = append(list, item)
count++ count++
total++
if count >= size { if count >= size {
dch <- list dch <- list
@@ -164,8 +162,6 @@ func (c *client) ReadData(ctx context.Context, size int, _ map[string]any, _ []s
if err = c.scanner.Err(); err != nil { if err = c.scanner.Err(); err != nil {
ech <- err ech <- err
} }
log.Debug("read: read file succeed! total=%d", total)
}(ctx) }(ctx)
<-ready <-ready

View File

@@ -32,6 +32,7 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
defer func() { defer func() {
close(dataCh) close(dataCh)
close(errCh)
if scrollId != "" { if scrollId != "" {
bs, _ := json.Marshal(map[string]string{ bs, _ := json.Marshal(map[string]string{

View File

@@ -7,7 +7,6 @@ import (
"net" "net"
"net/http" "net/http"
"net/url" "net/url"
"strconv"
"strings" "strings"
"time" "time"
@@ -17,89 +16,78 @@ import (
"github.com/samber/lo" "github.com/samber/lo"
) )
// Deprecated. use uri query: http://<username>:<password>@example.com:port?ping=false&... func NewClient(ctx context.Context, url *url.URL) (*elastic.Client, error) {
type Config struct {
DisablePing bool
}
type UriConfig struct {
Ping bool `json:"ping"`
Sniff bool `json:"sniff"`
}
// NewClient
// new esv7 client
// uri example:
// - http://127.0.0.1:9200
// - https://<username>:<password>@node1.dev:9200,node2.dev:19200,node3.dev:29200
func NewClient(ctx context.Context, uri string, configs ...Config) (*elastic.Client, error) {
var ( var (
err error err error
username string urlUsername string
password string urlPassword string
client *elastic.Client client *elastic.Client
ins *url.URL errCh = make(chan error)
) cliCh = make(chan *elastic.Client)
endpoints = lo.Map(
if ins, err = url.Parse(uri); err != nil { strings.Split(url.Host, ","),
return nil, err func(item string, index int) string {
} return fmt.Sprintf("%s://%s", url.Scheme, item)
cfg := Config{}
if len(configs) > 0 {
cfg = configs[0]
}
endpoints := lo.Map(
strings.Split(ins.Host, ","),
func(item string, index int) string {
return fmt.Sprintf("%s://%s", ins.Scheme, item)
},
)
if ins.User != nil {
username = ins.User.Username()
password, _ = ins.User.Password()
}
query := ins.Query()
cfg2 := &UriConfig{}
cfg2.Ping, _ = strconv.ParseBool(query.Get("ping"))
cfg2.Sniff, _ = strconv.ParseBool(query.Get("sniff"))
if client, err = elastic.NewClient(
elastic.Config{
Addresses: endpoints,
Username: username,
Password: password,
CACert: nil,
RetryOnStatus: []int{429},
MaxRetries: 3,
RetryBackoff: nil,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{Timeout: 10 * time.Second}).DialContext,
}, },
DiscoverNodesOnStart: cfg2.Sniff, )
}, )
); err != nil {
if url.User != nil {
urlUsername = url.User.Username()
if p, ok := url.User.Password(); ok {
urlPassword = p
}
}
ncFunc := func(endpoints []string, username, password string) {
var (
err error
cli *elastic.Client
infoResp *esapi.Response
)
if cli, err = elastic.NewClient(
elastic.Config{
Addresses: endpoints,
Username: username,
Password: password,
CACert: nil,
RetryOnStatus: []int{429},
MaxRetries: 3,
RetryBackoff: nil,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{Timeout: 10 * time.Second}).DialContext,
},
},
); err != nil {
errCh <- err
return
}
if infoResp, err = cli.Info(); err != nil {
errCh <- err
return
}
if infoResp.StatusCode != 200 {
err = fmt.Errorf("info es7 status=%d", infoResp.StatusCode)
errCh <- err
return
}
cliCh <- cli
}
go ncFunc(endpoints, urlUsername, urlPassword)
timeout := tool.TimeoutCtx(ctx, 10)
select {
case <-timeout.Done():
return nil, fmt.Errorf("dial es=%v err=%v", endpoints, context.DeadlineExceeded)
case client = <-cliCh:
return client, nil
case err = <-errCh:
return nil, err return nil, err
} }
// Deprecated.
cfg.DisablePing = cfg.DisablePing || cfg2.Ping
if cfg.DisablePing {
var res *esapi.Response
if res, err = client.Ping(client.Ping.WithContext(tool.TimeoutCtx(ctx, 5))); err != nil {
return nil, err
}
if res.StatusCode != 200 {
err = fmt.Errorf("ping es server response: %s", res.String())
return nil, err
}
}
return client, nil
} }

View File

@@ -1,6 +1,7 @@
package es7 package es7
import ( import (
"net/url"
"testing" "testing"
"github.com/loveuer/esgo2dump/internal/tool" "github.com/loveuer/esgo2dump/internal/tool"
@@ -8,8 +9,9 @@ import (
func TestNewClient(t *testing.T) { func TestNewClient(t *testing.T) {
uri := "http://es1.dev:9200,es2.dev:9200" uri := "http://es1.dev:9200,es2.dev:9200"
ins, _ := url.Parse(uri)
c, err := NewClient(tool.Timeout(5), uri) c, err := NewClient(tool.Timeout(5), ins)
if err != nil { if err != nil {
t.Fatal(err.Error()) t.Fatal(err.Error())
} }

View File

@@ -35,28 +35,11 @@ func WriteData(ctx context.Context, client *elastic.Client, index string, docsCh
count := 0 count := 0
if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{ if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
NumWorkers: 0, Client: client,
FlushBytes: 0, Index: index,
FlushInterval: 0, ErrorTrace: true,
Client: client,
Decoder: nil,
OnError: func(ctx context.Context, err error) { OnError: func(ctx context.Context, err error) {
log.Error("es7.writer: on error log, err = %s", err.Error())
}, },
Index: index,
ErrorTrace: true,
FilterPath: []string{},
Header: map[string][]string{},
Human: false,
Pipeline: "",
Pretty: false,
Refresh: "",
Routing: "",
Source: []string{},
SourceExcludes: []string{},
SourceIncludes: []string{},
Timeout: 0,
WaitForActiveShards: "",
}); err != nil { }); err != nil {
return err return err
} }