5 Commits

Author SHA1 Message Date
dde92f2e59 feat: support multi endpoints(format: scheme://user:passwd@ip1:port1,ip2:port2...) 2024-11-14 11:07:16 +08:00
ebb4365135 fix: when max not set, dump size negative 2024-07-22 10:13:12 +08:00
beb2ca4cf4 Merge remote-tracking branch 'origin/master' 2024-07-22 09:31:51 +08:00
246f919bc2 Merge pull request #4 from CaiCandong/bugfix-es6
Bugfix: the response  elasticsearch6 and 7 are a little different
2024-07-21 17:25:26 +08:00
d30233204f bugfix 2024-07-21 13:27:08 +08:00
11 changed files with 80 additions and 35 deletions

View File

@ -34,7 +34,7 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_
f_input string
f_output string
f_limit int
f_limit uint64
f_type string
f_source string
f_sort string
@ -61,7 +61,7 @@ func init() {
rootCommand.Flags().StringVar(&f_sort, "sort", "", "sort, <field>:<direction> format, for example: time:desc or name:asc")
rootCommand.Flags().StringVarP(&f_query, "query", "q", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`)
rootCommand.Flags().StringVar(&f_query_file, "query_file", "", `query json file (will execute line by line)`)
rootCommand.Flags().IntVarP(&f_limit, "limit", "l", 100, "")
rootCommand.Flags().Uint64VarP(&f_limit, "limit", "l", 100, "")
}
func Start(ctx context.Context) error {

View File

@ -6,7 +6,7 @@ import (
)
type DumpIO interface {
ReadData(ctx context.Context, size int, query map[string]any, includeFields []string, sort []string) (<-chan []*model.ESSource, <-chan error)
ReadData(ctx context.Context, size uint64, query map[string]any, includeFields []string, sort []string) (<-chan []*model.ESSource, <-chan error)
WriteData(ctx context.Context, docsCh <-chan []*model.ESSource) error
ReadMapping(context.Context) (map[string]any, error)

View File

@ -135,7 +135,7 @@ func (c *clientv6) Close() error {
return nil
}
func (c *clientv6) ReadData(ctx context.Context, size int, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
func (c *clientv6) ReadData(ctx context.Context, size uint64, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
dch, ech := es6.ReadData(ctx, c.client, c.index, size, 0, query, source, sort)
return dch, ech

View File

@ -70,7 +70,7 @@ func (c *client) Close() error {
return nil
}
func (c *client) ReadData(ctx context.Context, size int, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
func (c *client) ReadData(ctx context.Context, size uint64, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
dch, ech := es7.ReadDataV2(ctx, c.client, c.index, size, 0, query, source, sort)
return dch, ech

View File

@ -110,10 +110,10 @@ func (c *client) IsFile() bool {
return true
}
func (c *client) ReadData(ctx context.Context, size int, _ map[string]any, _ []string, _ []string) (<-chan []*model.ESSource, <-chan error) {
func (c *client) ReadData(ctx context.Context, size uint64, _ map[string]any, _ []string, _ []string) (<-chan []*model.ESSource, <-chan error) {
var (
err error
count = 0
count uint64 = 0
list = make([]*model.ESSource, 0, size)
dch = make(chan []*model.ESSource)
ech = make(chan error)

View File

@ -7,7 +7,24 @@ type ESSource struct {
Sort []any `json:"sort"`
}
type ESResponse struct {
type ESResponseV6 struct {
ScrollId string `json:"_scroll_id"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`
Shards struct {
Total int `json:"total"`
Successful int `json:"successful"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
} `json:"_shards"`
Hits struct {
Total int `json:"total"`
MaxScore float64 `json:"max_score"`
Hits []*ESSource `json:"hits"`
} `json:"hits"`
}
type ESResponseV7 struct {
ScrollId string `json:"_scroll_id"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`

View File

@ -53,7 +53,8 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_
- [x] es to file
- [x] es to es
- [x] auto create index with mapping
- [x] support es6
- [ ] [Feature Request #1](https://github.com/loveuer/esgo2dump/issues/1): Supports more than 10,000 lines of query_file
- [ ] args: split_size (auto split json output file)
- [ ] auto create index with mapping,setting
- [x] support es6
- [ ] support es8

View File

@ -14,7 +14,7 @@ import (
"time"
)
func ReadData(ctx context.Context, client *elastic.Client, index string, size, max int, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
func ReadData(ctx context.Context, client *elastic.Client, index string, size, max uint64, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
var (
dataCh = make(chan []*model.ESSource)
errCh = make(chan error)
@ -24,9 +24,9 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
var (
err error
resp *esapi.Response
result = new(model.ESResponse)
result = new(model.ESResponseV6)
scrollId string
total int
total uint64
)
defer func() {
@ -63,7 +63,7 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
qs := []func(*esapi.SearchRequest){
client.Search.WithContext(util.TimeoutCtx(ctx, 20)),
client.Search.WithIndex(index),
client.Search.WithSize(size),
client.Search.WithSize(int(size)),
client.Search.WithFrom(0),
client.Search.WithScroll(time.Duration(120) * time.Second),
}
@ -106,9 +106,9 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
scrollId = result.ScrollId
dataCh <- result.Hits.Hits
total += len(result.Hits.Hits)
total += uint64(len(result.Hits.Hits))
if len(result.Hits.Hits) < size || (max > 0 && total >= max) {
if uint64(len(result.Hits.Hits)) < size || (max > 0 && total >= max) {
return
}
@ -121,7 +121,7 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
return
}
result = new(model.ESResponse)
result = new(model.ESResponseV6)
decoder = json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
@ -135,9 +135,9 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
}
dataCh <- result.Hits.Hits
total += len(result.Hits.Hits)
total += uint64(len(result.Hits.Hits))
if len(result.Hits.Hits) < size || (max > 0 && total >= max) {
if uint64(len(result.Hits.Hits)) < size || (max > 0 && total >= max) {
break
}
}

View File

@ -7,9 +7,11 @@ import (
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/loveuer/esgo2dump/internal/util"
"github.com/samber/lo"
"net"
"net/http"
"net/url"
"strings"
"time"
)
@ -21,7 +23,12 @@ func NewClient(ctx context.Context, url *url.URL) (*elastic.Client, error) {
client *elastic.Client
errCh = make(chan error)
cliCh = make(chan *elastic.Client)
address = fmt.Sprintf("%s://%s", url.Scheme, url.Host)
endpoints = lo.Map(
strings.Split(url.Host, ","),
func(item string, index int) string {
return fmt.Sprintf("%s://%s", url.Scheme, item)
},
)
)
if url.User != nil {
@ -71,12 +78,12 @@ func NewClient(ctx context.Context, url *url.URL) (*elastic.Client, error) {
cliCh <- cli
}
go ncFunc([]string{address}, urlUsername, urlPassword)
go ncFunc(endpoints, urlUsername, urlPassword)
timeout := util.TimeoutCtx(ctx, 10)
select {
case <-timeout.Done():
return nil, fmt.Errorf("dial es=%s err=%v", address, context.DeadlineExceeded)
return nil, fmt.Errorf("dial es=%v err=%v", endpoints, context.DeadlineExceeded)
case client = <-cliCh:
return client, nil
case err = <-errCh:

20
xes/es7/client_test.go Normal file
View File

@ -0,0 +1,20 @@
package es7
import (
"github.com/loveuer/esgo2dump/internal/util"
"net/url"
"testing"
)
func TestNewClient(t *testing.T) {
uri := "http://es1.dev:9200,es2.dev:9200"
ins, _ := url.Parse(uri)
c, err := NewClient(util.Timeout(5), ins)
if err != nil {
t.Fatal(err.Error())
}
t.Log("success!!!")
_ = c
}

View File

@ -28,7 +28,7 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
var (
err error
resp *esapi.Response
result = new(model.ESResponse)
result = new(model.ESResponseV7)
scrollId string
total int
)
@ -125,7 +125,7 @@ func ReadData(ctx context.Context, client *elastic.Client, index string, size, m
return
}
result = new(model.ESResponse)
result = new(model.ESResponseV7)
decoder = json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
@ -159,7 +159,7 @@ func ReadDataV2(
ctx context.Context,
client *elastic.Client,
index string,
size, max int,
size, max uint64,
query map[string]any,
source []string,
sort []string,
@ -175,7 +175,7 @@ func ReadDataV2(
bs []byte
resp *esapi.Response
searchAfter = make([]any, 0)
total = 0
total uint64 = 0
body = make(map[string]any)
qs []func(request *esapi.SearchRequest)
)
@ -203,7 +203,7 @@ func ReadDataV2(
qs = []func(*esapi.SearchRequest){
client.Search.WithContext(util.TimeoutCtx(ctx, 30)),
client.Search.WithIndex(index),
client.Search.WithSize(util.Min(size, max-total)),
client.Search.WithSize(int(util.Min(size, max-total))),
client.Search.WithSort(sorts...),
}
@ -232,7 +232,7 @@ func ReadDataV2(
return
}
var result = new(model.ESResponse)
var result = new(model.ESResponseV7)
decoder := json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
errCh <- err
@ -245,9 +245,9 @@ func ReadDataV2(
}
dataCh <- result.Hits.Hits
total += len(result.Hits.Hits)
total += uint64(len(result.Hits.Hits))
if len(result.Hits.Hits) < size || (max > 0 && total >= max) {
if uint64(len(result.Hits.Hits)) < size || (max > 0 && total >= max) {
break
}