9 Commits

11 changed files with 386 additions and 102 deletions

View File

@ -45,6 +45,7 @@ jobs:
with:
repo_token: "${{ secrets.GITHUB_TOKEN }}"
title: "Release_${{ github.ref_name }}"
prerelease: false
files: |
dist/esgo2dump_${{ github.ref_name }}_linux_amd64
dist/esgo2dump_${{ github.ref_name }}_linux_arm64

2
go.mod
View File

@ -1,4 +1,4 @@
module esgo2dump
module github.com/loveuer/esgo2dump
go 1.18

View File

@ -2,7 +2,8 @@ package cmd
import (
"context"
"esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/spf13/cobra"
)
@ -20,7 +21,9 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'`,
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json`,
}
f_input string
@ -28,16 +31,19 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query=
f_limit int
f_type string
f_query string
f_query_file string
)
func init() {
rootCommand.Flags().BoolVar(&opt.Debug, "debug", false, "")
rootCommand.Flags().IntVar(&opt.Timeout, "timeout", 30, "max timeout seconds per operation with limit")
rootCommand.Flags().StringVarP(&f_input, "input", "i", "http://127.0.0.1:9200/my_index", "")
rootCommand.Flags().StringVarP(&f_input, "input", "i", "", "*required: input file or es url (example :data.json / http://127.0.0.1:9200/my_index)")
rootCommand.Flags().StringVarP(&f_output, "output", "o", "output.json", "")
rootCommand.Flags().StringVarP(&f_type, "type", "t", "data", "data/mapping/setting")
rootCommand.Flags().StringVarP(&f_query, "query", "q", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`)
rootCommand.Flags().StringVar(&f_query_file, "query_file", "", `query json file (will execute line by line)`)
rootCommand.Flags().IntVarP(&f_limit, "limit", "l", 100, "")
}

View File

@ -1,21 +1,44 @@
package cmd
import (
"bufio"
"context"
"encoding/json"
"errors"
"esgo2dump/internal/interfaces"
"esgo2dump/internal/opt"
"esgo2dump/internal/xes"
"esgo2dump/internal/xfile"
"fmt"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"io"
"net/url"
"os"
"github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/xes"
"github.com/loveuer/esgo2dump/internal/xfile"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
func check(cmd *cobra.Command) error {
if f_input == "" {
return cmd.Help()
//return fmt.Errorf("must specify input(example: data.json/http://127.0.0.1:9200/my_index)")
}
if f_limit == 0 || f_limit > 10000 {
return fmt.Errorf("invalid limit(1 - 10000)")
}
if f_query != "" && f_query_file != "" {
return fmt.Errorf("cannot specify both query and query_file at the same time")
}
switch f_type {
case "data", "mapping", "setting":
default:
return fmt.Errorf("unknown type=%s", f_type)
}
return nil
}
func run(cmd *cobra.Command, args []string) error {
var (
err error
@ -27,10 +50,8 @@ func run(cmd *cobra.Command, args []string) error {
logrus.SetLevel(logrus.DebugLevel)
}
switch f_type {
case "data", "mapping", "setting":
default:
return fmt.Errorf("unknown type=%s", f_type)
if err = check(cmd); err != nil {
return err
}
if ioi, err = newIO(f_input, interfaces.IOInput); err != nil {
@ -46,23 +67,45 @@ func run(cmd *cobra.Command, args []string) error {
_ = ioo.Close()
}()
if (f_query_file != "" || f_query != "") && ioi.IsFile() {
return fmt.Errorf("with file input, query or query_file can't be supported")
}
switch f_type {
case "data":
return executeData(cmd.Context(), ioi, ioo)
if err = executeData(cmd.Context(), ioi, ioo); err != nil {
return err
}
logrus.Info("Dump: write data succeed!!!")
return nil
case "mapping":
var mapping map[string]any
if mapping, err = ioi.ReadMapping(cmd.Context()); err != nil {
return err
}
return ioo.WriteMapping(cmd.Context(), mapping)
if err = ioo.WriteMapping(cmd.Context(), mapping); err != nil {
return err
}
logrus.Info("Dump: write mapping succeed!!!")
return nil
case "setting":
var setting map[string]any
if setting, err = ioi.ReadSetting(cmd.Context()); err != nil {
return err
}
return ioo.WriteSetting(cmd.Context(), setting)
if err = ioo.WriteSetting(cmd.Context(), setting); err != nil {
return err
}
logrus.Info("Dump: write setting succeed!!!")
return nil
default:
return fmt.Errorf("unknown type=%s", f_type)
}
@ -71,34 +114,131 @@ func run(cmd *cobra.Command, args []string) error {
func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
var (
err error
lines []*interfaces.ESSource
ch = make(chan []*interfaces.ESSource, 1)
errCh = make(chan error)
queries = make([]map[string]any, 0)
)
if f_query != "" {
query := make(map[string]any)
if err = json.Unmarshal([]byte(f_query), &query); err != nil {
return fmt.Errorf("invalid query err=%v", err)
}
queries = append(queries, query)
}
if f_query_file != "" {
var (
qf *os.File
)
if qf, err = os.Open(f_query_file); err != nil {
return fmt.Errorf("open query_file err=%v", err)
}
defer func() {
_ = qf.Close()
}()
scanner := bufio.NewScanner(qf)
scanner.Buffer(make([]byte, 1*1024*1024), 5*1024*1024)
lineCount := 1
for scanner.Scan() {
line := scanner.Text()
oq := make(map[string]any)
if err = json.Unmarshal([]byte(line), &oq); err != nil {
return fmt.Errorf("query file line=%d invalid err=%v", lineCount, err)
}
queries = append(queries, oq)
if len(queries) > 10000 {
return fmt.Errorf("query_file support max lines=%d", 10000)
}
lineCount++
}
}
if len(queries) == 0 {
queries = append(queries, nil)
}
go func(c context.Context) {
var (
lines []*interfaces.ESSource
)
defer func() {
close(ch)
}()
Loop:
for _, query := range queries {
for {
select {
case <-c.Done():
return
default:
if lines, err = input.ReadData(c, f_limit, query); err != nil {
errCh <- err
return
}
logrus.Debugf("executeData: input read_data got lines=%d", len(lines))
if len(lines) == 0 {
input.ResetOffset()
if query != nil {
bs, _ := json.Marshal(query)
logrus.Infof("Dump: query_file query=%s read done!!!", string(bs))
}
continue Loop
}
ch <- lines
}
}
}
}(ctx)
var (
succeed int
total int
docs []*interfaces.ESSource
ok bool
)
for {
select {
case <-ctx.Done():
case err = <-errCh:
return err
case docs, ok = <-ch:
if !ok {
return err
}
if lines, err = input.ReadData(ctx, f_limit); err != nil {
if errors.Is(err, io.EOF) {
if len(docs) == 0 {
return nil
}
return err
if succeed, err = output.WriteData(ctx, docs); err != nil {
return err
}
logrus.Debugf("executeData: output write_data succeed lines=%d", succeed)
if succeed != len(docs) {
return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(docs), succeed)
}
total += succeed
logrus.Infof("Dump: succeed=%d total=%d docs succeed!!!", succeed, total)
}
if len(lines) == 0 {
return nil
}
if succeed, err = output.WriteData(ctx, lines); err != nil {
return err
}
if succeed != len(lines) {
return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(lines), succeed)
}
logrus.Infof("Dump: %d docs succeed!!!", succeed)
}
}
@ -136,7 +276,7 @@ func newIO(source string, ioType interfaces.IO) (interfaces.DumpIO, error) {
logrus.Debugf("newIO.%s: source as url=%+v", ioType.Code(), *iurl)
return xes.NewClient(iurl, ioType, qm)
return xes.NewClient(iurl, ioType)
ClientByFile:
if ioType == interfaces.IOOutput {

View File

@ -3,9 +3,11 @@ package interfaces
import "context"
type DumpIO interface {
ReadData(context.Context, int) ([]*ESSource, error)
ReadData(context.Context, int, map[string]any) ([]*ESSource, error)
WriteData(ctx context.Context, docs []*ESSource) (int, error)
ResetOffset()
ReadMapping(context.Context) (map[string]any, error)
WriteMapping(context.Context, map[string]any) error

View File

@ -7,4 +7,7 @@ const (
var (
Debug bool
Timeout int
BuffSize = 5 * 1024 * 1024 // 5M
MaxBuffSize = 100 * 1024 * 1024 // 100M, default elastic_search doc max size
)

View File

@ -5,84 +5,106 @@ import (
"context"
"crypto/tls"
"encoding/json"
"esgo2dump/internal/interfaces"
"esgo2dump/internal/opt"
"esgo2dump/internal/util"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/elastic/go-elasticsearch/v7/esutil"
"github.com/sirupsen/logrus"
"net"
"net/http"
"net/url"
"strings"
"time"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/elastic/go-elasticsearch/v7/esutil"
"github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/util"
"github.com/sirupsen/logrus"
)
func NewClient(url *url.URL, iot interfaces.IO, qm map[string]any) (interfaces.DumpIO, error) {
func NewClient(url *url.URL, iot interfaces.IO) (interfaces.DumpIO, error) {
var (
err error
endpoint = fmt.Sprintf("%s://%s", url.Scheme, url.Host)
c *elastic.Client
infoResp *esapi.Response
index = strings.TrimPrefix(url.Path, "/")
username string
password string
address = fmt.Sprintf("%s://%s", url.Scheme, url.Host)
urlIndex = strings.TrimPrefix(url.Path, "/")
urlUsername string
urlPassword string
errCh = make(chan error)
cliCh = make(chan *elastic.Client)
)
if url.User != nil {
username = url.User.Username()
urlUsername = url.User.Username()
if p, ok := url.User.Password(); ok {
password = p
urlPassword = p
}
}
logrus.Debugf("xes.NewClient: endpoint=%s index=%s (username=%s password=%s)", endpoint, index, username, password)
logrus.Debugf("xes.NewClient: endpoint=%s index=%s (username=%s password=%s)", address, urlIndex, urlUsername, urlPassword)
if index == "" {
if urlIndex == "" {
return nil, fmt.Errorf("please specify index name: (like => http://127.0.0.1:9200/my_index)")
}
if c, err = elastic.NewClient(
elastic.Config{
Addresses: []string{endpoint},
Username: username,
Password: password,
CACert: nil,
RetryOnStatus: []int{429},
MaxRetries: 3,
RetryBackoff: nil,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{Timeout: 5 * time.Second}).DialContext,
ncFunc := func(endpoints []string, username, password, index string) {
var (
err error
cli *elastic.Client
infoResp *esapi.Response
)
if cli, err = elastic.NewClient(
elastic.Config{
Addresses: endpoints,
Username: username,
Password: password,
CACert: nil,
RetryOnStatus: []int{429},
MaxRetries: 3,
RetryBackoff: nil,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{Timeout: 10 * time.Second}).DialContext,
},
},
},
); err != nil {
logrus.Debugf("xes.NewClient: elastic new client with endpont=%s err=%v", endpoint, err)
return nil, err
); err != nil {
logrus.Debugf("xes.NewClient: elastic new client with endpont=%s err=%v", endpoints, err)
errCh <- err
return
}
if infoResp, err = cli.Info(); err != nil {
logrus.Debugf("xes.NewClient: ping err=%v", err)
errCh <- err
return
}
if infoResp.StatusCode != 200 {
err = fmt.Errorf("info xes status=%d", infoResp.StatusCode)
logrus.Debugf("xes.NewClient: status err=%v", err)
errCh <- err
return
}
cliCh <- cli
}
if infoResp, err = c.Info(); err != nil {
logrus.Debugf("xes.NewClient: ping err=%v", err)
return nil, err
}
go ncFunc([]string{address}, urlUsername, urlPassword, urlIndex)
if infoResp.StatusCode != 200 {
return nil, fmt.Errorf("info xes status=%d", infoResp.StatusCode)
select {
case <-util.Timeout(10).Done():
return nil, fmt.Errorf("dial es=%s err=%v", address, context.DeadlineExceeded)
case c := <-cliCh:
return &client{c: c, index: urlIndex, iot: iot}, nil
case e := <-errCh:
return nil, e
}
return &client{c: c, index: index, queryMap: qm, iot: iot}, nil
}
type client struct {
c *elastic.Client
iot interfaces.IO
index string
from int
scrollId string
queryMap map[string]any
}
func (c *client) checkResponse(r *esapi.Response) error {
@ -105,11 +127,34 @@ func (c *client) Close() error {
return nil
}
func (c *client) ResetOffset() {
defer func() {
c.scrollId = ""
}()
bs, _ := json.Marshal(map[string]string{
"scroll_id": c.scrollId,
})
rr, err := c.c.ClearScroll(
c.c.ClearScroll.WithContext(util.Timeout(3)),
c.c.ClearScroll.WithBody(bytes.NewReader(bs)),
)
if err != nil {
logrus.Warnf("ResetOffset: clear scroll id=%s err=%v", c.scrollId, err)
return
}
if rr.StatusCode != 200 {
logrus.Warnf("ResetOffset: clear scroll id=%s msg=%s", c.scrollId, rr.String())
}
}
func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) {
var (
err error
indexer esutil.BulkIndexer
count int
be error
)
if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
Client: c.c,
@ -126,13 +171,14 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
return 0, err
}
logrus.Debugf("xes.Write: doc content=%s", string(bs))
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
Action: "index",
Index: c.index,
DocumentID: doc.DocId,
Body: bytes.NewReader(bs),
OnFailure: func(ctx context.Context, item esutil.BulkIndexerItem, item2 esutil.BulkIndexerResponseItem, bulkErr error) {
be = bulkErr
},
}); err != nil {
return 0, err
}
@ -143,15 +189,19 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
return 0, err
}
if be != nil {
return 0, be
}
stats := indexer.Stats()
if stats.NumFailed > 0 {
return count, fmt.Errorf("write to xes failed=%d", stats.NumFailed)
return count, fmt.Errorf("write to xes failed_count=%d bulk_count=%d", stats.NumFailed, count)
}
return count, nil
}
func (c *client) ReadData(ctx context.Context, i int) ([]*interfaces.ESSource, error) {
func (c *client) ReadData(ctx context.Context, i int, query map[string]any) ([]*interfaces.ESSource, error) {
var (
err error
resp *esapi.Response
@ -164,11 +214,11 @@ func (c *client) ReadData(ctx context.Context, i int) ([]*interfaces.ESSource, e
c.c.Search.WithIndex(c.index),
c.c.Search.WithSize(i),
c.c.Search.WithFrom(0),
c.c.Search.WithScroll(time.Duration(opt.ScrollDurationSeconds) * time.Second),
c.c.Search.WithScroll(time.Duration(opt.Timeout*2) * time.Second),
}
if len(c.queryMap) > 0 {
queryBs, _ := json.Marshal(map[string]any{"query": c.queryMap})
if query != nil && len(query) > 0 {
queryBs, _ := json.Marshal(map[string]any{"query": query})
qs = append(qs, c.c.Search.WithBody(bytes.NewReader(queryBs)))
}
@ -192,7 +242,7 @@ func (c *client) ReadData(ctx context.Context, i int) ([]*interfaces.ESSource, e
if resp, err = c.c.Scroll(
c.c.Scroll.WithScrollID(c.scrollId),
c.c.Scroll.WithScroll(time.Duration(opt.ScrollDurationSeconds)*time.Second),
c.c.Scroll.WithScroll(time.Duration(opt.Timeout*2)*time.Second),
); err != nil {
return result.Hits.Hits, nil
}

View File

@ -1,9 +1,13 @@
package xes
import (
"esgo2dump/internal/util"
elastic "github.com/elastic/go-elasticsearch/v7"
"bufio"
"fmt"
"os"
"testing"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/loveuer/esgo2dump/internal/util"
)
func TestGetESMapping(t *testing.T) {
@ -36,3 +40,68 @@ func TestGetESMapping(t *testing.T) {
t.Log("get source:", r.String())
}
func TestScanWithInterrupt(t *testing.T) {
filename := "test_scan.txt"
f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
t.Error(1, err)
return
}
defer func() {
os.Remove(filename)
}()
f.WriteString(`line 01
line 02
line 03
line 04
line 05
line 06
line 07
line 08
line 09
line 10
line 11
line 12
line 13
line 14
line 15`)
f.Close()
of, err := os.Open(filename)
if err != nil {
t.Error(2, err)
return
}
scanner := bufio.NewScanner(of)
count := 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
}
}

View File

@ -4,10 +4,11 @@ import (
"bufio"
"context"
"encoding/json"
"esgo2dump/internal/interfaces"
"github.com/sirupsen/logrus"
"github.com/loveuer/esgo2dump/internal/opt"
"io"
"os"
"github.com/loveuer/esgo2dump/internal/interfaces"
)
type client struct {
@ -84,6 +85,8 @@ func (c *client) IsFile() bool {
return true
}
func (c *client) ResetOffset() {}
func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) {
var (
err error
@ -108,7 +111,7 @@ func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (in
return count, nil
}
func (c *client) ReadData(ctx context.Context, i int) ([]*interfaces.ESSource, error) {
func (c *client) ReadData(ctx context.Context, i int, _ map[string]any) ([]*interfaces.ESSource, error) {
var (
err error
count = 0
@ -118,8 +121,6 @@ func (c *client) ReadData(ctx context.Context, i int) ([]*interfaces.ESSource, e
for c.scanner.Scan() {
line := c.scanner.Text()
logrus.Debugf("xfile.Read: line=%s", line)
item := new(interfaces.ESSource)
if err = json.Unmarshal([]byte(line), item); err != nil {
return list, err
@ -149,6 +150,8 @@ func NewClient(file *os.File, ioType interfaces.IO) (interfaces.DumpIO, error) {
if ioType == interfaces.IOInput {
c.scanner = bufio.NewScanner(c.f)
buf := make([]byte, opt.BuffSize)
c.scanner.Buffer(buf, opt.MaxBuffSize)
}
return c, nil

View File

@ -2,10 +2,11 @@ package main
import (
"context"
"esgo2dump/internal/cmd"
"os/signal"
"syscall"
"github.com/loveuer/esgo2dump/internal/cmd"
"github.com/sirupsen/logrus"
)

View File

@ -28,7 +28,16 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'`,
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json
```
- example_queries.json
```json
{"bool":{"should":[{"term":{"user_id":{"value":"123"}}},{"term":{"user_id":{"value":"456"}}}]}}
{"bool":{"should":[{"term":{"user_id":{"value":"abc"}}},{"term":{"user_id":{"value":"def"}}}]}}
{"bool":{"should":[{"term":{"user_id":{"value":"ABC"}}},{"term":{"user_id":{"value":"DEF"}}}]}}
```
### roadmap