feat: 🎉 完成基本功能

This commit is contained in:
loveuer
2024-03-22 18:05:47 +08:00
commit b9bb06867d
16 changed files with 965 additions and 0 deletions

46
internal/cmd/cmd.go Normal file
View File

@ -0,0 +1,46 @@
package cmd
import (
"context"
"esgo2dump/internal/opt"
"github.com/spf13/cobra"
)
var (
rootCommand = &cobra.Command{
Use: "esgo2dump",
Short: "esgo2dump is alternative to elasticdump",
SilenceUsage: true,
SilenceErrors: true,
RunE: run,
Example: `
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9200/some_index --limit=5000
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'`,
}
f_input string
f_output string
f_limit int
f_type string
f_query string
)
func init() {
rootCommand.Flags().BoolVar(&opt.Debug, "debug", false, "")
rootCommand.Flags().IntVar(&opt.Timeout, "timeout", 30, "max timeout seconds per operation with limit")
rootCommand.Flags().StringVarP(&f_input, "input", "i", "http://127.0.0.1:9200/my_index", "")
rootCommand.Flags().StringVarP(&f_output, "output", "o", "output.json", "")
rootCommand.Flags().StringVarP(&f_type, "type", "t", "data", "data/mapping/setting")
rootCommand.Flags().StringVarP(&f_query, "query", "q", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`)
rootCommand.Flags().IntVarP(&f_limit, "limit", "l", 100, "")
}
func Start(ctx context.Context) error {
return rootCommand.ExecuteContext(ctx)
}

153
internal/cmd/run.go Normal file
View File

@ -0,0 +1,153 @@
package cmd
import (
"context"
"encoding/json"
"errors"
"esgo2dump/internal/interfaces"
"esgo2dump/internal/opt"
"esgo2dump/internal/xes"
"esgo2dump/internal/xfile"
"fmt"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"io"
"net/url"
"os"
)
func run(cmd *cobra.Command, args []string) error {
var (
err error
ioi interfaces.DumpIO
ioo interfaces.DumpIO
)
if opt.Debug {
logrus.SetLevel(logrus.DebugLevel)
}
switch f_type {
case "data", "mapping", "setting":
default:
return fmt.Errorf("unknown type=%s", f_type)
}
if ioi, err = newIO(f_input, interfaces.IOInput); err != nil {
return err
}
if ioo, err = newIO(f_output, interfaces.IOOutput); err != nil {
return err
}
defer func() {
_ = ioi.Close()
_ = ioo.Close()
}()
switch f_type {
case "data":
return executeData(cmd.Context(), ioi, ioo)
case "mapping":
var mapping map[string]any
if mapping, err = ioi.ReadMapping(cmd.Context()); err != nil {
return err
}
return ioo.WriteMapping(cmd.Context(), mapping)
case "setting":
var setting map[string]any
if setting, err = ioi.ReadSetting(cmd.Context()); err != nil {
return err
}
return ioo.WriteSetting(cmd.Context(), setting)
default:
return fmt.Errorf("unknown type=%s", f_type)
}
}
func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
var (
err error
lines []*interfaces.ESSource
succeed int
)
for {
if lines, err = input.ReadData(ctx, f_limit); err != nil {
if errors.Is(err, io.EOF) {
return nil
}
return err
}
if len(lines) == 0 {
return nil
}
if succeed, err = output.WriteData(ctx, lines); err != nil {
return err
}
if succeed != len(lines) {
return fmt.Errorf("cmd.run: got lines=%d, only succeed=%d", len(lines), succeed)
}
logrus.Infof("Dump: %d docs succeed!!!", succeed)
}
}
func newIO(source string, ioType interfaces.IO) (interfaces.DumpIO, error) {
var (
err error
iurl *url.URL
file *os.File
qm = make(map[string]any)
)
logrus.Debugf("newIO.%s: source string=%s", ioType.Code(), source)
if iurl, err = url.Parse(source); err != nil {
logrus.Debugf("newIO.%s: url parse source err=%v", ioType.Code(), err)
goto ClientByFile
}
if !(iurl.Scheme == "http" || iurl.Scheme == "https") {
logrus.Debugf("newIO.%s: url scheme=%s invalid", ioType.Code(), iurl.Scheme)
goto ClientByFile
}
if iurl.Host == "" {
logrus.Debugf("newIO.%s: url host empty", ioType.Code())
goto ClientByFile
}
if ioType == interfaces.IOInput && f_query != "" {
if err = json.Unmarshal([]byte(f_query), &qm); err != nil {
logrus.Debugf("newIO.%s: query=%s invalid to map[string]any", ioType.Code(), f_query)
return nil, fmt.Errorf("invalid query err=%v", err)
}
}
logrus.Debugf("newIO.%s: source as url=%+v", ioType.Code(), *iurl)
return xes.NewClient(iurl, ioType, qm)
ClientByFile:
if ioType == interfaces.IOOutput {
if _, err = os.Stat(source); !os.IsNotExist(err) {
return nil, fmt.Errorf("output_file=%s already exist", source)
}
}
if file, err = os.OpenFile(source, os.O_CREATE|os.O_RDWR, 0644); err != nil {
return nil, err
}
return xfile.NewClient(file, ioType)
}

View File

@ -0,0 +1,19 @@
package interfaces
import "context"
type DumpIO interface {
ReadData(context.Context, int) ([]*ESSource, error)
WriteData(ctx context.Context, docs []*ESSource) (int, error)
ReadMapping(context.Context) (map[string]any, error)
WriteMapping(context.Context, map[string]any) error
ReadSetting(ctx context.Context) (map[string]any, error)
WriteSetting(context.Context, map[string]any) error
Close() error
IOType() IO
IsFile() bool
}

View File

@ -0,0 +1,27 @@
package interfaces
type IO int64
const (
IOInput IO = iota
IOOutput
)
func (io IO) Code() string {
switch io {
case IOInput:
return "input"
case IOOutput:
return "output"
default:
return "unknown"
}
}
type DataType int64
const (
DataTypeData DataType = iota
DataTypeMapping
DataTypeSetting
)

View File

@ -0,0 +1,33 @@
package interfaces
type ESSource struct {
DocId string `json:"_id"`
Index string `json:"_index"`
Content map[string]any `json:"_source"`
}
type ESResponse struct {
ScrollId string `json:"_scroll_id"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`
Shards struct {
Total int `json:"total"`
Successful int `json:"successful"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
} `json:"_shards"`
Hits struct {
Total struct {
Value int `json:"value"`
Relation string `json:"relation"`
} `json:"total"`
MaxScore float64 `json:"max_score"`
Hits []*ESSource `json:"hits"`
} `json:"hits"`
}
type ESMapping map[string]struct {
Mappings struct {
Properties map[string]any `json:"properties"`
} `json:"mappings"`
}

10
internal/opt/var.go Normal file
View File

@ -0,0 +1,10 @@
package opt
const (
ScrollDurationSeconds = 10 * 60
)
var (
Debug bool
Timeout int
)

28
internal/util/ctx.go Normal file
View File

@ -0,0 +1,28 @@
package util
import (
"context"
"time"
)
func Timeout(seconds ...int) context.Context {
second := 30
if len(seconds) > 0 && seconds[0] > 0 {
second = seconds[0]
}
ctx, _ := context.WithTimeout(context.Background(), time.Duration(second)*time.Second)
return ctx
}
func TimeoutCtx(ctx context.Context, seconds ...int) context.Context {
second := 30
if len(seconds) > 0 && seconds[0] > 0 {
second = seconds[0]
}
timeout, _ := context.WithTimeout(ctx, time.Duration(second)*time.Second)
return timeout
}

295
internal/xes/xes.go Normal file
View File

@ -0,0 +1,295 @@
package xes
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"esgo2dump/internal/interfaces"
"esgo2dump/internal/opt"
"esgo2dump/internal/util"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/elastic/go-elasticsearch/v7/esutil"
"github.com/sirupsen/logrus"
"net/http"
"net/url"
"strings"
"time"
)
func NewClient(url *url.URL, iot interfaces.IO, qm map[string]any) (interfaces.DumpIO, error) {
var (
err error
endpoint = fmt.Sprintf("%s://%s", url.Scheme, url.Host)
c *elastic.Client
infoResp *esapi.Response
index = strings.TrimPrefix(url.Path, "/")
username string
password string
)
if url.User != nil {
username = url.User.Username()
if p, ok := url.User.Password(); ok {
password = p
}
}
logrus.Debugf("xes.NewClient: endpoint=%s index=%s (username=%s password=%s)", endpoint, index, username, password)
if index == "" {
return nil, fmt.Errorf("please specify index name: (like => http://127.0.0.1:9200/my_index)")
}
if c, err = elastic.NewClient(
elastic.Config{
Addresses: []string{endpoint},
Username: username,
Password: password,
CACert: nil,
RetryOnStatus: []int{429},
MaxRetries: 3,
RetryBackoff: nil,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
},
); err != nil {
logrus.Debugf("xes.NewClient: elastic new client with endpont=%s err=%v", endpoint, err)
return nil, err
}
if infoResp, err = c.Info(); err != nil {
logrus.Debugf("xes.NewClient: ping err=%v", err)
return nil, err
}
if infoResp.StatusCode != 200 {
return nil, fmt.Errorf("info xes status=%d", infoResp.StatusCode)
}
return &client{c: c, index: index, queryMap: qm, iot: iot}, nil
}
type client struct {
c *elastic.Client
iot interfaces.IO
index string
from int
scrollId string
queryMap map[string]any
}
func (c *client) checkResponse(r *esapi.Response) error {
if r.StatusCode == 200 {
return nil
}
return fmt.Errorf("status=%d msg=%s", r.StatusCode, r.String())
}
func (c *client) IOType() interfaces.IO {
return c.iot
}
func (c *client) IsFile() bool {
return false
}
func (c *client) Close() error {
return nil
}
func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) {
var (
err error
indexer esutil.BulkIndexer
count int
)
if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
Client: c.c,
Index: c.index,
Refresh: "",
}); err != nil {
return 0, err
}
for _, doc := range docs {
var bs []byte
if bs, err = json.Marshal(doc.Content); err != nil {
return 0, err
}
logrus.Debugf("xes.Write: doc content=%s", string(bs))
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
Action: "index",
Index: c.index,
DocumentID: doc.DocId,
Body: bytes.NewReader(bs),
}); err != nil {
return 0, err
}
count++
}
if err = indexer.Close(util.TimeoutCtx(ctx, opt.Timeout)); err != nil {
return 0, err
}
stats := indexer.Stats()
if stats.NumFailed > 0 {
return count, fmt.Errorf("write to xes failed=%d", stats.NumFailed)
}
return count, nil
}
func (c *client) ReadData(ctx context.Context, i int) ([]*interfaces.ESSource, error) {
var (
err error
resp *esapi.Response
result = new(interfaces.ESResponse)
)
if c.scrollId == "" {
qs := []func(*esapi.SearchRequest){
c.c.Search.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.c.Search.WithIndex(c.index),
c.c.Search.WithSize(i),
c.c.Search.WithFrom(0),
c.c.Search.WithScroll(time.Duration(opt.ScrollDurationSeconds) * time.Second),
}
if len(c.queryMap) > 0 {
queryBs, _ := json.Marshal(map[string]any{"query": c.queryMap})
qs = append(qs, c.c.Search.WithBody(bytes.NewReader(queryBs)))
}
if resp, err = c.c.Search(qs...); err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf(resp.String())
}
decoder := json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
return nil, err
}
c.scrollId = result.ScrollId
return result.Hits.Hits, nil
}
if resp, err = c.c.Scroll(
c.c.Scroll.WithScrollID(c.scrollId),
c.c.Scroll.WithScroll(time.Duration(opt.ScrollDurationSeconds)*time.Second),
); err != nil {
return result.Hits.Hits, nil
}
decoder := json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
return nil, err
}
return result.Hits.Hits, nil
}
func (c *client) ReadMapping(ctx context.Context) (map[string]any, error) {
r, err := c.c.Indices.GetMapping(
c.c.Indices.GetMapping.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *client) WriteMapping(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
for idxKey := range m {
if bs, err = json.Marshal(m[idxKey]); err != nil {
return err
}
if result, err = c.c.Indices.Create(
c.index,
c.c.Indices.Create.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.c.Indices.Create.WithBody(bytes.NewReader(bs)),
); err != nil {
return err
}
if err = c.checkResponse(result); err != nil {
return err
}
}
return nil
}
func (c *client) ReadSetting(ctx context.Context) (map[string]any, error) {
r, err := c.c.Indices.GetSettings(
c.c.Indices.GetSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.c.Indices.GetSettings.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *client) WriteSetting(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
if bs, err = json.Marshal(m); err != nil {
return err
}
if result, err = c.c.Indices.PutSettings(
bytes.NewReader(bs),
c.c.Indices.PutSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
); err != nil {
return err
}
return c.checkResponse(result)
}

38
internal/xes/xes_test.go Normal file
View File

@ -0,0 +1,38 @@
package xes
import (
"esgo2dump/internal/util"
elastic "github.com/elastic/go-elasticsearch/v7"
"testing"
)
func TestGetESMapping(t *testing.T) {
endpoint := "http://127.0.0.1:9200"
index := "some_index"
cli, err := elastic.NewClient(elastic.Config{
Addresses: []string{endpoint},
})
if err != nil {
t.Error(1, err)
return
}
resp, err := cli.Info(cli.Info.WithContext(util.Timeout(5)))
if err != nil {
t.Error(2, err)
return
}
t.Log("info:", resp.String())
r, err := cli.Indices.GetMapping(
cli.Indices.GetMapping.WithIndex(index),
)
if err != nil {
t.Error(3, err)
return
}
t.Log("get source:", r.String())
}

155
internal/xfile/xfile.go Normal file
View File

@ -0,0 +1,155 @@
package xfile
import (
"bufio"
"context"
"encoding/json"
"esgo2dump/internal/interfaces"
"github.com/sirupsen/logrus"
"io"
"os"
)
type client struct {
f *os.File
iot interfaces.IO
scanner *bufio.Scanner
}
func (c *client) ReadMapping(ctx context.Context) (map[string]any, error) {
var (
err error
bs []byte
)
if bs, err = io.ReadAll(c.f); err != nil {
return nil, err
}
m := make(map[string]any)
if err = json.Unmarshal(bs, &m); err != nil {
return nil, err
}
return m, nil
}
func (c *client) ReadSetting(ctx context.Context) (map[string]any, error) {
var (
err error
bs []byte
)
if bs, err = io.ReadAll(c.f); err != nil {
return nil, err
}
m := make(map[string]any)
if err = json.Unmarshal(bs, &m); err != nil {
return nil, err
}
return m, nil
}
func (c *client) WriteMapping(ctx context.Context, m map[string]any) error {
bs, err := json.Marshal(m)
if err != nil {
return err
}
_, err = c.f.Write(bs)
return err
}
func (c *client) WriteSetting(ctx context.Context, m map[string]any) error {
bs, err := json.Marshal(m)
if err != nil {
return err
}
_, err = c.f.Write(bs)
return err
}
func (c *client) IOType() interfaces.IO {
return c.iot
}
func (c *client) IsFile() bool {
return true
}
func (c *client) WriteData(ctx context.Context, docs []*interfaces.ESSource) (int, error) {
var (
err error
bs []byte
count = 0
)
for _, doc := range docs {
if bs, err = json.Marshal(doc); err != nil {
return count, err
}
bs = append(bs, '\n')
if _, err = c.f.Write(bs); err != nil {
return count, err
}
count++
}
return count, nil
}
func (c *client) ReadData(ctx context.Context, i int) ([]*interfaces.ESSource, error) {
var (
err error
count = 0
list = make([]*interfaces.ESSource, 0, i)
)
for c.scanner.Scan() {
line := c.scanner.Text()
logrus.Debugf("xfile.Read: line=%s", line)
item := new(interfaces.ESSource)
if err = json.Unmarshal([]byte(line), item); err != nil {
return list, err
}
list = append(list, item)
count++
if count >= i {
break
}
}
if err = c.scanner.Err(); err != nil {
return list, err
}
return list, nil
}
func (c *client) Close() error {
return c.f.Close()
}
func NewClient(file *os.File, ioType interfaces.IO) (interfaces.DumpIO, error) {
c := &client{f: file, iot: ioType}
if ioType == interfaces.IOInput {
c.scanner = bufio.NewScanner(c.f)
}
return c, nil
}