2024-03-22 18:05:47 +08:00
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
2024-03-27 17:44:01 +08:00
|
|
|
"bufio"
|
2024-03-22 18:05:47 +08:00
|
|
|
"context"
|
|
|
|
"encoding/json"
|
2024-05-08 23:14:06 +08:00
|
|
|
"errors"
|
2024-03-22 18:05:47 +08:00
|
|
|
"fmt"
|
|
|
|
"net/url"
|
|
|
|
"os"
|
2024-05-08 19:02:49 +08:00
|
|
|
"strings"
|
2024-06-21 17:09:06 +08:00
|
|
|
"sync"
|
2024-03-26 17:23:10 +08:00
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
"github.com/loveuer/esgo2dump/model"
|
|
|
|
"github.com/loveuer/nf/nft/log"
|
|
|
|
|
2024-03-26 17:23:10 +08:00
|
|
|
"github.com/loveuer/esgo2dump/internal/interfaces"
|
|
|
|
"github.com/loveuer/esgo2dump/internal/opt"
|
|
|
|
"github.com/loveuer/esgo2dump/internal/xes"
|
|
|
|
"github.com/loveuer/esgo2dump/internal/xfile"
|
2024-05-08 19:02:49 +08:00
|
|
|
"github.com/samber/lo"
|
2024-03-26 17:23:10 +08:00
|
|
|
"github.com/spf13/cobra"
|
2024-03-22 18:05:47 +08:00
|
|
|
)
|
|
|
|
|
2024-03-27 17:44:01 +08:00
|
|
|
func check(cmd *cobra.Command) error {
|
2024-12-13 15:01:40 +08:00
|
|
|
if opt.Cfg.Args.Input == "" {
|
2024-03-27 17:44:01 +08:00
|
|
|
return cmd.Help()
|
2024-12-13 15:01:40 +08:00
|
|
|
// return fmt.Errorf("must specify input(example: data.json/http://127.0.0.1:9200/my_index)")
|
2024-03-27 17:44:01 +08:00
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if opt.Cfg.Args.Limit == 0 || opt.Cfg.Args.Limit > 10000 {
|
2024-03-27 17:44:01 +08:00
|
|
|
return fmt.Errorf("invalid limit(1 - 10000)")
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if opt.Cfg.Args.Query != "" && opt.Cfg.Args.QueryFile != "" {
|
2024-03-27 17:44:01 +08:00
|
|
|
return fmt.Errorf("cannot specify both query and query_file at the same time")
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
switch opt.Cfg.Args.Type {
|
2024-03-27 17:44:01 +08:00
|
|
|
case "data", "mapping", "setting":
|
|
|
|
default:
|
2024-12-13 15:01:40 +08:00
|
|
|
return fmt.Errorf("unknown type=%s", opt.Cfg.Args.Type)
|
2024-03-27 17:44:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-03-22 18:05:47 +08:00
|
|
|
func run(cmd *cobra.Command, args []string) error {
|
|
|
|
var (
|
|
|
|
err error
|
|
|
|
ioi interfaces.DumpIO
|
|
|
|
ioo interfaces.DumpIO
|
|
|
|
)
|
|
|
|
|
2024-03-27 17:44:01 +08:00
|
|
|
if err = check(cmd); err != nil {
|
|
|
|
return err
|
2024-03-22 18:05:47 +08:00
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if ioi, err = newIO(opt.Cfg.Args.Input, interfaces.IOInput, es_iversion); err != nil {
|
2024-03-22 18:05:47 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if ioo, err = newIO(opt.Cfg.Args.Output, interfaces.IOOutput, es_oversion); err != nil {
|
2024-03-22 18:05:47 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
_ = ioi.Close()
|
|
|
|
_ = ioo.Close()
|
|
|
|
}()
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if (opt.Cfg.Args.Query != "" || opt.Cfg.Args.QueryFile != "") && ioi.IsFile() {
|
2024-03-27 17:44:01 +08:00
|
|
|
return fmt.Errorf("with file input, query or query_file can't be supported")
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if (opt.Cfg.Args.Source != "") && ioi.IsFile() {
|
2024-05-08 19:02:49 +08:00
|
|
|
return fmt.Errorf("with file input, source can't be supported")
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
switch opt.Cfg.Args.Type {
|
2024-03-22 18:05:47 +08:00
|
|
|
case "data":
|
2024-03-27 17:44:01 +08:00
|
|
|
if err = executeData(cmd.Context(), ioi, ioo); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-05-10 21:40:02 +08:00
|
|
|
log.Info("Dump: write data succeed!!!")
|
2024-03-27 17:44:01 +08:00
|
|
|
|
|
|
|
return nil
|
2024-03-22 18:05:47 +08:00
|
|
|
case "mapping":
|
|
|
|
var mapping map[string]any
|
|
|
|
if mapping, err = ioi.ReadMapping(cmd.Context()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-03-26 17:31:38 +08:00
|
|
|
if err = ioo.WriteMapping(cmd.Context(), mapping); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-05-10 21:40:02 +08:00
|
|
|
log.Info("Dump: write mapping succeed!!!")
|
2024-03-26 17:31:38 +08:00
|
|
|
|
|
|
|
return nil
|
2024-03-22 18:05:47 +08:00
|
|
|
case "setting":
|
|
|
|
var setting map[string]any
|
|
|
|
if setting, err = ioi.ReadSetting(cmd.Context()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-03-26 17:31:38 +08:00
|
|
|
if err = ioo.WriteSetting(cmd.Context(), setting); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-05-10 21:40:02 +08:00
|
|
|
log.Info("Dump: write setting succeed!!!")
|
2024-03-26 17:31:38 +08:00
|
|
|
|
|
|
|
return nil
|
2024-03-22 18:05:47 +08:00
|
|
|
default:
|
2024-12-13 15:01:40 +08:00
|
|
|
return fmt.Errorf("unknown type=%s", opt.Cfg.Args.Type)
|
2024-03-22 18:05:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
|
|
|
|
var (
|
2024-03-27 17:44:01 +08:00
|
|
|
err error
|
|
|
|
queries = make([]map[string]any, 0)
|
2024-05-08 19:02:49 +08:00
|
|
|
sources = make([]string, 0)
|
2024-03-26 21:05:37 +08:00
|
|
|
)
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if opt.Cfg.Args.Source != "" {
|
|
|
|
sources = lo.Map(strings.Split(opt.Cfg.Args.Source, ";"), func(item string, idx int) string {
|
2024-05-08 19:02:49 +08:00
|
|
|
return strings.TrimSpace(item)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if opt.Cfg.Args.Query != "" {
|
2024-03-27 17:44:01 +08:00
|
|
|
query := make(map[string]any)
|
2024-12-13 15:01:40 +08:00
|
|
|
if err = json.Unmarshal([]byte(opt.Cfg.Args.Query), &query); err != nil {
|
2024-03-27 17:44:01 +08:00
|
|
|
return fmt.Errorf("invalid query err=%v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
queries = append(queries, query)
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if opt.Cfg.Args.QueryFile != "" {
|
|
|
|
var qf *os.File
|
2024-03-27 17:44:01 +08:00
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if qf, err = os.Open(opt.Cfg.Args.QueryFile); err != nil {
|
2024-03-27 17:44:01 +08:00
|
|
|
return fmt.Errorf("open query_file err=%v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
_ = qf.Close()
|
|
|
|
}()
|
|
|
|
|
|
|
|
scanner := bufio.NewScanner(qf)
|
2024-03-28 16:53:53 +08:00
|
|
|
scanner.Buffer(make([]byte, 1*1024*1024), 5*1024*1024)
|
2024-03-27 17:44:01 +08:00
|
|
|
lineCount := 1
|
|
|
|
for scanner.Scan() {
|
|
|
|
line := scanner.Text()
|
|
|
|
oq := make(map[string]any)
|
|
|
|
if err = json.Unmarshal([]byte(line), &oq); err != nil {
|
|
|
|
return fmt.Errorf("query file line=%d invalid err=%v", lineCount, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
queries = append(queries, oq)
|
|
|
|
|
|
|
|
if len(queries) > 10000 {
|
|
|
|
return fmt.Errorf("query_file support max lines=%d", 10000)
|
|
|
|
}
|
|
|
|
|
|
|
|
lineCount++
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(queries) == 0 {
|
|
|
|
queries = append(queries, nil)
|
|
|
|
}
|
|
|
|
|
2024-03-26 21:05:37 +08:00
|
|
|
var (
|
2024-05-24 17:27:52 +08:00
|
|
|
ok bool
|
|
|
|
docs []*model.ESSource
|
|
|
|
dch <-chan []*model.ESSource
|
|
|
|
ech <-chan error
|
2024-05-20 16:11:50 +08:00
|
|
|
|
2024-05-24 17:27:52 +08:00
|
|
|
e2ch = make(chan error)
|
|
|
|
wch = make(chan []*model.ESSource)
|
2024-06-21 17:09:06 +08:00
|
|
|
wg = sync.WaitGroup{}
|
2024-03-22 18:05:47 +08:00
|
|
|
)
|
|
|
|
|
2024-05-24 17:27:52 +08:00
|
|
|
go func() {
|
2024-06-21 17:09:06 +08:00
|
|
|
wg.Add(1)
|
2024-05-24 17:27:52 +08:00
|
|
|
if err = output.WriteData(ctx, wch); err != nil {
|
|
|
|
e2ch <- err
|
|
|
|
}
|
2024-06-21 17:09:06 +08:00
|
|
|
|
|
|
|
wg.Done()
|
2024-05-24 17:27:52 +08:00
|
|
|
}()
|
|
|
|
|
|
|
|
log.Info("Query: got queries=%d", len(queries))
|
|
|
|
|
2024-05-20 16:11:50 +08:00
|
|
|
Loop:
|
2024-12-13 15:01:40 +08:00
|
|
|
for queryIdx, query := range queries {
|
2024-07-15 14:07:43 +08:00
|
|
|
bs, _ := json.Marshal(query)
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
log.Debug("Query[%d]: %s", queryIdx, string(bs))
|
2024-07-15 14:07:43 +08:00
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
dch, ech = input.ReadData(ctx, opt.Cfg.Args.Limit, query, sources, []string{opt.Cfg.Args.Sort})
|
2024-05-24 17:27:52 +08:00
|
|
|
|
2024-05-20 16:11:50 +08:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
2024-05-24 17:27:52 +08:00
|
|
|
case err, ok = <-ech:
|
|
|
|
if err != nil {
|
2024-05-20 16:11:50 +08:00
|
|
|
return err
|
|
|
|
}
|
2024-03-27 18:09:11 +08:00
|
|
|
|
2024-05-24 17:27:52 +08:00
|
|
|
continue Loop
|
|
|
|
case err, _ = <-e2ch:
|
|
|
|
return err
|
|
|
|
case docs, ok = <-dch:
|
|
|
|
if !ok || len(docs) == 0 {
|
|
|
|
continue Loop
|
2024-05-20 16:11:50 +08:00
|
|
|
}
|
2024-03-22 18:05:47 +08:00
|
|
|
|
2024-05-24 17:27:52 +08:00
|
|
|
wch <- docs
|
2024-05-20 16:11:50 +08:00
|
|
|
}
|
2024-03-22 18:05:47 +08:00
|
|
|
}
|
|
|
|
}
|
2024-05-20 16:11:50 +08:00
|
|
|
|
2024-06-21 17:09:06 +08:00
|
|
|
close(wch)
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
|
2024-05-20 16:11:50 +08:00
|
|
|
return nil
|
2024-03-22 18:05:47 +08:00
|
|
|
}
|
|
|
|
|
2024-05-08 23:14:06 +08:00
|
|
|
func newIO(source string, ioType interfaces.IO, esv string) (interfaces.DumpIO, error) {
|
2024-03-22 18:05:47 +08:00
|
|
|
var (
|
|
|
|
err error
|
|
|
|
iurl *url.URL
|
|
|
|
file *os.File
|
|
|
|
qm = make(map[string]any)
|
|
|
|
)
|
|
|
|
|
2024-05-31 13:48:33 +08:00
|
|
|
log.Debug("action=%s, type=%s, source=%s, es_version=%s", "new_io", ioType.Code(), source, esv)
|
2024-03-22 18:05:47 +08:00
|
|
|
|
|
|
|
if iurl, err = url.Parse(source); err != nil {
|
2024-05-31 13:48:33 +08:00
|
|
|
log.Debug("action=%s, type=%s, source=%s, err=%s", "new_io url parse err", ioType.Code(), source, err.Error())
|
2024-03-22 18:05:47 +08:00
|
|
|
goto ClientByFile
|
|
|
|
}
|
|
|
|
|
|
|
|
if !(iurl.Scheme == "http" || iurl.Scheme == "https") {
|
2024-05-31 13:48:33 +08:00
|
|
|
log.Debug("action=%s, type=%s, source=%s, scheme=%s", "new_io url scheme error", ioType.Code(), source, iurl.Scheme)
|
2024-03-22 18:05:47 +08:00
|
|
|
goto ClientByFile
|
|
|
|
}
|
|
|
|
|
|
|
|
if iurl.Host == "" {
|
2024-05-31 13:48:33 +08:00
|
|
|
log.Debug("action=%s, type=%s, source=%s", "new_io url host empty", ioType.Code(), source)
|
2024-03-22 18:05:47 +08:00
|
|
|
goto ClientByFile
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if ioType == interfaces.IOInput && opt.Cfg.Args.Query != "" {
|
|
|
|
if err = json.Unmarshal([]byte(opt.Cfg.Args.Query), &qm); err != nil {
|
|
|
|
log.Debug("action=%s, type=%s, source=%s, query=%s", "new_io query string invalid", ioType.Code(), source, opt.Cfg.Args.Query)
|
2024-03-22 18:05:47 +08:00
|
|
|
return nil, fmt.Errorf("invalid query err=%v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-08 23:14:06 +08:00
|
|
|
switch esv {
|
|
|
|
case "7":
|
|
|
|
return xes.NewClient(iurl, ioType)
|
|
|
|
case "6":
|
|
|
|
return xes.NewClientV6(iurl, ioType)
|
|
|
|
case "8":
|
2024-05-10 21:40:02 +08:00
|
|
|
return nil, errors.New("es version 8 coming soon")
|
2024-05-08 23:14:06 +08:00
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unknown es version=%s", esv)
|
|
|
|
}
|
2024-03-22 18:05:47 +08:00
|
|
|
|
|
|
|
ClientByFile:
|
|
|
|
if ioType == interfaces.IOOutput {
|
|
|
|
if _, err = os.Stat(source); !os.IsNotExist(err) {
|
|
|
|
return nil, fmt.Errorf("output_file=%s already exist", source)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-13 15:01:40 +08:00
|
|
|
if file, err = os.OpenFile(source, os.O_CREATE|os.O_RDWR, 0o644); err != nil {
|
2024-03-22 18:05:47 +08:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return xfile.NewClient(file, ioType)
|
|
|
|
}
|