Compare commits

...

13 Commits

Author SHA1 Message Date
loveuer
4257cd3668 refactor: project code arch; feat: cancel operation anytime 2025-02-08 11:42:56 +08:00
loveuer
b7b9c14e79 fix: write error stack bug 2025-01-22 17:29:12 +08:00
Peaches
b247b4fea8
feat: add query config(sniff). (#5) 2025-01-20 13:10:46 +08:00
loveuer
a574118649 update: add flag(disable-ping) 2025-01-02 21:38:30 -08:00
loveuer
75a62dce73 fix: size 0 bug
fix: huge index can't sort _id, back use scroll_id
refac: some files arch
2024-12-13 19:11:09 +08:00
loveuer
dde92f2e59 feat: support multi endpoints(format: scheme://user:passwd@ip1:port1,ip2:port2...) 2024-11-14 11:07:16 +08:00
loveuer
ebb4365135 fix: when max not set, dump size negative 2024-07-22 10:13:12 +08:00
loveuer
beb2ca4cf4 Merge remote-tracking branch 'origin/master' 2024-07-22 09:31:51 +08:00
loveuer
246f919bc2
Merge pull request #4 from CaiCandong/bugfix-es6
Bugfix: the response  elasticsearch6 and 7 are a little different
2024-07-21 17:25:26 +08:00
caicandong
d30233204f bugfix 2024-07-21 13:27:08 +08:00
loveuer
af8bb64366 feat: es7 read data(scroll => search_after) 2024-07-15 14:17:12 +08:00
loveuer
0aee33d553 feat: es7 read data(scroll => search_after) 2024-07-15 14:07:43 +08:00
loveuer
059550898e fix: lost last write 2024-06-21 17:09:06 +08:00
41 changed files with 1245 additions and 1677 deletions

View File

@ -16,32 +16,28 @@ jobs:
- name: checkout repository
uses: actions/checkout@v4
- name: fill version
run: sed -i -E "s/v[0-9]+.[0-9]+.[0-9]+/${{ github.ref_name }}/g" internal/opt/version.go
- name: install golang
uses: actions/setup-go@v4
with:
go-version: '1.18'
- name: build linux amd64
run: CGO_ENABLE=0 GOOS=linux GOARCH=amd64 go build -ldflags='-s -w' -o dist/esgo2dump_${{ github.ref_name }}_linux_amd64 .
run: CGO_ENABLE=0 GOOS=linux GOARCH=amd64 go build -ldflags='-s -w -X github.com/loveuer/esgo2dump/internal/opt.Version="${{ github.ref_name }}"' -o dist/esgo2dump_${{ github.ref_name }}_linux_amd64 .
- name: build linux arm64
run: CGO_ENABLE=0 GOOS=linux GOARCH=arm64 go build -ldflags='-s -w' -o dist/esgo2dump_${{ github.ref_name }}_linux_arm64 .
run: CGO_ENABLE=0 GOOS=linux GOARCH=arm64 go build -ldflags='-s -w -X github.com/loveuer/esgo2dump/internal/opt.Version="${{ github.ref_name }}"' -o dist/esgo2dump_${{ github.ref_name }}_linux_arm64 .
- name: build windows amd64
run: CGO_ENABLE=0 GOOS=windows GOARCH=amd64 go build -ldflags='-s -w' -o dist/esgo2dump_${{ github.ref_name }}_windows_amd64.exe .
run: CGO_ENABLE=0 GOOS=windows GOARCH=amd64 go build -ldflags='-s -w -X github.com/loveuer/esgo2dump/internal/opt.Version="${{ github.ref_name }}"' -o dist/esgo2dump_${{ github.ref_name }}_windows_amd64.exe .
- name: build windows arm64
run: CGO_ENABLE=0 GOOS=windows GOARCH=arm64 go build -ldflags='-s -w' -o dist/esgo2dump_${{ github.ref_name }}_windows_arm64.exe .
run: CGO_ENABLE=0 GOOS=windows GOARCH=arm64 go build -ldflags='-s -w -X github.com/loveuer/esgo2dump/internal/opt.Version="${{ github.ref_name }}"' -o dist/esgo2dump_${{ github.ref_name }}_windows_arm64.exe .
- name: build darwin amd64
run: CGO_ENABLE=0 GOOS=darwin GOARCH=amd64 go build -ldflags='-s -w' -o dist/esgo2dump_${{ github.ref_name }}_darwin_amd64 .
run: CGO_ENABLE=0 GOOS=darwin GOARCH=amd64 go build -ldflags='-s -w -X github.com/loveuer/esgo2dump/internal/opt.Version="${{ github.ref_name }}"' -o dist/esgo2dump_${{ github.ref_name }}_darwin_amd64 .
- name: build darwin arm64
run: CGO_ENABLE=0 GOOS=darwin GOARCH=arm64 go build -ldflags='-s -w' -o dist/esgo2dump_${{ github.ref_name }}_darwin_arm64 .
run: CGO_ENABLE=0 GOOS=darwin GOARCH=arm64 go build -ldflags='-s -w -X github.com/loveuer/esgo2dump/internal/opt.Version="${{ github.ref_name }}"' -o dist/esgo2dump_${{ github.ref_name }}_darwin_arm64 .
- name: run upx
uses: crazy-max/ghaction-upx@v3

6
.gitignore vendored
View File

@ -1,11 +1,7 @@
.idea
.vscode
.DS_Store
*data.json
*mapping.json
*setting.json
*output.json
*test.json
*.json
*.txt
dist
xtest

11
go.mod
View File

@ -5,16 +5,21 @@ go 1.18
require (
github.com/elastic/go-elasticsearch/v6 v6.8.10
github.com/elastic/go-elasticsearch/v7 v7.17.10
github.com/fatih/color v1.16.0
github.com/fatih/color v1.17.0
github.com/go-resty/resty/v2 v2.16.5
github.com/jedib0t/go-pretty/v6 v6.6.4
github.com/samber/lo v1.39.0
github.com/spf13/cobra v1.8.0
github.com/spf13/cobra v1.8.1
)
require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
golang.org/x/sys v0.14.0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/sys v0.28.0 // indirect
)

29
go.sum
View File

@ -1,29 +1,44 @@
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/elastic/go-elasticsearch/v6 v6.8.10 h1:2lN0gJ93gMBXvkhwih5xquldszpm8FlUwqG5sPzr6a8=
github.com/elastic/go-elasticsearch/v6 v6.8.10/go.mod h1:UwaDJsD3rWLM5rKNFzv9hgox93HoX8utj1kxD9aFUcI=
github.com/elastic/go-elasticsearch/v7 v7.17.10 h1:TCQ8i4PmIJuBunvBS6bwT2ybzVFxxUhhltAs3Gyu1yo=
github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4=
github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI=
github.com/go-resty/resty/v2 v2.16.5 h1:hBKqmWrr7uRc3euHVqmh1HTHcKn99Smr7o5spptdhTM=
github.com/go-resty/resty/v2 v2.16.5/go.mod h1:hkJtXbA2iKHzJheXYvQ8snQES5ZLGKMwQ07xAwp/fiA=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jedib0t/go-pretty/v6 v6.6.4 h1:B51RjA+Sytv0C0Je7PHGDXZBF2JpS5dZEWWRueBLP6U=
github.com/jedib0t/go-pretty/v6 v6.6.4/go.mod h1:zbn98qrYlh95FIhwwsbIip0LYpwSG8SUOScs+v9/t0E=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -1,69 +0,0 @@
package cmd
import (
"context"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/spf13/cobra"
)
var (
rootCommand = &cobra.Command{
Use: "esgo2dump",
Short: "esgo2dump is alternative to elasticdump",
SilenceUsage: true,
SilenceErrors: true,
RunE: run,
Example: `
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9200/some_index --limit=5000
esgo2dump --input=http://127.0.0.1:9200/some_index --i-version 6 --output=./data.json
esgo2dump --output=http://127.0.0.1:9200/some_index --o-version 6 --input=./data.json
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --source='id;name;age;address' --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json`,
}
f_input string
f_output string
f_limit int
f_type string
f_source string
f_sort string
f_query string
f_query_file string
f_version bool
es_iversion, es_oversion string
)
func init() {
rootCommand.Flags().BoolVar(&opt.Debug, "debug", false, "")
rootCommand.Flags().BoolVarP(&f_version, "version", "v", false, "print esgo2dump version")
rootCommand.Flags().IntVar(&opt.Timeout, "timeout", 30, "max timeout seconds per operation with limit")
rootCommand.Flags().StringVarP(&f_input, "input", "i", "", "*required: input file or es url (example :data.json / http://127.0.0.1:9200/my_index)")
rootCommand.Flags().StringVarP(&f_output, "output", "o", "output.json", "")
rootCommand.Flags().StringVar(&es_iversion, "i-version", "7", "input(es) version")
rootCommand.Flags().StringVar(&es_oversion, "o-version", "7", "output(es) version")
rootCommand.Flags().StringVarP(&f_type, "type", "t", "data", "data/mapping/setting")
rootCommand.Flags().StringVarP(&f_source, "source", "s", "", "query source, use ';' to separate")
rootCommand.Flags().StringVar(&f_sort, "sort", "", "sort, <field>:<direction> format, for example: time:desc or name:asc")
rootCommand.Flags().StringVarP(&f_query, "query", "q", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`)
rootCommand.Flags().StringVar(&f_query_file, "query_file", "", `query json file (will execute line by line)`)
rootCommand.Flags().IntVarP(&f_limit, "limit", "l", 100, "")
}
func Start(ctx context.Context) error {
return rootCommand.ExecuteContext(ctx)
}

9
internal/cmd/init.go Normal file
View File

@ -0,0 +1,9 @@
package cmd
import "time"
func init() {
time.Local = time.FixedZone("CST", 8*3600)
initRoot()
}

60
internal/cmd/root.go Normal file
View File

@ -0,0 +1,60 @@
package cmd
import (
"context"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/spf13/cobra"
)
const (
example = `
esgo2dump -i https://<user>:<password>@<es_node1_host>:<es_node1_port>,<es_node2_host>:<es_node2_port>/some_index?ping=false&sniff=false -o ./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=http://192.168.1.1:9200/some_index --limit=5000
esgo2dump --input=https://username:password@127.0.0.1:9200/some_index --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --source='id;name;age;address' --output=./data.json
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query='{"match": {"name": "some_name"}}'
esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_file=my_queries.json`
)
var rootCommand = &cobra.Command{
Use: "esgo2dump",
Short: "esgo2dump is alternative to elasticdump",
Example: example,
SilenceUsage: true,
SilenceErrors: true,
PreRunE: preRun,
RunE: run,
}
func initRoot(cmds ...*cobra.Command) *cobra.Command {
rootCommand.PersistentFlags().BoolVar(&opt.Cfg.Debug, "debug", false, "")
rootCommand.PersistentFlags().BoolVar(&opt.Cfg.Dev, "dev", false, "")
rootCommand.PersistentFlags().BoolVar(&opt.Cfg.DisablePing, "disable-ping", false, "")
rootCommand.PersistentFlags().BoolVarP(&opt.Cfg.Args.Version, "version", "v", false, "print esgo2dump version")
// rootCommand.Flags().IntVar(&opt.Cfg.Args.Timeout, "timeout", 30, "max timeout seconds per operation with limit")
rootCommand.Flags().StringVarP(&opt.Cfg.Args.Input, "input", "i", "", "*required: input file or es url (example :data.json / http://127.0.0.1:9200/my_index)")
rootCommand.Flags().StringVarP(&opt.Cfg.Args.Output, "output", "o", "output.json", "")
rootCommand.Flags().StringVarP(&opt.Cfg.Args.Type, "type", "t", "data", "data/mapping/setting")
rootCommand.Flags().StringVar(&opt.Cfg.Args.Field, "field", "", "query include field, use ',' to separate")
rootCommand.Flags().StringVar(&opt.Cfg.Args.Sort, "sort", "", "sort, <field>:<direction> format, for example: time:desc or name:asc, user ',' to separate")
rootCommand.Flags().StringVar(&opt.Cfg.Args.Query, "query", "", `query dsl, example: {"bool":{"must":[{"term":{"name":{"value":"some_name"}}}],"must_not":[{"range":{"age":{"gte":18,"lt":60}}}]}}`)
rootCommand.Flags().StringVar(&opt.Cfg.Args.QueryFile, "query_file", "", `query json file (will execute line by line)`)
rootCommand.Flags().IntVar(&opt.Cfg.Args.Limit, "limit", 100, "")
rootCommand.Flags().IntVar(&opt.Cfg.Args.Max, "max", 0, "max dump records")
rootCommand.AddCommand(cmds...)
return rootCommand
}
func Run(ctx context.Context) error {
return rootCommand.ExecuteContext(ctx)
}

74
internal/cmd/root.run.go Normal file
View File

@ -0,0 +1,74 @@
package cmd
import (
"fmt"
"github.com/loveuer/esgo2dump/internal/core"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/tool"
"github.com/loveuer/esgo2dump/pkg/log"
"github.com/loveuer/esgo2dump/pkg/model"
"github.com/spf13/cobra"
"os"
)
func preRun(cmd *cobra.Command, args []string) error {
if opt.Cfg.Debug {
log.SetLogLevel(log.LogLevelDebug)
}
if opt.Cfg.Args.Version {
fmt.Printf("esgo2dump version: %s\n", opt.Version)
os.Exit(0)
}
if opt.Cfg.Debug {
tool.TablePrinter(opt.Cfg)
}
// check args
if opt.Cfg.Args.Input == "" {
return cmd.Help()
}
if opt.Cfg.Args.Limit == 0 || opt.Cfg.Args.Limit > 10000 {
return fmt.Errorf("invalid limit(1 - 10000)")
}
if opt.Cfg.Args.Query != "" && opt.Cfg.Args.QueryFile != "" {
return fmt.Errorf("cannot specify both query and query_file at the same time")
}
switch opt.Cfg.Args.Type {
case "data", "mapping", "setting":
default:
return fmt.Errorf("unknown type=%s", opt.Cfg.Args.Type)
}
return nil
}
func run(cmd *cobra.Command, args []string) error {
var (
err error
input model.IO[map[string]any]
output model.IO[map[string]any]
)
if input, err = core.NewIO(cmd.Context(), opt.Cfg.Args.Input, model.Input); err != nil {
return err
}
if output, err = core.NewIO(cmd.Context(), opt.Cfg.Args.Output, model.Output); err != nil {
return err
}
switch opt.Cfg.Args.Type {
case "data":
return core.RunData(cmd, input, output)
case "mapping":
return core.RunMapping(cmd, input, output)
case "setting":
return core.RunSetting(cmd, input, output)
}
return fmt.Errorf("unknown args: type = %s", opt.Cfg.Args.Type)
}

View File

@ -1,294 +0,0 @@
package cmd
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
"net/url"
"os"
"strings"
"github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/xes"
"github.com/loveuer/esgo2dump/internal/xfile"
"github.com/samber/lo"
"github.com/spf13/cobra"
)
func check(cmd *cobra.Command) error {
if f_input == "" {
return cmd.Help()
//return fmt.Errorf("must specify input(example: data.json/http://127.0.0.1:9200/my_index)")
}
if f_limit == 0 || f_limit > 10000 {
return fmt.Errorf("invalid limit(1 - 10000)")
}
if f_query != "" && f_query_file != "" {
return fmt.Errorf("cannot specify both query and query_file at the same time")
}
switch f_type {
case "data", "mapping", "setting":
default:
return fmt.Errorf("unknown type=%s", f_type)
}
return nil
}
func run(cmd *cobra.Command, args []string) error {
var (
err error
ioi interfaces.DumpIO
ioo interfaces.DumpIO
)
if opt.Debug {
log.SetLogLevel(log.LogLevelDebug)
}
if f_version {
fmt.Printf("esgo2dump (Version: %s)\n", opt.Version)
os.Exit(0)
}
if err = check(cmd); err != nil {
return err
}
if ioi, err = newIO(f_input, interfaces.IOInput, es_iversion); err != nil {
return err
}
if ioo, err = newIO(f_output, interfaces.IOOutput, es_oversion); err != nil {
return err
}
defer func() {
_ = ioi.Close()
_ = ioo.Close()
}()
if (f_query_file != "" || f_query != "") && ioi.IsFile() {
return fmt.Errorf("with file input, query or query_file can't be supported")
}
if (f_source != "") && ioi.IsFile() {
return fmt.Errorf("with file input, source can't be supported")
}
switch f_type {
case "data":
if err = executeData(cmd.Context(), ioi, ioo); err != nil {
return err
}
log.Info("Dump: write data succeed!!!")
return nil
case "mapping":
var mapping map[string]any
if mapping, err = ioi.ReadMapping(cmd.Context()); err != nil {
return err
}
if err = ioo.WriteMapping(cmd.Context(), mapping); err != nil {
return err
}
log.Info("Dump: write mapping succeed!!!")
return nil
case "setting":
var setting map[string]any
if setting, err = ioi.ReadSetting(cmd.Context()); err != nil {
return err
}
if err = ioo.WriteSetting(cmd.Context(), setting); err != nil {
return err
}
log.Info("Dump: write setting succeed!!!")
return nil
default:
return fmt.Errorf("unknown type=%s", f_type)
}
}
func executeData(ctx context.Context, input, output interfaces.DumpIO) error {
var (
err error
queries = make([]map[string]any, 0)
sources = make([]string, 0)
)
if f_source != "" {
sources = lo.Map(strings.Split(f_source, ";"), func(item string, idx int) string {
return strings.TrimSpace(item)
})
}
if f_query != "" {
query := make(map[string]any)
if err = json.Unmarshal([]byte(f_query), &query); err != nil {
return fmt.Errorf("invalid query err=%v", err)
}
queries = append(queries, query)
}
if f_query_file != "" {
var (
qf *os.File
)
if qf, err = os.Open(f_query_file); err != nil {
return fmt.Errorf("open query_file err=%v", err)
}
defer func() {
_ = qf.Close()
}()
scanner := bufio.NewScanner(qf)
scanner.Buffer(make([]byte, 1*1024*1024), 5*1024*1024)
lineCount := 1
for scanner.Scan() {
line := scanner.Text()
oq := make(map[string]any)
if err = json.Unmarshal([]byte(line), &oq); err != nil {
return fmt.Errorf("query file line=%d invalid err=%v", lineCount, err)
}
queries = append(queries, oq)
if len(queries) > 10000 {
return fmt.Errorf("query_file support max lines=%d", 10000)
}
lineCount++
}
}
if len(queries) == 0 {
queries = append(queries, nil)
}
var (
ok bool
docs []*model.ESSource
dch <-chan []*model.ESSource
ech <-chan error
e2ch = make(chan error)
wch = make(chan []*model.ESSource)
)
go func() {
defer func() {
close(wch)
close(e2ch)
}()
if err = output.WriteData(ctx, wch); err != nil {
e2ch <- err
}
}()
log.Info("Query: got queries=%d", len(queries))
Loop:
for _, query := range queries {
dch, ech = input.ReadData(ctx, f_limit, query, sources, []string{f_sort})
for {
select {
case <-ctx.Done():
return ctx.Err()
case err, ok = <-ech:
if err != nil {
return err
}
continue Loop
case err, _ = <-e2ch:
return err
case docs, ok = <-dch:
if !ok || len(docs) == 0 {
continue Loop
}
wch <- docs
}
}
}
return nil
}
func newIO(source string, ioType interfaces.IO, esv string) (interfaces.DumpIO, error) {
var (
err error
iurl *url.URL
file *os.File
qm = make(map[string]any)
)
log.Debug("action=%s, type=%s, source=%s, es_version=%s", "new_io", ioType.Code(), source, esv)
if iurl, err = url.Parse(source); err != nil {
log.Debug("action=%s, type=%s, source=%s, err=%s", "new_io url parse err", ioType.Code(), source, err.Error())
goto ClientByFile
}
if !(iurl.Scheme == "http" || iurl.Scheme == "https") {
log.Debug("action=%s, type=%s, source=%s, scheme=%s", "new_io url scheme error", ioType.Code(), source, iurl.Scheme)
goto ClientByFile
}
if iurl.Host == "" {
log.Debug("action=%s, type=%s, source=%s", "new_io url host empty", ioType.Code(), source)
goto ClientByFile
}
if ioType == interfaces.IOInput && f_query != "" {
if err = json.Unmarshal([]byte(f_query), &qm); err != nil {
log.Debug("action=%s, type=%s, source=%s, query=%s", "new_io query string invalid", ioType.Code(), source, f_query)
return nil, fmt.Errorf("invalid query err=%v", err)
}
}
switch esv {
case "7":
return xes.NewClient(iurl, ioType)
case "6":
return xes.NewClientV6(iurl, ioType)
case "8":
return nil, errors.New("es version 8 coming soon")
default:
return nil, fmt.Errorf("unknown es version=%s", esv)
}
ClientByFile:
if ioType == interfaces.IOOutput {
if _, err = os.Stat(source); !os.IsNotExist(err) {
return nil, fmt.Errorf("output_file=%s already exist", source)
}
}
if file, err = os.OpenFile(source, os.O_CREATE|os.O_RDWR, 0644); err != nil {
return nil, err
}
return xfile.NewClient(file, ioType)
}

81
internal/core/io.go Normal file
View File

@ -0,0 +1,81 @@
package core
import (
"context"
"encoding/json"
"fmt"
elastic7 "github.com/elastic/go-elasticsearch/v7"
"github.com/go-resty/resty/v2"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/tool"
"github.com/loveuer/esgo2dump/internal/xfile"
"github.com/loveuer/esgo2dump/pkg/log"
"github.com/loveuer/esgo2dump/pkg/model"
"github.com/loveuer/esgo2dump/xes/es7"
"net/url"
"strings"
)
func NewIO(ctx context.Context, uri string, ioType model.IOType) (model.IO[map[string]any], error) {
type Version struct {
Name string
Version struct {
Number string `json:"number"`
} `json:"version"`
}
var (
err error
target *url.URL
rr *resty.Response
v Version
)
if target, err = url.Parse(uri); err != nil {
log.Debug("parse uri failed, type = %s, uri = %s, err = %s", ioType, uri, err.Error())
return xfile.NewClient(uri, ioType)
}
if err = tool.ValidScheme(target.Scheme); err != nil {
log.Debug("uri scheme check failed, type = %s, uri = %s", ioType, uri)
return xfile.NewClient(uri, ioType)
}
// elastic uri
index := strings.TrimPrefix(target.Path, "/")
if index == "" {
return nil, fmt.Errorf("uri invalid without index(path)")
}
log.Debug("%s uri es index = %s", ioType, index)
versionURL := fmt.Sprintf("%s://%s", target.Scheme, strings.Split(target.Host, ",")[0])
log.Debug("%s version url = %s", ioType, versionURL)
if rr, err = opt.HttpClient.R().Get(versionURL); err != nil {
log.Debug("get uri es version failed, type = %s, uri = %s, version_url = %s, err = %s", ioType, uri, versionURL, err.Error())
}
if err = json.Unmarshal(rr.Body(), &v); err != nil {
log.Debug("decode uri es version failed, type = %s, uri = %s, version_url = %s, err = %s", ioType, uri, versionURL, err.Error())
return nil, err
}
log.Debug("%s uri es version = %s", ioType, v.Version.Number)
mainVersion := strings.Split(v.Version.Number, ".")[0]
switch mainVersion {
case "8":
case "7":
var client *elastic7.Client
if client, err = es7.NewClient(ctx, uri); err != nil {
return nil, err
}
return es7.NewStreamer(ctx, client, index)
case "6":
default:
return nil, fmt.Errorf("es version not supported yet: %s", mainVersion)
}
return nil, nil
}

125
internal/core/run.data.go Normal file
View File

@ -0,0 +1,125 @@
package core
import (
"bufio"
"encoding/json"
"fmt"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/tool"
"github.com/loveuer/esgo2dump/pkg/log"
"github.com/loveuer/esgo2dump/pkg/model"
"github.com/samber/lo"
"github.com/spf13/cobra"
"os"
"strings"
"sync"
)
func RunData(cmd *cobra.Command, input, output model.IO[map[string]any]) error {
var (
err error
// query chan
qc = make(chan map[string]any)
// error chan
ec = make(chan error)
// done chan
wc = &sync.WaitGroup{}
total = 0
)
wc.Add(1)
go func() {
var (
wroteCount = 0
items []map[string]any
)
defer wc.Done()
for query := range qc {
for {
limit := tool.CalculateLimit(opt.Cfg.Args.Limit, total, opt.Cfg.Args.Max)
log.Debug("one-step dump: arg.limit = %d, total = %d, arg.max = %d, calculate.limit = %d", opt.Cfg.Args.Limit, total, opt.Cfg.Args.Max, limit)
if limit == 0 {
break
}
if items, err = input.ReadData(
cmd.Context(),
limit,
query,
lo.Filter(strings.Split(opt.Cfg.Args.Field, ","), func(x string, _ int) bool { return x != "" }),
lo.Filter(strings.Split(opt.Cfg.Args.Sort, ","), func(x string, _ int) bool { return x != "" }),
); err != nil {
ec <- err
return
}
if len(items) == 0 {
break
}
if wroteCount, err = output.WriteData(cmd.Context(), items); err != nil {
ec <- err
return
}
total += wroteCount
if wroteCount != len(items) {
ec <- fmt.Errorf("got items %d, but wrote %d", len(items), wroteCount)
return
}
log.Info("Dump: dump data success = %d total = %d", wroteCount, total)
}
}
}()
switch {
case opt.Cfg.Args.QueryFile != "":
var (
// query file
qf *os.File
queryCount = 0
)
if qf, err = os.Open(opt.Cfg.Args.QueryFile); err != nil {
return err
}
scanner := bufio.NewScanner(qf)
for scanner.Scan() {
queryCount++
qm := make(map[string]any)
if err = json.Unmarshal(scanner.Bytes(), &qm); err != nil {
return err
}
qc <- qm
log.Debug("Dump: queries[%06d]", queryCount)
}
case opt.Cfg.Args.Query != "":
var (
qm = make(map[string]any)
)
if err = json.Unmarshal([]byte(opt.Cfg.Args.Query), &qm); err != nil {
return err
}
qc <- qm
default:
qc <- nil
}
// close query chan to stop trans_io_goroutine
close(qc)
wc.Wait()
log.Info("Dump: dump all data success, total = %d", total)
return nil
}

View File

@ -0,0 +1,19 @@
package core
import (
"github.com/loveuer/esgo2dump/pkg/model"
"github.com/spf13/cobra"
)
func RunMapping(cmd *cobra.Command, input model.IO[map[string]any], output model.IO[map[string]any]) error {
mapping, err := input.ReadMapping(cmd.Context())
if err != nil {
return err
}
if err = output.WriteMapping(cmd.Context(), mapping); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,19 @@
package core
import (
"github.com/loveuer/esgo2dump/pkg/model"
"github.com/spf13/cobra"
)
func RunSetting(cmd *cobra.Command, input model.IO[map[string]any], output model.IO[map[string]any]) error {
setting, err := input.ReadSetting(cmd.Context())
if err != nil {
return err
}
if err = output.WriteSetting(cmd.Context(), setting); err != nil {
return err
}
return nil
}

View File

@ -1,22 +0,0 @@
package interfaces
import (
"context"
"github.com/loveuer/esgo2dump/model"
)
type DumpIO interface {
ReadData(ctx context.Context, size int, query map[string]any, includeFields []string, sort []string) (<-chan []*model.ESSource, <-chan error)
WriteData(ctx context.Context, docsCh <-chan []*model.ESSource) error
ReadMapping(context.Context) (map[string]any, error)
WriteMapping(context.Context, map[string]any) error
ReadSetting(ctx context.Context) (map[string]any, error)
WriteSetting(context.Context, map[string]any) error
Close() error
IOType() IO
IsFile() bool
}

View File

@ -1,27 +0,0 @@
package interfaces
type IO int64
const (
IOInput IO = iota
IOOutput
)
func (io IO) Code() string {
switch io {
case IOInput:
return "input"
case IOOutput:
return "output"
default:
return "unknown"
}
}
type DataType int64
const (
DataTypeData DataType = iota
DataTypeMapping
DataTypeSetting
)

24
internal/opt/opt.go Normal file
View File

@ -0,0 +1,24 @@
package opt
type args struct {
Version bool
Input string
Output string
Limit int
Max int
Type string
Timeout int
Field string
Sort string
Query string
QueryFile string
}
type config struct {
Debug bool `json:"-"`
Dev bool `json:"-"`
DisablePing bool `json:"-"`
Args args `json:"-"`
}
var Cfg = &config{}

View File

@ -1,13 +1,22 @@
package opt
import (
"crypto/tls"
"github.com/go-resty/resty/v2"
)
const (
ScrollDurationSeconds = 10 * 60
DefaultSize = 100
)
var (
Debug bool
Version = "vx.x.x"
Timeout int
BuffSize = 5 * 1024 * 1024 // 5M
MaxBuffSize = 100 * 1024 * 1024 // 100M, default elastic_search doc max size
HttpClient = resty.New().SetTLSClientConfig(&tls.Config{InsecureSkipVerify: true})
)

View File

@ -1,3 +0,0 @@
package opt
const Version = "v0.2.1"

View File

@ -1,4 +1,4 @@
package util
package tool
import (
"context"

21
internal/tool/min.go Normal file
View File

@ -0,0 +1,21 @@
package tool
func Min[T ~string | ~int | ~int64 | ~uint64 | ~float64 | ~float32 | ~int32 | ~uint32 | ~int16 | ~uint16 | ~int8 | ~uint8](a, b T) T {
if a <= b {
return a
}
return b
}
func CalculateLimit(limit, total, max int) int {
if max == 0 {
return limit
}
if max-total > 0 {
return Min(max-total, limit)
}
return 0
}

125
internal/tool/table.go Normal file
View File

@ -0,0 +1,125 @@
package tool
import (
"encoding/json"
"fmt"
"io"
"os"
"reflect"
"strings"
"github.com/jedib0t/go-pretty/v6/table"
"github.com/loveuer/esgo2dump/pkg/log"
)
func TablePrinter(data any, writers ...io.Writer) {
var w io.Writer = os.Stdout
if len(writers) > 0 && writers[0] != nil {
w = writers[0]
}
t := table.NewWriter()
structPrinter(t, "", data)
_, _ = fmt.Fprintln(w, t.Render())
}
func structPrinter(w table.Writer, prefix string, item any) {
Start:
rv := reflect.ValueOf(item)
if rv.IsZero() {
return
}
for rv.Type().Kind() == reflect.Pointer {
rv = rv.Elem()
}
switch rv.Type().Kind() {
case reflect.Invalid,
reflect.Uintptr,
reflect.Chan,
reflect.Func,
reflect.UnsafePointer:
case reflect.Bool,
reflect.Int,
reflect.Int8,
reflect.Int16,
reflect.Int32,
reflect.Int64,
reflect.Uint,
reflect.Uint8,
reflect.Uint16,
reflect.Uint32,
reflect.Uint64,
reflect.Float32,
reflect.Float64,
reflect.Complex64,
reflect.Complex128,
reflect.Interface:
w.AppendRow(table.Row{strings.TrimPrefix(prefix, "."), rv.Interface()})
case reflect.String:
val := rv.String()
if len(val) <= 160 {
w.AppendRow(table.Row{strings.TrimPrefix(prefix, "."), val})
return
}
w.AppendRow(table.Row{strings.TrimPrefix(prefix, "."), val[0:64] + "..." + val[len(val)-64:]})
case reflect.Array, reflect.Slice:
for i := 0; i < rv.Len(); i++ {
p := strings.Join([]string{prefix, fmt.Sprintf("[%d]", i)}, ".")
structPrinter(w, p, rv.Index(i).Interface())
}
case reflect.Map:
for _, k := range rv.MapKeys() {
structPrinter(w, fmt.Sprintf("%s.{%v}", prefix, k), rv.MapIndex(k).Interface())
}
case reflect.Pointer:
goto Start
case reflect.Struct:
for i := 0; i < rv.NumField(); i++ {
p := fmt.Sprintf("%s.%s", prefix, rv.Type().Field(i).Name)
field := rv.Field(i)
// log.Debug("TablePrinter: prefix: %s, field: %v", p, rv.Field(i))
if !field.CanInterface() {
return
}
structPrinter(w, p, field.Interface())
}
}
}
func TableMapPrinter(data []byte) {
m := make(map[string]any)
if err := json.Unmarshal(data, &m); err != nil {
log.Warn(err.Error())
return
}
t := table.NewWriter()
addRow(t, "", m)
fmt.Println(t.Render())
}
func addRow(w table.Writer, prefix string, m any) {
rv := reflect.ValueOf(m)
switch rv.Type().Kind() {
case reflect.Map:
for _, k := range rv.MapKeys() {
key := k.String()
if prefix != "" {
key = strings.Join([]string{prefix, k.String()}, ".")
}
addRow(w, key, rv.MapIndex(k).Interface())
}
case reflect.Slice, reflect.Array:
for i := 0; i < rv.Len(); i++ {
addRow(w, fmt.Sprintf("%s[%d]", prefix, i), rv.Index(i).Interface())
}
default:
w.AppendRow(table.Row{prefix, m})
}
}

15
internal/tool/validate.go Normal file
View File

@ -0,0 +1,15 @@
package tool
import (
"fmt"
"strings"
)
func ValidScheme(scheme string) error {
switch strings.ToLower(scheme) {
case "http", "https":
return nil
default:
return fmt.Errorf("invalid scheme: %s", scheme)
}
}

View File

@ -1,233 +0,0 @@
package xes
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"fmt"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
"github.com/loveuer/esgo2dump/xes/es6"
"net"
"net/http"
"net/url"
"strings"
"time"
elastic "github.com/elastic/go-elasticsearch/v6"
"github.com/elastic/go-elasticsearch/v6/esapi"
"github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/util"
)
func NewClientV6(url *url.URL, iot interfaces.IO) (interfaces.DumpIO, error) {
var (
address = fmt.Sprintf("%s://%s", url.Scheme, url.Host)
urlIndex = strings.TrimPrefix(url.Path, "/")
urlUsername string
urlPassword string
errCh = make(chan error)
cliCh = make(chan *elastic.Client)
)
if url.User != nil {
urlUsername = url.User.Username()
if p, ok := url.User.Password(); ok {
urlPassword = p
}
}
log.Debug("action=%s, endpoint=%s, index=%s, username=%s, password=%s", "new es client v6", address, urlIndex, urlUsername, urlPassword)
if urlIndex == "" {
return nil, fmt.Errorf("please specify index name: (like => http://127.0.0.1:9200/my_index)")
}
ncFunc := func(endpoints []string, username, password, index string) {
var (
err error
cli *elastic.Client
infoResp *esapi.Response
)
if cli, err = elastic.NewClient(
elastic.Config{
Addresses: endpoints,
Username: username,
Password: password,
CACert: nil,
RetryOnStatus: []int{429},
MaxRetries: 3,
RetryBackoff: nil,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{Timeout: 10 * time.Second}).DialContext,
},
},
); err != nil {
log.Debug("action=%s, endpoints=%v, err=%s", "new es client v6 error", endpoints, err.Error())
errCh <- err
return
}
if infoResp, err = cli.Info(); err != nil {
log.Debug("action=%s, endpoints=%v, err=%s", "new es client v6 info error", endpoints, err.Error())
errCh <- err
return
}
if infoResp.StatusCode != 200 {
err = fmt.Errorf("info xes status=%d", infoResp.StatusCode)
log.Debug("action=%s, endpoints=%v, err=%s", "es client v6 ping status error", endpoints, err.Error())
errCh <- err
return
}
cliCh <- cli
}
go ncFunc([]string{address}, urlUsername, urlPassword, urlIndex)
select {
case <-util.Timeout(10).Done():
return nil, fmt.Errorf("dial es=%s err=%v", address, context.DeadlineExceeded)
case c := <-cliCh:
return &clientv6{client: c, index: urlIndex, iot: iot}, nil
case e := <-errCh:
return nil, e
}
}
type clientv6 struct {
client *elastic.Client
iot interfaces.IO
index string
}
func (c *clientv6) Info(msg string, data ...any) {
log.Info(msg, data...)
}
func (c *clientv6) WriteData(ctx context.Context, docsCh <-chan []*model.ESSource) error {
return es6.WriteData(ctx, c.client, c.index, docsCh, c)
}
func (c *clientv6) checkResponse(r *esapi.Response) error {
if r.StatusCode == 200 {
return nil
}
return fmt.Errorf("status=%d msg=%s", r.StatusCode, r.String())
}
func (c *clientv6) IOType() interfaces.IO {
return c.iot
}
func (c *clientv6) IsFile() bool {
return false
}
func (c *clientv6) Close() error {
return nil
}
func (c *clientv6) ReadData(ctx context.Context, size int, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
dch, ech := es6.ReadData(ctx, c.client, c.index, size, 0, query, source, sort)
return dch, ech
}
func (c *clientv6) ReadMapping(ctx context.Context) (map[string]any, error) {
r, err := c.client.Indices.GetMapping(
c.client.Indices.GetMapping.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *clientv6) WriteMapping(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
for idxKey := range m {
if bs, err = json.Marshal(m[idxKey]); err != nil {
return err
}
if result, err = c.client.Indices.Create(
c.index,
c.client.Indices.Create.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.client.Indices.Create.WithBody(bytes.NewReader(bs)),
); err != nil {
return err
}
if err = c.checkResponse(result); err != nil {
return err
}
}
return nil
}
func (c *clientv6) ReadSetting(ctx context.Context) (map[string]any, error) {
r, err := c.client.Indices.GetSettings(
c.client.Indices.GetSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.client.Indices.GetSettings.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *clientv6) WriteSetting(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
if bs, err = json.Marshal(m); err != nil {
return err
}
if result, err = c.client.Indices.PutSettings(
bytes.NewReader(bs),
c.client.Indices.PutSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
); err != nil {
return err
}
return c.checkResponse(result)
}

View File

@ -1,223 +0,0 @@
package xes
import (
"bytes"
"context"
"encoding/json"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/internal/util"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
"github.com/loveuer/esgo2dump/xes/es7"
"net/url"
"strings"
)
type client struct {
client *elastic.Client
iot interfaces.IO
index string
}
func (c *client) Info(msg string, data ...any) {
log.Info(msg, data...)
}
func (c *client) WriteData(ctx context.Context, docsCh <-chan []*model.ESSource) error {
return es7.WriteData(ctx, c.client, c.index, docsCh, c)
}
func NewClient(url *url.URL, iot interfaces.IO) (interfaces.DumpIO, error) {
var (
urlIndex = strings.TrimPrefix(url.Path, "/")
cli *elastic.Client
err error
)
if urlIndex == "" {
return nil, fmt.Errorf("please specify index name: (like => http://127.0.0.1:9200/my_index)")
}
if cli, err = es7.NewClient(context.TODO(), url); err != nil {
return nil, err
}
return &client{client: cli, iot: iot, index: urlIndex}, nil
}
func (c *client) checkResponse(r *esapi.Response) error {
if r.StatusCode == 200 {
return nil
}
return fmt.Errorf("status=%d msg=%s", r.StatusCode, r.String())
}
func (c *client) IOType() interfaces.IO {
return c.iot
}
func (c *client) IsFile() bool {
return false
}
func (c *client) Close() error {
return nil
}
//func (c *client) WriteData(ctx context.Context, docs []*model.ESSource) (int, error) {
// var (
// err error
// indexer esutil.BulkIndexer
// count int
// be error
// )
// if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
// Client: c.client,
// Index: c.index,
// ErrorTrace: true,
// OnError: func(ctx context.Context, err error) {
//
// },
// }); err != nil {
// return 0, err
// }
//
// for _, doc := range docs {
// var bs []byte
//
// if bs, err = json.Marshal(doc.Content); err != nil {
// return 0, err
// }
//
// if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
// Action: "index",
// Index: c.index,
// DocumentID: doc.DocId,
// Body: bytes.NewReader(bs),
// OnFailure: func(ctx context.Context, item esutil.BulkIndexerItem, item2 esutil.BulkIndexerResponseItem, bulkErr error) {
// be = bulkErr
// },
// }); err != nil {
// return 0, err
// }
// count++
// }
//
// if err = indexer.Close(util.TimeoutCtx(ctx, opt.Timeout)); err != nil {
// return 0, err
// }
//
// if be != nil {
// return 0, be
// }
//
// stats := indexer.Stats()
// if stats.NumFailed > 0 {
// return count, fmt.Errorf("write to xes failed_count=%d bulk_count=%d", stats.NumFailed, count)
// }
//
// return count, nil
//}
func (c *client) ReadData(ctx context.Context, size int, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
dch, ech := es7.ReadData(ctx, c.client, c.index, size, 0, query, source, sort)
return dch, ech
}
func (c *client) ReadMapping(ctx context.Context) (map[string]any, error) {
r, err := c.client.Indices.GetMapping(
c.client.Indices.GetMapping.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *client) WriteMapping(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
for idxKey := range m {
if bs, err = json.Marshal(m[idxKey]); err != nil {
return err
}
if result, err = c.client.Indices.Create(
c.index,
c.client.Indices.Create.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.client.Indices.Create.WithBody(bytes.NewReader(bs)),
); err != nil {
return err
}
if err = c.checkResponse(result); err != nil {
return err
}
}
return nil
}
func (c *client) ReadSetting(ctx context.Context) (map[string]any, error) {
r, err := c.client.Indices.GetSettings(
c.client.Indices.GetSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
c.client.Indices.GetSettings.WithIndex(c.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (c *client) WriteSetting(ctx context.Context, m map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
if bs, err = json.Marshal(m); err != nil {
return err
}
if result, err = c.client.Indices.PutSettings(
bytes.NewReader(bs),
c.client.Indices.PutSettings.WithContext(util.TimeoutCtx(ctx, opt.Timeout)),
); err != nil {
return err
}
return c.checkResponse(result)
}

View File

@ -1,107 +0,0 @@
package xes
import (
"bufio"
"fmt"
"os"
"testing"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/loveuer/esgo2dump/internal/util"
)
func TestGetESMapping(t *testing.T) {
endpoint := "http://127.0.0.1:9200"
index := "some_index"
cli, err := elastic.NewClient(elastic.Config{
Addresses: []string{endpoint},
})
if err != nil {
t.Error(1, err)
return
}
resp, err := cli.Info(cli.Info.WithContext(util.Timeout(5)))
if err != nil {
t.Error(2, err)
return
}
t.Log("info:", resp.String())
r, err := cli.Indices.GetMapping(
cli.Indices.GetMapping.WithIndex(index),
)
if err != nil {
t.Error(3, err)
return
}
t.Log("get source:", r.String())
}
func TestScanWithInterrupt(t *testing.T) {
filename := "test_scan.txt"
f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
t.Error(1, err)
return
}
defer func() {
os.Remove(filename)
}()
f.WriteString(`line 01
line 02
line 03
line 04
line 05
line 06
line 07
line 08
line 09
line 10
line 11
line 12
line 13
line 14
line 15`)
f.Close()
of, err := os.Open(filename)
if err != nil {
t.Error(2, err)
return
}
scanner := bufio.NewScanner(of)
count := 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
if count > 5 {
break
}
}
count = 0
for scanner.Scan() {
text := scanner.Text()
fmt.Printf("[line: %2d] = %s\n", count, text)
count++
}
}

View File

@ -4,42 +4,74 @@ import (
"bufio"
"context"
"encoding/json"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
"fmt"
"io"
"os"
"github.com/loveuer/esgo2dump/internal/interfaces"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/pkg/log"
"github.com/loveuer/esgo2dump/pkg/model"
)
type client struct {
info os.FileInfo
f *os.File
iot interfaces.IO
scanner *bufio.Scanner
}
func (c *client) WriteData(ctx context.Context, docsCh <-chan []*model.ESSource) error {
func (c *client) ReadData(ctx context.Context, limit int, query map[string]any, fields []string, sort []string) ([]map[string]any, error) {
if len(query) != 0 {
return nil, fmt.Errorf("file with query is unsupported")
}
if len(sort) != 0 {
return nil, fmt.Errorf("file with sort is unsupported")
}
list := make([]map[string]any, 0, limit)
for c.scanner.Scan() {
line := c.scanner.Bytes()
item := make(map[string]any)
if err := json.Unmarshal(line, &item); err != nil {
return nil, err
}
if len(fields) > 0 {
// todo: pick fields
}
list = append(list, item)
if len(list) >= limit {
return list, nil
}
}
return list, nil
}
func (c *client) WriteData(ctx context.Context, items []map[string]any) (int, error) {
total := 0
for line := range docsCh {
for _, doc := range line {
bs, err := json.Marshal(doc)
for _, item := range items {
bs, err := json.Marshal(item)
if err != nil {
return err
return total, err
}
if _, err = c.f.Write(append(bs, '\n')); err != nil {
return err
if _, err = c.f.Write(bs); err != nil {
return total, err
}
total++
if _, err = c.f.WriteString("\n"); err != nil {
return total, err
}
}
count := len(line)
total += count
log.Info("Dump: succeed=%d total=%d docs succeed!!!", count, total)
}
return nil
return total, nil
}
func (c *client) ReadMapping(ctx context.Context) (map[string]any, error) {
@ -61,6 +93,17 @@ func (c *client) ReadMapping(ctx context.Context) (map[string]any, error) {
return m, nil
}
func (c *client) WriteMapping(ctx context.Context, mapping map[string]any) error {
bs, err := json.Marshal(mapping)
if err != nil {
return err
}
_, err = c.f.Write(bs)
return err
}
func (c *client) ReadSetting(ctx context.Context) (map[string]any, error) {
var (
err error
@ -80,8 +123,8 @@ func (c *client) ReadSetting(ctx context.Context) (map[string]any, error) {
return m, nil
}
func (c *client) WriteMapping(ctx context.Context, m map[string]any) error {
bs, err := json.Marshal(m)
func (c *client) WriteSetting(ctx context.Context, setting map[string]any) error {
bs, err := json.Marshal(setting)
if err != nil {
return err
}
@ -91,95 +134,45 @@ func (c *client) WriteMapping(ctx context.Context, m map[string]any) error {
return err
}
func (c *client) WriteSetting(ctx context.Context, m map[string]any) error {
bs, err := json.Marshal(m)
if err != nil {
return err
}
_, err = c.f.Write(bs)
return err
}
func (c *client) IOType() interfaces.IO {
return c.iot
}
func (c *client) IsFile() bool {
return true
}
func (c *client) ReadData(ctx context.Context, size int, _ map[string]any, _ []string, _ []string) (<-chan []*model.ESSource, <-chan error) {
func NewClient(path string, t model.IOType) (model.IO[map[string]any], error) {
var (
info os.FileInfo
err error
count = 0
list = make([]*model.ESSource, 0, size)
dch = make(chan []*model.ESSource)
ech = make(chan error)
ready = make(chan bool)
f *os.File
)
go func(ctx context.Context) {
defer func() {
close(dch)
close(ech)
}()
switch t {
case model.Input:
if info, err = os.Stat(path); err != nil {
return nil, err
}
ready <- true
log.Debug("input file: %s, size: %d", path, info.Size())
for c.scanner.Scan() {
select {
case <-ctx.Done():
return
if f, err = os.Open(path); err != nil {
return nil, err
}
case model.Output:
if info, err = os.Stat(path); err == nil {
return nil, fmt.Errorf("file already exists: %s", path)
}
if !os.IsNotExist(err) {
return nil, err
}
if f, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC|os.O_APPEND, 0o644); err != nil {
return nil, err
}
default:
item := new(model.ESSource)
line := c.scanner.Bytes()
if err = json.Unmarshal(line, item); err != nil {
ech <- err
return
return nil, fmt.Errorf("unknown type: %s", t)
}
list = append(list, item)
count++
if count >= size {
dch <- list
list = list[:0]
count = 0
}
}
}
if len(list) > 0 {
dch <- list
list = list[:0]
count = 0
}
if err = c.scanner.Err(); err != nil {
ech <- err
}
}(ctx)
<-ready
return dch, ech
}
func (c *client) Close() error {
return c.f.Close()
}
func NewClient(file *os.File, ioType interfaces.IO) (interfaces.DumpIO, error) {
c := &client{f: file, iot: ioType}
if ioType == interfaces.IOInput {
c.scanner = bufio.NewScanner(c.f)
c := &client{f: f, info: info}
buf := make([]byte, opt.BuffSize)
c.scanner.Buffer(buf, opt.MaxBuffSize)
}
scanner := bufio.NewScanner(c.f)
scanner.Buffer(buf, opt.MaxBuffSize)
c.scanner = scanner
return c, nil
}

11
main.go
View File

@ -2,19 +2,24 @@ package main
import (
"context"
"github.com/loveuer/esgo2dump/log"
"os/signal"
"syscall"
"github.com/loveuer/esgo2dump/pkg/log"
"github.com/loveuer/esgo2dump/internal/cmd"
)
func main() {
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
defer cancel()
if err := cmd.Start(ctx); err != nil {
go func() {
<-ctx.Done()
log.Fatal(ctx.Err().Error())
}()
if err := cmd.Run(ctx); err != nil {
log.Error(err.Error())
return
}

View File

@ -1,27 +0,0 @@
package model
type ESSource struct {
DocId string `json:"_id"`
Index string `json:"_index"`
Content map[string]any `json:"_source"`
}
type ESResponse struct {
ScrollId string `json:"_scroll_id"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`
Shards struct {
Total int `json:"total"`
Successful int `json:"successful"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
} `json:"_shards"`
Hits struct {
Total struct {
Value int `json:"value"`
Relation string `json:"relation"`
} `json:"total"`
MaxScore float64 `json:"max_score"`
Hits []*ESSource `json:"hits"`
} `json:"hits"`
}

View File

@ -21,7 +21,7 @@ var (
os.Exit(1)
}
defaultLogger = &logger{
DefaultLogger = &logger{
Mutex: sync.Mutex{},
timeFormat: "2006-01-02T15:04:05",
writer: os.Stdout,
@ -36,32 +36,32 @@ var (
)
func SetTimeFormat(format string) {
defaultLogger.SetTimeFormat(format)
DefaultLogger.SetTimeFormat(format)
}
func SetLogLevel(level LogLevel) {
defaultLogger.SetLogLevel(level)
DefaultLogger.SetLogLevel(level)
}
func Debug(msg string, data ...any) {
defaultLogger.Debug(msg, data...)
DefaultLogger.Debug(msg, data...)
}
func Info(msg string, data ...any) {
defaultLogger.Info(msg, data...)
DefaultLogger.Info(msg, data...)
}
func Warn(msg string, data ...any) {
defaultLogger.Warn(msg, data...)
DefaultLogger.Warn(msg, data...)
}
func Error(msg string, data ...any) {
defaultLogger.Error(msg, data...)
DefaultLogger.Error(msg, data...)
}
func Panic(msg string, data ...any) {
defaultLogger.Panic(msg, data...)
DefaultLogger.Panic(msg, data...)
}
func Fatal(msg string, data ...any) {
defaultLogger.Fatal(msg, data...)
DefaultLogger.Fatal(msg, data...)
}

45
pkg/model/es.go Normal file
View File

@ -0,0 +1,45 @@
package model
type ESSource[T any] struct {
DocId string `json:"_id"`
Index string `json:"_index"`
Content T `json:"_source"`
Sort []any `json:"sort"`
}
type ESResponseV6[T any] struct {
ScrollId string `json:"_scroll_id"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`
Shards struct {
Total int `json:"total"`
Successful int `json:"successful"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
} `json:"_shards"`
Hits struct {
Total int `json:"total"`
MaxScore float64 `json:"max_score"`
Hits []*ESSource[T] `json:"hits"`
} `json:"hits"`
}
type ESResponseV7[T any] struct {
ScrollId string `json:"_scroll_id"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`
Shards struct {
Total int `json:"total"`
Successful int `json:"successful"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
} `json:"_shards"`
Hits struct {
Total struct {
Value int `json:"value"`
Relation string `json:"relation"`
} `json:"total"`
MaxScore float64 `json:"max_score"`
Hits []*ESSource[T] `json:"hits"`
} `json:"hits"`
}

19
pkg/model/io.go Normal file
View File

@ -0,0 +1,19 @@
package model
import "context"
type IOType string
const (
Input IOType = "input"
Output IOType = "output"
)
type IO[T any] interface {
ReadData(ctx context.Context, limit int, query map[string]any, fields []string, sort []string) ([]T, error)
WriteData(ctx context.Context, items []T) (int, error)
ReadMapping(ctx context.Context) (map[string]any, error)
WriteMapping(ctx context.Context, mapping map[string]any) error
ReadSetting(ctx context.Context) (map[string]any, error)
WriteSetting(ctx context.Context, setting map[string]any) error
}

View File

@ -53,7 +53,8 @@ esgo2dump --input=http://127.0.0.1:9200/some_index --output=./data.json --query_
- [x] es to file
- [x] es to es
- [x] auto create index with mapping
- [x] support es6
- [ ] [Feature Request #1](https://github.com/loveuer/esgo2dump/issues/1): Supports more than 10,000 lines of query_file
- [ ] args: split_size (auto split json output file)
- [ ] auto create index with mapping,setting
- [x] support es6
- [ ] support es8

View File

@ -4,13 +4,14 @@ import (
"context"
"crypto/tls"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v6"
"github.com/elastic/go-elasticsearch/v6/esapi"
"github.com/loveuer/esgo2dump/internal/util"
"net"
"net/http"
"net/url"
"time"
elastic "github.com/elastic/go-elasticsearch/v6"
"github.com/elastic/go-elasticsearch/v6/esapi"
"github.com/loveuer/esgo2dump/internal/tool"
)
func NewClient(ctx context.Context, url *url.URL) (*elastic.Client, error) {
@ -72,7 +73,7 @@ func NewClient(ctx context.Context, url *url.URL) (*elastic.Client, error) {
}
go ncFunc([]string{address}, urlUsername, urlPassword)
timeout := util.TimeoutCtx(ctx, 10)
timeout := tool.TimeoutCtx(ctx, 10)
select {
case <-timeout.Done():

View File

@ -1,147 +0,0 @@
package es6
import (
"bytes"
"context"
"encoding/json"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v6"
"github.com/elastic/go-elasticsearch/v6/esapi"
"github.com/loveuer/esgo2dump/internal/util"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
"github.com/samber/lo"
"time"
)
func ReadData(ctx context.Context, client *elastic.Client, index string, size, max int, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
var (
dataCh = make(chan []*model.ESSource)
errCh = make(chan error)
)
go func() {
var (
err error
resp *esapi.Response
result = new(model.ESResponse)
scrollId string
total int
)
defer func() {
close(dataCh)
close(errCh)
if scrollId != "" {
bs, _ := json.Marshal(map[string]string{
"scroll_id": scrollId,
})
var (
rr *esapi.Response
)
if rr, err = client.ClearScroll(
client.ClearScroll.WithContext(util.Timeout(3)),
client.ClearScroll.WithBody(bytes.NewReader(bs)),
); err != nil {
log.Warn("clear scroll id=%s err=%v", scrollId, err)
return
}
if rr.StatusCode != 200 {
log.Warn("clear scroll id=%s status=%d msg=%s", scrollId, rr.StatusCode, rr.String())
}
}
}()
if client == nil {
errCh <- fmt.Errorf("client is nil")
}
qs := []func(*esapi.SearchRequest){
client.Search.WithContext(util.TimeoutCtx(ctx, 20)),
client.Search.WithIndex(index),
client.Search.WithSize(size),
client.Search.WithFrom(0),
client.Search.WithScroll(time.Duration(120) * time.Second),
}
if len(source) > 0 {
qs = append(qs, client.Search.WithSourceIncludes(source...))
}
if len(sort) > 0 {
sorts := lo.Filter(sort, func(item string, index int) bool {
return item != ""
})
if len(sorts) > 0 {
qs = append(qs, client.Search.WithSort(sorts...))
}
}
if query != nil && len(query) > 0 {
queryBs, _ := json.Marshal(map[string]any{"query": query})
qs = append(qs, client.Search.WithBody(bytes.NewReader(queryBs)))
}
if resp, err = client.Search(qs...); err != nil {
errCh <- err
return
}
if resp.StatusCode != 200 {
errCh <- fmt.Errorf("resp status=%d, resp=%s", resp.StatusCode, resp.String())
return
}
decoder := json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
errCh <- err
return
}
scrollId = result.ScrollId
dataCh <- result.Hits.Hits
total += len(result.Hits.Hits)
if len(result.Hits.Hits) < size || (max > 0 && total >= max) {
return
}
for {
if resp, err = client.Scroll(
client.Scroll.WithScrollID(scrollId),
client.Scroll.WithScroll(time.Duration(120)*time.Second),
); err != nil {
errCh <- err
return
}
result = new(model.ESResponse)
decoder = json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
errCh <- err
return
}
if resp.StatusCode != 200 {
errCh <- fmt.Errorf("resp status=%d, resp=%s", resp.StatusCode, resp.String())
return
}
dataCh <- result.Hits.Hits
total += len(result.Hits.Hits)
if len(result.Hits.Hits) < size || (max > 0 && total >= max) {
break
}
}
}()
return dataCh, errCh
}

View File

@ -1,85 +0,0 @@
package es6
import (
"bytes"
"context"
"encoding/json"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v6"
"github.com/elastic/go-elasticsearch/v6/esutil"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
)
func WriteData(ctx context.Context, client *elastic.Client, index string, docsCh <-chan []*model.ESSource, logs ...log.WroteLogger) error {
var (
err error
indexer esutil.BulkIndexer
total = 0
)
for {
select {
case <-ctx.Done():
return ctx.Err()
case docs, ok := <-docsCh:
if !ok {
return nil
}
if len(docs) == 0 {
continue
}
count := 0
if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
Client: client,
Index: index,
ErrorTrace: true,
OnError: func(ctx context.Context, err error) {
},
}); err != nil {
return err
}
for _, doc := range docs {
var bs []byte
if bs, err = json.Marshal(doc.Content); err != nil {
return err
}
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
Action: "index",
Index: index,
DocumentID: doc.DocId,
DocumentType: "_doc",
Body: bytes.NewReader(bs),
OnFailure: func(ctx context.Context, item esutil.BulkIndexerItem, item2 esutil.BulkIndexerResponseItem, bulkErr error) {
},
}); err != nil {
return err
}
count++
}
total += count
if err = indexer.Close(ctx); err != nil {
return err
}
stats := indexer.Stats()
if stats.NumFailed > 0 {
return fmt.Errorf("write to es failed_count=%d bulk_count=%d", stats.NumFailed, count)
}
if len(logs) > 0 && logs[0] != nil {
logs[0].Info("Dump: succeed=%d total=%d docs succeed!!!", count, total)
}
}
}
}

View File

@ -4,41 +4,51 @@ import (
"context"
"crypto/tls"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/loveuer/esgo2dump/internal/util"
"net"
"net/http"
"net/url"
"strings"
"time"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/loveuer/esgo2dump/internal/tool"
"github.com/samber/lo"
)
func NewClient(ctx context.Context, url *url.URL) (*elastic.Client, error) {
// NewClient
// new esv7 client
// uri example:
// - http://127.0.0.1:9200
// - https://<username>:<password>@node1.dev:9200,node2.dev:19200,node3.dev:29200
func NewClient(ctx context.Context, uri string) (*elastic.Client, error) {
var (
err error
urlUsername string
urlPassword string
username string
password string
client *elastic.Client
errCh = make(chan error)
cliCh = make(chan *elastic.Client)
address = fmt.Sprintf("%s://%s", url.Scheme, url.Host)
ins *url.URL
)
if url.User != nil {
urlUsername = url.User.Username()
if p, ok := url.User.Password(); ok {
urlPassword = p
}
if ins, err = url.Parse(uri); err != nil {
return nil, err
}
ncFunc := func(endpoints []string, username, password string) {
var (
err error
cli *elastic.Client
infoResp *esapi.Response
endpoints := lo.Map(
strings.Split(ins.Host, ","),
func(item string, index int) string {
return fmt.Sprintf("%s://%s", ins.Scheme, item)
},
)
if cli, err = elastic.NewClient(
if ins.User != nil {
username = ins.User.Username()
password, _ = ins.User.Password()
}
query := ins.Query()
if client, err = elastic.NewClient(
elastic.Config{
Addresses: endpoints,
Username: username,
@ -51,35 +61,23 @@ func NewClient(ctx context.Context, url *url.URL) (*elastic.Client, error) {
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{Timeout: 10 * time.Second}).DialContext,
},
DiscoverNodesOnStart: lo.If(query.Get("sniff") == "true", true).Else(false),
},
); err != nil {
errCh <- err
return
return nil, err
}
if infoResp, err = cli.Info(); err != nil {
errCh <- err
return
if query.Get("ping") != "false" {
var res *esapi.Response
if res, err = client.Ping(client.Ping.WithContext(tool.TimeoutCtx(ctx, 5))); err != nil {
return nil, err
}
if infoResp.StatusCode != 200 {
err = fmt.Errorf("info es7 status=%d", infoResp.StatusCode)
errCh <- err
return
}
cliCh <- cli
}
go ncFunc([]string{address}, urlUsername, urlPassword)
timeout := util.TimeoutCtx(ctx, 10)
select {
case <-timeout.Done():
return nil, fmt.Errorf("dial es=%s err=%v", address, context.DeadlineExceeded)
case client = <-cliCh:
return client, nil
case err = <-errCh:
if res.StatusCode != 200 {
err = fmt.Errorf("ping es server response: %s", res.String())
return nil, err
}
}
return client, nil
}

19
xes/es7/client_test.go Normal file
View File

@ -0,0 +1,19 @@
package es7
import (
"testing"
"github.com/loveuer/esgo2dump/internal/tool"
)
func TestNewClient(t *testing.T) {
uri := "http://es1.dev:9200,es2.dev:9200"
c, err := NewClient(tool.Timeout(5), uri)
if err != nil {
t.Fatal(err.Error())
}
t.Log("success!!!")
_ = c
}

View File

@ -5,146 +5,272 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/elastic/go-elasticsearch/v7/esutil"
"github.com/loveuer/esgo2dump/internal/opt"
"github.com/loveuer/esgo2dump/pkg/log"
"time"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/loveuer/esgo2dump/internal/util"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
"github.com/loveuer/esgo2dump/internal/tool"
"github.com/loveuer/esgo2dump/pkg/model"
"github.com/samber/lo"
"time"
)
// ReadData es7 read data
// @param[source]: a list of include fields to extract and return from the _source field.
// @param[sort]: a list of <field>:<direction> pairs.
func ReadData(ctx context.Context, client *elastic.Client, index string, size, max int, query map[string]any, source []string, sort []string) (<-chan []*model.ESSource, <-chan error) {
var (
dataCh = make(chan []*model.ESSource)
errCh = make(chan error)
)
type streamer struct {
ctx context.Context
client *elastic.Client
index string
scroll string
}
go func() {
// ReadData implements model.IO.
func (s *streamer) ReadData(ctx context.Context, limit int, query map[string]any, fields []string, sort []string) ([]map[string]any, error) {
var (
err error
qs []func(*esapi.SearchRequest)
resp *esapi.Response
result = new(model.ESResponse)
scrollId string
total int
result = new(model.ESResponseV7[map[string]any])
)
defer func() {
close(dataCh)
close(errCh)
if limit == 0 {
return nil, nil
}
if scrollId != "" {
bs, _ := json.Marshal(map[string]string{
"scroll_id": scrollId,
})
var (
rr *esapi.Response
)
if rr, err = client.ClearScroll(
client.ClearScroll.WithContext(util.Timeout(3)),
client.ClearScroll.WithBody(bytes.NewReader(bs)),
if s.scroll != "" {
if resp, err = s.client.Scroll(
s.client.Scroll.WithContext(tool.TimeoutCtx(s.ctx)),
s.client.Scroll.WithScrollID(s.scroll),
s.client.Scroll.WithScroll(35*time.Second),
); err != nil {
log.Warn("clear scroll id=%s err=%v", scrollId, err)
return
return nil, err
}
if rr.StatusCode != 200 {
log.Warn("clear scroll id=%s status=%d msg=%s", scrollId, rr.StatusCode, rr.String())
}
}
}()
if client == nil {
errCh <- fmt.Errorf("client is nil")
goto HandleResp
}
qs := []func(*esapi.SearchRequest){
client.Search.WithContext(util.TimeoutCtx(ctx, 20)),
client.Search.WithIndex(index),
client.Search.WithSize(size),
client.Search.WithFrom(0),
client.Search.WithScroll(time.Duration(120) * time.Second),
qs = []func(*esapi.SearchRequest){
s.client.Search.WithContext(tool.TimeoutCtx(s.ctx)),
s.client.Search.WithIndex(s.index),
s.client.Search.WithSize(limit),
s.client.Search.WithScroll(35 * time.Second),
}
if len(source) > 0 {
qs = append(qs, client.Search.WithSourceIncludes(source...))
if len(fields) > 0 {
qs = append(qs, s.client.Search.WithSourceIncludes(fields...))
}
if len(sort) > 0 {
sorts := lo.Filter(sort, func(item string, index int) bool {
return item != ""
})
if len(sorts) > 0 {
qs = append(qs, client.Search.WithSort(sorts...))
}
qs = append(qs, s.client.Search.WithSort(sort...))
}
if query != nil && len(query) > 0 {
queryBs, _ := json.Marshal(map[string]any{"query": query})
qs = append(qs, client.Search.WithBody(bytes.NewReader(queryBs)))
if len(query) > 0 {
queryBs, err := json.Marshal(map[string]any{"query": query})
if err != nil {
return nil, err
}
if resp, err = client.Search(qs...); err != nil {
errCh <- err
return
qs = append(qs, s.client.Search.WithBody(bytes.NewReader(queryBs)))
}
if resp, err = s.client.Search(qs...); err != nil {
return nil, err
}
HandleResp:
if resp.StatusCode != 200 {
errCh <- fmt.Errorf("resp status=%d, resp=%s", resp.StatusCode, resp.String())
return
return nil, fmt.Errorf("resp status=%d, resp=%s", resp.StatusCode, resp.String())
}
decoder := json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
errCh <- err
return
if err = json.NewDecoder(resp.Body).Decode(result); err != nil {
return nil, err
}
scrollId = result.ScrollId
s.scroll = result.ScrollId
dataCh <- result.Hits.Hits
total += len(result.Hits.Hits)
if len(result.Hits.Hits) < size || (max > 0 && total >= max) {
return
return lo.Slice(
lo.Map(
result.Hits.Hits,
func(item *model.ESSource[map[string]any], _ int) map[string]any {
return item.Content
},
),
0,
limit,
), nil
}
for {
if resp, err = client.Scroll(
client.Scroll.WithScrollID(scrollId),
client.Scroll.WithScroll(time.Duration(120)*time.Second),
// WriteData implements model.IO.
func (s *streamer) WriteData(ctx context.Context, items []map[string]any) (int, error) {
var (
err error
indexer esutil.BulkIndexer
total int
)
if len(items) == 0 {
return 0, nil
}
count := 0
if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
NumWorkers: 0,
FlushBytes: 0,
FlushInterval: 0,
Client: s.client,
Decoder: nil,
OnError: func(ctx context.Context, err error) {
log.Error("es7.writer: on error log, err = %s", err.Error())
},
Index: s.index,
ErrorTrace: true,
FilterPath: []string{},
Header: map[string][]string{},
Human: false,
Pipeline: "",
Pretty: false,
Refresh: "",
Routing: "",
Source: []string{},
SourceExcludes: []string{},
SourceIncludes: []string{},
Timeout: 0,
WaitForActiveShards: "",
}); err != nil {
return 0, err
}
for _, item := range items {
var bs []byte
if bs, err = json.Marshal(item); err != nil {
return 0, err
}
if err = indexer.Add(context.Background(), esutil.BulkIndexerItem{
Action: "index",
Index: s.index,
Body: bytes.NewReader(bs),
OnFailure: func(ctx context.Context, item esutil.BulkIndexerItem, item2 esutil.BulkIndexerResponseItem, bulkErr error) {
log.Error("es7.writer: on failure err log, err = %s", bulkErr.Error())
},
}); err != nil {
return 0, err
}
count++
}
total += count
if err = indexer.Close(ctx); err != nil {
return 0, err
}
stats := indexer.Stats()
return len(items) - int(stats.NumFailed), nil
}
func (s *streamer) ReadMapping(ctx context.Context) (map[string]any, error) {
r, err := s.client.Indices.GetMapping(
s.client.Indices.GetMapping.WithIndex(s.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (s *streamer) WriteMapping(ctx context.Context, mapping map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
for idxKey := range mapping {
if bs, err = json.Marshal(mapping[idxKey]); err != nil {
return err
}
if result, err = s.client.Indices.Create(
s.index,
s.client.Indices.Create.WithContext(tool.TimeoutCtx(ctx, opt.Timeout)),
s.client.Indices.Create.WithBody(bytes.NewReader(bs)),
); err != nil {
errCh <- err
return
return err
}
result = new(model.ESResponse)
decoder = json.NewDecoder(resp.Body)
if err = decoder.Decode(result); err != nil {
errCh <- err
return
}
if resp.StatusCode != 200 {
errCh <- fmt.Errorf("resp status=%d, resp=%s", resp.StatusCode, resp.String())
return
}
dataCh <- result.Hits.Hits
total += len(result.Hits.Hits)
if len(result.Hits.Hits) < size || (max > 0 && total >= max) {
break
if result.StatusCode != 200 {
return fmt.Errorf("status=%d, msg=%s", result.StatusCode, result.String())
}
}
}()
return dataCh, errCh
return nil
}
func (s *streamer) ReadSetting(ctx context.Context) (map[string]any, error) {
r, err := s.client.Indices.GetSettings(
s.client.Indices.GetSettings.WithContext(tool.TimeoutCtx(ctx, opt.Timeout)),
s.client.Indices.GetSettings.WithIndex(s.index),
)
if err != nil {
return nil, err
}
if r.StatusCode != 200 {
return nil, fmt.Errorf("status=%d, msg=%s", r.StatusCode, r.String())
}
m := make(map[string]any)
decoder := json.NewDecoder(r.Body)
if err = decoder.Decode(&m); err != nil {
return nil, err
}
return m, nil
}
func (s *streamer) WriteSetting(ctx context.Context, setting map[string]any) error {
var (
err error
bs []byte
result *esapi.Response
)
if bs, err = json.Marshal(setting); err != nil {
return err
}
if result, err = s.client.Indices.PutSettings(
bytes.NewReader(bs),
s.client.Indices.PutSettings.WithContext(tool.TimeoutCtx(ctx, opt.Timeout)),
); err != nil {
return err
}
if result.StatusCode != 200 {
return fmt.Errorf("status=%d, msg=%s", result.StatusCode, result.String())
}
return nil
}
func NewStreamer(ctx context.Context, client *elastic.Client, index string) (model.IO[map[string]any], error) {
s := &streamer{ctx: ctx, client: client, index: index}
return s, nil
}

View File

@ -5,41 +5,49 @@ import (
"context"
"encoding/json"
"fmt"
elastic "github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esutil"
"github.com/loveuer/esgo2dump/log"
"github.com/loveuer/esgo2dump/model"
"github.com/loveuer/esgo2dump/pkg/log"
"github.com/loveuer/esgo2dump/pkg/model"
)
func WriteData(ctx context.Context, client *elastic.Client, index string, docsCh <-chan []*model.ESSource, logs ...log.WroteLogger) error {
func WriteData[T any](ctx context.Context, client *elastic.Client, index string, docs ...*model.ESSource[T]) error {
var (
err error
indexer esutil.BulkIndexer
total int
)
for {
select {
case <-ctx.Done():
return ctx.Err()
case docs, ok := <-docsCh:
if !ok {
return nil
}
if len(docs) == 0 {
continue
return nil
}
count := 0
if indexer, err = esutil.NewBulkIndexer(esutil.BulkIndexerConfig{
NumWorkers: 0,
FlushBytes: 0,
FlushInterval: 0,
Client: client,
Decoder: nil,
OnError: func(ctx context.Context, err error) {
log.Error("es7.writer: on error log, err = %s", err.Error())
},
Index: index,
ErrorTrace: true,
OnError: func(ctx context.Context, err error) {
},
FilterPath: []string{},
Header: map[string][]string{},
Human: false,
Pipeline: "",
Pretty: false,
Refresh: "",
Routing: "",
Source: []string{},
SourceExcludes: []string{},
SourceIncludes: []string{},
Timeout: 0,
WaitForActiveShards: "",
}); err != nil {
return err
}
@ -57,6 +65,7 @@ func WriteData(ctx context.Context, client *elastic.Client, index string, docsCh
DocumentID: doc.DocId,
Body: bytes.NewReader(bs),
OnFailure: func(ctx context.Context, item esutil.BulkIndexerItem, item2 esutil.BulkIndexerResponseItem, bulkErr error) {
log.Error("es7.writer: on failure err log, err = %s", bulkErr.Error())
},
}); err != nil {
return err
@ -76,9 +85,5 @@ func WriteData(ctx context.Context, client *elastic.Client, index string, docsCh
return fmt.Errorf("write to es failed_count=%d bulk_count=%d", stats.NumFailed, count)
}
if len(logs) > 0 && logs[0] != nil {
logs[0].Info("Dump: succeed=%d total=%d docs succeed!!!", count, total)
}
}
}
return nil
}