package imager import ( "archive/tar" "compress/gzip" "context" "crypto/tls" "encoding/json" "fmt" "io" "net/http" "net/url" "strings" "time" "yizhisec.com/hsv2/forge/pkg/logger" ) // PullOption is a functional option for configuring image pull type PullOption func(*pullOption) type pullOption struct { Proxy string // http or socks5 proxy Rename string // pull image and rename it PlainHTTP bool // use http instead of https SkipTLSVerify bool // skip TLS certificate verification Username string // registry username for authentication Password string // registry password for authentication RetryTimes int // retry times for sync/proxy registries, default is 3 RetryDelay time.Duration // delay between retries, default is 2 seconds } // WithProxy sets the proxy for pulling images func WithProxy(proxy string) PullOption { return func(o *pullOption) { o.Proxy = proxy } } // WithRename sets a new name for the pulled image func WithRename(name string) PullOption { return func(o *pullOption) { o.Rename = name } } // WithPlainHTTP enables plain HTTP (no TLS) func WithPlainHTTP() PullOption { return func(o *pullOption) { o.PlainHTTP = true } } // WithSkipTLSVerify skips TLS certificate verification func WithSkipTLSVerify() PullOption { return func(o *pullOption) { o.SkipTLSVerify = true } } // WithAuth sets authentication credentials func WithAuth(username, password string) PullOption { return func(o *pullOption) { o.Username = username o.Password = password } } // WithRetry sets the retry times for pulling images func WithRetry(times int) PullOption { return func(o *pullOption) { if times < 0 { times = 0 } o.RetryTimes = times } } // WithRetryDelay sets the delay between retries func WithRetryDelay(delay time.Duration) PullOption { return func(o *pullOption) { o.RetryDelay = delay } } // manifestV2 represents Docker manifest v2 schema type manifestV2 struct { SchemaVersion int `json:"schemaVersion"` MediaType string `json:"mediaType"` Config struct { MediaType string `json:"mediaType"` Size int64 `json:"size"` Digest string `json:"digest"` } `json:"config"` Layers []struct { MediaType string `json:"mediaType"` Size int64 `json:"size"` Digest string `json:"digest"` } `json:"layers"` } // imageReference parses image reference into registry, repository, and tag type imageReference struct { Registry string Repository string Tag string } // parseImageReference parses an image name into its components func parseImageReference(name string) (*imageReference, error) { ref := &imageReference{ Tag: "latest", } // First, split by / to separate registry from repository // This handles cases like localhost:5000/myimage:tag slashParts := strings.SplitN(name, "/", 2) var registryPart, repoPart string if len(slashParts) == 1 { // No slash, it's just image:tag (e.g., alpine:3.19) repoPart = slashParts[0] ref.Registry = "registry-1.docker.io" } else { // Has slash, check if first part is a registry // Registry contains . or : (for host:port) if strings.Contains(slashParts[0], ".") || strings.Contains(slashParts[0], ":") { // First part is a registry registryPart = slashParts[0] repoPart = slashParts[1] ref.Registry = registryPart } else { // First part is namespace (e.g., library/alpine) repoPart = name ref.Registry = "registry-1.docker.io" } } // Now parse the repository and tag from repoPart // Split on last : to get tag (in case repository name contains /) lastColon := strings.LastIndex(repoPart, ":") if lastColon > 0 { ref.Repository = repoPart[:lastColon] ref.Tag = repoPart[lastColon+1:] } else { ref.Repository = repoPart } // For docker.io, add library/ prefix if no namespace if ref.Registry == "registry-1.docker.io" && !strings.Contains(ref.Repository, "/") { ref.Repository = "library/" + ref.Repository } return ref, nil } // PullImage pulls an OCI image using HTTP requests and saves it as a tar archive func PullImage(ctx context.Context, name string, store io.Writer, opts ...PullOption) error { options := &pullOption{ RetryTimes: 3, // default retry times RetryDelay: 2 * time.Second, // default retry delay } for _, opt := range opts { opt(options) } logger.Debug("Pulling image: %s with options: %+v", name, options) // Parse image reference ref, err := parseImageReference(name) if err != nil { return fmt.Errorf("failed to parse image reference: %w", err) } logger.Debug("Parsed image reference: registry=%s, repository=%s, tag=%s", ref.Registry, ref.Repository, ref.Tag) // Create HTTP client client := createHTTPClient(options) // Retry logic for sync/proxy registries var lastErr error for attempt := 0; attempt <= options.RetryTimes; attempt++ { if attempt > 0 { logger.Debug("Retry attempt %d/%d for image: %s", attempt, options.RetryTimes, name) select { case <-ctx.Done(): return ctx.Err() case <-time.After(options.RetryDelay): // Continue after delay } } // Get auth token if needed token, err := getAuthToken(ctx, client, ref, options) if err != nil { lastErr = fmt.Errorf("failed to get auth token: %w", err) logger.Debug("Attempt %d failed: %v", attempt+1, lastErr) continue } // Get manifest manifest, err := getManifest(ctx, client, ref, token, options) if err != nil { lastErr = fmt.Errorf("failed to get manifest: %w", err) logger.Debug("Attempt %d failed: %v", attempt+1, lastErr) continue } logger.Debug("Got manifest with %d layers", len(manifest.Layers)) // Download and create tar archive if err := createImageTar(ctx, client, ref, manifest, token, store, name, options); err != nil { lastErr = fmt.Errorf("failed to create image tar: %w", err) logger.Debug("Attempt %d failed: %v", attempt+1, lastErr) continue } // Success! logger.Info("Successfully pulled image: %s", name) return nil } // All retries exhausted if options.RetryTimes > 0 { return fmt.Errorf("failed to pull image after %d retries: %w", options.RetryTimes+1, lastErr) } return lastErr } // createHTTPClient creates an HTTP client with the given options func createHTTPClient(options *pullOption) *http.Client { client := &http.Client{} // Configure TLS if options.SkipTLSVerify { transport := &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } client.Transport = transport } // Configure proxy if options.Proxy != "" { proxyURL, err := url.Parse(options.Proxy) if err == nil { if client.Transport == nil { client.Transport = &http.Transport{} } if transport, ok := client.Transport.(*http.Transport); ok { transport.Proxy = http.ProxyURL(proxyURL) } } } return client } // getAuthToken gets authentication token from registry func getAuthToken(ctx context.Context, client *http.Client, ref *imageReference, options *pullOption) (string, error) { // Try to access registry API v2 to get auth challenge scheme := "https" if options.PlainHTTP { scheme = "http" } apiURL := fmt.Sprintf("%s://%s/v2/", scheme, ref.Registry) req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil) if err != nil { return "", err } // Add basic auth if provided if options.Username != "" && options.Password != "" { req.SetBasicAuth(options.Username, options.Password) } resp, err := client.Do(req) if err != nil { return "", err } defer resp.Body.Close() // If 200 OK, no auth needed if resp.StatusCode == http.StatusOK { return "", nil } // Check for WWW-Authenticate header authHeader := resp.Header.Get("WWW-Authenticate") if authHeader == "" { return "", nil } // Parse auth challenge token, err := fetchToken(ctx, client, authHeader, ref, options) if err != nil { return "", err } return token, nil } // fetchToken fetches authentication token from auth server func fetchToken(ctx context.Context, client *http.Client, authHeader string, ref *imageReference, options *pullOption) (string, error) { // Parse WWW-Authenticate header // Format: Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:library/alpine:pull" if !strings.HasPrefix(authHeader, "Bearer ") { return "", nil } params := make(map[string]string) parts := strings.Split(authHeader[7:], ",") for _, part := range parts { part = strings.TrimSpace(part) idx := strings.Index(part, "=") if idx > 0 { key := part[:idx] value := strings.Trim(part[idx+1:], "\"") params[key] = value } } realm, ok := params["realm"] if !ok { return "", fmt.Errorf("no realm in auth header") } // Build token URL tokenURL := realm if service, ok := params["service"]; ok { tokenURL += "?service=" + url.QueryEscape(service) } scope := fmt.Sprintf("repository:%s:pull", ref.Repository) if params["scope"] != "" { scope = params["scope"] } tokenURL += "&scope=" + url.QueryEscape(scope) req, err := http.NewRequestWithContext(ctx, "GET", tokenURL, nil) if err != nil { return "", err } // Add basic auth if provided if options.Username != "" && options.Password != "" { req.SetBasicAuth(options.Username, options.Password) } resp, err := client.Do(req) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("failed to get token: %s", resp.Status) } var tokenResp struct { Token string `json:"token"` } if err := json.NewDecoder(resp.Body).Decode(&tokenResp); err != nil { return "", err } return tokenResp.Token, nil } // getManifest fetches the image manifest func getManifest(ctx context.Context, client *http.Client, ref *imageReference, token string, options *pullOption) (*manifestV2, error) { scheme := "https" if options.PlainHTTP { scheme = "http" } manifestURL := fmt.Sprintf("%s://%s/v2/%s/manifests/%s", scheme, ref.Registry, ref.Repository, ref.Tag) req, err := http.NewRequestWithContext(ctx, "GET", manifestURL, nil) if err != nil { return nil, err } // Set accept headers for manifest v2 (add both types) req.Header.Add("Accept", "application/vnd.docker.distribution.manifest.v2+json") req.Header.Add("Accept", "application/vnd.oci.image.manifest.v1+json") req.Header.Add("Accept", "application/vnd.docker.distribution.manifest.list.v2+json") if token != "" { req.Header.Set("Authorization", "Bearer "+token) } logger.Debug("Fetching manifest from: %s", manifestURL) resp, err := client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) logger.Debug("Manifest request failed: %s, body: %s", resp.Status, string(body)) return nil, fmt.Errorf("failed to get manifest: %s", resp.Status) } contentType := resp.Header.Get("Content-Type") logger.Debug("Manifest Content-Type: %s", contentType) // Read response body body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } // Check if it's a manifest list (multi-arch) if strings.Contains(contentType, "manifest.list") || strings.Contains(contentType, "image.index") { // Parse manifest list and select amd64/linux var manifestList struct { SchemaVersion int `json:"schemaVersion"` Manifests []struct { Digest string `json:"digest"` Platform struct { Architecture string `json:"architecture"` OS string `json:"os"` } `json:"platform"` } `json:"manifests"` } if err := json.Unmarshal(body, &manifestList); err != nil { return nil, fmt.Errorf("failed to parse manifest list: %w", err) } logger.Debug("Found manifest list with %d manifests", len(manifestList.Manifests)) // Find amd64/linux manifest var targetDigest string for _, m := range manifestList.Manifests { logger.Debug("Checking manifest: arch=%s, os=%s, digest=%s", m.Platform.Architecture, m.Platform.OS, m.Digest) if m.Platform.Architecture == "amd64" && m.Platform.OS == "linux" { targetDigest = m.Digest break } } if targetDigest == "" && len(manifestList.Manifests) > 0 { // Fallback to first manifest with known platform for _, m := range manifestList.Manifests { if m.Platform.Architecture != "unknown" && m.Platform.OS != "unknown" { targetDigest = m.Digest logger.Debug("Using fallback manifest: arch=%s, os=%s", m.Platform.Architecture, m.Platform.OS) break } } } if targetDigest == "" { return nil, fmt.Errorf("no suitable manifest found in manifest list") } logger.Debug("Selected manifest digest: %s", targetDigest) // Fetch the actual manifest by digest return getManifestByDigest(ctx, client, ref, targetDigest, token, options) } var manifest manifestV2 if err := json.Unmarshal(body, &manifest); err != nil { return nil, fmt.Errorf("failed to parse manifest: %w", err) } logger.Debug("Parsed manifest: schemaVersion=%d, config.digest=%s, layers=%d", manifest.SchemaVersion, manifest.Config.Digest, len(manifest.Layers)) return &manifest, nil } // getManifestByDigest fetches a manifest by its digest func getManifestByDigest(ctx context.Context, client *http.Client, ref *imageReference, digest, token string, options *pullOption) (*manifestV2, error) { scheme := "https" if options.PlainHTTP { scheme = "http" } manifestURL := fmt.Sprintf("%s://%s/v2/%s/manifests/%s", scheme, ref.Registry, ref.Repository, digest) req, err := http.NewRequestWithContext(ctx, "GET", manifestURL, nil) if err != nil { return nil, err } req.Header.Add("Accept", "application/vnd.docker.distribution.manifest.v2+json") req.Header.Add("Accept", "application/vnd.oci.image.manifest.v1+json") if token != "" { req.Header.Set("Authorization", "Bearer "+token) } resp, err := client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("failed to get manifest by digest: %s", resp.Status) } body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } var manifest manifestV2 if err := json.Unmarshal(body, &manifest); err != nil { return nil, fmt.Errorf("failed to parse manifest: %w", err) } return &manifest, nil } // createImageTar downloads layers and creates a Docker-compatible tar archive func createImageTar(ctx context.Context, client *http.Client, ref *imageReference, manifest *manifestV2, token string, store io.Writer, imageName string, options *pullOption) error { // Create tar writer tw := tar.NewWriter(store) defer tw.Close() scheme := "https" if options.PlainHTTP { scheme = "http" } // Download config blob logger.Debug("Downloading config blob: %s", manifest.Config.Digest) configBlob, err := downloadBlob(ctx, client, scheme, ref.Registry, ref.Repository, manifest.Config.Digest, token) if err != nil { return fmt.Errorf("failed to download config: %w", err) } // Write config as json file configFileName := strings.TrimPrefix(manifest.Config.Digest, "sha256:") + ".json" if err := writeTarEntry(tw, configFileName, configBlob); err != nil { return err } // Download and write each layer var layerFiles []string for i, layer := range manifest.Layers { logger.Debug("Downloading layer %d/%d: %s", i+1, len(manifest.Layers), layer.Digest) layerBlob, err := downloadBlob(ctx, client, scheme, ref.Registry, ref.Repository, layer.Digest, token) if err != nil { return fmt.Errorf("failed to download layer %s: %w", layer.Digest, err) } layerFileName := strings.TrimPrefix(layer.Digest, "sha256:") + "/layer.tar" layerFiles = append(layerFiles, layerFileName) if err := writeTarEntry(tw, layerFileName, layerBlob); err != nil { return err } } // Create manifest.json for Docker compatibility if options.Rename != "" { imageName = options.Rename } manifestJSON := []map[string]interface{}{ { "Config": configFileName, "RepoTags": []string{imageName}, "Layers": layerFiles, }, } manifestData, err := json.Marshal(manifestJSON) if err != nil { return err } if err := writeTarEntry(tw, "manifest.json", manifestData); err != nil { return err } // Create repositories file (legacy) repositories := make(map[string]map[string]string) repositories[imageName] = map[string]string{ref.Tag: strings.TrimPrefix(manifest.Config.Digest, "sha256:")[:12]} reposData, err := json.Marshal(repositories) if err != nil { return err } if err := writeTarEntry(tw, "repositories", reposData); err != nil { return err } return nil } // downloadBlob downloads a blob from registry func downloadBlob(ctx context.Context, client *http.Client, scheme, registry, repository, digest, token string) ([]byte, error) { blobURL := fmt.Sprintf("%s://%s/v2/%s/blobs/%s", scheme, registry, repository, digest) req, err := http.NewRequestWithContext(ctx, "GET", blobURL, nil) if err != nil { return nil, err } if token != "" { req.Header.Set("Authorization", "Bearer "+token) } resp, err := client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("failed to download blob: %s", resp.Status) } // Check if content is gzipped var reader io.Reader = resp.Body if resp.Header.Get("Content-Type") == "application/vnd.docker.image.rootfs.diff.tar.gzip" || strings.Contains(resp.Header.Get("Content-Encoding"), "gzip") { gzReader, err := gzip.NewReader(resp.Body) if err == nil { defer gzReader.Close() reader = gzReader } } return io.ReadAll(reader) } // writeTarEntry writes a file entry to tar archive func writeTarEntry(tw *tar.Writer, name string, data []byte) error { hdr := &tar.Header{ Name: name, Mode: 0644, Size: int64(len(data)), } if err := tw.WriteHeader(hdr); err != nil { return err } if _, err := tw.Write(data); err != nil { return err } return nil }