mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-19 05:37:51 +08:00
### What problem does this PR solve? Implement: minio, s3, oss, azure_sas, azure_spn, gcs, opendal ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
426 lines
11 KiB
Go
426 lines
11 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package storage
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/aws/aws-sdk-go-v2/aws"
|
|
"github.com/aws/aws-sdk-go-v2/config"
|
|
"github.com/aws/aws-sdk-go-v2/credentials"
|
|
"github.com/aws/aws-sdk-go-v2/service/s3"
|
|
"github.com/aws/smithy-go"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// S3Config holds AWS S3 storage configuration
|
|
type S3Config struct {
|
|
AccessKeyID string `mapstructure:"access_key"` // AWS Access Key ID
|
|
SecretAccessKey string `mapstructure:"secret_key"` // AWS Secret Access Key
|
|
SessionToken string `mapstructure:"session_token"` // AWS Session Token (optional)
|
|
Region string `mapstructure:"region_name"` // AWS Region
|
|
EndpointURL string `mapstructure:"endpoint_url"` // Custom endpoint (optional)
|
|
SignatureVersion string `mapstructure:"signature_version"` // Signature version
|
|
AddressingStyle string `mapstructure:"addressing_style"` // Addressing style
|
|
Bucket string `mapstructure:"bucket"` // Default bucket (optional)
|
|
PrefixPath string `mapstructure:"prefix_path"` // Path prefix (optional)
|
|
}
|
|
|
|
// S3Storage implements Storage interface for AWS S3
|
|
type S3Storage struct {
|
|
client *s3.Client
|
|
bucket string
|
|
prefixPath string
|
|
config *S3Config
|
|
}
|
|
|
|
// NewS3Storage creates a new S3 storage instance
|
|
func NewS3Storage(config *S3Config) (*S3Storage, error) {
|
|
storage := &S3Storage{
|
|
bucket: config.Bucket,
|
|
prefixPath: config.PrefixPath,
|
|
config: config,
|
|
}
|
|
|
|
if err := storage.connect(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return storage, nil
|
|
}
|
|
|
|
func (s *S3Storage) connect() error {
|
|
ctx := context.Background()
|
|
|
|
var opts []func(*config.LoadOptions) error
|
|
|
|
// Configure region
|
|
if s.config.Region != "" {
|
|
opts = append(opts, config.WithRegion(s.config.Region))
|
|
}
|
|
|
|
// Configure credentials if provided
|
|
if s.config.AccessKeyID != "" && s.config.SecretAccessKey != "" {
|
|
creds := credentials.NewStaticCredentialsProvider(
|
|
s.config.AccessKeyID,
|
|
s.config.SecretAccessKey,
|
|
s.config.SessionToken,
|
|
)
|
|
opts = append(opts, config.WithCredentialsProvider(creds))
|
|
}
|
|
|
|
// Load configuration
|
|
cfg, err := config.LoadDefaultConfig(ctx, opts...)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to load AWS config: %w", err)
|
|
}
|
|
|
|
// Create S3 client with custom endpoint if provided
|
|
clientOpts := []func(*s3.Options){}
|
|
if s.config.EndpointURL != "" {
|
|
clientOpts = append(clientOpts, func(o *s3.Options) {
|
|
o.BaseEndpoint = aws.String(s.config.EndpointURL)
|
|
})
|
|
}
|
|
|
|
s.client = s3.NewFromConfig(cfg, clientOpts...)
|
|
return nil
|
|
}
|
|
|
|
func (s *S3Storage) reconnect() {
|
|
if err := s.connect(); err != nil {
|
|
zap.L().Error("Failed to reconnect to S3", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
func (s *S3Storage) resolveBucketAndPath(bucket, fnm string) (string, string) {
|
|
actualBucket := bucket
|
|
if s.bucket != "" {
|
|
actualBucket = s.bucket
|
|
}
|
|
|
|
actualPath := fnm
|
|
if s.prefixPath != "" {
|
|
actualPath = fmt.Sprintf("%s/%s/%s", s.prefixPath, bucket, fnm)
|
|
}
|
|
|
|
return actualBucket, actualPath
|
|
}
|
|
|
|
// Health checks S3 service availability
|
|
func (s *S3Storage) Health() bool {
|
|
bucket := s.bucket
|
|
if bucket == "" {
|
|
bucket = "health-check-bucket"
|
|
}
|
|
|
|
fnm := "txtxtxtxt1"
|
|
if s.prefixPath != "" {
|
|
fnm = fmt.Sprintf("%s/%s", s.prefixPath, fnm)
|
|
}
|
|
binary := []byte("_t@@@1")
|
|
|
|
ctx := context.Background()
|
|
|
|
// Ensure bucket exists
|
|
if !s.BucketExists(bucket) {
|
|
_, err := s.client.CreateBucket(ctx, &s3.CreateBucketInput{
|
|
Bucket: aws.String(bucket),
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to create bucket for health check", zap.String("bucket", bucket), zap.Error(err))
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Try to upload a test object
|
|
reader := bytes.NewReader(binary)
|
|
_, err := s.client.PutObject(ctx, &s3.PutObjectInput{
|
|
Bucket: aws.String(bucket),
|
|
Key: aws.String(fnm),
|
|
Body: reader,
|
|
})
|
|
|
|
if err != nil {
|
|
zap.L().Error("Health check failed", zap.Error(err))
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// Put uploads an object to S3
|
|
func (s *S3Storage) Put(bucket, fnm string, binary []byte, tenantID ...string) error {
|
|
bucket, fnm = s.resolveBucketAndPath(bucket, fnm)
|
|
|
|
ctx := context.Background()
|
|
|
|
for i := 0; i < 2; i++ {
|
|
// Ensure bucket exists
|
|
if !s.BucketExists(bucket) {
|
|
_, err := s.client.CreateBucket(ctx, &s3.CreateBucketInput{
|
|
Bucket: aws.String(bucket),
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to create bucket", zap.String("bucket", bucket), zap.Error(err))
|
|
s.reconnect()
|
|
time.Sleep(time.Second)
|
|
continue
|
|
}
|
|
zap.L().Info("Created bucket", zap.String("bucket", bucket))
|
|
}
|
|
|
|
reader := bytes.NewReader(binary)
|
|
_, err := s.client.PutObject(ctx, &s3.PutObjectInput{
|
|
Bucket: aws.String(bucket),
|
|
Key: aws.String(fnm),
|
|
Body: reader,
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to put object", zap.String("bucket", bucket), zap.String("key", fnm), zap.Error(err))
|
|
s.reconnect()
|
|
time.Sleep(time.Second)
|
|
continue
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
return fmt.Errorf("failed to put object after retries")
|
|
}
|
|
|
|
// Get retrieves an object from S3
|
|
func (s *S3Storage) Get(bucket, fnm string, tenantID ...string) ([]byte, error) {
|
|
bucket, fnm = s.resolveBucketAndPath(bucket, fnm)
|
|
|
|
ctx := context.Background()
|
|
|
|
for i := 0; i < 2; i++ {
|
|
result, err := s.client.GetObject(ctx, &s3.GetObjectInput{
|
|
Bucket: aws.String(bucket),
|
|
Key: aws.String(fnm),
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to get object", zap.String("bucket", bucket), zap.String("key", fnm), zap.Error(err))
|
|
s.reconnect()
|
|
time.Sleep(time.Second)
|
|
continue
|
|
}
|
|
defer result.Body.Close()
|
|
|
|
buf := new(bytes.Buffer)
|
|
if _, err := buf.ReadFrom(result.Body); err != nil {
|
|
zap.L().Error("Failed to read object data", zap.String("bucket", bucket), zap.String("key", fnm), zap.Error(err))
|
|
s.reconnect()
|
|
time.Sleep(time.Second)
|
|
continue
|
|
}
|
|
|
|
return buf.Bytes(), nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("failed to get object after retries")
|
|
}
|
|
|
|
// Rm removes an object from S3
|
|
func (s *S3Storage) Rm(bucket, fnm string, tenantID ...string) error {
|
|
bucket, fnm = s.resolveBucketAndPath(bucket, fnm)
|
|
|
|
ctx := context.Background()
|
|
|
|
_, err := s.client.DeleteObject(ctx, &s3.DeleteObjectInput{
|
|
Bucket: aws.String(bucket),
|
|
Key: aws.String(fnm),
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to remove object", zap.String("bucket", bucket), zap.String("key", fnm), zap.Error(err))
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ObjExist checks if an object exists in S3
|
|
func (s *S3Storage) ObjExist(bucket, fnm string, tenantID ...string) bool {
|
|
bucket, fnm = s.resolveBucketAndPath(bucket, fnm)
|
|
|
|
ctx := context.Background()
|
|
|
|
_, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{
|
|
Bucket: aws.String(bucket),
|
|
Key: aws.String(fnm),
|
|
})
|
|
if err != nil {
|
|
if isS3NotFound(err) {
|
|
return false
|
|
}
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// GetPresignedURL generates a presigned URL for accessing an object
|
|
func (s *S3Storage) GetPresignedURL(bucket, fnm string, expires time.Duration, tenantID ...string) (string, error) {
|
|
bucket, fnm = s.resolveBucketAndPath(bucket, fnm)
|
|
|
|
ctx := context.Background()
|
|
|
|
presignClient := s3.NewPresignClient(s.client)
|
|
|
|
for i := 0; i < 10; i++ {
|
|
req, err := presignClient.PresignGetObject(ctx, &s3.GetObjectInput{
|
|
Bucket: aws.String(bucket),
|
|
Key: aws.String(fnm),
|
|
}, s3.WithPresignExpires(expires))
|
|
if err != nil {
|
|
zap.L().Error("Failed to generate presigned URL", zap.String("bucket", bucket), zap.String("key", fnm), zap.Error(err))
|
|
s.reconnect()
|
|
time.Sleep(time.Second)
|
|
continue
|
|
}
|
|
|
|
return req.URL, nil
|
|
}
|
|
|
|
return "", fmt.Errorf("failed to generate presigned URL after 10 retries")
|
|
}
|
|
|
|
// BucketExists checks if a bucket exists
|
|
func (s *S3Storage) BucketExists(bucket string) bool {
|
|
actualBucket := bucket
|
|
if s.bucket != "" {
|
|
actualBucket = s.bucket
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
_, err := s.client.HeadBucket(ctx, &s3.HeadBucketInput{
|
|
Bucket: aws.String(actualBucket),
|
|
})
|
|
if err != nil {
|
|
zap.L().Debug("Bucket does not exist or error", zap.String("bucket", actualBucket), zap.Error(err))
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// RemoveBucket removes a bucket and all its objects
|
|
func (s *S3Storage) RemoveBucket(bucket string) error {
|
|
actualBucket := bucket
|
|
if s.bucket != "" {
|
|
actualBucket = s.bucket
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
// Check if bucket exists
|
|
if !s.BucketExists(actualBucket) {
|
|
return nil
|
|
}
|
|
|
|
// List and delete all objects
|
|
listInput := &s3.ListObjectsV2Input{
|
|
Bucket: aws.String(actualBucket),
|
|
}
|
|
|
|
for {
|
|
result, err := s.client.ListObjectsV2(ctx, listInput)
|
|
if err != nil {
|
|
zap.L().Error("Failed to list objects", zap.String("bucket", actualBucket), zap.Error(err))
|
|
return err
|
|
}
|
|
|
|
for _, obj := range result.Contents {
|
|
_, err := s.client.DeleteObject(ctx, &s3.DeleteObjectInput{
|
|
Bucket: aws.String(actualBucket),
|
|
Key: obj.Key,
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to delete object", zap.String("bucket", actualBucket), zap.Error(err))
|
|
}
|
|
}
|
|
|
|
if result.IsTruncated == nil || !*result.IsTruncated {
|
|
break
|
|
}
|
|
listInput.ContinuationToken = result.NextContinuationToken
|
|
}
|
|
|
|
// Delete bucket
|
|
_, err := s.client.DeleteBucket(ctx, &s3.DeleteBucketInput{
|
|
Bucket: aws.String(actualBucket),
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to delete bucket", zap.String("bucket", actualBucket), zap.Error(err))
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Copy copies an object from source to destination
|
|
func (s *S3Storage) Copy(srcBucket, srcPath, destBucket, destPath string) bool {
|
|
srcBucket, srcPath = s.resolveBucketAndPath(srcBucket, srcPath)
|
|
destBucket, destPath = s.resolveBucketAndPath(destBucket, destPath)
|
|
|
|
ctx := context.Background()
|
|
|
|
copySource := fmt.Sprintf("%s/%s", srcBucket, srcPath)
|
|
|
|
_, err := s.client.CopyObject(ctx, &s3.CopyObjectInput{
|
|
Bucket: aws.String(destBucket),
|
|
Key: aws.String(destPath),
|
|
CopySource: aws.String(copySource),
|
|
})
|
|
if err != nil {
|
|
zap.L().Error("Failed to copy object", zap.String("src", copySource), zap.String("dest", fmt.Sprintf("%s/%s", destBucket, destPath)), zap.Error(err))
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// Move moves an object from source to destination
|
|
func (s *S3Storage) Move(srcBucket, srcPath, destBucket, destPath string) bool {
|
|
if s.Copy(srcBucket, srcPath, destBucket, destPath) {
|
|
if err := s.Rm(srcBucket, srcPath); err != nil {
|
|
zap.L().Error("Failed to remove source object after copy", zap.String("bucket", srcBucket), zap.String("key", srcPath), zap.Error(err))
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isNotFound checks if the error is a not found error
|
|
func isS3NotFound(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
var apiErr smithy.APIError
|
|
if errors.As(err, &apiErr) {
|
|
return apiErr.ErrorCode() == "NotFound" || apiErr.ErrorCode() == "404" || apiErr.ErrorCode() == "NoSuchKey"
|
|
}
|
|
return false
|
|
}
|