Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public class S3DataSegmentPusherConfig

public void setBucket(String bucket)
{
this.bucket = bucket;
this.bucket = S3Utils.normalizeBucketName(bucket);
}

public void setBaseKey(String baseKey)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public S3LoadSpec(
{
Preconditions.checkNotNull(bucket);
Preconditions.checkNotNull(key);
this.bucket = bucket;
this.bucket = S3Utils.normalizeBucketName(bucket);
this.key = key;
this.rangeable = rangeable;
this.puller = puller;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
Expand All @@ -63,6 +65,7 @@ public class S3Utils
{
private static final String SCHEME = S3StorageDruidModule.SCHEME;
private static final Joiner JOINER = Joiner.on("/").skipNulls();
private static final Pattern S3_ARN = Pattern.compile("^arn:(aws|aws-cn|aws-us-gov):s3:[a-z0-9-]*:\\d{12}:accesspoint[:/][A-Za-z0-9.-]+$");
private static final Logger log = new Logger(S3Utils.class);

/**
Expand Down Expand Up @@ -129,6 +132,11 @@ public static <T> T retryS3Operation(Task<T> f) throws Exception
return RetryUtils.retry(f, S3RETRY, RetryUtils.DEFAULT_MAX_TRIES);
}

public static boolean isS3Arn(String value)
{
return value != null && S3_ARN.matcher(value).matches();
}

/**
* Retries S3 operations that fail intermittently (due to io-related exceptions, during obtaining credentials, etc).
* Service-level exceptions (access denied, file not found, etc) are not retried.
Expand Down Expand Up @@ -264,6 +272,20 @@ static String grantFullControlHeaderValue(@Nullable Grant grant)
return null;
}

/**
* Normalizes a bucket name or ARN by replacing '/' with ':'.
* Some inputs may provide Access Point ARNs in the form
* arn:aws:s3::<account>:accesspoint/alias.mrap, which should be normalized to
* arn:aws:s3::<account>:accesspoint:alias.mrap.
*/
public static String normalizeBucketName(String bucket)
{
return Optional.ofNullable(bucket)
.filter(S3Utils::isS3Arn)
.map(arn -> arn.replace('/', ':'))
.orElse(bucket);
}

public static String extractS3Key(URI uri)
{
return StringUtils.maybeRemoveLeadingSlash(uri.getPath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,4 +449,32 @@ public void testUseHttpsDefaultClientConfigSchemelessEndpointReturnsTrue() throw
// Sanity check: default AWSClientConfig protocol is "https"; schemeless URL inherits "https".
Assert.assertTrue(S3Utils.useHttps(new AWSClientConfig(), endpointWith("{\"url\":\"s3.example.com\"}")));
}

@Test
public void testBucketNormalizationForMRAP()
{
String bucket = "arn:aws:s3::123456789123:accesspoint/bucket.mrap";
Assert.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", S3Utils.normalizeBucketName(bucket));
}

@Test
public void testBucketNormalizationForRegional()
{
String bucket = "arn:aws:s3:us-east-1:123456789123:accesspoint/bucket";
Assert.assertEquals("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", S3Utils.normalizeBucketName(bucket));
}

@Test
public void testAlreadyNormalizedBucket()
{
String bucket = "arn:aws:s3::123456789123:accesspoint:bucket.mrap";
Assert.assertEquals(bucket, S3Utils.normalizeBucketName(bucket));
}

@Test
public void testNonS3Bucket()
{
String bucket = "bucket/subpath";
Assert.assertEquals(bucket, S3Utils.normalizeBucketName(bucket));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import java.net.URI;
import java.util.Objects;
import java.util.regex.Pattern;

/**
* Common type for 'bucket' and 'path' concept of cloud objects to allow code sharing between cloud specific
Expand All @@ -46,6 +47,8 @@
*/
public class CloudObjectLocation
{
private static final Pattern S3_ARN = Pattern.compile("^arn:(aws|aws-cn|aws-us-gov):s3:[a-z0-9-]*:\\d{12}:accesspoint[:/][A-Za-z0-9.-]+$");

public static URI validateUriScheme(String scheme, URI uri)
{
if (!scheme.equalsIgnoreCase(uri.getScheme())) {
Expand All @@ -64,8 +67,9 @@ public CloudObjectLocation(@JsonProperty("bucket") String bucket, @JsonProperty(
"bucket name cannot be null. Please verify if bucket name adheres to naming rules");
this.path = Preconditions.checkNotNull(StringUtils.maybeRemoveLeadingSlash(path));
Preconditions.checkArgument(
this.bucket.equals(StringUtils.urlEncode(this.bucket)),
"bucket must follow DNS-compliant naming conventions"
this.bucket.equals(StringUtils.urlEncode(this.bucket)) || isS3Arn(this.bucket),
Comment thread
vivek807 marked this conversation as resolved.
"bucket must follow DNS-compliant naming conventions or be a valid S3 Access Point ARN"
+ " or S3 Multi-Region Access Point ARN"
);
}

Expand All @@ -74,6 +78,11 @@ public CloudObjectLocation(URI uri)
this(uri.getHost() != null ? uri.getHost() : uri.getAuthority(), uri.getPath());
}

private static boolean isS3Arn(String value)
{
return value != null && S3_ARN.matcher(value).matches();
}

/**
* Given a scheme, encode {@link #bucket} and {@link #path} into a {@link URI}.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,44 @@ public void testBucketNameWithUnderscores()
Assertions.assertEquals("test_bucket", s3ValidBucket.getBucket());
Assertions.assertEquals("path/to/path", s3ValidBucket.getPath());
}

@Test
void testMRAP()
{
CloudObjectLocation location = new CloudObjectLocation("arn:aws:s3::123456789123:accesspoint:bucket.mrap", "path/to/path");
Assertions.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testMRAPWithURI()
{
CloudObjectLocation location = new CloudObjectLocation(URI.create("s3://arn:aws:s3::123456789123:accesspoint:bucket.mrap/path/to/path"));
Assertions.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testRegionalARN()
{
CloudObjectLocation location = new CloudObjectLocation("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", "path/to/path");
Assertions.assertEquals("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testRegionARNWithURI()
{
CloudObjectLocation location = new CloudObjectLocation(URI.create("s3://arn:aws:s3:us-east-1:123456789123:accesspoint:bucket/path/to/path"));
Assertions.assertEquals("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testMRAPWithPlusInKey()
{
CloudObjectLocation location = new CloudObjectLocation(URI.create("s3://arn:aws:s3::123456789123:accesspoint:bucket.mrap/path/with+plus"));
Assertions.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", location.getBucket());
Assertions.assertEquals("path/with+plus", location.getPath());
}
}
Loading