Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public class S3DataSegmentPusherConfig

public void setBucket(String bucket)
{
this.bucket = bucket;
this.bucket = S3Utils.normalizeBucketName(bucket);
}

public void setBaseKey(String baseKey)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public S3LoadSpec(
{
Preconditions.checkNotNull(bucket);
Preconditions.checkNotNull(key);
this.bucket = bucket;
this.bucket = S3Utils.normalizeBucketName(bucket);
this.key = key;
this.rangeable = rangeable;
this.puller = puller;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
Expand All @@ -63,6 +65,7 @@ public class S3Utils
{
private static final String SCHEME = S3StorageDruidModule.SCHEME;
private static final Joiner JOINER = Joiner.on("/").skipNulls();
private static final Pattern S3_ARN = Pattern.compile("^arn:(aws|aws-cn|aws-us-gov):s3:[a-z0-9-]*:\\d{12}:accesspoint[:/][A-Za-z0-9.-]+$");
private static final Logger log = new Logger(S3Utils.class);

/**
Expand Down Expand Up @@ -129,6 +132,11 @@ public static <T> T retryS3Operation(Task<T> f) throws Exception
return RetryUtils.retry(f, S3RETRY, RetryUtils.DEFAULT_MAX_TRIES);
}

public static boolean isS3Arn(String value)
{
return value != null && S3_ARN.matcher(value).matches();
}

/**
* Retries S3 operations that fail intermittently (due to io-related exceptions, during obtaining credentials, etc).
* Service-level exceptions (access denied, file not found, etc) are not retried.
Expand Down Expand Up @@ -264,6 +272,20 @@ static String grantFullControlHeaderValue(@Nullable Grant grant)
return null;
}

/**
* Normalizes a bucket name or ARN by replacing '/' with ':'.
* Some inputs may provide Access Point ARNs in the form
* arn:aws:s3::<account>:accesspoint/alias.mrap, which should be normalized to
* arn:aws:s3::<account>:accesspoint:alias.mrap.
*/
public static String normalizeBucketName(String bucket)
{
return Optional.ofNullable(bucket)
.filter(S3Utils::isS3Arn)
.map(arn -> arn.replace('/', ':'))
.orElse(bucket);
}

public static String extractS3Key(URI uri)
{
return StringUtils.maybeRemoveLeadingSlash(uri.getPath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,4 +449,32 @@ public void testUseHttpsDefaultClientConfigSchemelessEndpointReturnsTrue() throw
// Sanity check: default AWSClientConfig protocol is "https"; schemeless URL inherits "https".
Assert.assertTrue(S3Utils.useHttps(new AWSClientConfig(), endpointWith("{\"url\":\"s3.example.com\"}")));
}

@Test
public void testBucketNormalizationForMRAP()
{
String bucket = "arn:aws:s3::123456789123:accesspoint/bucket.mrap";
Assert.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", S3Utils.normalizeBucketName(bucket));
}

@Test
public void testBucketNormalizationForRegional()
{
String bucket = "arn:aws:s3:us-east-1:123456789123:accesspoint/bucket";
Assert.assertEquals("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", S3Utils.normalizeBucketName(bucket));
}

@Test
public void testAlreadyNormalizedBucket()
{
String bucket = "arn:aws:s3::123456789123:accesspoint:bucket.mrap";
Assert.assertEquals(bucket, S3Utils.normalizeBucketName(bucket));
}

@Test
public void testNonS3Bucket()
{
String bucket = "bucket/subpath";
Assert.assertEquals(bucket, S3Utils.normalizeBucketName(bucket));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import java.net.URI;
import java.util.Objects;
import java.util.regex.Pattern;

/**
* Common type for 'bucket' and 'path' concept of cloud objects to allow code sharing between cloud specific
Expand All @@ -46,6 +47,8 @@
*/
public class CloudObjectLocation
{
private static final Pattern S3_ARN = Pattern.compile("^arn:(aws|aws-cn|aws-us-gov):s3:[a-z0-9-]*:\\d{12}:accesspoint[:/][A-Za-z0-9.-]+$");

public static URI validateUriScheme(String scheme, URI uri)
{
if (!scheme.equalsIgnoreCase(uri.getScheme())) {
Expand All @@ -60,20 +63,37 @@ public static URI validateUriScheme(String scheme, URI uri)
@JsonCreator
public CloudObjectLocation(@JsonProperty("bucket") String bucket, @JsonProperty("path") String path)
{
this.bucket = Preconditions.checkNotNull(StringUtils.maybeRemoveTrailingSlash(bucket),
this.bucket = Preconditions.checkNotNull(normalizeBucket(bucket),
"bucket name cannot be null. Please verify if bucket name adheres to naming rules");
this.path = Preconditions.checkNotNull(StringUtils.maybeRemoveLeadingSlash(path));
Preconditions.checkArgument(
this.bucket.equals(StringUtils.urlEncode(this.bucket)),
"bucket must follow DNS-compliant naming conventions"
this.bucket.equals(StringUtils.urlEncode(this.bucket)) || isS3Arn(this.bucket),
Comment thread
vivek807 marked this conversation as resolved.
"bucket must follow DNS-compliant naming conventions or be a valid S3 Access Point ARN"
+ " or S3 Multi-Region Access Point ARN"
);
}

private static String normalizeBucket(String bucket)
{
String trimmed = StringUtils.maybeRemoveTrailingSlash(bucket);

if (trimmed != null && isS3Arn(trimmed)) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P2] Handle slash-form MRAP ARNs before URI splitting

This normalization only works when the full ARN is already passed as the bucket string. For a URI like s3://arn:aws:s3::123456789123:accesspoint/bucket.mrap/path, java.net.URI splits the value into authority arn:aws:s3::123456789123:accesspoint and path /bucket.mrap/path, so the bucket passed here does not match S3_ARN and the slash-form MRAP URI remains rejected or parsed with the access point name as part of the key. Please normalize this form before the URI constructor splits bucket/path, or add validation/tests that explicitly reject it.

return trimmed.replace('/', ':');
}

return trimmed;
}

public CloudObjectLocation(URI uri)
{
this(uri.getHost() != null ? uri.getHost() : uri.getAuthority(), uri.getPath());
}

private static boolean isS3Arn(String value)
{
return value != null && S3_ARN.matcher(value).matches();
}

/**
* Given a scheme, encode {@link #bucket} and {@link #path} into a {@link URI}.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,44 @@ public void testBucketNameWithUnderscores()
Assertions.assertEquals("test_bucket", s3ValidBucket.getBucket());
Assertions.assertEquals("path/to/path", s3ValidBucket.getPath());
}

@Test
void testMRAP()
{
CloudObjectLocation location = new CloudObjectLocation("arn:aws:s3::123456789123:accesspoint:bucket.mrap", "path/to/path");
Assertions.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testMRAPWithURI()
{
CloudObjectLocation location = new CloudObjectLocation(URI.create("s3://arn:aws:s3::123456789123:accesspoint:bucket.mrap/path/to/path"));
Assertions.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testRegionalARN()
{
CloudObjectLocation location = new CloudObjectLocation("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", "path/to/path");
Assertions.assertEquals("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testRegionARNWithURI()
{
CloudObjectLocation location = new CloudObjectLocation(URI.create("s3://arn:aws:s3:us-east-1:123456789123:accesspoint:bucket/path/to/path"));
Assertions.assertEquals("arn:aws:s3:us-east-1:123456789123:accesspoint:bucket", location.getBucket());
Assertions.assertEquals("path/to/path", location.getPath());
}

@Test
void testMRAPWithPlusInKey()
{
CloudObjectLocation location = new CloudObjectLocation(URI.create("s3://arn:aws:s3::123456789123:accesspoint:bucket.mrap/path/with+plus"));
Assertions.assertEquals("arn:aws:s3::123456789123:accesspoint:bucket.mrap", location.getBucket());
Assertions.assertEquals("path/with+plus", location.getPath());
}
}
Loading