[Enhancement] Support Azure Workload Identity authentication for Azure Data Lake Storage Gen2 (#62754)

Signed-off-by: Cosmin Constantin Lazar <cosminconstantinlazar@gmail.com>
This commit is contained in:
Cosmin Lazar 2025-09-05 04:27:46 +02:00 committed by GitHub
parent 6b3d00852b
commit 7c9b15732b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 47 additions and 5 deletions

View File

@ -30,6 +30,7 @@ import static com.starrocks.connector.share.credential.CloudConfigurationConstan
import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_ID;
import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_SECRET;
import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TENANT_ID;
import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TOKEN_FILE;
import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_USE_MANAGED_IDENTITY;
import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_SAS_TOKEN;
import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_SHARED_KEY;
@ -99,7 +100,8 @@ public class AzureCloudConfigurationProvider implements CloudConfigurationProvid
properties.getOrDefault(AZURE_ADLS2_SHARED_KEY, ""),
properties.getOrDefault(AZURE_ADLS2_SAS_TOKEN, ""),
properties.getOrDefault(AZURE_ADLS2_OAUTH2_CLIENT_SECRET, ""),
properties.getOrDefault(AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT, "")
properties.getOrDefault(AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT, ""),
properties.getOrDefault(AZURE_ADLS2_OAUTH2_TOKEN_FILE, "")
);
if (adls2.validate()) {
return new AzureCloudConfiguration(adls2);

View File

@ -29,6 +29,7 @@ import org.apache.hadoop.fs.adl.AdlConfKeys;
import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys;
import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider;
import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider;
import org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -40,7 +41,7 @@ import java.util.Map;
// For Azure Data Lake Gen1 (adl://)
// We support Managed Service Identity & Service Principal
// For Azure Data Lake Gen2 (abfs:// & abfss://)
// We support Managed Identity & Shared Key & Service Principal
// We support Managed Identity & Shared Key & Service Principal & Workload Identity
abstract class AzureStorageCloudCredential implements CloudCredential {
public static final Logger LOG = LogManager.getLogger(AzureStorageCloudCredential.class);
@ -231,10 +232,11 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential {
private final String sasToken;
private final String oauth2ClientSecret;
private final String oauth2ClientEndpoint;
private final String oauth2TokenFile;
public AzureADLS2CloudCredential(String endpoint, boolean oauth2ManagedIdentity, String oauth2TenantId, String oauth2ClientId,
String storageAccount, String sharedKey, String sasToken, String oauth2ClientSecret,
String oauth2ClientEndpoint) {
String oauth2ClientEndpoint, String oauth2TokenFile) {
Preconditions.checkNotNull(endpoint);
Preconditions.checkNotNull(oauth2TenantId);
Preconditions.checkNotNull(oauth2ClientId);
@ -243,6 +245,7 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential {
Preconditions.checkNotNull(sasToken);
Preconditions.checkNotNull(oauth2ClientSecret);
Preconditions.checkNotNull(oauth2ClientEndpoint);
Preconditions.checkNotNull(oauth2TokenFile);
this.endpoint = endpoint;
this.oauth2ManagedIdentity = oauth2ManagedIdentity;
@ -253,6 +256,7 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential {
this.sasToken = sasToken;
this.oauth2ClientSecret = oauth2ClientSecret;
this.oauth2ClientEndpoint = oauth2ClientEndpoint;
this.oauth2TokenFile = oauth2TokenFile;
tryGenerateConfigurationMap();
}
@ -315,6 +319,18 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential {
oauth2ClientSecret);
generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT),
oauth2ClientEndpoint);
} else if (!oauth2TokenFile.isEmpty() && !oauth2TenantId.isEmpty() && !oauth2ClientId.isEmpty()) {
generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME),
"OAuth");
generatedConfigurationMap.put(
createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME),
WorkloadIdentityTokenProvider.class.getName());
generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE),
oauth2TokenFile);
generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID),
oauth2ClientId);
generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT),
oauth2TenantId);
}
}
@ -330,6 +346,7 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential {
", sasToken='" + sasToken + '\'' +
", oauth2ClientSecret='" + oauth2ClientSecret + '\'' +
", oauth2ClientEndpoint='" + oauth2ClientEndpoint + '\'' +
", oauth2TokenFile='" + oauth2TokenFile + '\'' +
'}';
}

View File

@ -83,7 +83,7 @@ public class CloudConfigurationFactoryTest {
"AzureCloudConfiguration{resources='', jars='', hdpuser='', " +
"cred=AzureADLS2CloudCredential{oauth2ManagedIdentity=false, oauth2TenantId='', oauth2ClientId='', " +
"endpoint='account.dfs.core.windows.net', storageAccount='', sharedKey='', " +
"sasToken='sas_token', oauth2ClientSecret='', oauth2ClientEndpoint=''}}",
"sasToken='sas_token', oauth2ClientSecret='', oauth2ClientEndpoint='', oauth2TokenFile=''}}",
cloudConfiguration.toConfString());
map = new HashMap<>();
@ -281,6 +281,7 @@ public class CloudConfigurationFactoryTest {
put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT, "XX");
put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_SECRET, "XX");
put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_USE_MANAGED_IDENTITY, "XX");
put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TOKEN_FILE, "XX");
}
};
CloudConfiguration cc = CloudConfigurationFactory.buildCloudConfigurationForStorage(map);
@ -294,7 +295,7 @@ public class CloudConfigurationFactoryTest {
"AzureCloudConfiguration{resources='', jars='', hdpuser='', " +
"cred=AzureADLS2CloudCredential{oauth2ManagedIdentity=false, oauth2TenantId='XX', " +
"oauth2ClientId='XX', endpoint='', storageAccount='XX', sharedKey='XX', sasToken='', " +
"oauth2ClientSecret='XX', oauth2ClientEndpoint='XX'}}");
"oauth2ClientSecret='XX', oauth2ClientEndpoint='XX', oauth2TokenFile='XX'}}");
}
@Test
@ -340,6 +341,27 @@ public class CloudConfigurationFactoryTest {
Assertions.assertEquals("client-id", conf.get("fs.azure.account.oauth2.client.id"));
}
@Test
public void testAzureADLS2WorkloadIdentity() {
Map<String, String> map = new HashMap<>() {
{
put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_ID, "client-id");
put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TENANT_ID, "tenant-id");
put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TOKEN_FILE, "/path/to/token");
}
};
CloudConfiguration cc = CloudConfigurationFactory.buildCloudConfigurationForStorage(map);
Assertions.assertEquals(cc.getCloudType(), CloudType.AZURE);
Configuration conf = new Configuration();
cc.applyToConfiguration(conf);
Assertions.assertEquals("OAuth", conf.get("fs.azure.account.auth.type"));
Assertions.assertEquals("org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider",
conf.get("fs.azure.account.oauth.provider.type"));
Assertions.assertEquals("tenant-id", conf.get("fs.azure.account.oauth2.msi.tenant"));
Assertions.assertEquals("/path/to/token", conf.get("fs.azure.account.oauth2.token.file"));
}
@Test
public void testGCPCloudConfiguration() {
Map<String, String> map = new HashMap<String, String>() {

View File

@ -106,6 +106,7 @@ public class CloudConfigurationConstants {
public static final String AZURE_ADLS2_SAS_TOKEN = "azure.adls2.sas_token";
public static final String AZURE_ADLS2_OAUTH2_CLIENT_SECRET = "azure.adls2.oauth2_client_secret";
public static final String AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT = "azure.adls2.oauth2_client_endpoint";
public static final String AZURE_ADLS2_OAUTH2_TOKEN_FILE = "azure.adls2.oauth2_token_file";
// Credential for Google Cloud Platform (GCP)
// For Google Cloud Storage (GCS)