From 7c9b15732b3d2164e00d38b72601db1ccbc7b27d Mon Sep 17 00:00:00 2001 From: Cosmin Lazar Date: Fri, 5 Sep 2025 04:27:46 +0200 Subject: [PATCH] [Enhancement] Support Azure Workload Identity authentication for Azure Data Lake Storage Gen2 (#62754) Signed-off-by: Cosmin Constantin Lazar --- .../AzureCloudConfigurationProvider.java | 4 ++- .../azure/AzureStorageCloudCredential.java | 21 +++++++++++++-- .../CloudConfigurationFactoryTest.java | 26 +++++++++++++++++-- .../CloudConfigurationConstants.java | 1 + 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureCloudConfigurationProvider.java b/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureCloudConfigurationProvider.java index 9699837c608..b1014220305 100644 --- a/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureCloudConfigurationProvider.java +++ b/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureCloudConfigurationProvider.java @@ -30,6 +30,7 @@ import static com.starrocks.connector.share.credential.CloudConfigurationConstan import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_ID; import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_SECRET; import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TENANT_ID; +import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TOKEN_FILE; import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_USE_MANAGED_IDENTITY; import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_SAS_TOKEN; import static com.starrocks.connector.share.credential.CloudConfigurationConstants.AZURE_ADLS2_SHARED_KEY; @@ -99,7 +100,8 @@ public class AzureCloudConfigurationProvider implements CloudConfigurationProvid properties.getOrDefault(AZURE_ADLS2_SHARED_KEY, ""), properties.getOrDefault(AZURE_ADLS2_SAS_TOKEN, ""), properties.getOrDefault(AZURE_ADLS2_OAUTH2_CLIENT_SECRET, ""), - properties.getOrDefault(AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT, "") + properties.getOrDefault(AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT, ""), + properties.getOrDefault(AZURE_ADLS2_OAUTH2_TOKEN_FILE, "") ); if (adls2.validate()) { return new AzureCloudConfiguration(adls2); diff --git a/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureStorageCloudCredential.java b/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureStorageCloudCredential.java index 9d81016570b..cfed1499e25 100644 --- a/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureStorageCloudCredential.java +++ b/fe/fe-core/src/main/java/com/starrocks/credential/azure/AzureStorageCloudCredential.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.adl.AdlConfKeys; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -40,7 +41,7 @@ import java.util.Map; // For Azure Data Lake Gen1 (adl://) // We support Managed Service Identity & Service Principal // For Azure Data Lake Gen2 (abfs:// & abfss://) -// We support Managed Identity & Shared Key & Service Principal +// We support Managed Identity & Shared Key & Service Principal & Workload Identity abstract class AzureStorageCloudCredential implements CloudCredential { public static final Logger LOG = LogManager.getLogger(AzureStorageCloudCredential.class); @@ -231,10 +232,11 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential { private final String sasToken; private final String oauth2ClientSecret; private final String oauth2ClientEndpoint; + private final String oauth2TokenFile; public AzureADLS2CloudCredential(String endpoint, boolean oauth2ManagedIdentity, String oauth2TenantId, String oauth2ClientId, String storageAccount, String sharedKey, String sasToken, String oauth2ClientSecret, - String oauth2ClientEndpoint) { + String oauth2ClientEndpoint, String oauth2TokenFile) { Preconditions.checkNotNull(endpoint); Preconditions.checkNotNull(oauth2TenantId); Preconditions.checkNotNull(oauth2ClientId); @@ -243,6 +245,7 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential { Preconditions.checkNotNull(sasToken); Preconditions.checkNotNull(oauth2ClientSecret); Preconditions.checkNotNull(oauth2ClientEndpoint); + Preconditions.checkNotNull(oauth2TokenFile); this.endpoint = endpoint; this.oauth2ManagedIdentity = oauth2ManagedIdentity; @@ -253,6 +256,7 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential { this.sasToken = sasToken; this.oauth2ClientSecret = oauth2ClientSecret; this.oauth2ClientEndpoint = oauth2ClientEndpoint; + this.oauth2TokenFile = oauth2TokenFile; tryGenerateConfigurationMap(); } @@ -315,6 +319,18 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential { oauth2ClientSecret); generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT), oauth2ClientEndpoint); + } else if (!oauth2TokenFile.isEmpty() && !oauth2TenantId.isEmpty() && !oauth2ClientId.isEmpty()) { + generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME), + "OAuth"); + generatedConfigurationMap.put( + createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME), + WorkloadIdentityTokenProvider.class.getName()); + generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE), + oauth2TokenFile); + generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID), + oauth2ClientId); + generatedConfigurationMap.put(createConfigKey(ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT), + oauth2TenantId); } } @@ -330,6 +346,7 @@ class AzureADLS2CloudCredential extends AzureStorageCloudCredential { ", sasToken='" + sasToken + '\'' + ", oauth2ClientSecret='" + oauth2ClientSecret + '\'' + ", oauth2ClientEndpoint='" + oauth2ClientEndpoint + '\'' + + ", oauth2TokenFile='" + oauth2TokenFile + '\'' + '}'; } diff --git a/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java b/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java index f87597260ba..2038a41666c 100644 --- a/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java @@ -83,7 +83,7 @@ public class CloudConfigurationFactoryTest { "AzureCloudConfiguration{resources='', jars='', hdpuser='', " + "cred=AzureADLS2CloudCredential{oauth2ManagedIdentity=false, oauth2TenantId='', oauth2ClientId='', " + "endpoint='account.dfs.core.windows.net', storageAccount='', sharedKey='', " + - "sasToken='sas_token', oauth2ClientSecret='', oauth2ClientEndpoint=''}}", + "sasToken='sas_token', oauth2ClientSecret='', oauth2ClientEndpoint='', oauth2TokenFile=''}}", cloudConfiguration.toConfString()); map = new HashMap<>(); @@ -281,6 +281,7 @@ public class CloudConfigurationFactoryTest { put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT, "XX"); put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_SECRET, "XX"); put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_USE_MANAGED_IDENTITY, "XX"); + put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TOKEN_FILE, "XX"); } }; CloudConfiguration cc = CloudConfigurationFactory.buildCloudConfigurationForStorage(map); @@ -294,7 +295,7 @@ public class CloudConfigurationFactoryTest { "AzureCloudConfiguration{resources='', jars='', hdpuser='', " + "cred=AzureADLS2CloudCredential{oauth2ManagedIdentity=false, oauth2TenantId='XX', " + "oauth2ClientId='XX', endpoint='', storageAccount='XX', sharedKey='XX', sasToken='', " + - "oauth2ClientSecret='XX', oauth2ClientEndpoint='XX'}}"); + "oauth2ClientSecret='XX', oauth2ClientEndpoint='XX', oauth2TokenFile='XX'}}"); } @Test @@ -340,6 +341,27 @@ public class CloudConfigurationFactoryTest { Assertions.assertEquals("client-id", conf.get("fs.azure.account.oauth2.client.id")); } + @Test + public void testAzureADLS2WorkloadIdentity() { + Map map = new HashMap<>() { + { + put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_CLIENT_ID, "client-id"); + put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TENANT_ID, "tenant-id"); + put(CloudConfigurationConstants.AZURE_ADLS2_OAUTH2_TOKEN_FILE, "/path/to/token"); + } + }; + + CloudConfiguration cc = CloudConfigurationFactory.buildCloudConfigurationForStorage(map); + Assertions.assertEquals(cc.getCloudType(), CloudType.AZURE); + Configuration conf = new Configuration(); + cc.applyToConfiguration(conf); + Assertions.assertEquals("OAuth", conf.get("fs.azure.account.auth.type")); + Assertions.assertEquals("org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider", + conf.get("fs.azure.account.oauth.provider.type")); + Assertions.assertEquals("tenant-id", conf.get("fs.azure.account.oauth2.msi.tenant")); + Assertions.assertEquals("/path/to/token", conf.get("fs.azure.account.oauth2.token.file")); + } + @Test public void testGCPCloudConfiguration() { Map map = new HashMap() { diff --git a/java-extensions/hadoop-ext/src/main/java/com/starrocks/connector/share/credential/CloudConfigurationConstants.java b/java-extensions/hadoop-ext/src/main/java/com/starrocks/connector/share/credential/CloudConfigurationConstants.java index 79ed683be6c..36993db8836 100644 --- a/java-extensions/hadoop-ext/src/main/java/com/starrocks/connector/share/credential/CloudConfigurationConstants.java +++ b/java-extensions/hadoop-ext/src/main/java/com/starrocks/connector/share/credential/CloudConfigurationConstants.java @@ -106,6 +106,7 @@ public class CloudConfigurationConstants { public static final String AZURE_ADLS2_SAS_TOKEN = "azure.adls2.sas_token"; public static final String AZURE_ADLS2_OAUTH2_CLIENT_SECRET = "azure.adls2.oauth2_client_secret"; public static final String AZURE_ADLS2_OAUTH2_CLIENT_ENDPOINT = "azure.adls2.oauth2_client_endpoint"; + public static final String AZURE_ADLS2_OAUTH2_TOKEN_FILE = "azure.adls2.oauth2_token_file"; // Credential for Google Cloud Platform (GCP) // For Google Cloud Storage (GCS)