-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Description
Operating System
Linux
Version Information
Azure ML studio - Spark serverless compute (Spark 3.4)
Steps to reproduce
Tutorial notebook "1.Develop-feature-set-and-register.ipynb" cannot access sample data
https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-get-started-with-feature-store?view=azureml-api-2&tabs=SDK-track
When reaching cell 12 there will be error related to access and credentials
Expected behavior
The cell should run successfully
Actual behavior
Error with access and authentication
`---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
Cell In[33], line 3
1 # remove the "." in the roor directory path as we need to generate absolute path to read from spark
2 transactions_source_data_path = "wasbs://[email protected]/feature-store-prp/datasources/transactions-source/*.parquet"
----> 3 transactions_src_df = spark.read.parquet(transactions_source_data_path)
5 display(transactions_src_df.head(5))
6 # Note: display(training_df.head(5)) displays the timestamp column in a different format. You can can call transactions_src_df.show() to see correctly formatted value
File /opt/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py:531, in DataFrameReader.parquet(self, *paths, **options)
520 int96RebaseMode = options.get("int96RebaseMode", None)
521 self._set_opts(
522 mergeSchema=mergeSchema,
523 pathGlobFilter=pathGlobFilter,
(...)
528 int96RebaseMode=int96RebaseMode,
529 )
--> 531 return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))
File ~/cluster-env/env/lib/python3.10/site-packages/py4j/java_gateway.py:1322, in JavaMember.call(self, *args)
1316 command = proto.CALL_COMMAND_NAME +
1317 self.command_header +
1318 args_command +
1319 proto.END_COMMAND_PART
1321 answer = self.gateway_client.send_command(command)
-> 1322 return_value = get_return_value(
1323 answer, self.gateway_client, self.target_id, self.name)
1325 for temp_arg in temp_args:
1326 if hasattr(temp_arg, "_detach"):
File /opt/spark/python/lib/pyspark.zip/pyspark/errors/exceptions/captured.py:169, in capture_sql_exception..deco(*a, **kw)
167 def deco(*a: Any, **kw: Any) -> Any:
168 try:
--> 169 return f(*a, **kw)
170 except Py4JJavaError as e:
171 converted = convert_exception(e.java_exception)
File ~/cluster-env/env/lib/python3.10/site-packages/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
Py4JJavaError: An error occurred while calling o7576.parquet.
: org.apache.hadoop.fs.azure.AzureException: org.apache.hadoop.fs.azure.AzureException: No credentials found for account azuremlexampledata.blob.core.windows.net in the configuration, and its container data is not accessible using anonymous credentials. Please check if the container exists first. If it is not publicly available, you have to provide account credentials.
at org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.createAzureStorageSession(AzureNativeFileSystemStore.java:1123)
at org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.initialize(AzureNativeFileSystemStore.java:566)
at org.apache.hadoop.fs.azure.NativeAzureFileSystem.initialize(NativeAzureFileSystem.java:1423)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3469)
at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:174)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3574)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3521)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:540)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:365)
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$1(DataSource.scala:725)
at scala.collection.immutable.List.map(List.scala:293)
at org.apache.spark.sql.execution.datasources.DataSource$.checkAndGlobPathIfNecessary(DataSource.scala:723)
at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:552)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:404)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:236)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:219)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:219)
at org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:575)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.hadoop.fs.azure.AzureException: No credentials found for account azuremlexampledata.blob.core.windows.net in the configuration, and its container data is not accessible using anonymous credentials. Please check if the container exists first. If it is not publicly available, you have to provide account credentials.
at org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.connectUsingAnonymousCredentials(AzureNativeFileSystemStore.java:899)
at org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.createAzureStorageSession(AzureNativeFileSystemStore.java:1118)
... 29 more`
Addition information
No response