remove conf

liviazhu · liviazhu · commit 71a665646f28 · 2026-02-19T10:48:28.000-08:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2584,16 +2584,6 @@ object SQLConf {
       .createWithDefault(
         "org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider")
 
-  val STREAMING_CHECKPOINT_STATE_CREATE_METADATA_DIR_ON_READ =
-    buildConf("spark.sql.streaming.checkpoint.stateCreateMetadataDirOnRead")
-      .internal()
-      .doc(
-        "When true, the state data source reader will create metadata directories if they " +
-          "don't exist. When false, the reader will only read from existing directories.")
-      .version("4.2.0")
-      .booleanConf
-      .createWithDefault(false)
-
   val NUM_STATE_STORE_MAINTENANCE_THREADS =
     buildConf("spark.sql.streaming.stateStore.numStateStoreMaintenanceThreads")
       .internal()
@@ -7098,9 +7088,6 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS)
 
-  def stateStoreCreateMetadataDirOnRead: Boolean =
-    getConf(STREAMING_CHECKPOINT_STATE_CREATE_METADATA_DIR_ON_READ)
-
   def isStateSchemaCheckEnabled: Boolean = getConf(STATE_SCHEMA_CHECK_ENABLED)
 
   def numStateStoreMaintenanceThreads: Int = getConf(NUM_STATE_STORE_MAINTENANCE_THREADS)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
@@ -378,9 +378,8 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
         val storeId = new StateStoreId(stateCheckpointLocation.toString, sourceOptions.operatorId,
           partitionId, sourceOptions.storeName)
         val providerId = new StateStoreProviderId(storeId, UUID.randomUUID())
-        val createSchemaDir = session.sessionState.conf.stateStoreCreateMetadataDirOnRead
         val manager = new StateSchemaCompatibilityChecker(providerId, hadoopConf,
-          oldSchemaFilePaths = oldSchemaFilePaths, createSchemaDir = createSchemaDir)
+          oldSchemaFilePaths = oldSchemaFilePaths, createSchemaDir = false)
         val stateSchema = manager.readSchemaFile()
 
         if (sourceOptions.internalOnlyReadAllColumnFamilies) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStateHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStateHelper.scala
@@ -79,8 +79,6 @@ object StreamStreamJoinStateHelper {
     // KeyToNumValuesType, KeyWithIndexToValueType
     val storeNames = SymmetricHashJoinStateManager.allStateStoreNames(side).toList
 
-    val createSchemaDir = session.sessionState.conf.stateStoreCreateMetadataDirOnRead
-
     val (keySchema, valueSchema) =
       if (!usesVirtualColumnFamilies(
         newHadoopConf, stateCheckpointLocation, operatorId)) {
@@ -97,12 +95,12 @@ object StreamStreamJoinStateHelper {
         // read the key schema from the keyToNumValues store for the join keys
         val manager = new StateSchemaCompatibilityChecker(
           providerIdForKeyToNumValues, newHadoopConf, oldSchemaFilePaths,
-          createSchemaDir = createSchemaDir)
+          createSchemaDir = false)
         val kSchema = manager.readSchemaFile().head.keySchema
 
         // read the value schema from the keyWithIndexToValue store for the values
         val manager2 = new StateSchemaCompatibilityChecker(providerIdForKeyWithIndexToValue,
-          newHadoopConf, oldSchemaFilePaths, createSchemaDir = createSchemaDir)
+          newHadoopConf, oldSchemaFilePaths, createSchemaDir = false)
         val vSchema = manager2.readSchemaFile().head.valueSchema
 
         (kSchema, vSchema)
@@ -112,7 +110,7 @@ object StreamStreamJoinStateHelper {
         val providerId = new StateStoreProviderId(storeId, UUID.randomUUID())
 
         val manager = new StateSchemaCompatibilityChecker(
-          providerId, newHadoopConf, oldSchemaFilePaths, createSchemaDir = createSchemaDir)
+          providerId, newHadoopConf, oldSchemaFilePaths, createSchemaDir = false)
         val kSchema = manager.readSchemaFile().find { schema =>
           schema.colFamilyName == storeNames(0)
         }.map(_.keySchema).get
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/metadata/StateMetadataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/metadata/StateMetadataSource.scala
@@ -222,11 +222,9 @@ class StateMetadataPartitionReader(
         } else {
           1
         }
-        val createMetadataDir = SparkSession.getActiveSession
-          .map(_.sessionState.conf.stateStoreCreateMetadataDirOnRead).getOrElse(false)
         OperatorStateMetadataReader.createReader(
           operatorIdPath, hadoopConf, operatorStateMetadataVersion, batchId,
-          createMetadataDir = createMetadataDir).read() match {
+          createMetadataDir = false).read() match {
           case Some(metadata) => metadata
           case None => throw StateDataSourceErrors.failedToReadOperatorMetadata(checkpointLocation,
             batchId)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala
@@ -1507,8 +1507,8 @@ abstract class StateDataSourceReadSuite extends StateDataSourceTestBase with Ass
  * Test suite that verifies the state data source reader does not create empty state
  * directories when reading state for all stateful operators.
  *
- * When `spark.sql.streaming.stateStore.createMetadataDirOnRead` is false (the default),
- * the reader should not call mkdirs on the schema metadata path. This is important for
+ * The reader does not create metadata directories (no mkdirs on the schema metadata path).
+ * This is important for
  * Unity Catalog environments where creating directories requires WRITE FILES permission,
  * but reading state should only require READ FILES permission (ES-1722614).
  *
@@ -1667,35 +1667,4 @@ class StateDataSourceNoEmptyDirCreationSuite extends StateDataSourceTestBase {
     )
   }
 
-  test("createMetadataDirOnRead=true recreates deleted state directory") {
-    withSQLConf(
-      SQLConf.STREAMING_CHECKPOINT_STATE_CREATE_METADATA_DIR_ON_READ.key -> "true") {
-      withTempDir { tempDir =>
-        val checkpointPath = tempDir.getAbsolutePath
-        runLargeDataStreamingAggregationQuery(checkpointPath)
-
-        val stateDir = new File(tempDir, "state")
-        assert(stateDir.exists(), "State directory should exist after running the query")
-        Utils.deleteRecursively(stateDir)
-        assert(!stateDir.exists(), "State directory should be deleted")
-
-        // With createMetadataDirOnRead=true, the reader will attempt to create
-        // the _metadata directory, which recreates part of the state directory tree
-        val e5 = intercept[Exception] {
-          spark.read
-            .format("statestore")
-            .option(StateSourceOptions.PATH, checkpointPath)
-            .load()
-            .collect()
-        }
-        assertCauseChainContains(e5,
-          classOf[StateDataSourceReadStateSchemaFailure])
-
-        // The state directory should be recreated (at least partially) because
-        // createMetadataDirOnRead=true causes mkdirs on the schema metadata path
-        assert(stateDir.exists(),
-          "With createMetadataDirOnRead=true, state directory should be recreated")
-      }
-    }
-  }
 }

Original file line number	Diff line number	Diff line change
`@@ -222,11 +222,9 @@ class StateMetadataPartitionReader(`
`222`	`222`	`} else {`
`223`	`223`	`1`
`224`	`224`	`}`
`225`		`- val createMetadataDir = SparkSession.getActiveSession`
`226`		`- .map(_.sessionState.conf.stateStoreCreateMetadataDirOnRead).getOrElse(false)`
`227`	`225`	`OperatorStateMetadataReader.createReader(`
`228`	`226`	`operatorIdPath, hadoopConf, operatorStateMetadataVersion, batchId,`
`229`		`- createMetadataDir = createMetadataDir).read() match {`
	`227`	`+ createMetadataDir = false).read() match {`
`230`	`228`	`case Some(metadata) => metadata`
`231`	`229`	`case None => throw StateDataSourceErrors.failedToReadOperatorMetadata(checkpointLocation,`
`232`	`230`	`batchId)`