From 9f34ea5bf2a1388c506a95f847cfee012ce09659 Mon Sep 17 00:00:00 2001 From: Priyesh Karatha Date: Sat, 20 Jun 2026 09:57:46 +0530 Subject: [PATCH 1/2] HDDS-14577. Handle missing metadata dir when updating container state --- .../container/keyvalue/KeyValueContainer.java | 6 ++++ .../keyvalue/TestKeyValueContainer.java | 34 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index 5be6d20a336a..712507f1ff11 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -299,6 +299,12 @@ private void writeToContainerFile(File containerFile, boolean isCreate) throws StorageContainerException { File tempContainerFile = null; try { + // The metadata directory may be absent when writing the UNHEALTHY marker after + // a MISSING_METADATA_DIR corruption is detected. Recreate it so the state can be persisted. + File metadataDir = containerFile.getParentFile(); + if (!metadataDir.exists() && !metadataDir.mkdirs()) { + throw new IOException("Failed to create metadata directory: " + metadataDir); + } tempContainerFile = createTempFile(containerFile); ContainerDataYaml.createContainerFile(containerData, tempContainerFile); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java index 37728cde4e0d..b904c4337e9b 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java @@ -759,6 +759,40 @@ public void testReportOfUnhealthyContainer( assertNotNull(keyValueContainer.getContainerReport()); } + /** + * When a container's metadata directory is missing (MISSING_METADATA_DIR), marking the container + * UNHEALTHY must succeed and persist the state to disk. Previously the call failed with + * StorageContainerException because writeToContainerFile tried to create a temp file inside + * the missing directory. + */ + @ContainerTestVersionInfo.ContainerTest + public void testMarkUnhealthyWithMissingMetadataDir(ContainerTestVersionInfo versionInfo) throws Exception { + init(versionInfo); + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + + // Simulate MISSING_METADATA_DIR: delete the entire metadata directory. + File metadataDir = new File(keyValueContainerData.getMetadataPath()); + assertTrue(metadataDir.exists(), "Metadata dir should exist before corruption"); + FileUtils.deleteDirectory(metadataDir); + assertFalse(metadataDir.exists(), "Metadata dir should be gone after deletion"); + + // markContainerUnhealthy must not throw even though the metadata dir is absent. + keyValueContainer.markContainerUnhealthy(); + + // In-memory state must be UNHEALTHY. + assertEquals(ContainerProtos.ContainerDataProto.State.UNHEALTHY, + keyValueContainer.getContainerState()); + + // The metadata directory must have been recreated and the .container file written. + assertTrue(metadataDir.exists(), "Metadata dir should be recreated by markContainerUnhealthy"); + File containerFile = keyValueContainer.getContainerFile(); + assertTrue(containerFile.exists(), "Container file should exist after markContainerUnhealthy"); + + // The persisted state must be UNHEALTHY. + KeyValueContainerData reloaded = (KeyValueContainerData) ContainerDataYaml.readContainerFile(containerFile); + assertEquals(ContainerProtos.ContainerDataProto.State.UNHEALTHY, reloaded.getState()); + } + @ContainerTestVersionInfo.ContainerTest public void testUpdateContainer(ContainerTestVersionInfo versionInfo) throws Exception { From f20c05b7635b8c14d4ec34474e38f7119421def9 Mon Sep 17 00:00:00 2001 From: Priyesh Karatha Date: Tue, 23 Jun 2026 09:40:55 +0530 Subject: [PATCH 2/2] addressing review comments --- .../container/keyvalue/KeyValueContainer.java | 19 +++++++++----- .../keyvalue/TestKeyValueContainer.java | 26 ++++++++----------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index 712507f1ff11..531e662d7751 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -299,12 +299,6 @@ private void writeToContainerFile(File containerFile, boolean isCreate) throws StorageContainerException { File tempContainerFile = null; try { - // The metadata directory may be absent when writing the UNHEALTHY marker after - // a MISSING_METADATA_DIR corruption is detected. Recreate it so the state can be persisted. - File metadataDir = containerFile.getParentFile(); - if (!metadataDir.exists() && !metadataDir.mkdirs()) { - throw new IOException("Failed to create metadata directory: " + metadataDir); - } tempContainerFile = createTempFile(containerFile); ContainerDataYaml.createContainerFile(containerData, tempContainerFile); @@ -397,7 +391,18 @@ public void markContainerUnhealthy() throws StorageContainerException { writeLock(); final State prevState = containerData.getState(); try { - updateContainerState(UNHEALTHY); + if (!getContainerFile().getParentFile().exists()) { + // Metadata directory is absent (e.g. MISSING_METADATA_DIR detected by scanner). + // Attempting to write the .container file would fail, and writing a partial file + // with only the state field is more harmful than not writing one at all. + // The in-memory UNHEALTHY state is sufficient: SCM will receive it via ICR + // and schedule deletion without requiring a persisted .container file. + containerData.setState(UNHEALTHY); + LOG.debug("Skipping .container file update for container {} with missing metadata directory", + containerData.getContainerID()); + } else { + updateContainerState(UNHEALTHY); + } clearPendingPutBlockCache(); } finally { writeUnlock(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java index b904c4337e9b..54eab570d9e1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java @@ -23,6 +23,7 @@ import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.verifyAllDataChecksumsMatch; import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.CONTAINER_SCHEMA_V3_ENABLED; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_METADATA_DIR; import static org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil.isSameSchemaVersion; import static org.apache.hadoop.ozone.container.replication.CopyContainerCompression.NO_COMPRESSION; import static org.assertj.core.api.Assertions.assertThat; @@ -760,21 +761,20 @@ public void testReportOfUnhealthyContainer( } /** - * When a container's metadata directory is missing (MISSING_METADATA_DIR), marking the container - * UNHEALTHY must succeed and persist the state to disk. Previously the call failed with - * StorageContainerException because writeToContainerFile tried to create a temp file inside - * the missing directory. + * When a container's metadata directory is missing (MISSING_METADATA_DIR detected by the scanner), + * markContainerUnhealthy must succeed without throwing. Writing a partial .container file with only + * the state field would lose other metadata and is more harmful than writing nothing. The in-memory + * UNHEALTHY state is sufficient for SCM to receive it via ICR and schedule deletion. */ @ContainerTestVersionInfo.ContainerTest public void testMarkUnhealthyWithMissingMetadataDir(ContainerTestVersionInfo versionInfo) throws Exception { init(versionInfo); keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); - // Simulate MISSING_METADATA_DIR: delete the entire metadata directory. + // Simulate MISSING_METADATA_DIR using the same corruption helper used in scanner tests. File metadataDir = new File(keyValueContainerData.getMetadataPath()); assertTrue(metadataDir.exists(), "Metadata dir should exist before corruption"); - FileUtils.deleteDirectory(metadataDir); - assertFalse(metadataDir.exists(), "Metadata dir should be gone after deletion"); + MISSING_METADATA_DIR.applyTo(keyValueContainer); // markContainerUnhealthy must not throw even though the metadata dir is absent. keyValueContainer.markContainerUnhealthy(); @@ -783,14 +783,10 @@ public void testMarkUnhealthyWithMissingMetadataDir(ContainerTestVersionInfo ver assertEquals(ContainerProtos.ContainerDataProto.State.UNHEALTHY, keyValueContainer.getContainerState()); - // The metadata directory must have been recreated and the .container file written. - assertTrue(metadataDir.exists(), "Metadata dir should be recreated by markContainerUnhealthy"); - File containerFile = keyValueContainer.getContainerFile(); - assertTrue(containerFile.exists(), "Container file should exist after markContainerUnhealthy"); - - // The persisted state must be UNHEALTHY. - KeyValueContainerData reloaded = (KeyValueContainerData) ContainerDataYaml.readContainerFile(containerFile); - assertEquals(ContainerProtos.ContainerDataProto.State.UNHEALTHY, reloaded.getState()); + // The metadata directory must NOT be recreated; no partial .container file should be written. + assertFalse(metadataDir.exists(), "Metadata dir should not be recreated by markContainerUnhealthy"); + assertFalse(keyValueContainer.getContainerFile().exists(), + "Container file should not be written when metadata dir is missing"); } @ContainerTestVersionInfo.ContainerTest