From a7492351109bc7ffaa9f9406add9a9d61c2b281b Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati <115860222+sreejasahithi@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:17:59 +0530 Subject: [PATCH 1/2] HDDS-15456. Add SCM DB lookup and identify orphan and deleted-but-present containers --- .../container/analyze/AnalyzeSubcommand.java | 141 ++++++++++---- .../analyze/ScmContainerMetadataReader.java | 117 +++++++++++ .../analyze/ContainerAnalyzeTestHelper.java | 37 ++++ .../analyze/TestAnalyzeSubcommand.java | 184 +++++++++++++++++- .../TestScmContainerMetadataReader.java | 90 +++++++++ 5 files changed, 524 insertions(+), 45 deletions(-) create mode 100644 hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java create mode 100644 hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java diff --git a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java index 12a0c67bd87..9728195d734 100644 --- a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java +++ b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java @@ -17,12 +17,19 @@ package org.apache.hadoop.ozone.debug.datanode.container.analyze; +import java.io.File; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; import java.util.concurrent.Callable; import org.apache.hadoop.hdds.cli.AbstractSubcommand; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.shell.ListLimitOptions; import picocli.CommandLine; import picocli.CommandLine.Command; @@ -34,65 +41,123 @@ */ @Command( name = "analyze", - description = "Analyze container consistency between on-disk container " + - "directories on this DataNode and SCM metadata. Must be run locally on a DataNode.") + description = { + "Analyze container consistency between on-disk container directories on this DataNode and SCM metadata.", + "Must be run locally on a DataNode.", + "", + "Each reported container occurrence includes a status:", + " MISSING_METADATA: metadata/{containerId}.container does not exist.", + " INVALID_METADATA: metadata file exists but cannot be parsed, or the container ID in the metadata", + " does not match the directory name.", + " VALID: metadata file is present and consistent with the directory." + }) public class AnalyzeSubcommand extends AbstractSubcommand implements Callable { - @CommandLine.Option(names = {"--count"}, - defaultValue = "20", - description = "Number of containers to display") - private int count; + @CommandLine.Mixin + private ListLimitOptions listOptions; + + @CommandLine.Option(names = {"--scm-db"}, + description = "Path to an offline scm.db directory, or its parent metadata directory.") + private File scmDb; @Override public Void call() throws Exception { - if (count < 1) { - throw new IOException("Count must be an integer greater than 0."); - } + listOptions.getLimit(); //This triggers ListLimitOptions validation OzoneConfiguration conf = getOzoneConf(); ContainerScanResult scanResult = ContainerDirectoryScanner.scan(conf); Map> enrichedDuplicates = ContainerDirectoryScanner.enrichDuplicates(scanResult.getDuplicates()); - // TODO: SCM metadata lookup from --scm-db when provided. - // TODO: For each id in scanResult.getSingles().keySet() classified NOT_IN_SCM or DELETED: - // enrichOccurrence(id, scanResult.getSingles().get(id)) and report. - // TODO: For each id in enrichedDuplicates.keySet() classified NOT_IN_SCM or DELETED: - // enrichedDuplicates.get(id) is already enriched — just report. + if (scmDb != null) { + findOrphanAndDeletedButPresentContainers(conf, scanResult, enrichedDuplicates); + } else { + out().println("To identify orphan containers (wrt SCM) and containers that are marked as DELETED in SCM but" + + " exist in the datanode's current directory, provide the SCM database path using the --scm-db option." + ); + } printDuplicates(enrichedDuplicates); printVolumeScanErrors(scanResult.getVolumeScanErrors()); return null; } - private void printDuplicates(Map> duplicates) { - long totalDuplicateIds = duplicates.size(); - out().printf("Number of containers with duplicate container directories on this DataNode: %d%n", totalDuplicateIds); + private void findOrphanAndDeletedButPresentContainers(OzoneConfiguration conf, ContainerScanResult scanResult, + Map> enrichedDuplicates) throws IOException { + Map> enrichedOrphanContainers = new HashMap<>(); + Map> enrichedDeletedButPresent = new HashMap<>(); - if (totalDuplicateIds == 0) { + try (ScmContainerMetadataReader reader = new ScmContainerMetadataReader(conf, scmDb)) { + Set containerIds = new HashSet<>(scanResult.getSingles().keySet()); + containerIds.addAll(enrichedDuplicates.keySet()); + + for (long containerId : containerIds) { + Optional classification = reader.classify(containerId); + if (!classification.isPresent()) { + continue; + } + List occurrences = enrichedDuplicates.get(containerId); + if (occurrences == null) { + String path = scanResult.getSingles().get(containerId); + occurrences = Collections.singletonList(ContainerDirectoryScanner.enrichOccurrence(containerId, path)); + } + if (classification.get() == ScmContainerMetadataReader.ScmContainerClassification.NOT_IN_SCM) { + enrichedOrphanContainers.put(containerId, occurrences); + } else { + enrichedDeletedButPresent.put(containerId, occurrences); + } + } + } + + printContainerOccurrenceReport("Number of orphan containers(wrt SCM) on this DataNode: %d%n", + enrichedOrphanContainers); + printContainerOccurrenceReport("Number of deleted but present containers on this DataNode: %d%n", + enrichedDeletedButPresent); + } + + private void printContainerOccurrenceReport(String countFormat, + Map> containersById) { + long total = containersById.size(); + out().printf(countFormat, total); + if (total == 0) { return; } - if (totalDuplicateIds > count) { - out().printf("Showing first %d:%n", count); + if (!listOptions.isAll()) { + int limit = listOptions.getLimit(); + if (total > limit) { + out().printf("Showing first %d:%n", limit); + } + + containersById.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .limit(limit) + .forEach(entry -> printContainerEntry(entry.getKey(), entry.getValue())); + } else { + containersById.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .forEach(entry -> printContainerEntry(entry.getKey(), entry.getValue())); } + } - duplicates.entrySet().stream() - .sorted(Map.Entry.comparingByKey()) - .limit(count) - .forEach(entry -> { - long containerId = entry.getKey(); - List occurrences = entry.getValue(); - out().printf("Container %d (%d occurrences):%n", containerId, occurrences.size()); - for (ContainerDiskOccurrence o : occurrences) { - out().printf(" path=%s%n", o.getContainerPath()); - if (o.isSizeKnown()) { - out().printf(" status=%s size=%d bytes%n", o.getStatus(), o.getSizeBytes()); - } else { - out().printf(" status=%s size=unavailable (failed to compute directory size)%n", - o.getStatus()); - } - out().println(); - } - }); + private void printContainerEntry(long containerId, List occurrences) { + out().printf("Container %d (%d occurrence%s):%n", + containerId, + occurrences.size(), + occurrences.size() == 1 ? "" : "s"); + for (ContainerDiskOccurrence occurrence : occurrences) { + out().printf(" path=%s%n", occurrence.getContainerPath()); + if (occurrence.isSizeKnown()) { + out().printf(" status=%s size=%d bytes%n", occurrence.getStatus(), occurrence.getSizeBytes()); + } else { + out().printf(" status=%s size=unavailable (failed to compute directory size)%n", occurrence.getStatus()); + } + out().println(); + } + } + + private void printDuplicates(Map> duplicates) { + printContainerOccurrenceReport( + "Number of containers with duplicate container directories on this DataNode: %d%n", + duplicates); } private void printVolumeScanErrors(List volumeScanErrors) { diff --git a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java new file mode 100644 index 00000000000..d416fd5f04b --- /dev/null +++ b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.datanode.container.analyze; + +import static org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition.CONTAINERS; + +import java.io.File; +import java.io.IOException; +import java.util.Objects; +import java.util.Optional; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition; +import org.apache.hadoop.hdds.utils.db.CodecException; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; +import org.apache.hadoop.hdds.utils.db.RocksDatabaseException; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.cache.TableCache.CacheType; +import org.apache.hadoop.ozone.OzoneConsts; + +/** + * Read-only lookup of container metadata from {@code scm.db}. + */ +public final class ScmContainerMetadataReader implements AutoCloseable { + + private final DBStore dbStore; + private final Table containerTable; + + public ScmContainerMetadataReader(ConfigurationSource conf, File scmDbPath) + throws IOException { + File scmDbDir = resolveScmDbDirectory(scmDbPath); + try { + this.dbStore = DBStoreBuilder.newBuilder(conf, SCMDBDefinition.get(), scmDbDir.getName(), + scmDbDir.getParentFile().toPath()) + .setOpenReadOnly(true) + .build(); + } catch (RocksDatabaseException e) { + throw new IOException("Failed to open SCM database at " + scmDbDir, e); + } + try { + this.containerTable = CONTAINERS.getTable(dbStore, CacheType.NO_CACHE); + } catch (RocksDatabaseException | CodecException e) { + dbStore.close(); + throw new IOException("Failed to open scm.db containers column family at " + scmDbDir, e); + } + } + + /** + * Classify a container ID against scm.db {@code containers}. + * + * @return {@link Optional#empty()} when the container is present in SCM with a + * non-DELETED lifecycle state + */ + public Optional classify(long containerId) throws IOException { + try { + ContainerInfo info = containerTable.get(ContainerID.valueOf(containerId)); + if (info == null) { + return Optional.of(ScmContainerClassification.NOT_IN_SCM); + } + if (info.isDeleted()) { + return Optional.of(ScmContainerClassification.DELETED); + } + return Optional.empty(); + } catch (RocksDatabaseException | CodecException e) { + throw new IOException("Failed to read container " + containerId + " from scm.db", e); + } + } + + static File resolveScmDbDirectory(File path) throws IOException { + Objects.requireNonNull(path, "scmDbPath"); + File scmDbDir = path; + if (!OzoneConsts.SCM_DB_NAME.equals(path.getName())) { + File child = new File(path, OzoneConsts.SCM_DB_NAME); + if (child.isDirectory()) { + scmDbDir = child; + } + } + if (!scmDbDir.isDirectory()) { + throw new IOException("SCM database directory not found: " + path); + } + return scmDbDir; + } + + @Override + public void close() { + if (dbStore != null) { + dbStore.close(); + } + } + + /** + * SCM-side classification for an on-disk container directory. + */ + enum ScmContainerClassification { + /** No record for this container ID in scm.db {@code containers}. */ + NOT_IN_SCM, + /** Record exists and {@link ContainerInfo} state is DELETED. */ + DELETED + } +} diff --git a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerAnalyzeTestHelper.java b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerAnalyzeTestHelper.java index c9d3e01483d..e5aab1e5c5f 100644 --- a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerAnalyzeTestHelper.java +++ b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ContainerAnalyzeTestHelper.java @@ -21,9 +21,19 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Map; import java.util.UUID; import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.Storage; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; @@ -110,4 +120,31 @@ void corruptVersionFile(File volumeRoot) throws IOException { File versionFile = StorageVolumeUtil.getVersionFile(hddsRoot); Files.write(versionFile.toPath(), new byte[0]); } + + /** + * Creates an offline {@code scm.db} with the given container states. + * + * @return path to the {@code scm.db} directory + */ + File createScmDb(Map containerStates) throws IOException { + Path scmRoot = tempDir.resolve("scm-metadata"); + Files.createDirectories(scmRoot); + DBStore dbStore = DBStoreBuilder.newBuilder(conf, SCMDBDefinition.get(), OzoneConsts.SCM_DB_NAME, scmRoot).build(); + try { + Table containerTable = SCMDBDefinition.CONTAINERS.getTable(dbStore); + for (Map.Entry entry : containerStates.entrySet()) { + long containerId = entry.getKey(); + ContainerInfo containerInfo = new ContainerInfo.Builder() + .setContainerID(containerId) + .setState(entry.getValue()) + .setOwner("test") + .setReplicationConfig(RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE)) + .build(); + containerTable.put(ContainerID.valueOf(containerId), containerInfo); + } + } finally { + dbStore.close(); + } + return scmRoot.resolve(OzoneConsts.SCM_DB_NAME).toFile(); + } } diff --git a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java index 0d3da9f45a7..080c4de9aa0 100644 --- a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java +++ b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.debug.datanode.container.analyze; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.params.provider.Arguments.arguments; import java.io.File; import java.io.PrintWriter; @@ -25,14 +26,21 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.UUID; +import java.util.stream.Stream; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.debug.OzoneDebug; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import picocli.CommandLine; /** @@ -84,7 +92,7 @@ public void testAnalyzeRespectsCount() throws Exception { } executeAnalyze(volumeRoot1.getAbsolutePath() + "," + volumeRoot2.getAbsolutePath(), - "--count", "2"); + "--length", "2"); String output = outWriter.toString(); assertThat(output).contains("Number of containers with duplicate container directories on this DataNode: 3"); @@ -97,10 +105,10 @@ public void testAnalyzeRespectsCount() throws Exception { @Test public void testAnalyzeInvalidCount() { - executeAnalyze(tempDir.toString(), "--count", "0"); + executeAnalyze(tempDir.toString(), "--length", "0"); String combined = outWriter.toString() + errWriter.toString(); - assertThat(combined).contains("Count must be an integer greater than 0."); + assertThat(combined).contains("List length should be a positive number"); } @Test @@ -164,16 +172,178 @@ public void testAnalyzeDuplicateValidAndInvalidEmptyFile() throws Exception { assertDuplicateReport(volumeRoot1, volumeRoot2, containerId, "INVALID_METADATA"); } + @ParameterizedTest(name = "{0}") + @MethodSource("scmOrphanOrDeletedScenarios") + public void testAnalyzeScmOrphanOrDeletedSingleVolume(String scenarioName, long containerId, + HddsProtos.LifeCycleState scmState, boolean metadataFilePresent, long metadataContainerId, String expectedStatus) + throws Exception { + File volumeRoot = testHelper.formatVolume("volume0"); + testHelper.createContainerDirectory(volumeRoot, containerId, metadataFilePresent, metadataContainerId); + + Map scmContainers = new HashMap<>(); + if (scmState != null) { + scmContainers.put(containerId, scmState); + } + File scmDb = testHelper.createScmDb(scmContainers); + + executeAnalyze(volumeRoot.getAbsolutePath(), "--scm-db", scmDb.getAbsolutePath()); + + String output = outWriter.toString(); + assertScmCounts(output, scmState == null ? 1 : 0, scmState == HddsProtos.LifeCycleState.DELETED ? 1 : 0); + assertThat(output).contains("Container " + containerId + " (1 occurrence):"); + assertOccurrenceStatus(output, volumeRoot, containerId, expectedStatus); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("scmOrphanOrDeletedScenarios") + public void testAnalyzeScmOrphanOrDeletedOnTwoVolumes(String scenarioName, long containerId, + HddsProtos.LifeCycleState scmState, boolean metadataFilePresent, long metadataContainerId, String expectedStatus) + throws Exception { + File volumeRoot1 = testHelper.formatVolume("volume0"); + File volumeRoot2 = testHelper.formatVolume("volume1"); + testHelper.createContainerDirectory(volumeRoot1, containerId, metadataFilePresent, metadataContainerId); + testHelper.createContainerDirectory(volumeRoot2, containerId, metadataFilePresent, metadataContainerId); + + Map scmContainers = new HashMap<>(); + if (scmState != null) { + scmContainers.put(containerId, scmState); + } + File scmDb = testHelper.createScmDb(scmContainers); + + executeAnalyze(volumeRoot1.getAbsolutePath() + "," + volumeRoot2.getAbsolutePath(), + "--scm-db", scmDb.getAbsolutePath()); + + String output = outWriter.toString(); + assertScmCounts(output, scmState == null ? 1 : 0, scmState == HddsProtos.LifeCycleState.DELETED ? 1 : 0); + assertThat(output).contains("Container " + containerId + " (2 occurrences):"); + assertOccurrenceStatus(output, volumeRoot1, containerId, expectedStatus); + assertOccurrenceStatus(output, volumeRoot2, containerId, expectedStatus); + } + + @Test + public void testAnalyzeScmOmitsHealthyContainer() throws Exception { + File volumeRoot = testHelper.formatVolume("volume0"); + long containerId = 8020L; + testHelper.createContainerDirectory(volumeRoot, containerId, true, containerId); + + Map scmContainers = new HashMap<>(); + scmContainers.put(containerId, HddsProtos.LifeCycleState.CLOSED); + File scmDb = testHelper.createScmDb(scmContainers); + + executeAnalyze(volumeRoot.getAbsolutePath(), "--scm-db", scmDb.getAbsolutePath()); + + String output = outWriter.toString(); + assertThat(output).contains("Number of orphan containers(wrt SCM) on this DataNode: 0"); + assertThat(output).contains("Number of deleted but present containers on this DataNode: 0"); + } + + @Test + public void testAnalyzeScmMixedOrphanDeletedHealthy() throws Exception { + File volumeRoot = testHelper.formatVolume("volume0"); + long orphanId = 8101L; + long deletedId = 8102L; + long healthyId = 8103L; + testHelper.createContainerDirectory(volumeRoot, orphanId, true, orphanId); + testHelper.createContainerDirectory(volumeRoot, deletedId, true, deletedId); + testHelper.createContainerDirectory(volumeRoot, healthyId, true, healthyId); + + Map scmContainers = new HashMap<>(); + scmContainers.put(deletedId, HddsProtos.LifeCycleState.DELETED); + scmContainers.put(healthyId, HddsProtos.LifeCycleState.CLOSED); + File scmDb = testHelper.createScmDb(scmContainers); + + executeAnalyze(volumeRoot.getAbsolutePath(), "--scm-db", scmDb.getAbsolutePath()); + + String output = outWriter.toString(); + assertThat(output).contains("Number of orphan containers(wrt SCM) on this DataNode: 1"); + assertThat(output).contains("Number of deleted but present containers on this DataNode: 1"); + assertThat(output).contains("Container " + orphanId + " (1 occurrence):"); + assertOccurrenceStatus(output, volumeRoot, orphanId, "VALID"); + assertThat(output).contains("Container " + deletedId + " (1 occurrence):"); + assertOccurrenceStatus(output, volumeRoot, deletedId, "VALID"); + assertThat(output).doesNotContain("Container " + healthyId); + } + + @Test + public void testAnalyzeScmMixedOrphanDeletedDuplicate() throws Exception { + File volumeRoot1 = testHelper.formatVolume("volume0"); + File volumeRoot2 = testHelper.formatVolume("volume1"); + long orphanId = 8201L; + long deletedId = 8202L; + long duplicateId = 8203L; + testHelper.createContainerDirectory(volumeRoot1, orphanId, true, orphanId); + testHelper.createContainerDirectory(volumeRoot1, deletedId, true, deletedId); + testHelper.createContainerDirectory(volumeRoot1, duplicateId, true, duplicateId); + testHelper.createContainerDirectory(volumeRoot2, duplicateId, true, duplicateId); + + Map scmContainers = new HashMap<>(); + scmContainers.put(deletedId, HddsProtos.LifeCycleState.DELETED); + scmContainers.put(duplicateId, HddsProtos.LifeCycleState.CLOSED); + File scmDb = testHelper.createScmDb(scmContainers); + + executeAnalyze(volumeRoot1.getAbsolutePath() + "," + volumeRoot2.getAbsolutePath(), + "--scm-db", scmDb.getAbsolutePath()); + + String output = outWriter.toString(); + assertThat(output).contains("Number of orphan containers(wrt SCM) on this DataNode: 1"); + assertThat(output).contains("Container " + orphanId + " (1 occurrence):"); + assertOccurrenceStatus(output, volumeRoot1, orphanId, "VALID"); + assertThat(output).contains("Number of deleted but present containers on this DataNode: 1"); + assertThat(output).contains("Container " + deletedId + " (1 occurrence):"); + assertOccurrenceStatus(output, volumeRoot1, deletedId, "VALID"); + assertThat(output).contains("Number of containers with duplicate container directories on this DataNode: 1"); + assertThat(output).contains("Container " + duplicateId + " (2 occurrences):"); + assertOccurrenceStatus(output, volumeRoot1, duplicateId, "VALID"); + assertOccurrenceStatus(output, volumeRoot2, duplicateId, "VALID"); + } + + @Test + public void testAnalyzeWithoutScmDb() throws Exception { + File volumeRoot = testHelper.formatVolume("volume0"); + long containerId = 8301L; + testHelper.createContainerDirectory(volumeRoot, containerId, true, containerId); + + executeAnalyze(volumeRoot.getAbsolutePath()); + + String output = outWriter.toString(); + assertThat(output).contains("provide the SCM database path using the --scm-db option"); + assertThat(output).doesNotContain("Number of orphan containers(wrt SCM) on this DataNode:"); + assertThat(output).doesNotContain("Number of deleted but present containers on this DataNode:"); + assertThat(output).contains("Number of containers with duplicate container directories on this DataNode: 0"); + } + + private static Stream scmOrphanOrDeletedScenarios() { + return Stream.of( + arguments("orphan-valid", 8008L, null, true, 8008L, "VALID"), + arguments("deleted-but-present-valid", 8030L, HddsProtos.LifeCycleState.DELETED, true, 8030L, "VALID"), + arguments("orphan-missing-metadata", 8401L, null, false, 8401L, "MISSING_METADATA"), + arguments("deleted-but-present-missing-metadata", 8402L, HddsProtos.LifeCycleState.DELETED, false, 8402L, + "MISSING_METADATA"), + arguments("orphan-invalid-metadata", 8403L, null, true, 9999L, "INVALID_METADATA"), + arguments("deleted-but-present-invalid-metadata", 8404L, HddsProtos.LifeCycleState.DELETED, true, 9999L, + "INVALID_METADATA")); + } + + private void assertScmCounts(String output, int expectedOrphans, int expectedDeleted) { + assertThat(output).contains( + "Number of orphan containers(wrt SCM) on this DataNode: " + expectedOrphans); + assertThat(output).contains( + "Number of deleted but present containers on this DataNode: " + expectedDeleted); + } + private void assertDuplicateReport(File volumeRoot1, File volumeRoot2, long containerId, String volume2ExpectedStatus) { executeAnalyze(volumeRoot1.getAbsolutePath() + "," + volumeRoot2.getAbsolutePath()); - String path1 = testHelper.containerPath(volumeRoot1, containerId); - String path2 = testHelper.containerPath(volumeRoot2, containerId); String output = outWriter.toString(); assertThat(output).contains("Container " + containerId + " (2 occurrences):"); - assertThat(output).contains("path=" + path1 + "\n status=" + "VALID"); - assertThat(output).contains("path=" + path2 + "\n status=" + volume2ExpectedStatus); + assertOccurrenceStatus(output, volumeRoot1, containerId, "VALID"); + assertOccurrenceStatus(output, volumeRoot2, containerId, volume2ExpectedStatus); + } + + private void assertOccurrenceStatus(String output, File volumeRoot, long containerId, String expectedStatus) { + assertThat(output).contains(String.format("path=%s%n status=%s", + testHelper.containerPath(volumeRoot, containerId), expectedStatus)); } private void executeAnalyze(String datanodeDirs, String... extraArgs) { diff --git a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java new file mode 100644 index 00000000000..57b683884f4 --- /dev/null +++ b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.datanode.container.analyze; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Unit tests for {@link ScmContainerMetadataReader}. + */ +public class TestScmContainerMetadataReader { + + @TempDir + private Path tempDir; + + private OzoneConfiguration conf; + private ContainerAnalyzeTestHelper testHelper; + + @BeforeEach + public void setup() { + conf = new OzoneConfiguration(); + testHelper = new ContainerAnalyzeTestHelper(tempDir, conf, + UUID.randomUUID().toString(), UUID.randomUUID().toString()); + } + + @Test + public void testClassifyNotInScm() throws Exception { + File scmDb = testHelper.createScmDb(Collections.emptyMap()); + try (ScmContainerMetadataReader reader = new ScmContainerMetadataReader(conf, scmDb)) { + Optional result = reader.classify(1001L); + assertTrue(result.isPresent()); + assertEquals(ScmContainerMetadataReader.ScmContainerClassification.NOT_IN_SCM, result.get()); + } + } + + @Test + public void testClassifyDeleted() throws Exception { + Map containers = new HashMap<>(); + containers.put(1002L, HddsProtos.LifeCycleState.DELETED); + File scmDb = testHelper.createScmDb(containers); + + try (ScmContainerMetadataReader reader = new ScmContainerMetadataReader(conf, scmDb)) { + Optional result = reader.classify(1002L); + assertTrue(result.isPresent()); + assertEquals(ScmContainerMetadataReader.ScmContainerClassification.DELETED, result.get()); + } + } + + @Test + public void testClassifyOmitOther() throws Exception { + Map containers = new HashMap<>(); + containers.put(1003L, HddsProtos.LifeCycleState.CLOSED); + containers.put(1004L, HddsProtos.LifeCycleState.OPEN); + File scmDb = testHelper.createScmDb(containers); + + try (ScmContainerMetadataReader reader = new ScmContainerMetadataReader(conf, scmDb)) { + assertFalse(reader.classify(1003L).isPresent()); + assertFalse(reader.classify(1004L).isPresent()); + } + } +} From ca010f823692d05b86beec695f408e26e1f753ef Mon Sep 17 00:00:00 2001 From: Sreeja Chintalapati <115860222+sreejasahithi@users.noreply.github.com> Date: Tue, 23 Jun 2026 19:25:28 +0530 Subject: [PATCH 2/2] Addressed review comments --- .../container/analyze/AnalyzeSubcommand.java | 42 ++++++++++++------- .../analyze/ScmContainerMetadataReader.java | 13 ++++-- .../analyze/TestAnalyzeSubcommand.java | 14 ++++--- .../TestScmContainerMetadataReader.java | 12 +++++- 4 files changed, 56 insertions(+), 25 deletions(-) diff --git a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java index 9728195d734..065717c3f2e 100644 --- a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java +++ b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/AnalyzeSubcommand.java @@ -27,6 +27,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.Callable; +import java.util.stream.Stream; import org.apache.hadoop.hdds.cli.AbstractSubcommand; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.shell.ListLimitOptions; @@ -45,11 +46,16 @@ "Analyze container consistency between on-disk container directories on this DataNode and SCM metadata.", "Must be run locally on a DataNode.", "", - "Each reported container occurrence includes a status:", + "Reports:", + " Duplicate container directories: same containerID found on more than one volume.", + " Orphan containers (requires --scm-db): present on disk but not present in SCM metadata.", + " Containers marked DELETED in SCM but present on disk (requires --scm-db).", + "", + "Each reported occurrence includes container directory path(s), size and an on-disk metadata status:", " MISSING_METADATA: metadata/{containerId}.container does not exist.", - " INVALID_METADATA: metadata file exists but cannot be parsed, or the container ID in the metadata", - " does not match the directory name.", - " VALID: metadata file is present and consistent with the directory." + " INVALID_METADATA: metadata file exists but cannot be parsed, or the containerID in the", + " file does not match the directory name.", + " VALID: metadata file is present, parses correctly, and its containerID matches the directory name." }) public class AnalyzeSubcommand extends AbstractSubcommand implements Callable { @CommandLine.Mixin @@ -61,7 +67,7 @@ public class AnalyzeSubcommand extends AbstractSubcommand implements Callable> enrichedDuplicates = @@ -80,6 +86,16 @@ public Void call() throws Exception { return null; } + /** + * Validate CLI options before starting the on-disk DN scan. + * {@link ListLimitOptions#getLimit()} is also called from + * {@link #printContainerOccurrenceReport(String, Map)}, but validating here fails fast + * before the DN volume scan and SCM DB lookup. + */ + private void validateOptions() { + listOptions.getLimit(); + } + private void findOrphanAndDeletedButPresentContainers(OzoneConfiguration conf, ContainerScanResult scanResult, Map> enrichedDuplicates) throws IOException { Map> enrichedOrphanContainers = new HashMap<>(); @@ -109,7 +125,8 @@ private void findOrphanAndDeletedButPresentContainers(OzoneConfiguration conf, C printContainerOccurrenceReport("Number of orphan containers(wrt SCM) on this DataNode: %d%n", enrichedOrphanContainers); - printContainerOccurrenceReport("Number of deleted but present containers on this DataNode: %d%n", + printContainerOccurrenceReport( + "Number of containers marked DELETED in SCM but present on disk on this DataNode: %d%n", enrichedDeletedButPresent); } @@ -121,21 +138,16 @@ private void printContainerOccurrenceReport(String countFormat, return; } + Stream>> stream = + containersById.entrySet().stream().sorted(Map.Entry.comparingByKey()); if (!listOptions.isAll()) { int limit = listOptions.getLimit(); if (total > limit) { out().printf("Showing first %d:%n", limit); } - - containersById.entrySet().stream() - .sorted(Map.Entry.comparingByKey()) - .limit(limit) - .forEach(entry -> printContainerEntry(entry.getKey(), entry.getValue())); - } else { - containersById.entrySet().stream() - .sorted(Map.Entry.comparingByKey()) - .forEach(entry -> printContainerEntry(entry.getKey(), entry.getValue())); + stream = stream.limit(limit); } + stream.forEach(entry -> printContainerEntry(entry.getKey(), entry.getValue())); } private void printContainerEntry(long containerId, List occurrences) { diff --git a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java index d416fd5f04b..6c2869299b5 100644 --- a/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java +++ b/hadoop-ozone/cli-debug/src/main/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/ScmContainerMetadataReader.java @@ -46,9 +46,13 @@ public final class ScmContainerMetadataReader implements AutoCloseable { public ScmContainerMetadataReader(ConfigurationSource conf, File scmDbPath) throws IOException { File scmDbDir = resolveScmDbDirectory(scmDbPath); + File parentDir = scmDbDir.getParentFile(); + if (parentDir == null) { + throw new IOException("SCM database directory has no parent path: " + scmDbDir); + } try { this.dbStore = DBStoreBuilder.newBuilder(conf, SCMDBDefinition.get(), scmDbDir.getName(), - scmDbDir.getParentFile().toPath()) + parentDir.toPath()) .setOpenReadOnly(true) .build(); } catch (RocksDatabaseException e) { @@ -85,9 +89,10 @@ public Optional classify(long containerId) throws IO static File resolveScmDbDirectory(File path) throws IOException { Objects.requireNonNull(path, "scmDbPath"); - File scmDbDir = path; - if (!OzoneConsts.SCM_DB_NAME.equals(path.getName())) { - File child = new File(path, OzoneConsts.SCM_DB_NAME); + File absolutePath = path.getAbsoluteFile(); + File scmDbDir = absolutePath; + if (!OzoneConsts.SCM_DB_NAME.equals(absolutePath.getName())) { + File child = new File(absolutePath, OzoneConsts.SCM_DB_NAME); if (child.isDirectory()) { scmDbDir = child; } diff --git a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java index 080c4de9aa0..54c780b897a 100644 --- a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java +++ b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestAnalyzeSubcommand.java @@ -234,7 +234,8 @@ public void testAnalyzeScmOmitsHealthyContainer() throws Exception { String output = outWriter.toString(); assertThat(output).contains("Number of orphan containers(wrt SCM) on this DataNode: 0"); - assertThat(output).contains("Number of deleted but present containers on this DataNode: 0"); + assertThat(output).contains( + "Number of containers marked DELETED in SCM but present on disk on this DataNode: 0"); } @Test @@ -256,7 +257,8 @@ public void testAnalyzeScmMixedOrphanDeletedHealthy() throws Exception { String output = outWriter.toString(); assertThat(output).contains("Number of orphan containers(wrt SCM) on this DataNode: 1"); - assertThat(output).contains("Number of deleted but present containers on this DataNode: 1"); + assertThat(output).contains( + "Number of containers marked DELETED in SCM but present on disk on this DataNode: 1"); assertThat(output).contains("Container " + orphanId + " (1 occurrence):"); assertOccurrenceStatus(output, volumeRoot, orphanId, "VALID"); assertThat(output).contains("Container " + deletedId + " (1 occurrence):"); @@ -288,7 +290,8 @@ public void testAnalyzeScmMixedOrphanDeletedDuplicate() throws Exception { assertThat(output).contains("Number of orphan containers(wrt SCM) on this DataNode: 1"); assertThat(output).contains("Container " + orphanId + " (1 occurrence):"); assertOccurrenceStatus(output, volumeRoot1, orphanId, "VALID"); - assertThat(output).contains("Number of deleted but present containers on this DataNode: 1"); + assertThat(output).contains( + "Number of containers marked DELETED in SCM but present on disk on this DataNode: 1"); assertThat(output).contains("Container " + deletedId + " (1 occurrence):"); assertOccurrenceStatus(output, volumeRoot1, deletedId, "VALID"); assertThat(output).contains("Number of containers with duplicate container directories on this DataNode: 1"); @@ -308,7 +311,8 @@ public void testAnalyzeWithoutScmDb() throws Exception { String output = outWriter.toString(); assertThat(output).contains("provide the SCM database path using the --scm-db option"); assertThat(output).doesNotContain("Number of orphan containers(wrt SCM) on this DataNode:"); - assertThat(output).doesNotContain("Number of deleted but present containers on this DataNode:"); + assertThat(output).doesNotContain( + "Number of containers marked DELETED in SCM but present on disk on this DataNode:"); assertThat(output).contains("Number of containers with duplicate container directories on this DataNode: 0"); } @@ -328,7 +332,7 @@ private void assertScmCounts(String output, int expectedOrphans, int expectedDel assertThat(output).contains( "Number of orphan containers(wrt SCM) on this DataNode: " + expectedOrphans); assertThat(output).contains( - "Number of deleted but present containers on this DataNode: " + expectedDeleted); + "Number of containers marked DELETED in SCM but present on disk on this DataNode: " + expectedDeleted); } private void assertDuplicateReport(File volumeRoot1, File volumeRoot2, long containerId, diff --git a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java index 57b683884f4..78878b1f2c7 100644 --- a/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java +++ b/hadoop-ozone/cli-debug/src/test/java/org/apache/hadoop/ozone/debug/datanode/container/analyze/TestScmContainerMetadataReader.java @@ -19,6 +19,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; @@ -68,7 +69,7 @@ public void testClassifyDeleted() throws Exception { containers.put(1002L, HddsProtos.LifeCycleState.DELETED); File scmDb = testHelper.createScmDb(containers); - try (ScmContainerMetadataReader reader = new ScmContainerMetadataReader(conf, scmDb)) { + try (ScmContainerMetadataReader reader = new ScmContainerMetadataReader(conf, scmDb.getParentFile())) { Optional result = reader.classify(1002L); assertTrue(result.isPresent()); assertEquals(ScmContainerMetadataReader.ScmContainerClassification.DELETED, result.get()); @@ -87,4 +88,13 @@ public void testClassifyOmitOther() throws Exception { assertFalse(reader.classify(1004L).isPresent()); } } + + @Test + public void testResolveScmDbDirectoryReturnsAbsolutePathWithParent() throws Exception { + File scmDb = testHelper.createScmDb(Collections.emptyMap()); + File resolved = ScmContainerMetadataReader.resolveScmDbDirectory(scmDb); + assertTrue(resolved.isAbsolute()); + assertNotNull(resolved.getParentFile()); + assertEquals(scmDb.getAbsolutePath(), resolved.getAbsolutePath()); + } }