diff --git a/fdbclient/include/fdbclient/CommitTransaction.h b/fdbclient/include/fdbclient/CommitTransaction.h index 54173c49a16..c6bea17205f 100644 --- a/fdbclient/include/fdbclient/CommitTransaction.h +++ b/fdbclient/include/fdbclient/CommitTransaction.h @@ -91,7 +91,7 @@ struct MutationRef { CompareAndClear, Reserved_For_SpanContextMessage /* See fdbserver/SpanContextMessage.h */, Reserved_For_OTELSpanContextMessage, - Reserved_For_PartitionMapMessage /* See fdbserver/core/PartitionMapMessage.h */, + Reserved_For_PartitionMapMessage /* See fdbserver/backupworker/PartitionMapMessage.h */, MAX_ATOMIC_OP }; diff --git a/fdbrpc/FlowTransport.cpp b/fdbrpc/FlowTransport.cpp index ca61119ee49..dd4e7139fca 100644 --- a/fdbrpc/FlowTransport.cpp +++ b/fdbrpc/FlowTransport.cpp @@ -35,7 +35,7 @@ #include "fdbrpc/fdbrpc.h" #include "fdbrpc/FailureMonitor.h" -#include "fdbrpc/HealthMonitor.h" +#include "HealthMonitor.h" #include "JsonWebKeySet.h" #include "fdbrpc/genericactors.h" #include "fdbrpc/IPAllowList.h" @@ -762,11 +762,11 @@ Future connectionWriter(Reference self, Reference conn) } } -Future delayedHealthUpdate(NetworkAddress address, bool* tooManyConnectionsClosed) { +Future delayedHealthUpdate(HealthMonitor* healthMonitor, NetworkAddress address, bool* tooManyConnectionsClosed) { double start = now(); while (true) { if (FLOW_KNOBS->HEALTH_MONITOR_MARK_FAILED_UNSTABLE_CONNECTIONS && - FlowTransport::transport().healthMonitor()->tooManyConnectionsClosed(address) && address.isPublic()) { + healthMonitor->tooManyConnectionsClosed(address) && address.isPublic()) { co_await delayJittered(FLOW_KNOBS->MAX_RECONNECTION_TIME * 2.0); } else { if (*tooManyConnectionsClosed) { @@ -774,7 +774,7 @@ Future delayedHealthUpdate(NetworkAddress address, bool* tooManyConnection .detail("Dest", address) .detail("StartTime", start) .detail("TimeElapsed", now() - start) - .detail("ClosedCount", FlowTransport::transport().healthMonitor()->closedConnectionsCount(address)); + .detail("ClosedCount", healthMonitor->closedConnectionsCount(address)); *tooManyConnectionsClosed = false; } IFailureMonitor::failureMonitor().setStatus(address, FailureStatus(false)); @@ -855,7 +855,8 @@ Future connectionKeeper(Reference self, IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false)); } if (self->unsent.empty()) { - delayedHealthUpdateF = delayedHealthUpdate(self->destination, &tooManyConnectionsClosed); + delayedHealthUpdateF = delayedHealthUpdate( + &self->transport->healthMonitor, self->destination, &tooManyConnectionsClosed); auto healthRes = co_await race(delayedHealthUpdateF, self->dataToSend.onTrigger()); if (healthRes.index() == 0) { conn->close(); @@ -897,7 +898,8 @@ Future connectionKeeper(Reference self, try { self->transport->countConnEstablished++; if (!delayedHealthUpdateF.isValid()) - delayedHealthUpdateF = delayedHealthUpdate(self->destination, &tooManyConnectionsClosed); + delayedHealthUpdateF = delayedHealthUpdate( + &self->transport->healthMonitor, self->destination, &tooManyConnectionsClosed); self->connected = true; co_await (connectionWriter(self, conn) || reader || connectionMonitor(self) || self->resetConnection.onTrigger()); @@ -992,15 +994,14 @@ Future connectionKeeper(Reference self, if (conn) { if (self->destination.isPublic() && e.code() == error_code_connection_failed) { - FlowTransport::transport().healthMonitor()->reportPeerClosed(self->destination); + self->transport->healthMonitor.reportPeerClosed(self->destination); if (FLOW_KNOBS->HEALTH_MONITOR_MARK_FAILED_UNSTABLE_CONNECTIONS && - FlowTransport::transport().healthMonitor()->tooManyConnectionsClosed(self->destination) && + self->transport->healthMonitor.tooManyConnectionsClosed(self->destination) && self->destination.isPublic()) { TraceEvent("TooManyConnectionsClosedMarkFailed") .detail("Dest", self->destination) - .detail( - "ClosedCount", - FlowTransport::transport().healthMonitor()->closedConnectionsCount(self->destination)); + .detail("ClosedCount", + self->transport->healthMonitor.closedConnectionsCount(self->destination)); tooManyConnectionsClosed = true; IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(true)); } @@ -2182,8 +2183,8 @@ void FlowTransport::createInstance(bool isClient, g_network->setGlobal(INetwork::enClientFailureMonitor, isClient ? (flowGlobalType)1 : nullptr); } -HealthMonitor* FlowTransport::healthMonitor() { - return &self->healthMonitor; +std::unordered_set FlowTransport::getRecentClosedPeers() { + return self->healthMonitor.getRecentClosedPeers(); } Optional FlowTransport::getPublicKeyByName(StringRef name) const { diff --git a/fdbrpc/HealthMonitor.cpp b/fdbrpc/HealthMonitor.cpp index 452edeec18b..95527e3b643 100644 --- a/fdbrpc/HealthMonitor.cpp +++ b/fdbrpc/HealthMonitor.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbrpc/HealthMonitor.h" +#include "HealthMonitor.h" void HealthMonitor::reportPeerClosed(const NetworkAddress& peerAddress) { purgeOutdatedHistory(); diff --git a/fdbrpc/include/fdbrpc/HealthMonitor.h b/fdbrpc/HealthMonitor.h similarity index 100% rename from fdbrpc/include/fdbrpc/HealthMonitor.h rename to fdbrpc/HealthMonitor.h diff --git a/fdbrpc/include/fdbrpc/FlowTransport.h b/fdbrpc/include/fdbrpc/FlowTransport.h index 93afeac8a44..87bd2b4d2c2 100644 --- a/fdbrpc/include/fdbrpc/FlowTransport.h +++ b/fdbrpc/include/fdbrpc/FlowTransport.h @@ -24,9 +24,10 @@ #include #include +#include +#include #include "fdbrpc/DDSketch.h" -#include "fdbrpc/HealthMonitor.h" #include "flow/genericactors.actor.h" #include "flow/network.h" #include "flow/FileIdentifier.h" @@ -293,7 +294,8 @@ class FlowTransport : NonCopyable { Endpoint loadedEndpoint(const UID& token); Future loadedDisconnect(); - HealthMonitor* healthMonitor(); + // Returns peers whose recent failed connections have already been evicted from the transport peer map. + std::unordered_set getRecentClosedPeers(); bool currentDeliveryPeerIsTrusted() const; NetworkAddress currentDeliveryPeerAddress() const; diff --git a/fdbserver/backupworker/BackupWorkerRangePartitioned.cpp b/fdbserver/backupworker/BackupWorkerRangePartitioned.cpp index 45a06f8d969..636c4874ef5 100644 --- a/fdbserver/backupworker/BackupWorkerRangePartitioned.cpp +++ b/fdbserver/backupworker/BackupWorkerRangePartitioned.cpp @@ -25,7 +25,7 @@ #include "fdbserver/core/BackupPartitionMap.h" #include "fdbserver/core/BackupProgress.h" #include "fdbserver/core/Knobs.h" -#include "fdbserver/core/PartitionMapMessage.h" +#include "PartitionMapMessage.h" #include "fdbserver/core/WaitFailure.h" #include "fdbserver/logsystem/LogSystem.h" #include "fdbserver/logsystem/LogSystemConsumer.h" diff --git a/fdbserver/core/include/fdbserver/core/PartitionMapMessage.h b/fdbserver/backupworker/PartitionMapMessage.h similarity index 100% rename from fdbserver/core/include/fdbserver/core/PartitionMapMessage.h rename to fdbserver/backupworker/PartitionMapMessage.h diff --git a/fdbserver/tester/KnobProtectiveGroups.cpp b/fdbserver/tester/KnobProtectiveGroups.cpp index 81b7514ca77..ab28e628631 100644 --- a/fdbserver/tester/KnobProtectiveGroups.cpp +++ b/fdbserver/tester/KnobProtectiveGroups.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbserver/tester/KnobProtectiveGroups.h" +#include "KnobProtectiveGroups.h" #include diff --git a/fdbserver/tester/include/fdbserver/tester/KnobProtectiveGroups.h b/fdbserver/tester/KnobProtectiveGroups.h similarity index 71% rename from fdbserver/tester/include/fdbserver/tester/KnobProtectiveGroups.h rename to fdbserver/tester/KnobProtectiveGroups.h index 4ae331a254c..d20acd83088 100644 --- a/fdbserver/tester/include/fdbserver/tester/KnobProtectiveGroups.h +++ b/fdbserver/tester/KnobProtectiveGroups.h @@ -21,29 +21,9 @@ #ifndef FDBSERVER_TESTER_KNOBPROTECTIVEGROUPS_H #define FDBSERVER_TESTER_KNOBPROTECTIVEGROUPS_H -#include -#include +#include "fdbserver/tester/KnobKeyValuePairs.h" -#include "flow/Knobs.h" - -// A list of knob key value pairs -class KnobKeyValuePairs { -public: - using container_t = std::unordered_map; - -private: - // Here the knob value is directly stored, unlike KnobValue, for simplicity - container_t knobs; - -public: - // Sets a value for a given knob - void set(const std::string& name, const ParsedKnobValue value); - - // Gets a list of knobs for given type - const container_t& getKnobs() const; -}; - -// For knobs, temporarily change the values, the original values will be recovered +// For knobs, temporarily change the values, the original values will be recovered. class KnobProtectiveGroup { KnobKeyValuePairs overriddenKnobs; KnobKeyValuePairs originalKnobs; diff --git a/fdbserver/tester/include/fdbserver/tester/KnobKeyValuePairs.h b/fdbserver/tester/include/fdbserver/tester/KnobKeyValuePairs.h new file mode 100644 index 00000000000..ec54a8ee564 --- /dev/null +++ b/fdbserver/tester/include/fdbserver/tester/KnobKeyValuePairs.h @@ -0,0 +1,47 @@ +/* + * KnobKeyValuePairs.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2026 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FDBSERVER_TESTER_KNOBKEYVALUEPAIRS_H +#define FDBSERVER_TESTER_KNOBKEYVALUEPAIRS_H +#pragma once + +#include +#include + +#include "flow/Knobs.h" + +// A list of knob key value pairs. +class KnobKeyValuePairs { +public: + using container_t = std::unordered_map; + +private: + // Here the knob value is directly stored, unlike KnobValue, for simplicity. + container_t knobs; + +public: + // Sets a value for a given knob. + void set(const std::string& name, const ParsedKnobValue value); + + // Gets a list of knobs for given type. + const container_t& getKnobs() const; +}; + +#endif // FDBSERVER_TESTER_KNOBKEYVALUEPAIRS_H diff --git a/fdbserver/tester/include/fdbserver/tester/WorkloadUtils.h b/fdbserver/tester/include/fdbserver/tester/WorkloadUtils.h index 5d2206674a7..841208665ef 100644 --- a/fdbserver/tester/include/fdbserver/tester/WorkloadUtils.h +++ b/fdbserver/tester/include/fdbserver/tester/WorkloadUtils.h @@ -29,7 +29,7 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/core/FDBSimulationPolicy.h" -#include "fdbserver/tester/KnobProtectiveGroups.h" +#include "fdbserver/tester/KnobKeyValuePairs.h" #include "fdbserver/core/TesterInterface.h" #include "fdbserver/tester/workloads.h" #include "fdbrpc/PerfMetric.h" diff --git a/fdbserver/tester/test.cpp b/fdbserver/tester/test.cpp index e7104e93957..7dcc9e6ff8e 100644 --- a/fdbserver/tester/test.cpp +++ b/fdbserver/tester/test.cpp @@ -43,7 +43,7 @@ #include "fdbserver/core/QuietDatabase.h" #include "fdbserver/core/WorkerInterface.actor.h" #include "fdbserver/core/FDBSimulationPolicy.h" -#include "fdbserver/tester/KnobProtectiveGroups.h" +#include "KnobProtectiveGroups.h" #include "ConsistencyChecker.h" #include "DatabaseMaintenance.h" #include "TestSpecParser.h" diff --git a/fdbserver/worker/worker.actor.cpp b/fdbserver/worker/worker.actor.cpp index 50bf37daf58..a0082d516a8 100644 --- a/fdbserver/worker/worker.actor.cpp +++ b/fdbserver/worker/worker.actor.cpp @@ -1251,7 +1251,7 @@ UpdateWorkerHealthRequest doPeerHealthCheck(const WorkerInterface& interf, // part of the transaction sub system. // Note that we don't need to calculate recovered peer in this case since all the recently closed peers are // considered permanently closed peers. - for (const auto& address : FlowTransport::transport().healthMonitor()->getRecentClosedPeers()) { + for (const auto& address : FlowTransport::transport().getRecentClosedPeers()) { if (allPeers.find(address) != allPeers.end()) { // We have checked this peer in the above for loop. continue;