diff --git a/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h b/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h index b01e0db168..bcd13661c6 100644 --- a/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h +++ b/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h @@ -13,6 +13,8 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h" +#include + namespace psr { /// Default implementation of EdgeFunctionSingletonCache. @@ -53,6 +55,7 @@ class DefaultEdgeFunctionSingletonCacheImpl void erase(const EdgeFunctionTy &EF) noexcept override { Cache.erase(&EF); } template + requires std::constructible_from [[nodiscard]] EdgeFunction createEdgeFunction(ArgTys &&...Args) { return CachedEdgeFunction{ EdgeFunctionTy{std::forward(Args)...}, this}; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h index 7688177425..9ed5419ced 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h @@ -10,6 +10,7 @@ #ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_FIELDSENSALLOCSITESAWAREIFDSPROBLEM_H #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_FIELDSENSALLOCSITESAWAREIFDSPROBLEM_H +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/Domain/BinaryDomain.h" @@ -19,8 +20,12 @@ #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/Compressor.h" +#include "phasar/Utils/Fn.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/MapUtils.h" +#include "phasar/Utils/SmallArraySet.h" +#include "phasar/Utils/StrongTypeDef.h" +#include "phasar/Utils/TableWrappers.h" #include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/TypedVector.h" #include "phasar/Utils/Utilities.h" @@ -28,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Operator.h" @@ -37,21 +43,16 @@ #include #include -namespace psr::cfl_fieldsens { - /// \file /// Implements field-sensitivity after the paper "Boosting the performance /// of alias-aware IFDS analysis with CFL-based environment transformers" by Li /// et al. -// NOLINTNEXTLINE(performance-enum-size) -enum class FieldStringNodeId : uint32_t { - None = 0, -}; +PHASAR_STRONG_TYPEDEF(psr::cfl_fieldsens, uint32_t, FieldStringNodeId, + None = 0); -[[nodiscard]] inline llvm::hash_code hash_value(FieldStringNodeId NId) { - return llvm::hash_value(std::underlying_type_t(NId)); -} +PHASAR_STRONG_TYPEDEF(psr::cfl_fieldsens, uint32_t, KillSetId, Empty = 0); +namespace psr::cfl_fieldsens { struct FieldStringNode { FieldStringNodeId Next{}; @@ -93,6 +94,8 @@ namespace cfl_fieldsens { /// Interns the Store- and Load field-strings class FieldStringManager { public: + static constexpr int32_t TopOffset = INT32_MIN; + FieldStringManager(); [[nodiscard]] FieldStringNodeId intern(FieldStringNode Nod) { @@ -127,44 +130,75 @@ class FieldStringManager { return Depth[NId]; } + [[nodiscard]] KillSetId internKills(SmallArraySet &&Kills) { + return KillsCompressor.getOrInsert(std::move(Kills)); + } + + [[nodiscard]] KillSetId addKill(KillSetId KS, int32_t Offs) { + if (Offs == TopOffset || KillsCompressor[KS].contains(Offs)) { + return KS; + } + + auto Kills = KillsCompressor[KS]; + Kills.insert(Offs); + return KillsCompressor.getOrInsert(std::move(Kills)); + } + + [[nodiscard]] bool isKilledBy(KillSetId KS, int32_t Offs) const { + if (Offs == TopOffset || KS == KillSetId::Empty) { + return false; + } + if (!KillsCompressor.inbounds(KS)) [[unlikely]] { + return false; + } + + return KillsCompressor[KS].contains(Offs); + } + + [[nodiscard]] const auto &kills(KillSetId KS) const { + return KillsCompressor[KS]; + } + + void reserve(size_t ExpectedCapacity) { + NodeCompressor.reserve(ExpectedCapacity); + Depth.reserve(ExpectedCapacity); + } + private: Compressor NodeCompressor{}; TypedVector Depth{}; + Compressor, KillSetId> KillsCompressor{}; }; /// A single CFL Field-Access String consisting of: gep, loads, kills, and /// stores struct AccessPath { - static constexpr int32_t TopOffset = INT32_MIN; + static constexpr int32_t TopOffset = FieldStringManager::TopOffset; FieldStringNodeId Loads{}; FieldStringNodeId Stores{}; - llvm::SmallDenseSet Kills{}; + KillSetId Kills{}; // Add an offset for pending GEPs; INT32_MIN is Top - int32_t Offset = {0}; - int32_t EmptyTombstone = 0; + int32_t Offset{}; [[nodiscard]] bool empty() const noexcept { return Loads == FieldStringNodeId::None && - Stores == FieldStringNodeId::None && Kills.empty() && Offset == 0; - } - - [[nodiscard]] bool kills(int32_t Off) const { - return Off != TopOffset && Kills.contains(Off); + Stores == FieldStringNodeId::None && Kills == KillSetId::Empty && + Offset == 0; } [[nodiscard]] constexpr bool - operator==(const AccessPath &Other) const noexcept { - return EmptyTombstone == Other.EmptyTombstone && Loads == Other.Loads && - Stores == Other.Stores && Kills == Other.Kills; - } + operator==(const AccessPath &Other) const noexcept = default; - bool operator!=(const AccessPath &Other) const noexcept { - return !(*this == Other); + friend constexpr size_t hash_value(const AccessPath &FieldString) noexcept { + size_t HC = 37; + HC = HC * 31 + size_t(FieldString.Loads); + HC = HC * 31 + size_t(FieldString.Stores); + HC = HC * 31 + size_t(FieldString.Kills); + HC = HC * 31 + size_t(FieldString.Offset); + return HC; } - friend size_t hash_value(const AccessPath &FieldString) noexcept; - friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const AccessPath &FieldString); @@ -174,32 +208,27 @@ struct AccessPath { struct AccessPathDMI { static AccessPath getEmptyKey() { AccessPath Ret{}; - Ret.EmptyTombstone = 1; + Ret.Loads = FieldStringNodeId(UINT32_MAX); return Ret; } static AccessPath getTombstoneKey() { AccessPath Ret{}; - Ret.EmptyTombstone = 2; + Ret.Loads = FieldStringNodeId(UINT32_MAX); + Ret.Stores = FieldStringNodeId(UINT32_MAX); return Ret; } - static auto getHashValue(const AccessPath &FieldString) noexcept { + static auto getHashValue(AccessPath FieldString) noexcept { return hash_value(FieldString); } - static bool isEqual(const AccessPath &L, const AccessPath &R) noexcept { - if (L.EmptyTombstone != R.EmptyTombstone) { - return false; - } - if (L.EmptyTombstone) { - return true; - } - return L == R; - } + static bool isEqual(AccessPath L, AccessPath R) noexcept { return L == R; } }; /// An edge-value consisting of a set if CFL field access strings. struct IFDSEdgeValue { + using container_type = llvm::SmallDenseSet; + [[clang::require_explicit_initialization]] FieldStringManager *Mgr{}; - llvm::SmallDenseSet Paths; + container_type Paths; static constexpr llvm::StringLiteral LogCategory = "IFDSEdgeValue"; @@ -214,7 +243,7 @@ struct IFDSEdgeValue { return !(*this == Other); } - [[nodiscard]] friend auto hash_value(const IFDSEdgeValue EV) { + [[nodiscard]] friend auto hash_value(const IFDSEdgeValue &EV) { return llvm::hash_combine_range(EV.Paths.begin(), EV.Paths.end()); } @@ -363,18 +392,78 @@ bool filterFieldSensFacts( return true; } +struct CFLFieldSensEdgeFunctionImpl { + using l_t = LatticeDomain; + [[clang::require_explicit_initialization]] IFDSEdgeValue Transform; + [[clang::require_explicit_initialization]] uint8_t DepthKLimit{}; + + bool operator==(const CFLFieldSensEdgeFunctionImpl &Other) const noexcept { + assert(DepthKLimit == Other.DepthKLimit); + return Transform == Other.Transform; + } + + friend auto hash_value(const CFLFieldSensEdgeFunctionImpl &EF) noexcept { + return hash_value(EF.Transform); + } + + [[nodiscard]] static auto from(IFDSEdgeValue &&Txn, uint8_t DepthKLimit) { + return CFLFieldSensEdgeFunctionImpl{ + .Transform = std::move(Txn), + .DepthKLimit = DepthKLimit, + }; + } + + [[nodiscard]] static auto from(AccessPath Txn, FieldStringManager &Mgr, + uint8_t DepthKLimit) { + return CFLFieldSensEdgeFunctionImpl{ + .Transform = {.Mgr = &Mgr, .Paths = {Txn}}, + .DepthKLimit = DepthKLimit, + }; + } + + [[nodiscard]] static auto fromEpsilon(uint8_t DepthKLimit, + FieldStringManager &Mgr) { + return CFLFieldSensEdgeFunctionImpl{ + .Transform = IFDSEdgeValue::epsilon(&Mgr), + .DepthKLimit = DepthKLimit, + }; + } +}; + +struct CFLFieldSensEdgeFunction { + using l_t = LatticeDomain; + [[clang::require_explicit_initialization]] const CFLFieldSensEdgeFunctionImpl + *Impl{}; + + [[nodiscard]] l_t computeTarget(l_t Source) const { + assert(Impl != nullptr); + Source.onValue(fn<&IFDSEdgeValue::applyTransforms>, Impl->Transform, + Impl->DepthKLimit); + return Source; + } + + constexpr friend bool + operator==(CFLFieldSensEdgeFunction L, + CFLFieldSensEdgeFunction R) noexcept = default; + + friend auto hash_value(CFLFieldSensEdgeFunction EF) noexcept { + return llvm::hash_value(EF.Impl); + } + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + CFLFieldSensEdgeFunction EF); +}; + } // namespace cfl_fieldsens /// An IFDS-Problem adaptor that makes any field-insensitive IFDS analysis -/// field-sensitive. Just wrap your IFDS problem with -/// FieldSensAllocSitesAwareIFDSProblem and use the IterativeIDESolver instead -/// of the IFDSSolver. +/// field-sensitive. Just wrap your IFDS problem with CFLFieldSensIFDSProblem +/// and use the IterativeIDESolver instead of the IFDSSolver. /// /// The only thing to change in your usual IFDS problem is not to kill data-flow /// facts when only parts of the fields should be killed. This is now handled by -/// the FieldSensAllocSitesAwareIFDSProblem. For that, provide a -/// FieldSensAllocSitesAwareIFDSProblemConfig with a proper KillsAt -/// implementation. +/// the CFLFieldSensIFDSProblem. For that, provide a CFLFieldSensIFDSProblem +/// with a proper KillsAt implementation. class CFLFieldSensIFDSProblem : public IDETabulationProblem { using Base = IDETabulationProblem; @@ -426,7 +515,10 @@ class CFLFieldSensIFDSProblem : Base(assertNotNull(UserProblem).getProjectIRDB(), assertNotNull(UserProblem).getEntryPoints(), UserProblem->getZeroValue()), - UserProblem(UserProblem), Config(std::move(Config)) {} + UserProblem(UserProblem), Config(std::move(Config)) { + Mgr.reserve(UserProblem->getProjectIRDB()->getNumInstructions()); + regCounters(); + } /// Constructs an IDETabulationProblem with the usual arguments, forwarded /// from UserProblem and tries to automatically derive the config from @@ -487,6 +579,10 @@ class CFLFieldSensIFDSProblem uint8_t DepthKLimit, const llvm::DataLayout &DL); + EdgeFunction getLoadEdgeFunction(d_t CurrNode, d_t PointerOp, + uint8_t DepthKLimit, + const llvm::DataLayout &DL); + EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) override; @@ -516,10 +612,25 @@ class CFLFieldSensIFDSProblem [[nodiscard]] const auto &base() const noexcept { return *UserProblem; } private: + using EFConstPtr = const cfl_fieldsens::CFLFieldSensEdgeFunctionImpl *; + using EFResultPtr = llvm::PointerIntPair; + + [[nodiscard]] EdgeFunction + makeEF(cfl_fieldsens::CFLFieldSensEdgeFunctionImpl &&EF); + [[nodiscard]] EFResultPtr + makeEFPtr(cfl_fieldsens::CFLFieldSensEdgeFunctionImpl &&EF); + + static void regCounters() noexcept; + IFDSTabulationProblem *UserProblem{}; cfl_fieldsens::FieldStringManager Mgr{}; cfl_fieldsens::IFDSProblemConfig Config{}; + UnorderedSet EFInternCache{}; + + llvm::DenseMap, EFResultPtr> ExtendCache{}; + llvm::DenseMap, EFResultPtr> CombineCache{}; + uint8_t DepthKLimit = 5; // Original from the paper }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index 9841805160..0a5c9c9998 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -374,6 +374,12 @@ getPointerIndicesOfType(const llvm::DIType *Ty, const llvm::DataLayout &DL); getPointerIndicesOfType(const llvm::DIType *Ty, const llvm::DataLayout &DL, PointerIndicesCache &PIC); +[[nodiscard]] bool walkLoadChainTo(const llvm::Value *Start, + const llvm::Value *Target, + const llvm::DataLayout &DL, + uint32_t MaxDepth, + llvm::function_ref OnDeref); + /** * Retrieves String annotation value as per * diff --git a/include/phasar/Utils/Compressor.h b/include/phasar/Utils/Compressor.h index ed14f676dd..a7d4fc1893 100644 --- a/include/phasar/Utils/Compressor.h +++ b/include/phasar/Utils/Compressor.h @@ -224,6 +224,9 @@ class Compressor { assert(Elem != nullptr); if constexpr (has_llvm_dense_map_info) { return llvm::DenseMapInfo::getHashValue(*Elem); + } else if constexpr (is_llvm_hashable_v) { + using llvm::hash_value; + return hash_value(*Elem); } else { return std::hash{}(*Elem); } diff --git a/include/phasar/Utils/SmallArraySet.h b/include/phasar/Utils/SmallArraySet.h new file mode 100644 index 0000000000..9022c6c1c8 --- /dev/null +++ b/include/phasar/Utils/SmallArraySet.h @@ -0,0 +1,359 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" + +#include +#include +#include +#include +#include +#include + +namespace psr { + +/// \brief Simple set container similar to the upcoming std::flat_set but only +/// uses sorting+binary-search, once the size exceeds a fixed threshold. Below +/// the threshold unique-insertion + lookup is handled via linear search. +/// +/// Should work well, if the expected size is small. +/// Use the static methods fromSorted() and fromSortedUniqued(), to speedup +/// construction,. if you can make some assumptions about the incoming data. +template ::value> +class SmallArraySet : private llvm::SmallVector { +public: + using typename llvm::SmallVector::value_type; + using typename llvm::SmallVector::iterator; + using typename llvm::SmallVector::const_iterator; + using typename llvm::SmallVector::const_reference; + using typename llvm::SmallVector::reference; + using typename llvm::SmallVector::const_pointer; + using typename llvm::SmallVector::pointer; + using typename llvm::SmallVector::difference_type; + + static constexpr size_t LinearThreshold = std::max(1, 64 / sizeof(T)); + + SmallArraySet() noexcept = default; + SmallArraySet(std::initializer_list IList) + : llvm::SmallVector(IList) { + psr::sortUnique(base()); + } + + explicit SmallArraySet(llvm::ArrayRef Elems) + : llvm::SmallVector(Elems) { + psr::sortUnique(base()); + } + + [[nodiscard]] static SmallArraySet + fromSorted(llvm::SmallVectorImpl &&Vec) { + assert(std::ranges::is_sorted(Vec) && "Vec is not sorted"); + SmallArraySet Ret(std::move(Vec), /*PreSortedAndUniqued=*/std::true_type{}); + + Ret.base().erase(std::unique(begin(), end()), end()); + + return Ret; + } + + [[nodiscard]] static SmallArraySet + fromSortedUniqued(llvm::SmallVectorImpl &&Vec) { + assert(std::ranges::is_sorted(Vec) && "Vec is not sorted"); + assert(std::ranges::adjacent_find(Vec) == Vec.end() && + "Vec is not uniqued"); + + SmallArraySet Ret(std::move(Vec), /*PreSortedAndUniqued=*/std::true_type{}); + return Ret; + } + + // Note: default the special member functions to explicitly make the move ctor + // noexcept + SmallArraySet(const SmallArraySet &) = default; + SmallArraySet(SmallArraySet &&) noexcept = default; + SmallArraySet &operator=(const SmallArraySet &) = default; + SmallArraySet &operator=(SmallArraySet &&) noexcept = default; + ~SmallArraySet() = default; + + [[nodiscard]] operator llvm::ArrayRef() const noexcept { return base(); } + + using llvm::SmallVector::begin; + using llvm::SmallVector::end; + using llvm::SmallVector::size; + using llvm::SmallVector::empty; + using llvm::SmallVector::reserve; + using llvm::SmallVector::operator==; + using llvm::SmallVector::operator!=; + + // Without inline, clang may not inline this, even in O3 + // NOLINTNEXTLINE(readability-redundant-inline-specifier) + inline bool insert(ByConstRef Val) { + const auto Size = this->size(); + const auto Begin = this->begin(); + const auto End = this->end(); + + if (Size < LinearThreshold) [[likely]] { + if (std::find(Begin, End, Val) != End) { + return false; + } + + this->push_back(Val); + + if (Size + 1 == LinearThreshold) [[unlikely]] { + std::ranges::sort(base()); + } + + return true; + } + + return insertImpl(Val); + } + + // Assume, the new Val is not in the set yet + void insertUnique(T Val) { + assert(!contains(Val)); + if (this->size() < LinearThreshold || Val > this->back()) { + this->push_back(std::move(Val)); + return; + } + insertImpl(Val); + } + + void insert(const SmallArraySet &Other) { + const auto Size = size(); + const auto OtherSize = Other.size(); + + llvm::ArrayRef OtherArr = Other; + + if (OtherSize <= Size && Size + OtherSize <= LinearThreshold) { + size_t I = 0; + for (; I != OtherSize; ++I) { + const auto End = this->end(); + ByConstRef Val = OtherArr[I]; + if (std::find(this->begin(), End, Val) != End) { + continue; + } + if (this->size() == LinearThreshold) { + break; + } + this->push_back(Val); + } + if (I == OtherSize) { + if (this->size() == LinearThreshold) [[unlikely]] { + std::ranges::sort(base()); + } + return; + } + + OtherArr = OtherArr.slice(I); + } + + insertImpl(OtherArr); + } + + void insertAll(auto &&Range) { + size_t EstimatedRngSize = SIZE_MAX; + if constexpr (requires() { + { Range.size() } -> std::convertible_to; + }) { + EstimatedRngSize = Range.size(); + } + + if (size() < LinearThreshold && EstimatedRngSize < LinearThreshold) { + for (auto &&Elem : Range) { + insert(PSR_FWD(Elem)); + } + return; + } + + this->append(llvm::adl_begin(Range), llvm::adl_end(Range)); + psr::sortUnique(base()); + } + + [[nodiscard]] SmallArraySet setUnion(const SmallArraySet &Other) const { + + const auto Size = size(); + const auto OtherSize = Other.size(); + if (std::min(Size, OtherSize) < LinearThreshold) { + const auto ThisSmaller = Size < OtherSize; + const auto &Smaller = ThisSmaller ? *this : Other; + const auto &Larger = ThisSmaller ? Other : *this; + auto Merged = Larger; + Merged.insert(Smaller); + return Merged; + } + + SmallArraySet Merged{}; + Merged.resize_for_overwrite(size() + Other.size()); + auto LastOut = std::set_union(begin(), end(), Other.begin(), Other.end(), + Merged.begin()); + Merged.base().erase(LastOut, Merged.end()); + return Merged; + } + + bool intersectWith(const SmallArraySet &Other) { + const auto Size = size(); + const auto OtherSize = Other.size(); + + if (Size < LinearThreshold || OtherSize > Size) { + auto It = std::ranges::remove_if( + base(), [&Other](const auto &Elem) { return !Other.contains(Elem); }); + base().erase(It.begin(), It.end()); + return Size != size(); + } + + // TODO: Optimize + + SmallArraySet Merged{}; + Merged.resize_for_overwrite(std::min(Size, OtherSize)); + + auto [Unused1, Unused2, LastOut] = + std::ranges::set_intersection(*this, Other, Merged.begin()); + Merged.base().erase(LastOut, Merged.end()); + *this = std::move(Merged); + return Size != size(); + } + + [[nodiscard]] SmallArraySet + setIntersection(const SmallArraySet &Other) const { + const auto Size = size(); + const auto OtherSize = Other.size(); + + if (std::min(Size, OtherSize) < LinearThreshold) { + auto Ret = Size < OtherSize ? *this : Other; + const auto &Larger = Size < OtherSize ? Other : *this; + auto It = std::ranges::remove_if( + Ret, [&Larger](const auto &Elem) { return !Larger.contains(Elem); }); + Ret.base().erase(It.begin(), It.end()); + return Ret; + } + + SmallArraySet Merged{}; + Merged.resize_for_overwrite(std::min(Size, OtherSize)); + + auto [Unused1, Unused2, LastOut] = + std::ranges::set_intersection(*this, Other, Merged.begin()); + Merged.base().erase(LastOut, Merged.end()); + return Merged; + } + + [[nodiscard]] bool contains(ByConstRef Val) const noexcept { + if (this->size() < LinearThreshold) { + return llvm::is_contained(base(), Val); + } + + return std::ranges::binary_search(base(), Val); + } + + void sort() { + if (size() >= LinearThreshold) { + // Always sorted + return; + } + + std::ranges::sort(base()); + } + + LLVM_ATTRIBUTE_ALWAYS_INLINE void foreach ( + std::invocable auto Handler) const { + for (const auto &Elem : base()) { + std::invoke(Handler, Elem); + } + } + + [[nodiscard]] friend auto hash_value(const SmallArraySet &Set) noexcept { + if (Set.size() < LinearThreshold) { + if constexpr (std::is_trivially_copyable_v) { + T Arr[LinearThreshold]{}; + memcpy(&Arr, Set.data(), Set.size_in_bytes()); + std::ranges::sort(Arr, Arr + Set.size()); + return llvm::hash_combine_range(Arr, Arr + Set.size()); + } else { + // Some reduction that ignores the order + return std::transform_reduce(Set.begin(), Set.end(), 37, + std::bit_xor<>{}, [](ByConstRef Val) { + using llvm::hash_value; + return hash_value(Val); + }); + } + } else { + return llvm::hash_combine_range(Set.begin(), Set.end()); + } + } + + [[nodiscard]] bool operator==(const SmallArraySet &Other) const noexcept { + if (size() != Other.size()) { + return false; + } + + if (size() < LinearThreshold) { + if (hash_value(*this) != hash_value(Other)) { + // Some pre-check to avoid the quadratic loop. + // XXX: Need to measure, whether this actually helps + return false; + } + + for (const auto &Val : Other) { + if (!contains(Val)) { + return false; + } + } + return true; + } + + return std::equal(begin(), end(), Other.begin()); + } + +private: + explicit SmallArraySet(llvm::SmallVectorImpl &&Elems, + std::true_type /*PreSortedAndUniqued*/) + : llvm::SmallVector(std::move(Elems)) {} + + bool insertImpl(ByConstRef Val) { + const auto It = std::ranges::lower_bound(base(), Val); + if (It != this->end() && *It == Val) { + return false; + } + + base().insert(It, Val); + return true; + } + + void insertImpl(llvm::ArrayRef OtherArr) { + if (OtherArr.size() < 3) { + // For small sizes, it is probably better to insert the elements + // one-by-one, instead of using std::sort. + // TODO: Find good threshold + + for (const auto &Val : OtherArr) { + insert(Val); + } + return; + } + + this->append(OtherArr.begin(), OtherArr.end()); + psr::sortUnique(base()); + } + + [[nodiscard]] constexpr auto &base() noexcept { + return static_cast &>(*this); + } + [[nodiscard]] constexpr const auto &base() const noexcept { + return static_cast &>(*this); + } +}; +} // namespace psr diff --git a/include/phasar/Utils/TableWrappers.h b/include/phasar/Utils/TableWrappers.h index d836a762f7..860e8dd822 100644 --- a/include/phasar/Utils/TableWrappers.h +++ b/include/phasar/Utils/TableWrappers.h @@ -51,6 +51,9 @@ template struct Hasher { size_t operator()(ByConstRef Key) const noexcept { if constexpr (has_getHashCode::value) { return Key.getHashCode(); + } else if constexpr (is_llvm_hashable_v) { + using llvm::hash_value; + return hash_value(Key); } else { return std::hash{}(Key); } @@ -396,7 +399,7 @@ template class UnorderedSet { void clear() noexcept(std::is_nothrow_default_constructible_v) { - std::unordered_set Empty{}; + std::unordered_set> Empty{}; swap(Set, Empty); } @@ -443,17 +446,7 @@ template class UnorderedSet { } private: - struct Hasher { - size_t operator()(ByConstRef Key) const noexcept { - if constexpr (has_getHashCode::value) { - return Key.getHashCode(); - } else { - return std::hash{}(Key); - } - } - }; - - std::unordered_set Set; + std::unordered_set> Set; }; template class DenseTable1d { diff --git a/include/phasar/Utils/Utilities.h b/include/phasar/Utils/Utilities.h index 0b534246eb..24422d96bf 100644 --- a/include/phasar/Utils/Utilities.h +++ b/include/phasar/Utils/Utilities.h @@ -17,6 +17,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -135,7 +136,12 @@ void intersectWith(ContainerTy &Dest, const OtherContainerTy &Src) { if (Src.count(*It)) { ++It; } else { - It = Dest.erase(It); + if constexpr (std::is_void_v) { + auto OldIt = It++; + Dest.erase(OldIt); + } else { + It = Dest.erase(It); + } } } } @@ -305,18 +311,6 @@ struct SecondFn { } }; -template -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const std::optional &Opt) { - if (Opt) { - OS << *Opt; - } else { - OS << ""; - } - - return OS; -} - template requires(!std::is_pointer_v) LLVM_ATTRIBUTE_ALWAYS_INLINE T &assertNotNull(T &Value) { @@ -347,6 +341,24 @@ template void assertAllNotNull([[maybe_unused]] const T &Range) { } } +template > +constexpr void sortUnique(auto &Range, CompareFn Cmp = {}) { + auto It = llvm::adl_begin(Range); + auto End = llvm::adl_end(Range); + std::sort(It, End, std::move(Cmp)); + Range.erase(std::unique(It, End), End); +} + +/// \brief Similar to std::minmax, but returns by value +template +[[nodiscard]] constexpr std::pair minmaxVal(T First, T Second) noexcept { + if (std::less{}(Second, First)) { + return {std::move(Second), std::move(First)}; + } + + return {std::move(First), std::move(Second)}; +} + } // namespace psr #endif diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp index baff7f6a87..f02934c5db 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp @@ -3,15 +3,15 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" #include "phasar/Domain/LatticeDomain.h" -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/Fn.h" +#include "phasar/Utils/Lazy.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Printer.h" +#include "phasar/Utils/Utilities.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DataLayout.h" @@ -24,9 +24,6 @@ #include #include -#include -#include -#include #include #include @@ -38,6 +35,8 @@ FieldStringManager::FieldStringManager() { NodeCompressor.insertDummy( FieldStringNode{.Next = FieldStringNodeId::None, .Offset = 0}); Depth.push_back(0); + // Empty kill-set has index 0 + KillsCompressor.getOrInsert({}); } llvm::SmallVector @@ -78,65 +77,6 @@ constexpr static int32_t addOffsets(int32_t L, int32_t R) noexcept { return Sum; } -struct CFLFieldSensEdgeFunction { - using l_t = LatticeDomain; - [[clang::require_explicit_initialization]] IFDSEdgeValue Transform; - [[clang::require_explicit_initialization]] uint8_t DepthKLimit{}; - - [[nodiscard]] l_t computeTarget(l_t Source) const { - Source.onValue(fn<&IFDSEdgeValue::applyTransforms>, Transform, DepthKLimit); - return Source; - } - - static EdgeFunction - compose(EdgeFunctionRef /*This*/, - const EdgeFunction & /*SecondFunction*/) { - llvm::report_fatal_error("Use extend() instead!"); - } - - static EdgeFunction - join(EdgeFunctionRef /*This*/, - const EdgeFunction & /*OtherFunction*/) { - llvm::report_fatal_error("Use combine() instead!"); - } - - bool operator==(const CFLFieldSensEdgeFunction &Other) const noexcept { - assert(DepthKLimit == Other.DepthKLimit); - return Transform == Other.Transform; - } - - friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const CFLFieldSensEdgeFunction &EF) { - return OS << "Txn[" << EF.Transform << ']'; - } - - [[nodiscard]] static auto from(IFDSEdgeValue &&Txn, uint8_t DepthKLimit) { - return CFLFieldSensEdgeFunction{ - .Transform = std::move(Txn), - .DepthKLimit = DepthKLimit, - }; - } - - [[nodiscard]] static auto from(AccessPath &&Txn, FieldStringManager &Mgr, - uint8_t DepthKLimit) { - // Avoid initializer_list as it prevents moving - auto Ret = CFLFieldSensEdgeFunction{ - .Transform = {.Mgr = &Mgr, .Paths = {}}, - .DepthKLimit = DepthKLimit, - }; - Ret.Transform.Paths.insert(std::move(Txn)); - return Ret; - } - - [[nodiscard]] static auto fromEpsilon(uint8_t DepthKLimit, - FieldStringManager &Mgr) { - return CFLFieldSensEdgeFunction{ - .Transform = IFDSEdgeValue::epsilon(&Mgr), - .DepthKLimit = DepthKLimit, - }; - } -}; - [[nodiscard]] std::string storesToString(const AccessPath &AP, const FieldStringManager &Mgr) { std::string Ret; @@ -153,69 +93,72 @@ struct CFLFieldSensEdgeFunction { [[nodiscard]] auto applyOneGepAndStore(FieldStringManager &Mgr, AccessPath &F, int32_t Field, uint8_t DepthKLimit) { if (Mgr.depth(F.Stores) == DepthKLimit) { - // TODO: Optimize: + // XXX: Optimize: auto Full = Mgr.getFullFieldString(F.Stores); Full.erase(Full.begin()); F.Stores = Mgr.fromFullFieldString(Full); } - F.Stores = Mgr.prepend(std::exchange(F.Offset, 0) + Field, F.Stores); + F.Stores = Mgr.prepend(Field, F.Stores); return std::true_type{}; } // Returns whether to retain F [[nodiscard]] auto applyOneGepAndLoad(FieldStringManager &Mgr, AccessPath &F, int32_t Field, uint8_t DepthKLimit) { - auto Offs = F.Offset + Field; if (F.Stores == FieldStringNodeId::None) { + auto Offs = F.Offset + Field; - if (F.kills(Offs)) { + if (Mgr.isKilledBy(F.Kills, Offs)) { return false; } F.Offset = 0; - // TODO: Is this application of k-limiting correct here? // cf. Section 4.2.3 "K-Limiting" in the paper if (Mgr.depth(F.Loads) == DepthKLimit) { return true; } F.Loads = Mgr.prepend(Offs, F.Loads); - F.Kills.clear(); + F.Kills = KillSetId::Empty; return true; } auto StoresHead = Mgr[F.Stores]; - if (StoresHead.Offset != Offs && StoresHead.Offset != AccessPath::TopOffset) { + if (StoresHead.Offset != Field && + StoresHead.Offset != AccessPath::TopOffset) { return false; } - assert(StoresHead.Offset == Offs || + assert(StoresHead.Offset == Field || StoresHead.Offset == AccessPath::TopOffset); - F.Offset = 0; F.Stores = StoresHead.Next; - // llvm::errs() << "> pop_back\n"; return true; } [[nodiscard]] auto applyOneGepAndKill(FieldStringManager &Mgr, AccessPath &F, int32_t Field, uint8_t /*DepthKLimit*/) { - auto Offs = addOffsets(F.Offset, Field); - if (Offs == AccessPath::TopOffset) { + if (Field == AccessPath::TopOffset) { // We cannot kill Top return true; } if (F.Stores == FieldStringNodeId::None) { - F.Kills.insert(Offs); + auto Offs = addOffsets(F.Offset, Field); + if (Offs == AccessPath::TopOffset) { + // We cannot kill Top + return true; + } + + F.Kills = Mgr.addKill(F.Kills, Offs); PHASAR_LOG_LEVEL_CAT(DEBUG, IFDSEdgeValue::LogCategory, "> add K" << Offs); return true; } auto StoresHead = Mgr[F.Stores]; - if (StoresHead.Offset == Offs) { + if (StoresHead.Offset == Field) { PHASAR_LOG_LEVEL_CAT(DEBUG, IFDSEdgeValue::LogCategory, "> Kill " << storesToString(F, Mgr)); return false; @@ -224,7 +167,7 @@ struct CFLFieldSensEdgeFunction { PHASAR_LOG_LEVEL_CAT(DEBUG, IFDSEdgeValue::LogCategory, "> Retain " << storesToString(F, Mgr)); - assert(StoresHead.Offset != Offs); + assert(StoresHead.Offset != Field); return true; } @@ -240,49 +183,38 @@ struct CFLFieldSensEdgeFunction { return std::true_type{}; } -void applyTransform(IFDSEdgeValue &EV, const AccessPath &Txn, - uint8_t DepthKLimit) { - - if (EV.Paths.empty() || Txn.empty()) { - // Nothing to be done here - return; - } - if (EV.isEpsilon()) { - EV.Paths.clear(); - EV.Paths.insert(Txn); - return; - } - - auto Save = std::exchange(EV.Paths, {}); - EV.Paths.reserve(Save.size()); - +static void applyTransformImpl(const IFDSEdgeValue::container_type &EV, + IFDSEdgeValue::container_type &Into, + FieldStringManager &Mgr, const AccessPath &Txn, + uint8_t DepthKLimit) { const auto TxnOffset = Txn.Offset; - const auto TxnLoads = EV.Mgr->getFullFieldString(Txn.Loads); - const auto TxnStores = EV.Mgr->getFullFieldString(Txn.Stores); + const auto TxnLoads = Mgr.getFullFieldString(Txn.Loads); + const auto TxnStores = Mgr.getFullFieldString(Txn.Stores); + const auto Kills = Mgr.kills(Txn.Kills); // safety copy - for (const auto &F : Save) { + for (const auto &F : EV) { auto Copy = F; bool Retain = [&] { if (TxnOffset) { - if (!applyOneGep(*EV.Mgr, Copy, TxnOffset, DepthKLimit)) { + if (!applyOneGep(Mgr, Copy, TxnOffset, DepthKLimit)) { return false; } } for (auto Ld : TxnLoads) { - if (!applyOneGepAndLoad(*EV.Mgr, Copy, Ld, DepthKLimit)) { + if (!applyOneGepAndLoad(Mgr, Copy, Ld, DepthKLimit)) { return false; } } - for (auto Kl : Txn.Kills) { - if (!applyOneGepAndKill(*EV.Mgr, Copy, Kl, DepthKLimit)) { + for (auto Kl : Kills) { + if (!applyOneGepAndKill(Mgr, Copy, Kl, DepthKLimit)) { return false; } } for (auto St : TxnStores) { - if (!applyOneGepAndStore(*EV.Mgr, Copy, St, DepthKLimit)) { + if (!applyOneGepAndStore(Mgr, Copy, St, DepthKLimit)) { return false; } } @@ -291,10 +223,58 @@ void applyTransform(IFDSEdgeValue &EV, const AccessPath &Txn, }(); if (Retain) { - EV.Paths.insert(std::move(Copy)); + Into.insert(Copy); } } } + +void applyTransform(IFDSEdgeValue &EV, const AccessPath &Txn, + uint8_t DepthKLimit) { + if (EV.Paths.empty() || Txn.empty()) { + // Nothing to be done here + return; + } + if (EV.isEpsilon()) { + EV.Paths.clear(); + EV.Paths.insert(Txn); + return; + } + + auto Save = std::exchange(EV.Paths, {}); + EV.Paths.reserve(Save.size()); + + applyTransformImpl(Save, EV.Paths, *EV.Mgr, Txn, DepthKLimit); +} + +void applyTransformInto(const IFDSEdgeValue &EV, IFDSEdgeValue &Into, + const AccessPath &Txn, uint8_t DepthKLimit) { + assert(&EV != &Into); + assert(EV.Mgr == Into.Mgr); + if (EV.Paths.empty() || Txn.empty()) { + // Nothing to be done here + return; + } + if (EV.isEpsilon()) { + Into.Paths.insert(Txn); + return; + } + + applyTransformImpl(EV.Paths, Into.Paths, *EV.Mgr, Txn, DepthKLimit); +} + +static auto &printOffset(llvm::raw_ostream &OS, int32_t Offset, + bool WithSign = false) { + + if (WithSign && (Offset > 0 || Offset == AccessPath::TopOffset)) { + OS << '+'; + } + if (Offset == AccessPath::TopOffset) { + OS << 'T'; + } else { + OS << Offset; + } + return OS; +} } // namespace void IFDSEdgeValue::applyTransforms(const IFDSEdgeValue &Txns, @@ -324,9 +304,7 @@ void IFDSEdgeValue::applyTransforms(const IFDSEdgeValue &Txns, for (++It; It != End; ++It) { if (!It->empty()) { - auto Tmp = *this; - applyTransform(Tmp, *It, DepthKLimit); - Ret.Paths.insert(Tmp.Paths.begin(), Tmp.Paths.end()); + applyTransformInto(*this, Ret, *It, DepthKLimit); } else { Ret.Paths.insert(Paths.begin(), Paths.end()); } @@ -335,13 +313,6 @@ void IFDSEdgeValue::applyTransforms(const IFDSEdgeValue &Txns, *this = std::move(Ret); } -size_t psr::cfl_fieldsens::hash_value(const AccessPath &FieldString) noexcept { - // Xor does not care about the order - auto HCK = std::reduce(FieldString.Kills.begin(), FieldString.Kills.end(), 0, - std::bit_xor<>{}); - return llvm::hash_combine(FieldString.Loads, FieldString.Stores, HCK); -} - llvm::raw_ostream & psr::cfl_fieldsens::operator<<(llvm::raw_ostream &OS, const AccessPath &FieldString) { @@ -350,23 +321,19 @@ psr::cfl_fieldsens::operator<<(llvm::raw_ostream &OS, } if (FieldString.Offset) { - if (FieldString.Offset > 0) { - OS << '+'; - } - - OS << FieldString.Offset << '.'; + printOffset(OS, FieldString.Offset, true) << '.'; } if (FieldString.Loads != FieldStringNodeId::None) { OS << "L#" << uint32_t(FieldString.Loads) << '.'; } - for (auto Kl : FieldString.Kills) { - OS << 'K' << Kl << '.'; + if (FieldString.Kills != KillSetId::Empty) { + OS << "K#" << uint32_t(FieldString.Kills) << '.'; } - if (FieldString.Loads != FieldStringNodeId::None) { - OS << "S#" << uint32_t(FieldString.Loads) << '.'; + if (FieldString.Stores != FieldStringNodeId::None) { + OS << "S#" << uint32_t(FieldString.Stores) << '.'; } return OS; @@ -380,23 +347,19 @@ void AccessPath::print(llvm::raw_ostream &OS, } if (Offset != 0) { - if (Offset > 0) { - OS << '+'; - } - - OS << Offset << '.'; + printOffset(OS, Offset, true) << '.'; } for (auto Ld : Mgr.getFullFieldString(Loads)) { - OS << 'L' << Ld << '.'; + printOffset(OS << 'L', Ld) << '.'; } - for (auto Kl : Kills) { - OS << 'K' << Kl << '.'; + for (auto Kl : Mgr.kills(Kills)) { + printOffset(OS << 'K', Kl) << '.'; } for (auto St : Mgr.getFullFieldString(Stores)) { - OS << 'S' << St << '.'; + printOffset(OS << 'S', St) << '.'; } } @@ -434,6 +397,22 @@ cfl_fieldsens::makeInitialSeeds( return {std::move(Ret)}; } +llvm::raw_ostream &cfl_fieldsens::operator<<(llvm::raw_ostream &OS, + CFLFieldSensEdgeFunction EF) { + return OS << "Txn[" << EF.Impl->Transform << ']'; +} + +EdgeFunction CFLFieldSensIFDSProblem::makeEF( + cfl_fieldsens::CFLFieldSensEdgeFunctionImpl &&EF) { + auto It = EFInternCache.insert(std::move(EF)); + return CFLFieldSensEdgeFunction{&*It.first}; +} +auto CFLFieldSensIFDSProblem::makeEFPtr( + cfl_fieldsens::CFLFieldSensEdgeFunctionImpl &&EF) -> EFResultPtr { + auto It = EFInternCache.insert(std::move(EF)); + return EFResultPtr{&*It.first}; +} + auto CFLFieldSensIFDSProblem::getStoreEdgeFunction(d_t CurrNode, d_t SuccNode, d_t PointerOp, d_t ValueOp, uint8_t DepthKLimit, @@ -441,51 +420,74 @@ auto CFLFieldSensIFDSProblem::getStoreEdgeFunction(d_t CurrNode, d_t SuccNode, -> EdgeFunction { auto [BasePtr, Offset] = getBaseAndOffset(PointerOp, DL); - // TODO;: How to deal with BasePtr? - - auto [BaseBasePtr, - BaseOffset] = [&]() -> std::pair { - if (BasePtr != SuccNode && llvm::isa(BasePtr)) { - return getBaseAndOffset( - llvm::cast(BasePtr)->getPointerOperand(), DL); - } - - return {nullptr, INT32_MIN}; - }(); - if (CurrNode == SuccNode && - (BasePtr == CurrNode || BaseBasePtr == CurrNode)) { + // Trace the pointer chain from BasePtr toward SuccNode. DerefOffsets[0] is + // the outermost GEP offset (closest to SuccNode). The Stores chain is built + // with the outermost offset as HEAD so applyOneGepAndLoad matches in + // traversal order (outermost first, matching the actual memory access + // sequence). + llvm::SmallVector DerefOffsets; + const bool FoundSuccNode = walkLoadChainTo( + BasePtr, SuccNode, DL, DepthKLimit, [&](int64_t ByteOffset) { + DerefOffsets.push_back(ByteOffset != INT64_MIN ? int32_t(ByteOffset) + : AccessPath::TopOffset); + }); + + if (CurrNode == SuccNode && FoundSuccNode) { // Kill - AccessPath FieldString{}; - FieldString.Kills.insert(Offset); - return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, - DepthKLimit); + FieldString.Kills = Mgr.addKill(FieldString.Kills, Offset); + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(FieldString, Mgr, DepthKLimit)); } - if (ValueOp == CurrNode && CurrNode != SuccNode) { - // Store + // Also match when ValueOp is a zero-offset GEP of CurrNode (e.g. the -O0 + // arraydecay pattern where `%arraydecay = gep arr, 0, 0` is stored but the + // tainted fact is `arr` itself). + const auto *ValueBase = ValueOp->stripPointerCastsAndAliases(); + const bool IsValueCurrNode = ValueOp == CurrNode || ValueBase == CurrNode; + if (IsValueCurrNode && CurrNode != SuccNode && FoundSuccNode) { + // Store: prepend innermost first so the outermost becomes the HEAD. AccessPath FieldString{}; - if (BasePtr != SuccNode && llvm::isa(BasePtr)) { - // This is a hack, to be more correct with field-insensitive alias - // information - - if (BaseBasePtr == SuccNode) { - // push before Offset, or after? - FieldString.Stores = Mgr.prepend(BaseOffset, FieldString.Stores); - } - } - FieldString.Stores = Mgr.prepend(Offset, FieldString.Stores); - - return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, - DepthKLimit); + for (int32_t DerefOffset : llvm::reverse(DerefOffsets)) { + FieldString.Stores = Mgr.prepend(DerefOffset, FieldString.Stores); + } + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(FieldString, Mgr, DepthKLimit)); } // unaffected by the store return EdgeIdentity{}; } +auto CFLFieldSensIFDSProblem::getLoadEdgeFunction(d_t CurrNode, d_t PointerOp, + uint8_t DepthKLimit, + const llvm::DataLayout &DL) + -> EdgeFunction { + + const auto *ZeroOffsBase = PointerOp->stripPointerCastsAndAliases(); + if (CurrNode == PointerOp || CurrNode == ZeroOffsBase) { + // Note: Offsets handled in GEP below + AccessPath FieldString{}; + FieldString.Loads = Mgr.prepend(/*Offset*/ 0, FieldString.Loads); + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(FieldString, Mgr, DepthKLimit)); + } + + // In case CurrNode!=PointerOp, we are filtering llvm values for allocation + // sites. GEPs are no alloc-sites! + // => we must handle the offsetting here in the load, without relying it being + // handled in the GEP-EF + + auto [BasePtr, Offset] = getBaseAndOffset(ZeroOffsBase, DL); + + AccessPath FieldString{}; + FieldString.Loads = Mgr.prepend(Offset, FieldString.Stores); + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(FieldString, Mgr, DepthKLimit)); +} + auto CFLFieldSensIFDSProblem::getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { @@ -499,7 +501,7 @@ auto CFLFieldSensIFDSProblem::getNormalEdgeFunction(n_t Curr, d_t CurrNode, if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); + return makeEF(CFLFieldSensEdgeFunctionImpl::fromEpsilon(DepthKLimit, Mgr)); } if (const auto *Store = llvm::dyn_cast(Curr)) { @@ -511,17 +513,9 @@ auto CFLFieldSensIFDSProblem::getNormalEdgeFunction(n_t Curr, d_t CurrNode, if (Curr == SuccNode) { if (const auto *Load = llvm::dyn_cast(Curr)) { - // Load - - auto [BasePtr, Offset] = getBaseAndOffset( - Load->getPointerOperand(), IRDB->getModule()->getDataLayout()); - - // TODO;: How to deal with BasePtr? - - AccessPath FieldString{}; - FieldString.Loads = Mgr.prepend(Offset, FieldString.Loads); - return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, - DepthKLimit); + return getLoadEdgeFunction(CurrNode, Load->getPointerOperand(), + DepthKLimit, + IRDB->getModule()->getDataLayout()); } if (const auto *Gep = llvm::dyn_cast(Curr)) { @@ -530,8 +524,8 @@ auto CFLFieldSensIFDSProblem::getNormalEdgeFunction(n_t Curr, d_t CurrNode, AccessPath FieldString{}; FieldString.Offset = OffsVal; - return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, - DepthKLimit); + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(FieldString, Mgr, DepthKLimit)); } } @@ -552,7 +546,7 @@ auto CFLFieldSensIFDSProblem::getCallEdgeFunction(n_t CallSite, d_t SrcNode, if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); + return makeEF(CFLFieldSensEdgeFunctionImpl::fromEpsilon(DepthKLimit, Mgr)); } // This is naturally identity @@ -572,7 +566,7 @@ auto CFLFieldSensIFDSProblem::getReturnEdgeFunction( if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); + return makeEF(CFLFieldSensEdgeFunctionImpl::fromEpsilon(DepthKLimit, Mgr)); } return EdgeIdentity{}; @@ -602,7 +596,7 @@ auto CFLFieldSensIFDSProblem::getCallToRetEdgeFunction( if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); + return makeEF(CFLFieldSensEdgeFunctionImpl::fromEpsilon(DepthKLimit, Mgr)); } // This naturally identity @@ -629,16 +623,16 @@ auto CFLFieldSensIFDSProblem::getSummaryEdgeFunction(n_t Curr, d_t CurrNode, << *KillOffs); AccessPath FieldString{}; - FieldString.Kills.insert(*KillOffs); - return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, - DepthKLimit); + FieldString.Kills = Mgr.addKill(FieldString.Kills, *KillOffs); + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(FieldString, Mgr, DepthKLimit)); } } if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); + return makeEF(CFLFieldSensEdgeFunctionImpl::fromEpsilon(DepthKLimit, Mgr)); } // TODO: Is that correct? -- We may need to handle field-indirections here @@ -647,17 +641,19 @@ auto CFLFieldSensIFDSProblem::getSummaryEdgeFunction(n_t Curr, d_t CurrNode, } static void klimitPaths(auto &Paths, FieldStringManager &Mgr) { - llvm::SmallDenseMap, 2, AccessPathDMI> ToInsert; + ToInsert.reserve(Paths.size()); // retained across .clear() calls below + + // Merge stores for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { auto It = IIt++; if (It->Stores != FieldStringNodeId::None) { AccessPath Approx = *It; auto StoresHead = Mgr[Approx.Stores]; Approx.Stores = Mgr.prepend(AccessPath::TopOffset, StoresHead.Next); - ToInsert[std::move(Approx)].push_back(*It); + ToInsert[Approx].push_back(*It); Paths.erase(It); } } @@ -668,9 +664,130 @@ static void klimitPaths(auto &Paths, FieldStringManager &Mgr) { Paths.insert(OrigPaths.begin(), OrigPaths.end()); } } + + // Merge geps + ToInsert.clear(); + for (const AccessPath &AP : Paths) { + auto NoOffs = AP; + NoOffs.Offset = AccessPath::TopOffset; + ToInsert[NoOffs].push_back(AP); + } + Paths.clear(); + for (auto &&[Approx, OrigPaths] : ToInsert) { + if (OrigPaths.size() > 2) { + Paths.insert(Approx); + } else { + Paths.insert(OrigPaths.begin(), OrigPaths.end()); + } + } + + // Merge loads + ToInsert.clear(); + for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { + auto It = IIt++; + if (It->Loads != FieldStringNodeId::None) { + AccessPath Approx = *It; + auto LoadsHead = Mgr[Approx.Loads]; + Approx.Loads = Mgr.prepend(AccessPath::TopOffset, LoadsHead.Next); + ToInsert[Approx].push_back(*It); + Paths.erase(It); + } + } + for (auto &&[Approx, OrigPaths] : ToInsert) { + if (OrigPaths.size() > 2) { + Paths.insert(Approx); + } else { + Paths.insert(OrigPaths.begin(), OrigPaths.end()); + } + } + + // Merge Kills + ToInsert.clear(); + for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { + auto It = IIt++; + + AccessPath Approx = *It; + Approx.Kills = {}; + ToInsert[Approx].push_back(*It); + Paths.erase(It); + } + for (auto &&[Approx, OrigPaths] : ToInsert) { + if (OrigPaths.size() > 2) { + auto KillSet = Mgr.kills(OrigPaths.front().Kills); + for (const auto &AP : llvm::drop_begin(OrigPaths)) { + KillSet.intersectWith(Mgr.kills(AP.Kills)); + } + + auto ApproxMut = Approx; + ApproxMut.Kills = Mgr.internKills(std::move(KillSet)); + Paths.insert(std::move(ApproxMut)); + } else { + Paths.insert(OrigPaths.begin(), OrigPaths.end()); + } + } } static constexpr ptrdiff_t BreadthKLimit = 5; +static constexpr ptrdiff_t WidenKLimit = 128; + +static constexpr unsigned AllEFPtrId = 0; +static constexpr unsigned AllBottomId = 1; +static constexpr unsigned AllTopId = 2; + +[[nodiscard]] static llvm::PointerIntPair +allBotPtr() noexcept { + return {nullptr, AllBottomId}; +} + +[[nodiscard]] static llvm::PointerIntPair +allTopPtr() noexcept { + return {nullptr, AllTopId}; +} + +[[nodiscard]] static EdgeFunction +getResultEF(llvm::PointerIntPair + Ptr) noexcept { + PAMM_GET_INSTANCE; + switch (Ptr.getInt()) { + [[likely]] case AllEFPtrId: + INC_COUNTER("getResultEF Ptr", 1, Full); + assert(Ptr.getPointer() != nullptr); + assert(Ptr.getPointer() == Ptr.getOpaqueValue() && + "Zero-tag does not pollute the alignment bits"); + return CFLFieldSensEdgeFunction{ + static_cast( + Ptr.getOpaqueValue())}; + case AllBottomId: + INC_COUNTER("getResultEF Bot", 1, Full); + return AllBottom{}; + case AllTopId: + INC_COUNTER("getResultEF Top", 1, Full); + return AllTop{}; + default: + llvm_unreachable("All valid tags should be handled explicitly"); + } +} + +void CFLFieldSensIFDSProblem::regCounters() noexcept { + PAMM_GET_INSTANCE; + + REG_COUNTER("ExtendCache Refs", 0, Full); + REG_COUNTER("ExtendCache Misses", 0, Full); + + REG_COUNTER("CombineCache Refs", 0, Full); + REG_COUNTER("CombineCache Misses", 0, Full); + REG_COUNTER("Combine CallsTotal", 0, Full); + REG_COUNTER("Combine LIdentity", 0, Full); + REG_COUNTER("Combine LIdentitySlow", 0, Full); + REG_COUNTER("Combine RIdentity", 0, Full); + REG_COUNTER("Combine RIdentitySlow", 0, Full); + + REG_COUNTER("getResultEF Top", 0, Full); + REG_COUNTER("getResultEF Bot", 0, Full); + REG_COUNTER("getResultEF Ptr", 0, Full); +} auto CFLFieldSensIFDSProblem::extend(const EdgeFunction &L, const EdgeFunction &R) @@ -683,38 +800,50 @@ auto CFLFieldSensIFDSProblem::extend(const EdgeFunction &L, const auto *FldSensL = L.dyn_cast(); const auto *FldSensR = R.dyn_cast(); - if (FldSensL && FldSensR) { - if (FldSensR->Transform.isEpsilon()) { - return L; - } + if (!FldSensL || !FldSensR) { + llvm::report_fatal_error("[CFLFieldSensIFDSProblem::extend]: " + "Unexpected edge functions: " + + llvm::Twine(to_string(L)) + " EXTEND " + + llvm::Twine(to_string(R))); + } - if (FldSensL->Transform.Paths.empty()) { - return L; - } + if (FldSensR->Impl->Transform.isEpsilon()) { + return L; + } - auto Txn = FldSensL->Transform; - Txn.applyTransforms(FldSensR->Transform, DepthKLimit); + PAMM_GET_INSTANCE; - if (Txn.Paths.empty()) { - return AllTop{}; - } + INC_COUNTER("ExtendCache Refs", 1, Full); - if (Txn.Paths.size() > BreadthKLimit) { - klimitPaths(Txn.Paths, Mgr); - } - return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); - } + auto [It, Inserted] = ExtendCache.try_emplace( + std::pair{FldSensL->Impl, FldSensR->Impl}, lazy{[&]() -> EFResultPtr { + INC_COUNTER("ExtendCache Misses", 1, Full); + + auto Txn = FldSensL->Impl->Transform; + Txn.applyTransforms(FldSensR->Impl->Transform, DepthKLimit); + + if (Txn.Paths.empty()) { + return allTopPtr(); + } + + if (Txn.Paths.size() > BreadthKLimit) { + klimitPaths(Txn.Paths, Mgr); + if (Txn.Paths.size() > WidenKLimit) { + return allBotPtr(); + } + } + + return makeEFPtr( + CFLFieldSensEdgeFunctionImpl::from(std::move(Txn), DepthKLimit)); + }}); - llvm::report_fatal_error("[FieldSensAllocSitesAwareIFDSProblem::extend]: " - "Unexpected edge functions: " + - llvm::Twine(to_string(L)) + " EXTEND " + - llvm::Twine(to_string(R))); + return getResultEF(It->second); }(); - // if (!L.isa>() && !R.isa>()) { - PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, - "EXTEND " << L << " X " << R << " ==> " << Ret); - // } + if (!L.isa>() && !R.isa>()) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "EXTEND " << L << " X " << R << " ==> " << Ret); + } return Ret; } @@ -722,70 +851,92 @@ auto CFLFieldSensIFDSProblem::extend(const EdgeFunction &L, auto CFLFieldSensIFDSProblem::combine(const EdgeFunction &L, const EdgeFunction &R) -> EdgeFunction { + if (auto Dflt = defaultJoinOrNullNoId(L, R)) { + return Dflt; + } auto Ret = [&]() -> EdgeFunction { - if (auto Dflt = defaultJoinOrNullNoId(L, R)) { - return Dflt; - } + PAMM_GET_INSTANCE; + INC_COUNTER("Combine CallsTotal", 1, Full); const auto *FldSensL = L.dyn_cast(); const auto *FldSensR = R.dyn_cast(); - if (FldSensL) { if (FldSensR) { - // A complicated way of expressing set-union of LPaths and RPaths. - // Reason being that we don't want to unnecessarily copy the sets. - // Rather, we like just incrementing the ref-count of L or R if somehow - // possible. - - const auto &LPaths = FldSensL->Transform.Paths; - const auto &RPaths = FldSensR->Transform.Paths; - const auto LeftSz = LPaths.size(); - const auto RightSz = RPaths.size(); - const auto LeftSmaller = LeftSz < RightSz; - - if (LeftSz && RightSz) { - const auto &Larger = LeftSmaller ? RPaths : LPaths; - const auto &Smaller = LeftSmaller ? LPaths : RPaths; - - auto It = Smaller.begin(); - const auto End = Smaller.end(); - - for (; It != End; ++It) { - if (!Larger.contains(*It)) { - auto Union = Larger; - Union.insert(It, End); - - if (Union.size() > BreadthKLimit) { - klimitPaths(Union, Mgr); + + INC_COUNTER("CombineCache Refs", 1, Full); + auto [CacheIt, CacheInserted] = CombineCache.try_emplace( + psr::minmaxVal(FldSensL->Impl, FldSensR->Impl), + lazy{[this, FldSensL{*FldSensL}, + FldSensR{*FldSensR}]() -> EFResultPtr { + PAMM_GET_INSTANCE; + INC_COUNTER("CombineCache Misses", 1, Full); + + // A complicated way of expressing set-union of LPaths and RPaths. + // Reason being that we don't want to unnecessarily copy the sets. + // Rather, we like just incrementing the ref-count of L or R if + // somehow possible. + + const auto &RPaths = FldSensR.Impl->Transform.Paths; + const auto &LPaths = FldSensL.Impl->Transform.Paths; + const auto LeftSz = LPaths.size(); + const auto RightSz = RPaths.size(); + const auto LeftSmaller = LeftSz < RightSz; + + if (LeftSz && RightSz) { + const auto &Larger = LeftSmaller ? RPaths : LPaths; + const auto &Smaller = LeftSmaller ? LPaths : RPaths; + + auto It = Smaller.begin(); + const auto End = Smaller.end(); + + for (; It != End; ++It) { + if (!Larger.contains(*It)) { + auto Union = Larger; + Union.insert(It, End); + + // NOTE: No k-limit in combine()!!! Otherwise, we may loose + // monotonicity of the lattice! + + if (Union.size() > WidenKLimit) { + return allBotPtr(); + } + + return makeEFPtr(CFLFieldSensEdgeFunctionImpl::from( + IFDSEdgeValue{.Mgr = &Mgr, .Paths = std::move(Union)}, + DepthKLimit)); + } + } } - return CFLFieldSensEdgeFunction::from( - IFDSEdgeValue{.Mgr = &Mgr, .Paths = std::move(Union)}, - DepthKLimit); - } - } - } + return EFResultPtr{LeftSmaller ? FldSensR.Impl : FldSensL.Impl}; + }}); - return LeftSmaller ? R : L; + return getResultEF(CacheIt->second); } if (R.isa>()) { - if (FldSensL->Transform.Paths.contains(AccessPath{})) { + INC_COUNTER("Combine RIdentity", 1, Full); + if (FldSensL->Impl->Transform.Paths.contains(AccessPath{})) { return L; } - auto Txn = FldSensL->Transform; + INC_COUNTER("Combine RIdentitySlow", 1, Full); + auto Txn = FldSensL->Impl->Transform; Txn.Paths.insert(AccessPath{}); - return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(std::move(Txn), DepthKLimit)); } } else if (FldSensR && L.isa>()) { - if (FldSensR->Transform.Paths.contains(AccessPath{})) { + INC_COUNTER("Combine LIdentity", 1, Full); + if (FldSensR->Impl->Transform.Paths.contains(AccessPath{})) { return R; } - auto Txn = FldSensR->Transform; + INC_COUNTER("Combine LIdentitySlow", 1, Full); + auto Txn = FldSensR->Impl->Transform; Txn.Paths.insert(AccessPath{}); - return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); + return makeEF( + CFLFieldSensEdgeFunctionImpl::from(std::move(Txn), DepthKLimit)); } llvm::errs() << "COMBINE " << L << " X " << R << " ==> AllBottom\n"; @@ -793,8 +944,12 @@ auto CFLFieldSensIFDSProblem::combine(const EdgeFunction &L, return AllBottom{}; }(); - PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, - "COMBINE " << L << " X " << R << " ==> " << Ret); + if (L != R) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "COMBINE " << L << " X " << R << " ==> " << Ret + << "; Ret==L: " << (Ret == L) + << "; Ret==R: " << (Ret == R)); + } return Ret; } diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index 5abdf627c3..f431e0a19b 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -729,6 +729,60 @@ psr::getPointerIndicesOfType(const llvm::DIType *Ty, return std::move(getPointerIndicesOfType(Ty, DL, PIC)); } +bool psr::walkLoadChainTo(const llvm::Value *Start, const llvm::Value *Target, + const llvm::DataLayout &DL, uint32_t MaxDepth, + llvm::function_ref OnDeref) { + const llvm::Value *Cur = Start; + for (unsigned Depth = 0; Depth < MaxDepth && Cur != Target; ++Depth) { + const auto *LI = llvm::dyn_cast(Cur); + if (!LI) { + break; + } + + llvm::APInt Offset(64, 0); + const auto *Stripped = + LI->getPointerOperand()->stripAndAccumulateConstantOffsets( + DL, Offset, /*AllowNonInbounds=*/true); + const auto *Base = Stripped->stripPointerCastsAndAliases(); + int64_t ByteOffset = llvm::isa(Stripped) + ? INT64_MIN // non-constant GEP + : Offset.getSExtValue(); + + // -O0 mem2reg artifact: clang copies every argument/local into an alloca + // and re-loads it at each use. If the alloca has exactly one store and + // that store holds Target, the load is a transparent SSA copy -- not a + // real dereference of Target. Skipping OnDeref here keeps the indirection + // depth consistent with post-mem2reg IR (where the alloca disappears). + // Require Offset==0 and no other stores to avoid spurious kills when the + // alloca is reassigned to a different pointer. + if (const auto *AI = llvm::dyn_cast(Base); + AI && Offset.isZero()) { + bool HasTargetStore = false; + bool HasOtherStore = false; + for (const auto *U : AI->users()) { + const auto *SI = llvm::dyn_cast(U); + if (!SI || SI->getPointerOperand() != AI) { + continue; + } + if (SI->getValueOperand() == Target) { + HasTargetStore = true; + } else { + HasOtherStore = true; + break; + } + } + if (HasTargetStore && !HasOtherStore) { + Cur = Target; + break; + } + } + + OnDeref(ByteOffset); + Cur = Base; + } + return Cur == Target; +} + llvm::StringRef psr::getVarAnnotationIntrinsicName(const llvm::CallInst *CallInst) { const int KPointerGlobalStringIdx = 1; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index a372b56d6e..6f9bdc7ff2 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -6,15 +6,17 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" #include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/Logger.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Instruction.h" +#include "llvm/Support/ErrorHandling.h" #include "SrcCodeLocationEntry.h" #include "TestConfig.h" @@ -119,7 +121,7 @@ class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { } if (Leak.contains(Source)) { - Leaks[CS] = Source; + Leaks[CS].insert(Source); } if (Kill.contains(Source)) { @@ -130,7 +132,7 @@ class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { }); } - llvm::DenseMap Leaks{}; + llvm::DenseMap> Leaks{}; private: const psr::LLVMTaintConfig *Config{}; @@ -138,13 +140,34 @@ class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { using namespace psr::unittest; -class CFLFieldSensTest : public ::testing::Test { -protected: - static constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER("xtaint/"); - const std::vector EntryPoints = {"main"}; +template +T makeIFDSTA(const psr::LLVMProjectIRDB *IRDB, psr::LLVMAliasInfoRef AS, + const psr::LLVMTaintConfig *TC); + +template <> +psr::IFDSTaintAnalysis +makeIFDSTA(const psr::LLVMProjectIRDB *IRDB, + psr::LLVMAliasInfoRef AS, + const psr::LLVMTaintConfig *TC) { + return psr::IFDSTaintAnalysis(IRDB, AS, TC, {"main"}, + /*TaintMainArgs=*/false, + /*EnableStrongUpdateStore=*/false); +} - using TaintSetT = std::set; +template <> +ExampleTaintAnalysis +makeIFDSTA(const psr::LLVMProjectIRDB *IRDB, + psr::LLVMAliasInfoRef AS, + const psr::LLVMTaintConfig *TC) { + return ExampleTaintAnalysis(IRDB, AS, TC, {"main"}); +} +using TaintSetT = std::set; +static constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER("xtaint/"); +const std::vector EntryPoints = {"main"}; + +template class CFLFieldSensTest : public ::testing::Test { +protected: void run(const llvm::Twine &IRFileName, const std::map &GroundTruth, bool ShouldDumpResults = false) { @@ -156,7 +179,7 @@ class CFLFieldSensTest : public ::testing::Test { psr::LLVMAliasSet BaseAS(&IRDB); psr::FilteredLLVMAliasSet AS(&BaseAS); psr::LLVMTaintConfig TC(IRDB); - ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); + auto TaintProblem = makeIFDSTA(&IRDB, &AS, &TC); psr::CFLFieldSensIFDSProblem FsTaintProblem(&TaintProblem); @@ -167,16 +190,16 @@ class CFLFieldSensTest : public ::testing::Test { Solver.solve(); auto Results = Solver.getSolverResults(); - // auto Results = psr::solveIDEProblem(FsTaintProblem, ICFG); - // Results.dumpResults(ICFG); - std::map> ComputedLeaks; for (auto IIt = TaintProblem.Leaks.begin(), End = TaintProblem.Leaks.end(); IIt != End;) { auto It = IIt++; - const auto &[LeakInst, LeakFact] = *It; + const auto &[LeakInst, LeakFacts] = *It; + + ASSERT_EQ(LeakFacts.size(), 1); + const auto *LeakFact = *LeakFacts.begin(); const auto &Res = Results.resultAt(LeakInst, LeakFact); if (const auto *FieldStrings = Res.getValueOrNull()) { @@ -199,70 +222,74 @@ class CFLFieldSensTest : public ::testing::Test { } }; -TEST_F(CFLFieldSensTest, Basic_01) { +using ProblemTypes = + ::testing::Types; +TYPED_TEST_SUITE(CFLFieldSensTest, ProblemTypes); + +TYPED_TEST(CFLFieldSensTest, Basic_01) { std::map GroundTruth = { {LineColFun{8, 3, "main"}, {LineColFunOp{8, 9, "main", llvm::Instruction::Load}}}, }; - run({PathToLLFiles + "xtaint01_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint01_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_02) { +TYPED_TEST(CFLFieldSensTest, Basic_02) { std::map GroundTruth = { {LineColFun{9, 3, "main"}, {LineColFunOp{9, 9, "main", llvm::Instruction::Load}}}, }; - run({PathToLLFiles + "xtaint02_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint02_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_03) { +TYPED_TEST(CFLFieldSensTest, Basic_03) { std::map GroundTruth = { {LineColFun{10, 3, "main"}, {LineColFunOp{10, 9, "main", llvm::Instruction::Load}}}, }; - run({PathToLLFiles + "xtaint03_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint03_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_04) { +TYPED_TEST(CFLFieldSensTest, Basic_04) { auto Call = LineColFun{6, 3, "_Z3barPi"}; std::map GroundTruth = { {Call, {OperandOf{0, Call}}}, }; - run({PathToLLFiles + "xtaint04_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint04_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_06) { +TYPED_TEST(CFLFieldSensTest, Basic_06) { std::map GroundTruth = { // no leaks expected }; - run({PathToLLFiles + "xtaint06_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint06_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_09_1) { +TYPED_TEST(CFLFieldSensTest, Basic_09_1) { std::map GroundTruth = { {LineColFun{14, 3, "main"}, {LineColFun{14, 8, "main"}}}, }; - run({PathToLLFiles + "xtaint09_1_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint09_1_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_09) { +TYPED_TEST(CFLFieldSensTest, Basic_09) { auto SinkCall = LineColFun{16, 3, "main"}; std::map GroundTruth = { {SinkCall, {OperandOf{0, SinkCall}}}, }; - run({PathToLLFiles + "xtaint09_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint09_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_12) { +TYPED_TEST(CFLFieldSensTest, Basic_12) { std::map GroundTruth = { {LineColFun{19, 3, "main"}, {LineColFun{19, 8, "main"}}}, }; @@ -270,76 +297,72 @@ TEST_F(CFLFieldSensTest, Basic_12) { // We sanitize an alias - since we don't have must-alias relations, we cannot // kill aliases at all - run({PathToLLFiles + "xtaint12_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint12_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_13) { +TYPED_TEST(CFLFieldSensTest, Basic_13) { std::map GroundTruth = { {LineColFun{17, 3, "main"}, {LineColFun{17, 8, "main"}}}, }; - run({PathToLLFiles + "xtaint13_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint13_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_14) { +TYPED_TEST(CFLFieldSensTest, Basic_14) { std::map GroundTruth = { {LineColFun{24, 3, "main"}, {LineColFun{24, 8, "main"}}}, }; - run({PathToLLFiles + "xtaint14_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint14_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_16) { +TYPED_TEST(CFLFieldSensTest, Basic_16) { std::map GroundTruth = { {LineColFun{13, 3, "main"}, {LineColFun{13, 8, "main"}}}, }; - run({PathToLLFiles + "xtaint16_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint16_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_17) { +TYPED_TEST(CFLFieldSensTest, Basic_17) { std::map GroundTruth = { {LineColFun{17, 3, "main"}, {LineColFun{17, 8, "main"}}}, }; - run({PathToLLFiles + "xtaint17_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint17_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_18) { +TYPED_TEST(CFLFieldSensTest, Basic_18) { std::map GroundTruth = { // no leaks expected }; - run({PathToLLFiles + "xtaint18_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint18_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_20) { +TYPED_TEST(CFLFieldSensTest, Basic_20) { std::map GroundTruth = { {LineColFun{12, 3, "main"}, {LineColFun{6, 7, "main"}}}, {LineColFun{13, 3, "main"}, {LineColFun{13, 8, "main"}}}, }; - run({PathToLLFiles + "xtaint20_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint20_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_22) { +TYPED_TEST(CFLFieldSensTest, Basic_22) { std::map GroundTruth = { {LineColFun{9, 5, "main"}, {LineColFun{9, 11, "main"}}}, }; - // psr::Logger::initializeStderrLogger( - // psr::SeverityLevel::DEBUG, - // psr::FieldSensAllocSitesAwareIFDSProblem::LogCategory.str()); - - run({PathToLLFiles + "xtaint22_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint22_cpp_dbg.ll"}, GroundTruth); } -TEST_F(CFLFieldSensTest, Basic_23) { +TYPED_TEST(CFLFieldSensTest, Basic_23) { std::map GroundTruth = { {LineColFun{17, 5, "main"}, {LineColFun{17, 11, "main"}}}, }; - run({PathToLLFiles + "xtaint23_cpp_dbg.ll"}, GroundTruth); + this->run({PathToLLFiles + "xtaint23_cpp_dbg.ll"}, GroundTruth); } } // namespace