diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index dc2a08a3da55a..87152110aa7fd 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -804,6 +804,9 @@ product(bool, IncrementalInlineForceCleanup, false, DIAGNOSTIC, \ "do cleanup after every iteration of incremental inlining") \ \ + product(bool, IncrementalInlineVector, true, DIAGNOSTIC, \ + "Inline fallback implementation of failed vector intrinsics") \ + \ product(intx, LiveNodeCountInliningCutoff, 40000, \ "max number of live nodes in a method") \ range(0, max_juint / 8) \ diff --git a/src/hotspot/share/opto/callGenerator.cpp b/src/hotspot/share/opto/callGenerator.cpp index d0b48982b0f09..3da3f05b89e10 100644 --- a/src/hotspot/share/opto/callGenerator.cpp +++ b/src/hotspot/share/opto/callGenerator.cpp @@ -445,6 +445,31 @@ CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* cal return cg; } +class LateInlineVectorCallGenerator : public LateInlineCallGenerator { + private: + CallGenerator* _fallback_cg; + + public: + LateInlineVectorCallGenerator(ciMethod* method, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) : + LateInlineCallGenerator(method, intrinsic_cg), _fallback_cg(fallback_cg) { + assert(_fallback_cg != nullptr && _fallback_cg->is_parse(), ""); + } + + virtual bool is_vector_late_inline() const { return true; } + + virtual JVMState* generate(JVMState* jvms) { + JVMState* new_jvms = LateInlineCallGenerator::generate(jvms); + CallGenerator* fallback = CallGenerator::for_late_inline(method(), _fallback_cg)->with_call_node(call_node()); + Compile::current()->add_vector_late_inline(fallback); + return new_jvms; + } +}; + +CallGenerator* CallGenerator::for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) { + return new LateInlineVectorCallGenerator(m, intrinsic_cg, fallback_cg); +} + + // Allow inlining decisions to be delayed class LateInlineVirtualCallGenerator : public VirtualCallGenerator { private: diff --git a/src/hotspot/share/opto/callGenerator.hpp b/src/hotspot/share/opto/callGenerator.hpp index 75ba6f709c07c..2dd102b9e0601 100644 --- a/src/hotspot/share/opto/callGenerator.hpp +++ b/src/hotspot/share/opto/callGenerator.hpp @@ -75,6 +75,7 @@ class CallGenerator : public ArenaObj { // same but for method handle calls virtual bool is_mh_late_inline() const { return false; } virtual bool is_string_late_inline() const { return false; } + virtual bool is_vector_late_inline() const { return false; } virtual bool is_boxing_late_inline() const { return false; } virtual bool is_vector_reboxing_late_inline() const { return false; } virtual bool is_virtual_late_inline() const { return false; } @@ -142,6 +143,7 @@ class CallGenerator : public ArenaObj { static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const); static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg); + static CallGenerator* for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg); static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_late_inline_virtual(ciMethod* m, int vtable_index, float expected_uses); diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index e7dc57524ebd8..b8c03a188e55f 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -416,6 +416,7 @@ void Compile::remove_useless_node(Node* dead) { remove_useless_late_inlines( &_late_inlines, dead); remove_useless_late_inlines( &_string_late_inlines, dead); remove_useless_late_inlines( &_boxing_late_inlines, dead); + remove_useless_late_inlines( &_vector_late_inlines, dead); remove_useless_late_inlines(&_vector_reboxing_late_inlines, dead); if (dead->is_CallStaticJava()) { @@ -480,6 +481,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis remove_useless_late_inlines( &_late_inlines, useful); remove_useless_late_inlines( &_string_late_inlines, useful); remove_useless_late_inlines( &_boxing_late_inlines, useful); + remove_useless_late_inlines( &_vector_late_inlines, useful); remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful); DEBUG_ONLY(verify_graph_edges(true /*check for no_dead_code*/, root_and_safepoints);) } @@ -694,6 +696,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci, _string_late_inlines(comp_arena(), 2, 0, nullptr), _boxing_late_inlines(comp_arena(), 2, 0, nullptr), _vector_reboxing_late_inlines(comp_arena(), 2, 0, nullptr), + _vector_late_inlines(comp_arena(), 2, 0, nullptr), _late_inlines_pos(0), _has_mh_late_inlines(false), _oom(false), @@ -2173,6 +2176,25 @@ void Compile::shuffle_late_inlines() { shuffle_array(*C, _late_inlines); } +void Compile::process_vector_late_inlines() { + for (int i = 0; i < _vector_late_inlines.length(); i++) { + CallGenerator* cg = _vector_late_inlines.at(i); + + // When a vector intrinsic fails, set_generator(cg) caches the + // LateInlineVectorCallGenerator on the call node to allow retries + // if IGVN optimizes the call node's inputs. If the call node is not + // on the IGVN worklist when cleanup runs, CallStaticJavaNode::Ideal + // does not fire and the cached generator persists. Once _late_inlines + // drains and we commit to the fallback here, clear the stale generator + // to prevent a subsequent IGVN pass from re-registering the intrinsic + // attempt into _late_inlines alongside the fallback, which would create + // duplicate call_node entries. + cg->call_node()->as_CallJava()->set_generator(nullptr); + add_late_inline(cg); + } + _vector_late_inlines.clear(); +} + // Perform incremental inlining until bound on number of live nodes is reached void Compile::inline_incrementally(PhaseIterGVN& igvn) { TracePhase tp(_t_incrInline); @@ -2230,6 +2252,10 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) { print_method(PHASE_INCREMENTAL_INLINE_STEP, 3); if (failing()) return; + + if (_late_inlines.length() == 0) { + process_vector_late_inlines(); + } } igvn_worklist()->ensure_empty(); // should be done with igvn @@ -4585,6 +4611,9 @@ void Compile::log_inline_id(CallGenerator* cg) { } void Compile::log_inline_failure(const char* msg) { + if (inline_printer()->is_suspended()) { + return; + } if (C->log() != nullptr) { C->log()->inline_fail(msg); } diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index 3c2e1c641195b..d079e95aeccfc 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -480,6 +480,7 @@ class Compile : public Phase { GrowableArray _boxing_late_inlines; // same but for boxing operations GrowableArray _vector_reboxing_late_inlines; // same but for vector reboxing operations + GrowableArray _vector_late_inlines; // inline fallback implementation for failed intrinsics int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining) bool _has_mh_late_inlines; // Can there still be a method handle late inlining pending? @@ -508,6 +509,12 @@ class Compile : public Phase { InlinePrinter _inline_printer; public: + + void add_vector_late_inline(CallGenerator* cg) { + _vector_late_inlines.push(cg); + } + void process_vector_late_inlines(); + void* barrier_set_state() const { return _barrier_set_state; } InlinePrinter* inline_printer() { return &_inline_printer; } diff --git a/src/hotspot/share/opto/doCall.cpp b/src/hotspot/share/opto/doCall.cpp index 90454408a430f..b44e65bb4ecbd 100644 --- a/src/hotspot/share/opto/doCall.cpp +++ b/src/hotspot/share/opto/doCall.cpp @@ -166,6 +166,21 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool cg_intrinsic = cg; cg = nullptr; } else if (IncrementalInline && should_delay_vector_inlining(callee, jvms)) { + if (IncrementalInlineVector && allow_inline) { + // Try to late inline fallback implementation if intrinsification attempt fails. + CallGenerator* fallback_cg; + { + InlinePrinterSuspendScope guard(C->inline_printer()); + fallback_cg = call_generator(callee, vtable_index, call_does_dispatch, jvms, + true /*allow_inline*/, prof_factor, + speculative_receiver_type, false /*allow_intrinsics*/); + } + if (fallback_cg != nullptr && fallback_cg->is_parse()) { + return CallGenerator::for_vector_late_inline(callee, cg, fallback_cg); + } + // Fallback not inlineable by regular heuristics; fall through. + } + // Don't try to inline fallback implementation. return CallGenerator::for_late_inline(callee, cg); } else { return cg; diff --git a/src/hotspot/share/opto/printinlining.cpp b/src/hotspot/share/opto/printinlining.cpp index 06d14a7f3af27..18b4118356848 100644 --- a/src/hotspot/share/opto/printinlining.cpp +++ b/src/hotspot/share/opto/printinlining.cpp @@ -35,6 +35,9 @@ outputStream* InlinePrinter::record(ciMethod* callee, JVMState* state, InliningR if (!is_enabled()) { return &_nullStream; } + if (is_suspended()) { + return &_nullStream; + } outputStream* stream = locate(state, callee)->add(result); if (msg != nullptr) { stream->print("%s", msg); diff --git a/src/hotspot/share/opto/printinlining.hpp b/src/hotspot/share/opto/printinlining.hpp index e331593ec0e6a..e45f5075a8d30 100644 --- a/src/hotspot/share/opto/printinlining.hpp +++ b/src/hotspot/share/opto/printinlining.hpp @@ -114,6 +114,8 @@ class InlinePrinter { Compile* C; + uint _suspend_depth; + // In case print inline is disabled, this null stream is returned from ::record() nullStream _nullStream; @@ -126,7 +128,7 @@ class InlinePrinter { IPInlineSite _root{nullptr, 0}; public: - InlinePrinter(Compile* compile) : C(compile) {} + InlinePrinter(Compile* compile) : C(compile), _suspend_depth(0) {} // Saves the result of an inline attempt of method at state. // An optional string message with more details that is copied to the stream for this attempt. Pointer is not captured. @@ -136,6 +138,18 @@ class InlinePrinter { // Prints all collected inlining information to the given output stream. void print_on(outputStream* tty) const; + + bool is_suspended() const { return _suspend_depth > 0; } + void suspend() { _suspend_depth++; } + void resume() { assert(_suspend_depth > 0, "unbalanced resume"); _suspend_depth--; } +}; + +class InlinePrinterSuspendScope : public StackObj { + private: + InlinePrinter* const _printer; + public: + InlinePrinterSuspendScope(InlinePrinter* printer) : _printer(printer) { _printer->suspend(); } + ~InlinePrinterSuspendScope() { _printer->resume(); } }; #endif // PRINTINLINING_HPP diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java index c6329c70f6594..9b1a95dfb1440 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java @@ -40,7 +40,8 @@ */ public class TestVectorTest { public static void main(String[] args) { - TestFramework.runWithFlags("--add-modules=jdk.incubator.vector"); + TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", + "-XX:-IncrementalInlineVector"); } @DontInline diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java index 26e159fb768d0..0b1bfd2c80209 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java @@ -253,7 +253,8 @@ public static void testLongVectorUnsignedCondition() { public static void main(String[] args) { TestFramework testFramework = new TestFramework(); testFramework.setDefaultWarmup(10000) - .addFlags("--add-modules=jdk.incubator.vector") + .addFlags("--add-modules=jdk.incubator.vector", + "-XX:-IncrementalInlineVector") .addFlags("-XX:UseSVE=0") .start(); } diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java index 9a2f440ea8227..fe7095fc5691c 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java @@ -1747,7 +1747,8 @@ public static void testMaskedCompareMaskNotNegative() { public static void main(String[] args) { TestFramework testFramework = new TestFramework(); testFramework.setDefaultWarmup(10000) - .addFlags("--add-modules=jdk.incubator.vector") + .addFlags("--add-modules=jdk.incubator.vector", + "-XX:-IncrementalInlineVector") .start(); } }