diff --git a/make/common/native/Flags.gmk b/make/common/native/Flags.gmk index efb4c08e74c..6353b490654 100644 --- a/make/common/native/Flags.gmk +++ b/make/common/native/Flags.gmk @@ -234,6 +234,9 @@ define SetupLinkerFlags ifeq ($(call isTargetOs, macosx), true) $1_EXTRA_LDFLAGS += -Wl,-object_path_lto,$$($1_OBJECT_DIR)/$$($1_NAME)_lto_helper.o endif + ifeq ($(TOOLCHAIN_TYPE), microsoft) + $1_EXTRA_LDFLAGS += -LTCGOUT:$$($1_OBJECT_DIR)/$$($1_NAME).iobj + endif endif $1_EXTRA_LDFLAGS += $$($1_LDFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LDFLAGS_$(OPENJDK_TARGET_OS)) \ diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index 842784d1a29..78ef121bd29 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -346,8 +346,14 @@ source %{ } bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { - // Only SVE has partial vector operations - if (UseSVE == 0) { + // 1. Only SVE requires partial vector operations. + // 2. The vector size in bytes must be smaller than MaxVectorSize. + // 3. Predicated vectors have a mask input, which guarantees that + // out-of-bounds lanes remain inactive. + int length_in_bytes = vt->length_in_bytes(); + if (UseSVE == 0 || + length_in_bytes == MaxVectorSize || + node->is_predicated_vector()) { return false; } @@ -370,21 +376,22 @@ source %{ return !node->in(1)->is_Con(); case Op_LoadVector: case Op_StoreVector: - // We use NEON load/store instructions if the vector length is <= 128 bits. - return vt->length_in_bytes() > 16; case Op_AddReductionVI: case Op_AddReductionVL: - // We may prefer using NEON instructions rather than SVE partial operations. - return !VM_Version::use_neon_for_vector(vt->length_in_bytes()); + // For these ops, we prefer using NEON instructions rather than SVE + // predicated instructions for better performance. + return !VM_Version::use_neon_for_vector(length_in_bytes); case Op_MinReductionV: case Op_MaxReductionV: - // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON - // instructions rather than SVE partial operations. + // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON + // instructions rather than SVE predicated instructions for + // better performance. return vt->element_basic_type() == T_LONG || - !VM_Version::use_neon_for_vector(vt->length_in_bytes()); + !VM_Version::use_neon_for_vector(length_in_bytes); default: - // For other ops whose vector size is smaller than the max vector size, a - // full-sized unpredicated operation does not impact the final vector result. + // For other ops whose vector size is smaller than the max vector + // size, a full-sized unpredicated operation does not impact the + // vector result. return false; } } diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index dff82ce95ac..66dc22c3758 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -336,8 +336,14 @@ source %{ } bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { - // Only SVE has partial vector operations - if (UseSVE == 0) { + // 1. Only SVE requires partial vector operations. + // 2. The vector size in bytes must be smaller than MaxVectorSize. + // 3. Predicated vectors have a mask input, which guarantees that + // out-of-bounds lanes remain inactive. + int length_in_bytes = vt->length_in_bytes(); + if (UseSVE == 0 || + length_in_bytes == MaxVectorSize || + node->is_predicated_vector()) { return false; } @@ -360,21 +366,22 @@ source %{ return !node->in(1)->is_Con(); case Op_LoadVector: case Op_StoreVector: - // We use NEON load/store instructions if the vector length is <= 128 bits. - return vt->length_in_bytes() > 16; case Op_AddReductionVI: case Op_AddReductionVL: - // We may prefer using NEON instructions rather than SVE partial operations. - return !VM_Version::use_neon_for_vector(vt->length_in_bytes()); + // For these ops, we prefer using NEON instructions rather than SVE + // predicated instructions for better performance. + return !VM_Version::use_neon_for_vector(length_in_bytes); case Op_MinReductionV: case Op_MaxReductionV: - // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON - // instructions rather than SVE partial operations. + // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON + // instructions rather than SVE predicated instructions for + // better performance. return vt->element_basic_type() == T_LONG || - !VM_Version::use_neon_for_vector(vt->length_in_bytes()); + !VM_Version::use_neon_for_vector(length_in_bytes); default: - // For other ops whose vector size is smaller than the max vector size, a - // full-sized unpredicated operation does not impact the final vector result. + // For other ops whose vector size is smaller than the max vector + // size, a full-sized unpredicated operation does not impact the + // vector result. return false; } } diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index ceedb4f1063..2ccc755be3c 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -5379,7 +5379,6 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { assert (UseCompressedClassPointers, "should only be used for compressed headers"); assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); int index = oop_recorder()->find_index(k); - assert(! Universe::heap()->is_in(k), "should not be an oop"); InstructionMark im(this); RelocationHolder rspec = metadata_Relocation::spec(index); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index aa00609094e..2a0a9149bb3 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -6335,8 +6335,36 @@ instruct loadConD_Ex(regD dst, immD src) %{ // Prefetch instructions. // Must be safe to execute with invalid address (cannot fault). +// Special prefetch versions which use the dcbz instruction. +instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{ + match(PrefetchAllocation (AddP mem src)); + predicate(AllocatePrefetchStyle == 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %} + size(4); + ins_encode %{ + __ dcbz($src$$Register, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{ + match(PrefetchAllocation mem); + predicate(AllocatePrefetchStyle == 3); + ins_cost(MEMORY_REF_COST); + + format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %} + size(4); + ins_encode %{ + __ dcbz($mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); +%} + instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{ match(PrefetchAllocation (AddP mem src)); + predicate(AllocatePrefetchStyle != 3); ins_cost(MEMORY_REF_COST); format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %} @@ -6349,6 +6377,7 @@ instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{ instruct prefetch_alloc_no_offset(indirectMemory mem) %{ match(PrefetchAllocation mem); + predicate(AllocatePrefetchStyle != 3); ins_cost(MEMORY_REF_COST); format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %} diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 7a8496ae42b..8ffd54fd7ee 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -4933,7 +4933,6 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { assert (UseCompressedClassPointers, "should only be used for compressed headers"); assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder"); int index = oop_recorder()->find_index(k); - assert(!Universe::heap()->is_in(k), "should not be an oop"); narrowKlass nk = CompressedKlassPointers::encode(k); relocate(metadata_Relocation::spec(index), [&] { diff --git a/src/hotspot/share/gc/serial/serialHeap.cpp b/src/hotspot/share/gc/serial/serialHeap.cpp index 932c06b8109..104924c1cad 100644 --- a/src/hotspot/share/gc/serial/serialHeap.cpp +++ b/src/hotspot/share/gc/serial/serialHeap.cpp @@ -630,6 +630,14 @@ bool SerialHeap::requires_barriers(stackChunkOop obj) const { // Returns "TRUE" iff "p" points into the committed areas of the heap. bool SerialHeap::is_in(const void* p) const { + // precondition + verify_not_in_native_if_java_thread(); + + if (!is_in_reserved(p)) { + // If it's not even in reserved. + return false; + } + return _young_gen->is_in(p) || _old_gen->is_in(p); } @@ -797,3 +805,12 @@ void SerialHeap::gc_epilogue(bool full) { MetaspaceCounters::update_performance_counters(); }; + +#ifdef ASSERT +void SerialHeap::verify_not_in_native_if_java_thread() { + if (Thread::current()->is_Java_thread()) { + JavaThread* thread = JavaThread::current(); + assert(thread->thread_state() != _thread_in_native, "precondition"); + } +} +#endif diff --git a/src/hotspot/share/gc/serial/serialHeap.hpp b/src/hotspot/share/gc/serial/serialHeap.hpp index ee016173c2a..f5286179abf 100644 --- a/src/hotspot/share/gc/serial/serialHeap.hpp +++ b/src/hotspot/share/gc/serial/serialHeap.hpp @@ -111,6 +111,8 @@ class SerialHeap : public CollectedHeap { void print_tracing_info() const override; void stop() override {}; + static void verify_not_in_native_if_java_thread() NOT_DEBUG_RETURN; + public: // Returns JNI_OK on success jint initialize() override; diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp index 2181e089663..9635ed4d0cb 100644 --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp @@ -37,6 +37,7 @@ #include "utilities/copy.hpp" size_t ThreadLocalAllocBuffer::_max_size = 0; +int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0; unsigned int ThreadLocalAllocBuffer::_target_refills = 0; ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() : @@ -224,6 +225,30 @@ void ThreadLocalAllocBuffer::startup_initialization() { // abort during VM initialization. _target_refills = MAX2(_target_refills, 2U); +#ifdef COMPILER2 + // If the C2 compiler is present, extra space is needed at the end of + // TLABs, otherwise prefetching instructions generated by the C2 + // compiler will fault (due to accessing memory outside of heap). + // The amount of space is the max of the number of lines to + // prefetch for array and for instance allocations. (Extra space must be + // reserved to accommodate both types of allocations.) + // + // Only SPARC-specific BIS instructions are known to fault. (Those + // instructions are generated if AllocatePrefetchStyle==3 and + // AllocatePrefetchInstr==1). To be on the safe side, however, + // extra space is reserved for all combinations of + // AllocatePrefetchStyle and AllocatePrefetchInstr. + // + // If the C2 compiler is not present, no space is reserved. + + // +1 for rounding up to next cache line, +1 to be safe + if (CompilerConfig::is_c2_or_jvmci_compiler_enabled()) { + int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2; + _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) / + (int)HeapWordSize; + } +#endif + // During jvm startup, the main thread is initialized // before the heap is initialized. So reinitialize it now. guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread"); @@ -429,7 +454,8 @@ void ThreadLocalAllocStats::publish() { } size_t ThreadLocalAllocBuffer::end_reserve() { - return CollectedHeap::lab_alignment_reserve(); + size_t reserve_size = CollectedHeap::lab_alignment_reserve(); + return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch); } const HeapWord* ThreadLocalAllocBuffer::start_relaxed() const { diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp index b64fa8d6ad1..59979646395 100644 --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp @@ -58,6 +58,7 @@ class ThreadLocalAllocBuffer: public CHeapObj { size_t _allocated_before_last_gc; // total bytes allocated up until the last gc static size_t _max_size; // maximum size of any TLAB + static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB static unsigned _target_refills; // expected number of refills between GCs unsigned _number_of_refills; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp index 2b5bc766a46..7db478a781a 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp @@ -128,8 +128,8 @@ class ShenandoahBarrierSet: public BarrierSet { void write_ref_array(HeapWord* start, size_t count); private: - template - inline void arraycopy_marking(T* dst, size_t count); + template + void arraycopy_marking(T* dst, size_t count); template inline void arraycopy_evacuation(T* src, size_t count); template diff --git a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp index adeea8ebf96..199256ca31b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp @@ -429,7 +429,11 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) { // If marking old or young, we must evaluate the SATB barrier. This will be the only // action if we are not marking old. If we are marking old, we must still evaluate the // load reference barrier for a young collection. - arraycopy_marking(dst, count); + if (_heap->mode()->is_generational()) { + arraycopy_marking(dst, count); + } else { + arraycopy_marking(dst, count); + } } if ((gc_state & ShenandoahHeap::EVACUATION) != 0) { @@ -441,11 +445,12 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) { } } -template +template void ShenandoahBarrierSet::arraycopy_marking(T* dst, size_t count) { assert(_heap->is_concurrent_mark_in_progress(), "only during marking"); if (ShenandoahSATBBarrier) { - if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast(dst))) { + if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast(dst)) || + (IS_GENERATIONAL && _heap->heap_region_containing(dst)->is_old() && _heap->is_concurrent_young_mark_in_progress())) { arraycopy_work(dst, count); } } diff --git a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp index 6f2171bbd75..90602bc2b35 100644 --- a/src/hotspot/share/opto/macro.cpp +++ b/src/hotspot/share/opto/macro.cpp @@ -1914,7 +1914,8 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false, transform_later(cache_adr); cache_adr = new CastP2XNode(needgc_false, cache_adr); transform_later(cache_adr); - // Address is aligned to execute prefetch to the beginning of cache line size. + // Address is aligned to execute prefetch to the beginning of cache line size + // (it is important when BIS instruction is used on SPARC as prefetch). Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1)); cache_adr = new AndXNode(cache_adr, mask); transform_later(cache_adr); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp index ca13d0166a1..a071cff9e3c 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -329,6 +329,10 @@ class Matcher : public PhaseTransform { static bool match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt); + // Determines if a vector operation needs to be partially implemented with a mask + // controlling only the lanes in range [0, vector_length) are processed. This applies + // to operations whose vector length is less than the hardware-supported maximum + // vector length. Returns true if the operation requires masking, false otherwise. static bool vector_needs_partial_operations(Node* node, const TypeVect* vt); static bool vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 57b94205e5e..271dc901dcb 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -936,28 +936,26 @@ bool VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(int op } } +// Idealize vector operations whose vector size is less than the hardware supported +// max vector size. Generate a vector mask for the operation. Lanes with indices +// inside of the vector size are set to true, while the remaining lanes are set to +// false. Returns the corresponding masked vector node. +static Node* ideal_partial_operations(PhaseGVN* phase, Node* node, const TypeVect* vt) { + if (!Matcher::vector_needs_partial_operations(node, vt)) { + return nullptr; + } -Node* VectorNode::try_to_gen_masked_vector(PhaseGVN* gvn, Node* node, const TypeVect* vt) { int vopc = node->Opcode(); uint vlen = vt->length(); BasicType bt = vt->element_basic_type(); + assert(Matcher::match_rule_supported_vector_masked(vopc, vlen, bt), + "The masked feature is required for the vector operation"); + assert(Matcher::match_rule_supported_vector(Op_VectorMaskGen, vlen, bt), + "'VectorMaskGen' is required to generate a vector mask"); - // Predicated vectors do not need to add another mask input - if (node->is_predicated_vector() || !Matcher::has_predicated_vectors() || - !Matcher::match_rule_supported_vector_masked(vopc, vlen, bt) || - !Matcher::match_rule_supported_vector(Op_VectorMaskGen, vlen, bt)) { - return nullptr; - } - - Node* mask = nullptr; - // Generate a vector mask for vector operation whose vector length is lower than the - // hardware supported max vector length. - if (vt->length_in_bytes() < (uint)MaxVectorSize) { - Node* length = gvn->transform(new ConvI2LNode(gvn->makecon(TypeInt::make(vlen)))); - mask = gvn->transform(VectorMaskGenNode::make(length, bt, vlen)); - } else { - return nullptr; - } + // Generate a vector mask, with lanes inside of the vector length set to true. + Node* length = phase->transform(new ConvI2LNode(phase->makecon(TypeInt::make(vlen)))); + Node* mask = phase->transform(VectorMaskGenNode::make(length, bt, vlen)); // Generate the related masked op for vector load/store/load_gather/store_scatter. // Or append the mask to the vector op's input list by default. @@ -1037,8 +1035,9 @@ bool VectorNode::should_swap_inputs_to_help_global_value_numbering() { } Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) { - if (Matcher::vector_needs_partial_operations(this, vect_type())) { - return try_to_gen_masked_vector(phase, this, vect_type()); + Node* n = ideal_partial_operations(phase, this, vect_type()); + if (n != nullptr) { + return n; } // Sort inputs of commutative non-predicated vector operations to help value numbering. @@ -1119,9 +1118,9 @@ LoadVectorNode* LoadVectorNode::make(int opc, Node* ctl, Node* mem, } Node* LoadVectorNode::Ideal(PhaseGVN* phase, bool can_reshape) { - const TypeVect* vt = vect_type(); - if (Matcher::vector_needs_partial_operations(this, vt)) { - return VectorNode::try_to_gen_masked_vector(phase, this, vt); + Node* n = ideal_partial_operations(phase, this, vect_type()); + if (n != nullptr) { + return n; } return LoadNode::Ideal(phase, can_reshape); } @@ -1133,9 +1132,9 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem, Node* adr, } Node* StoreVectorNode::Ideal(PhaseGVN* phase, bool can_reshape) { - const TypeVect* vt = vect_type(); - if (Matcher::vector_needs_partial_operations(this, vt)) { - return VectorNode::try_to_gen_masked_vector(phase, this, vt); + Node* n = ideal_partial_operations(phase, this, vect_type()); + if (n != nullptr) { + return n; } return StoreNode::Ideal(phase, can_reshape); } @@ -1411,11 +1410,11 @@ ReductionNode* ReductionNode::make(int opc, Node* ctrl, Node* n1, Node* n2, Basi } Node* ReductionNode::Ideal(PhaseGVN* phase, bool can_reshape) { - const TypeVect* vt = vect_type(); - if (Matcher::vector_needs_partial_operations(this, vt)) { - return VectorNode::try_to_gen_masked_vector(phase, this, vt); + Node* n = ideal_partial_operations(phase, this, vect_type()); + if (n != nullptr) { + return n; } - return nullptr; + return Node::Ideal(phase, can_reshape); } // Convert fromLong to maskAll if the input sets or unsets all lanes. @@ -1893,11 +1892,11 @@ Node* VectorMaskOpNode::make(Node* mask, const Type* ty, int mopc) { } Node* VectorMaskOpNode::Ideal(PhaseGVN* phase, bool can_reshape) { - const TypeVect* vt = vect_type(); - if (Matcher::vector_needs_partial_operations(this, vt)) { - return VectorNode::try_to_gen_masked_vector(phase, this, vt); + Node* n = ideal_partial_operations(phase, this, vect_type()); + if (n != nullptr) { + return n; } - return nullptr; + return TypeNode::Ideal(phase, can_reshape); } Node* VectorMaskCastNode::Identity(PhaseGVN* phase) { diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 427aeff53fc..dc7aa13cf36 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -117,7 +117,6 @@ class VectorNode : public TypeNode { static bool is_vector_bitwise_not_pattern(Node* n); static Node* degenerate_vector_rotate(Node* n1, Node* n2, bool is_rotate_left, int vlen, BasicType bt, PhaseGVN* phase); - static Node* try_to_gen_masked_vector(PhaseGVN* gvn, Node* node, const TypeVect* vt); // [Start, end) half-open range defining which operands are vectors static void vector_operands(Node* n, uint* start, uint* end); diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index 25a99c2d758..85a2ef8b507 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -353,6 +353,7 @@ nonstatic_field(ThreadLocalAllocBuffer, _pf_top, HeapWord*) \ nonstatic_field(ThreadLocalAllocBuffer, _desired_size, size_t) \ nonstatic_field(ThreadLocalAllocBuffer, _refill_waste_limit, size_t) \ + static_field(ThreadLocalAllocBuffer, _reserve_for_allocation_prefetch, int) \ static_field(ThreadLocalAllocBuffer, _target_refills, unsigned) \ nonstatic_field(ThreadLocalAllocBuffer, _number_of_refills, unsigned) \ nonstatic_field(ThreadLocalAllocBuffer, _refill_waste, unsigned) \ diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java index e23e63806bd..1dc67330d3d 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ThreadLocalAllocBuffer.java @@ -76,9 +76,10 @@ private long alignmentReserve() { private long endReserve() { long labAlignmentReserve = VM.getVM().getLabAlignmentReserve(); + long reserveForAllocationPrefetch = VM.getVM().getReserveForAllocationPrefetch(); long heapWordSize = VM.getVM().getHeapWordSize(); - return labAlignmentReserve * heapWordSize; + return Math.max(labAlignmentReserve, reserveForAllocationPrefetch) * heapWordSize; } /** Support for iteration over heap -- not sure how this will diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java index 1607563150a..dc27a4fc59e 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java @@ -123,6 +123,7 @@ public class VM { private int invocationEntryBCI; private ReversePtrs revPtrs; private VMRegImpl vmregImpl; + private int reserveForAllocationPrefetch; private int labAlignmentReserve; // System.getProperties from debuggee VM @@ -446,6 +447,8 @@ private VM(TypeDataBase db, JVMDebugger debugger, boolean isBigEndian) { boolType = (CIntegerType) db.lookupType("bool"); Type threadLocalAllocBuffer = db.lookupType("ThreadLocalAllocBuffer"); + CIntegerField reserveForAllocationPrefetchField = threadLocalAllocBuffer.getCIntegerField("_reserve_for_allocation_prefetch"); + reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType); Type collectedHeap = db.lookupType("CollectedHeap"); CIntegerField labAlignmentReserveField = collectedHeap.getCIntegerField("_lab_alignment_reserve"); @@ -912,6 +915,10 @@ public String getVMInternalInfo() { return vmInternalInfo; } + public int getReserveForAllocationPrefetch() { + return reserveForAllocationPrefetch; + } + public int getLabAlignmentReserve() { return labAlignmentReserve; } diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt index 6c3d907961d..ddc6e55dc05 100644 --- a/test/hotspot/jtreg/ProblemList.txt +++ b/test/hotspot/jtreg/ProblemList.txt @@ -187,9 +187,3 @@ vmTestbase/nsk/jdwp/ThreadReference/ForceEarlyReturn/forceEarlyReturn001/forceEa vmTestbase/nsk/monitoring/ThreadMXBean/ThreadInfo/Multi/Multi005/TestDescription.java 8076494 windows-x64 vmTestbase/nsk/monitoring/ThreadMXBean/findMonitorDeadlockedThreads/find006/TestDescription.java 8310144 macosx-aarch64 - -vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded001/TestDescription.java 8373022 generic-all -vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded002/TestDescription.java 8373022 generic-all -vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded003/TestDescription.java 8373022 generic-all -vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded004/TestDescription.java 8373022 generic-all -vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded005/TestDescription.java 8373022 generic-all diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java index c8ee5e730fa..445fef5e55a 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java @@ -68,6 +68,15 @@ public class TestFloat16ScalarOperations { private static final Float16 RANDOM4 = Float16.valueOf(genF.next()); private static final Float16 RANDOM5 = Float16.valueOf(genF.next()); + // We have to ensure that the constants are not special values that lead the operations to + // constant fold. For example "x + 0" could constant fold to "x", so we need to avoid that + // the add constant is zero. + private static Generator genSmallRangeF = G.uniformFloats(0.1f, 0.9f); + private static final Float16 RANDOM_CON_ADD = Float16.valueOf(genSmallRangeF.next()); + private static final Float16 RANDOM_CON_SUB = Float16.valueOf(genSmallRangeF.next()); + private static final Float16 RANDOM_CON_MUL = Float16.valueOf(genSmallRangeF.next()); + private static final Float16 RANDOM_CON_DIV = Float16.valueOf(genSmallRangeF.next()); + private static Float16 RANDOM1_VAR = RANDOM1; private static Float16 RANDOM2_VAR = RANDOM2; private static Float16 RANDOM3_VAR = RANDOM3; @@ -435,10 +444,10 @@ public void checkExactFP16ConstantPatterns(short actual) { @Warmup(10000) public short testRandomFP16ConstantPatternSet1() { short res = 0; - res += Float.floatToFloat16(RANDOM1_VAR.floatValue() + RANDOM2.floatValue()); - res += Float.floatToFloat16(RANDOM2_VAR.floatValue() - RANDOM3.floatValue()); - res += Float.floatToFloat16(RANDOM3_VAR.floatValue() * RANDOM4.floatValue()); - res += Float.floatToFloat16(RANDOM4_VAR.floatValue() / RANDOM5.floatValue()); + res += Float.floatToFloat16(RANDOM1_VAR.floatValue() + RANDOM_CON_ADD.floatValue()); + res += Float.floatToFloat16(RANDOM2_VAR.floatValue() - RANDOM_CON_SUB.floatValue()); + res += Float.floatToFloat16(RANDOM3_VAR.floatValue() * RANDOM_CON_MUL.floatValue()); + res += Float.floatToFloat16(RANDOM4_VAR.floatValue() / RANDOM_CON_DIV.floatValue()); return res; } @@ -456,10 +465,10 @@ public void checkRandomFP16ConstantPatternSet1(short actual) { @Warmup(10000) public short testRandomFP16ConstantPatternSet2() { short res = 0; - res += Float.floatToFloat16(RANDOM2.floatValue() + RANDOM1_VAR.floatValue()); - res += Float.floatToFloat16(RANDOM3.floatValue() - RANDOM2_VAR.floatValue()); - res += Float.floatToFloat16(RANDOM4.floatValue() * RANDOM3_VAR.floatValue()); - res += Float.floatToFloat16(RANDOM5.floatValue() / RANDOM4_VAR.floatValue()); + res += Float.floatToFloat16(RANDOM_CON_ADD.floatValue() + RANDOM1_VAR.floatValue()); + res += Float.floatToFloat16(RANDOM_CON_SUB.floatValue() - RANDOM2_VAR.floatValue()); + res += Float.floatToFloat16(RANDOM_CON_MUL.floatValue() * RANDOM3_VAR.floatValue()); + res += Float.floatToFloat16(RANDOM_CON_DIV.floatValue() / RANDOM4_VAR.floatValue()); return res; } diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 85595b9b632..a9d7426b2e8 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -1448,6 +1448,16 @@ public class IRNode { beforeMatchingNameRegex(VECTOR_MASK_LANE_IS_SET, "ExtractUB"); } + public static final String VECTOR_MASK_GEN = PREFIX + "VECTOR_MASK_GEN" + POSTFIX; + static { + beforeMatchingNameRegex(VECTOR_MASK_GEN, "VectorMaskGen"); + } + + public static final String VECTOR_MASK_FIRST_TRUE = PREFIX + "VECTOR_MASK_FIRST_TRUE" + POSTFIX; + static { + beforeMatchingNameRegex(VECTOR_MASK_FIRST_TRUE, "VectorMaskFirstTrue"); + } + // Can only be used if avx512_vnni is available. public static final String MUL_ADD_VS2VI_VNNI = PREFIX + "MUL_ADD_VS2VI_VNNI" + POSTFIX; static { diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorLoadStoreOptimization.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorLoadStoreOptimization.java new file mode 100644 index 00000000000..c603f450d0c --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorLoadStoreOptimization.java @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.vectorapi; + +import compiler.lib.generators.*; +import compiler.lib.ir_framework.*; +import jdk.incubator.vector.*; +import jdk.test.lib.Asserts; + +/** + * @test 8371603 + * @key randomness + * @library /test/lib / + * @summary Test the missing optimization issues for vector load/store caused by JDK-8286941 + * @modules jdk.incubator.vector + * + * @run driver ${test.main.class} + */ +public class TestVectorLoadStoreOptimization { + private static final int LENGTH = 1024; + private static final Generators random = Generators.G; + + private static final VectorSpecies SPECIES = IntVector.SPECIES_PREFERRED; + + private static int[] a; + + static { + a = new int[LENGTH]; + random.fill(random.ints(), a); + } + + // Test that "LoadVectorNode::Ideal()" calls "LoadNode::Ideal()" as expected, + // which sees the previous stores that go to the same position in-dependently, + // and optimize out the load with matched store values. + @Test + @IR(counts = { IRNode.LOAD_VECTOR_I, "1" }, + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true", "rvv", "true"}) + public static void testLoadVector() { + IntVector v1 = IntVector.fromArray(SPECIES, a, 0); + v1.intoArray(a, SPECIES.length()); + v1.intoArray(a, 2 * SPECIES.length()); + // The second load vector equals to the first one and should be optimized + // out by "LoadNode::Ideal()". + IntVector v2 = IntVector.fromArray(SPECIES, a, SPECIES.length()); + v2.intoArray(a, 3 * SPECIES.length()); + } + + @Check(test = "testLoadVector") + public static void testLoadVectorVerify() { + for (int i = SPECIES.length(); i < 4 * SPECIES.length(); i += SPECIES.length()) { + for (int j = 0; j < SPECIES.length(); j++) { + Asserts.assertEquals(a[i + j], a[j]); + } + } + } + + // Test that "StoreVectorNode::Ideal()" calls "StoreNode::Ideal()" as expected, + // which can get rid of previous stores that go to the same position. + @Test + @IR(counts = { IRNode.STORE_VECTOR, "1" }, + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true", "rvv", "true"}) + public static void testStoreVector() { + IntVector v1 = IntVector.fromArray(SPECIES, a, 0 * SPECIES.length()); + IntVector v2 = IntVector.fromArray(SPECIES, a, 1 * SPECIES.length()); + // Useless store to same position as below, which should be optimized out by + // "StoreNode::Ideal()". + v1.intoArray(a, 3 * SPECIES.length()); + v2.intoArray(a, 3 * SPECIES.length()); + } + + @Check(test = "testStoreVector") + public static void testStoreVectorVerify() { + for (int i = 3 * SPECIES.length(); i < 4 * SPECIES.length(); i++) { + Asserts.assertEquals(a[i], a[i - 2 * SPECIES.length()]); + } + } + + public static void main(String[] args) { + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(10000) + .addFlags("--add-modules=jdk.incubator.vector") + .start(); + } +} \ No newline at end of file diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorOperationsWithPartialSize.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorOperationsWithPartialSize.java new file mode 100644 index 00000000000..6fd20b7e2fb --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorOperationsWithPartialSize.java @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.vectorapi; + +import compiler.lib.generators.*; +import compiler.lib.ir_framework.*; +import jdk.incubator.vector.*; +import jdk.test.lib.Asserts; + +/** + * @test 8371603 + * @key randomness + * @library /test/lib / + * @summary Test vector operations with vector size less than MaxVectorSize + * @modules jdk.incubator.vector + * + * @run driver ${test.main.class} + */ + +public class TestVectorOperationsWithPartialSize { + private static final int SIZE = 1024; + private static final Generators random = Generators.G; + + private static final VectorSpecies ISPEC_128 = IntVector.SPECIES_128; + private static final VectorSpecies LSPEC_128 = LongVector.SPECIES_128; + private static final VectorSpecies FSPEC_128 = FloatVector.SPECIES_128; + private static final VectorSpecies DSPEC_128 = DoubleVector.SPECIES_128; + private static final VectorSpecies ISPEC_256 = IntVector.SPECIES_256; + private static final VectorSpecies LSPEC_256 = LongVector.SPECIES_256; + + private static int[] ia; + private static int[] ib; + private static long[] la; + private static long[] lb; + private static float[] fa; + private static float[] fb; + private static double[] da; + private static double[] db; + private static boolean[] m; + private static boolean[] mr; + private static int[] indices; + + static { + ia = new int[SIZE]; + ib = new int[SIZE]; + la = new long[SIZE]; + lb = new long[SIZE]; + fa = new float[SIZE]; + fb = new float[SIZE]; + da = new double[SIZE]; + db = new double[SIZE]; + m = new boolean[SIZE]; + mr = new boolean[SIZE]; + indices = new int[SIZE]; + + random.fill(random.ints(), ia); + random.fill(random.longs(), la); + random.fill(random.floats(), fa); + random.fill(random.doubles(), da); + random.fill(random.uniformInts(0, ISPEC_128.length()), indices); + for (int i = 0; i < SIZE; i++) { + m[i] = i % 2 == 0; + } + } + + // ================ Load/Store/Gather/Scatter Tests ================== + + private static void verifyLoadStore(int[] expected, int[] actual, int vlen) { + for (int i = 0; i < vlen; i++) { + Asserts.assertEquals(expected[i], actual[i]); + } + } + + private static void verifyLoadGatherStoreScatter(int[] expected, int[] actual, int[] indices, int vlen) { + for (int i = 0; i < vlen; i++) { + Asserts.assertEquals(expected[indices[i]], actual[indices[i]]); + } + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "0", + IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "1", + IRNode.STORE_VECTOR, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public void testLoadStore_128() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0); + v.intoArray(ib, 0); + verifyLoadStore(ia, ib, ISPEC_128.length()); + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.LOAD_VECTOR_MASKED, "1", + IRNode.STORE_VECTOR_MASKED, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"}) + public void testLoadStore_256() { + IntVector v = IntVector.fromArray(ISPEC_256, ia, 0); + v.intoArray(ib, 0); + verifyLoadStore(ia, ib, ISPEC_256.length()); + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.LOAD_VECTOR_GATHER_MASKED, "1", + IRNode.STORE_VECTOR_SCATTER_MASKED, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public void testLoadGatherStoreScatter_128() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0, indices, 0); + v.intoArray(ib, 0, indices, 0); + verifyLoadGatherStoreScatter(ia, ib, indices, ISPEC_128.length()); + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.LOAD_VECTOR_GATHER_MASKED, "1", + IRNode.STORE_VECTOR_SCATTER_MASKED, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"}) + public void testLoadGatherStoreScatter_256() { + IntVector v = IntVector.fromArray(ISPEC_256, ia, 0, indices, 0); + v.intoArray(ib, 0, indices, 0); + verifyLoadGatherStoreScatter(ia, ib, indices, ISPEC_256.length()); + } + + // ===================== Reduction Tests - Add ===================== + + interface binOpInt { + int apply(int a, int b); + } + + interface binOpLong { + long apply(long a, long b); + } + + private static int reduceLanes(int init, int[] arr, int vlen, binOpInt f) { + int result = init; + for (int i = 0; i < vlen; i++) { + result = f.apply(arr[i], result); + } + return result; + } + + private static long reduceLanes(long init, long[] arr, int vlen,binOpLong f) { + long result = init; + for (int i = 0; i < vlen; i++) { + result = f.apply(arr[i], result); + } + return result; + } + + // Reduction add operations with integer types are implemented with NEON SIMD instructions + // when the vector size is less than or equal to 128-bit. + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "0", + IRNode.ADD_REDUCTION_VI, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public int testAddReductionInt_128() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0); + int result = v.reduceLanes(VectorOperators.ADD); + Asserts.assertEquals(reduceLanes(0, ia, ISPEC_128.length(), (a, b) -> (a + b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.ADD_REDUCTION_VI, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"}) + public int testAddReductionInt_256() { + IntVector v = IntVector.fromArray(ISPEC_256, ia, 0); + int result = v.reduceLanes(VectorOperators.ADD); + Asserts.assertEquals(reduceLanes(0, ia, ISPEC_256.length(), (a, b) -> (a + b)), result); + return result; + } + + // Reduction add operations with long types are implemented with NEON SIMD instructions + // when the vector size is less than or equal to 128-bit. + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "0", + IRNode.ADD_REDUCTION_VL, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public long testAddReductionLong_128() { + LongVector v = LongVector.fromArray(LSPEC_128, la, 0); + long result = v.reduceLanes(VectorOperators.ADD); + Asserts.assertEquals(reduceLanes(0L, la, LSPEC_128.length(), (a, b) -> (a + b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.ADD_REDUCTION_VL, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"}) + public long testAddReductionLong_256() { + LongVector v = LongVector.fromArray(LSPEC_256, la, 0); + long result = v.reduceLanes(VectorOperators.ADD); + Asserts.assertEquals(reduceLanes(0L, la, LSPEC_256.length(), (a, b) -> (a + b)), result); + return result; + } + + private static void verifyAddReductionFloat(float actual, float[] arr, int vlen) { + float expected = 0.0f; + for (int i = 0; i < vlen; i++) { + expected += arr[i]; + } + // Floating point addition reduction ops may introduce rounding errors. + float ROUNDING_ERROR_FACTOR_ADD = 10.0f; + float tolerance = Math.ulp(expected) * ROUNDING_ERROR_FACTOR_ADD; + if (Math.abs(expected - actual) > tolerance) { + throw new RuntimeException( + "assertEqualsWithTolerance" + + ": expected " + expected + " but was " + actual + + " (tolerance: " + tolerance + ", diff: " + Math.abs(expected - actual) + ")" + ); + } + } + + private static void verifyAddReductionDouble(double actual, double[] arr, int vlen) { + double expected = 0.0; + for (int i = 0; i < vlen; i++) { + expected += arr[i]; + } + // Floating point addition reduction ops may introduce rounding errors. + double ROUNDING_ERROR_FACTOR_ADD = 10.0; + double tolerance = Math.ulp(expected) * ROUNDING_ERROR_FACTOR_ADD; + if (Math.abs(expected - actual) > tolerance) { + throw new RuntimeException( + "assertEqualsWithTolerance" + + ": expected " + expected + " but was " + actual + + " (tolerance: " + tolerance + ", diff: " + Math.abs(expected - actual) + ")" + ); + } + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.ADD_REDUCTION_VF, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public float testAddReductionFloat() { + FloatVector v = FloatVector.fromArray(FSPEC_128, fa, 0); + float result = v.reduceLanes(VectorOperators.ADD); + verifyAddReductionFloat(result, fa, FSPEC_128.length()); + return result; + } + + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.ADD_REDUCTION_VD, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public double testAddReductionDouble() { + DoubleVector v = DoubleVector.fromArray(DSPEC_128, da, 0); + double result = v.reduceLanes(VectorOperators.ADD); + verifyAddReductionDouble(result, da, DSPEC_128.length()); + return result; + } + + // ============== Reduction Tests - Logical ============== + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.AND_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public int testAndReduction() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0); + int result = v.reduceLanes(VectorOperators.AND); + Asserts.assertEquals(reduceLanes(-1, ia, ISPEC_128.length(), (a, b) -> (a & b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.OR_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public int testOrReduction() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0); + int result = v.reduceLanes(VectorOperators.OR); + Asserts.assertEquals(reduceLanes(0, ia, ISPEC_128.length(), (a, b) -> (a | b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.XOR_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"}) + public int testXorReduction() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0); + int result = v.reduceLanes(VectorOperators.XOR); + Asserts.assertEquals(reduceLanes(0, ia, ISPEC_128.length(), (a, b) -> (a ^ b)), result); + return result; + } + + // ===================== Reduction Tests - Min/Max ===================== + + // Reduction min operations with non-long types are implemented with NEON SIMD instructions + // when the vector size is less than or equal to 128-bit. + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "0", + IRNode.MIN_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"}) + public int testMinReductionInt_128() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0); + int result = v.reduceLanes(VectorOperators.MIN); + Asserts.assertEquals(reduceLanes(Integer.MAX_VALUE, ia, ISPEC_128.length(), (a, b) -> Math.min(a, b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.MIN_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 64"}) + public int testMinReductionInt_256() { + IntVector v = IntVector.fromArray(ISPEC_256, ia, 0); + int result = v.reduceLanes(VectorOperators.MIN); + Asserts.assertEquals(reduceLanes(Integer.MAX_VALUE, ia, ISPEC_256.length(), (a, b) -> Math.min(a, b)), result); + return result; + } + + // Reduction max operations with non-long types are implemented with NEON SIMD instructions + // when the vector size is less than or equal to 128-bit. + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "0", + IRNode.MAX_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"}) + public int testMaxReductionInt_128() { + IntVector v = IntVector.fromArray(ISPEC_128, ia, 0); + int result = v.reduceLanes(VectorOperators.MAX); + Asserts.assertEquals(reduceLanes(Integer.MIN_VALUE, ia, ISPEC_128.length(), (a, b) -> Math.max(a, b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.MAX_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 64"}) + public int testMaxReductionInt_256() { + IntVector v = IntVector.fromArray(ISPEC_256, ia, 0); + int result = v.reduceLanes(VectorOperators.MAX); + Asserts.assertEquals(reduceLanes(Integer.MIN_VALUE, ia, ISPEC_256.length(), (a, b) -> Math.max(a, b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.MIN_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"}) + public static long testMinReductionLong() { + LongVector v = LongVector.fromArray(LSPEC_128, la, 0); + long result = v.reduceLanes(VectorOperators.MIN); + Asserts.assertEquals(reduceLanes(Long.MAX_VALUE, la, LSPEC_128.length(), (a, b) -> Math.min(a, b)), result); + return result; + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.MAX_REDUCTION_V, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"}) + public static long testMaxReductionLong() { + LongVector v = LongVector.fromArray(LSPEC_128, la, 0); + long result = v.reduceLanes(VectorOperators.MAX); + Asserts.assertEquals(reduceLanes(Long.MIN_VALUE, la, LSPEC_128.length(), (a, b) -> Math.max(a, b)), result); + return result; + } + + // ====================== VectorMask Tests ====================== + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.VECTOR_LOAD_MASK, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"}) + public static void testLoadMask() { + VectorMask vm = VectorMask.fromArray(ISPEC_128, m, 0); + vm.not().intoArray(mr, 0); + // Verify that the mask is loaded correctly. + for (int i = 0; i < ISPEC_128.length(); i++) { + Asserts.assertEquals(!m[i], mr[i]); + } + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.VECTOR_MASK_CMP, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"}) + public static void testVectorMaskCmp() { + IntVector v1 = IntVector.fromArray(ISPEC_128, ia, 0); + IntVector v2 = IntVector.fromArray(ISPEC_128, ib, 0); + VectorMask vm = v1.compare(VectorOperators.LT, v2); + vm.intoArray(mr, 0); + // Verify that the mask is generated correctly. + for (int i = 0; i < ISPEC_128.length(); i++) { + Asserts.assertEquals(ia[i] < ib[i], mr[i]); + } + } + + @Test + @IR(counts = {IRNode.VECTOR_MASK_GEN, "1", + IRNode.VECTOR_MASK_FIRST_TRUE, "1"}, + applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"}) + public static int testFirstTrue() { + VectorMask vm = ISPEC_128.maskAll(false); + int result = vm.firstTrue(); + // The result is the vector length if no lane is true. + // This is the default behavior of the firstTrue method. + Asserts.assertEquals(ISPEC_128.length(), result); + return result; + } + + public static void main(String[] args) { + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(10000) + .addFlags("--add-modules=jdk.incubator.vector") + .start(); + } +} diff --git a/test/hotspot/jtreg/vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded001.java b/test/hotspot/jtreg/vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded001.java index a684c03e67a..5fbb4d2444e 100644 --- a/test/hotspot/jtreg/vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded001.java +++ b/test/hotspot/jtreg/vmTestbase/nsk/monitoring/MemoryPoolMBean/isUsageThresholdExceeded/isexceeded001.java @@ -92,7 +92,8 @@ public static int run(String[] argv, PrintStream out) { // but cannot assume this affects the pool we are testing. b = new byte[INCREMENT]; - isExceeded = monitor.isUsageThresholdExceeded(pool); + // Ensure the observation of isExceeded is sticky to match peakUsage. + isExceeded = isExceeded || monitor.isUsageThresholdExceeded(pool); log.display(" Allocated heap. isExceeded = " + isExceeded); // Fetch usage information: use peak usage in comparisons below, in case usage went up and then down. diff --git a/test/jdk/ProblemList.txt b/test/jdk/ProblemList.txt index 72a248408ac..caa9ed76c70 100644 --- a/test/jdk/ProblemList.txt +++ b/test/jdk/ProblemList.txt @@ -500,6 +500,7 @@ java/awt/GraphicsDevice/DisplayModes/UnknownRefrshRateTest.java 8286436 macosx-a java/awt/image/multiresolution/MultiresolutionIconTest.java 8291979 linux-x64,windows-all java/awt/event/SequencedEvent/MultipleContextsFunctionalTest.java 8305061 macosx-x64 sun/java2d/DirectX/OnScreenRenderingResizeTest/OnScreenRenderingResizeTest.java 8301177 linux-x64 +sun/awt/image/bug8038000.java 8373065 generic-all # Several tests which fail on some hidpi systems/macosx12-aarch64 system java/awt/Window/8159168/SetShapeTest.java 8274106 macosx-aarch64 diff --git a/test/jdk/java/lang/management/MemoryMXBean/MemoryManagement.java b/test/jdk/java/lang/management/MemoryMXBean/MemoryManagement.java index b136b724b71..f6c7446d1f3 100644 --- a/test/jdk/java/lang/management/MemoryMXBean/MemoryManagement.java +++ b/test/jdk/java/lang/management/MemoryMXBean/MemoryManagement.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -49,7 +49,7 @@ * * @modules jdk.management * - * @run main/timeout=600 MemoryManagement + * @run main/othervm/timeout=600 -Xmn8m MemoryManagement */ import java.lang.management.*; @@ -58,6 +58,10 @@ import javax.management.openmbean.CompositeData; public class MemoryManagement { + + private static final int YOUNG_GEN_SIZE = 8 * 1024 * 1024; // Must match -Xmn set on the @run line + private static final int NUM_CHUNKS = 2; + private static final MemoryMXBean mm = ManagementFactory.getMemoryMXBean(); private static final List pools = Collections.synchronizedList(ManagementFactory.getMemoryPoolMXBeans()); @@ -66,9 +70,6 @@ public class MemoryManagement { private static volatile MemoryPoolMXBean mpool = null; private static volatile boolean trace = false; private static volatile boolean testFailed = false; - private static final int NUM_CHUNKS = 2; - // Must match -Xmn set on the @run line - private static final int YOUNG_GEN_SIZE = 8 * 1024 * 1024; private static volatile long chunkSize; private static volatile int listenerInvoked = 0;