Skip to content

Commit d9a8ce5

Browse files
authored
Merge branch 'openjdk:master' into backport-mrserb-762423d6-master
2 parents 3bb11c3 + 3a6b4ef commit d9a8ce5

File tree

93 files changed

+3854
-470
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+3854
-470
lines changed

src/hotspot/cpu/ppc/stubGenerator_ppc.cpp

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,177 @@ class StubGenerator: public StubCodeGenerator {
734734
return start;
735735
}
736736

737+
// Computes the Galois/Counter Mode (GCM) product and reduction.
738+
//
739+
// This function performs polynomial multiplication of the subkey H with
740+
// the current GHASH state using vectorized polynomial multiplication (`vpmsumd`).
741+
// The subkey H is divided into lower, middle, and higher halves.
742+
// The multiplication results are reduced using `vConstC2` to stay within GF(2^128).
743+
// The final computed value is stored back into `vState`.
744+
static void computeGCMProduct(MacroAssembler* _masm,
745+
VectorRegister vLowerH, VectorRegister vH, VectorRegister vHigherH,
746+
VectorRegister vConstC2, VectorRegister vZero, VectorRegister vState,
747+
VectorRegister vLowProduct, VectorRegister vMidProduct, VectorRegister vHighProduct,
748+
VectorRegister vReducedLow, VectorRegister vTmp8, VectorRegister vTmp9,
749+
VectorRegister vCombinedResult, VectorRegister vSwappedH) {
750+
__ vxor(vH, vH, vState);
751+
__ vpmsumd(vLowProduct, vLowerH, vH); // L : Lower Half of subkey H
752+
__ vpmsumd(vMidProduct, vSwappedH, vH); // M : Combined halves of subkey H
753+
__ vpmsumd(vHighProduct, vHigherH, vH); // H : Higher Half of subkey H
754+
__ vpmsumd(vReducedLow, vLowProduct, vConstC2); // Reduction
755+
__ vsldoi(vTmp8, vMidProduct, vZero, 8); // mL : Extract the lower 64 bits of M
756+
__ vsldoi(vTmp9, vZero, vMidProduct, 8); // mH : Extract the higher 64 bits of M
757+
__ vxor(vLowProduct, vLowProduct, vTmp8); // LL + mL : Partial result for lower half
758+
__ vxor(vHighProduct, vHighProduct, vTmp9); // HH + mH : Partial result for upper half
759+
__ vsldoi(vLowProduct, vLowProduct, vLowProduct, 8); // Swap
760+
__ vxor(vLowProduct, vLowProduct, vReducedLow);
761+
__ vsldoi(vCombinedResult, vLowProduct, vLowProduct, 8); // Swap
762+
__ vpmsumd(vLowProduct, vLowProduct, vConstC2); // Reduction using constant
763+
__ vxor(vCombinedResult, vCombinedResult, vHighProduct); // Combine reduced Low & High products
764+
__ vxor(vState, vLowProduct, vCombinedResult);
765+
}
766+
767+
// Generate stub for ghash process blocks.
768+
//
769+
// Arguments for generated stub:
770+
// state: R3_ARG1 (long[] state)
771+
// subkeyH: R4_ARG2 (long[] subH)
772+
// data: R5_ARG3 (byte[] data)
773+
// blocks: R6_ARG4 (number of 16-byte blocks to process)
774+
//
775+
// The polynomials are processed in bit-reflected order for efficiency reasons.
776+
// This optimization leverages the structure of the Galois field arithmetic
777+
// to minimize the number of bit manipulations required during multiplication.
778+
// For an explanation of how this works, refer :
779+
// Vinodh Gopal, Erdinc Ozturk, Wajdi Feghali, Jim Guilford, Gil Wolrich,
780+
// Martin Dixon. "Optimized Galois-Counter-Mode Implementation on Intel®
781+
// Architecture Processor"
782+
// http://web.archive.org/web/20130609111954/http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/communications-ia-galois-counter-mode-paper.pdf
783+
//
784+
//
785+
address generate_ghash_processBlocks() {
786+
StubCodeMark mark(this, "StubRoutines", "ghash");
787+
address start = __ function_entry();
788+
789+
// Registers for parameters
790+
Register state = R3_ARG1; // long[] state
791+
Register subkeyH = R4_ARG2; // long[] subH
792+
Register data = R5_ARG3; // byte[] data
793+
Register blocks = R6_ARG4;
794+
Register temp1 = R8;
795+
// Vector Registers
796+
VectorRegister vZero = VR0;
797+
VectorRegister vH = VR1;
798+
VectorRegister vLowerH = VR2;
799+
VectorRegister vHigherH = VR3;
800+
VectorRegister vLowProduct = VR4;
801+
VectorRegister vMidProduct = VR5;
802+
VectorRegister vHighProduct = VR6;
803+
VectorRegister vReducedLow = VR7;
804+
VectorRegister vTmp8 = VR8;
805+
VectorRegister vTmp9 = VR9;
806+
VectorRegister vTmp10 = VR10;
807+
VectorRegister vSwappedH = VR11;
808+
VectorRegister vTmp12 = VR12;
809+
VectorRegister loadOrder = VR13;
810+
VectorRegister vHigh = VR14;
811+
VectorRegister vLow = VR15;
812+
VectorRegister vState = VR16;
813+
VectorRegister vPerm = VR17;
814+
VectorRegister vCombinedResult = VR18;
815+
VectorRegister vConstC2 = VR19;
816+
817+
__ li(temp1, 0xc2);
818+
__ sldi(temp1, temp1, 56);
819+
__ vspltisb(vZero, 0);
820+
__ mtvrd(vConstC2, temp1);
821+
__ lxvd2x(vH->to_vsr(), subkeyH);
822+
__ lxvd2x(vState->to_vsr(), state);
823+
// Operations to obtain lower and higher bytes of subkey H.
824+
__ vspltisb(vReducedLow, 1);
825+
__ vspltisb(vTmp10, 7);
826+
__ vsldoi(vTmp8, vZero, vReducedLow, 1); // 0x1
827+
__ vor(vTmp8, vConstC2, vTmp8); // 0xC2...1
828+
__ vsplt(vTmp9, 0, vH); // MSB of H
829+
__ vsl(vH, vH, vReducedLow); // Carry = H<<7
830+
__ vsrab(vTmp9, vTmp9, vTmp10);
831+
__ vand(vTmp9, vTmp9, vTmp8); // Carry
832+
__ vxor(vTmp10, vH, vTmp9);
833+
__ vsldoi(vConstC2, vZero, vConstC2, 8);
834+
__ vsldoi(vSwappedH, vTmp10, vTmp10, 8); // swap Lower and Higher Halves of subkey H
835+
__ vsldoi(vLowerH, vZero, vSwappedH, 8); // H.L
836+
__ vsldoi(vHigherH, vSwappedH, vZero, 8); // H.H
837+
#ifdef ASSERT
838+
__ cmpwi(CCR0, blocks, 0); // Compare 'blocks' (R6_ARG4) with zero
839+
__ asm_assert_ne("blocks should NOT be zero");
840+
#endif
841+
__ clrldi(blocks, blocks, 32);
842+
__ mtctr(blocks);
843+
__ lvsl(loadOrder, temp1);
844+
#ifdef VM_LITTLE_ENDIAN
845+
__ vspltisb(vTmp12, 0xf);
846+
__ vxor(loadOrder, loadOrder, vTmp12);
847+
#define LE_swap_bytes(x) __ vec_perm(x, x, x, loadOrder)
848+
#else
849+
#define LE_swap_bytes(x)
850+
#endif
851+
852+
// This code performs Karatsuba multiplication in Galois fields to compute the GHASH operation.
853+
//
854+
// The Karatsuba method breaks the multiplication of two 128-bit numbers into smaller parts,
855+
// performing three 128-bit multiplications and combining the results efficiently.
856+
//
857+
// (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
858+
// (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
859+
//
860+
// Inputs:
861+
// - vH: The data vector (state), containing both B0 (lower half) and B1 (higher half).
862+
// - vLowerH: Lower half of the subkey H (A0).
863+
// - vHigherH: Higher half of the subkey H (A1).
864+
// - vConstC2: Constant used for reduction (for final processing).
865+
//
866+
// References:
867+
// Shay Gueron, Michael E. Kounavis.
868+
// "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the GCM Mode"
869+
// https://web.archive.org/web/20110609115824/https://software.intel.com/file/24918
870+
//
871+
Label L_aligned_loop, L_store, L_unaligned_loop, L_initialize_unaligned_loop;
872+
__ andi(temp1, data, 15);
873+
__ cmpwi(CCR0, temp1, 0);
874+
__ bne(CCR0, L_initialize_unaligned_loop);
875+
876+
__ bind(L_aligned_loop);
877+
__ lvx(vH, temp1, data);
878+
LE_swap_bytes(vH);
879+
computeGCMProduct(_masm, vLowerH, vH, vHigherH, vConstC2, vZero, vState,
880+
vLowProduct, vMidProduct, vHighProduct, vReducedLow, vTmp8, vTmp9, vCombinedResult, vSwappedH);
881+
__ addi(data, data, 16);
882+
__ bdnz(L_aligned_loop);
883+
__ b(L_store);
884+
885+
__ bind(L_initialize_unaligned_loop);
886+
__ li(temp1, 0);
887+
__ lvsl(vPerm, temp1, data);
888+
__ lvx(vHigh, temp1, data);
889+
#ifdef VM_LITTLE_ENDIAN
890+
__ vspltisb(vTmp12, -1);
891+
__ vxor(vPerm, vPerm, vTmp12);
892+
#endif
893+
__ bind(L_unaligned_loop);
894+
__ addi(data, data, 16);
895+
__ lvx(vLow, temp1, data);
896+
__ vec_perm(vH, vHigh, vLow, vPerm);
897+
computeGCMProduct(_masm, vLowerH, vH, vHigherH, vConstC2, vZero, vState,
898+
vLowProduct, vMidProduct, vHighProduct, vReducedLow, vTmp8, vTmp9, vCombinedResult, vSwappedH);
899+
__ vmr(vHigh, vLow);
900+
__ bdnz(L_unaligned_loop);
901+
902+
__ bind(L_store);
903+
__ stxvd2x(vState->to_vsr(), state);
904+
__ blr();
905+
906+
return start;
907+
}
737908
// -XX:+OptimizeFill : convert fill/copy loops into intrinsic
738909
//
739910
// The code is implemented(ported from sparc) as we believe it benefits JVM98, however
@@ -4851,6 +5022,10 @@ class StubGenerator: public StubCodeGenerator {
48515022
StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
48525023
}
48535024

5025+
if (UseGHASHIntrinsics) {
5026+
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
5027+
}
5028+
48545029
if (UseAESIntrinsics) {
48555030
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
48565031
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();

src/hotspot/cpu/ppc/vm_version_ppc.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,14 @@ void VM_Version::initialize() {
319319
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
320320
}
321321

322-
if (UseGHASHIntrinsics) {
323-
warning("GHASH intrinsics are not available on this CPU");
322+
if (VM_Version::has_vsx()) {
323+
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
324+
UseGHASHIntrinsics = true;
325+
}
326+
} else if (UseGHASHIntrinsics) {
327+
if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
328+
warning("GHASH intrinsics are not available on this CPU");
329+
}
324330
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
325331
}
326332

src/hotspot/share/cds/classListParser.cpp

Lines changed: 31 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "jvm.h"
4343
#include "logging/log.hpp"
4444
#include "logging/logTag.hpp"
45+
#include "memory/oopFactory.hpp"
4546
#include "memory/resourceArea.hpp"
4647
#include "oops/constantPool.hpp"
4748
#include "runtime/atomic.hpp"
@@ -97,6 +98,12 @@ ClassListParser::~ClassListParser() {
9798
_instance = nullptr;
9899
}
99100

101+
int ClassListParser::parse_classlist(const char* classlist_path, ParseMode parse_mode, TRAPS) {
102+
UnregisteredClasses::initialize(CHECK_0);
103+
ClassListParser parser(classlist_path, parse_mode);
104+
return parser.parse(THREAD); // returns the number of classes loaded.
105+
}
106+
100107
int ClassListParser::parse(TRAPS) {
101108
int class_count = 0;
102109

@@ -390,6 +397,19 @@ bool ClassListParser::parse_uint_option(const char* option_name, int* value) {
390397
return false;
391398
}
392399

400+
objArrayOop ClassListParser::get_specified_interfaces(TRAPS) {
401+
const int n = _interfaces->length();
402+
if (n == 0) {
403+
return nullptr;
404+
} else {
405+
objArrayOop array = oopFactory::new_objArray(vmClasses::Class_klass(), n, CHECK_NULL);
406+
for (int i = 0; i < n; i++) {
407+
array->obj_at_put(i, lookup_class_by_id(_interfaces->at(i))->java_mirror());
408+
}
409+
return array;
410+
}
411+
}
412+
393413
void ClassListParser::print_specified_interfaces() {
394414
const int n = _interfaces->length();
395415
jio_fprintf(defaultStream::error_stream(), "Currently specified interfaces[%d] = {\n", n);
@@ -474,7 +494,17 @@ InstanceKlass* ClassListParser::load_class_from_source(Symbol* class_name, TRAPS
474494

475495
ResourceMark rm;
476496
char * source_path = os::strdup_check_oom(ClassLoader::uri_to_path(_source));
477-
InstanceKlass* k = UnregisteredClasses::load_class(class_name, source_path, CHECK_NULL);
497+
InstanceKlass* specified_super = lookup_class_by_id(_super);
498+
Handle super_class(THREAD, specified_super->java_mirror());
499+
objArrayOop r = get_specified_interfaces(CHECK_NULL);
500+
objArrayHandle interfaces(THREAD, r);
501+
InstanceKlass* k = UnregisteredClasses::load_class(class_name, source_path,
502+
super_class, interfaces, CHECK_NULL);
503+
if (k->java_super() != specified_super) {
504+
error("The specified super class %s (id %d) does not match actual super class %s",
505+
specified_super->external_name(), _super,
506+
k->java_super()->external_name());
507+
}
478508
if (k->local_interfaces()->length() != _interfaces->length()) {
479509
print_specified_interfaces();
480510
print_actual_interfaces(k);
@@ -682,46 +712,3 @@ InstanceKlass* ClassListParser::lookup_class_by_id(int id) {
682712
assert(*klass_ptr != nullptr, "must be");
683713
return *klass_ptr;
684714
}
685-
686-
687-
InstanceKlass* ClassListParser::lookup_super_for_current_class(Symbol* super_name) {
688-
if (!is_loading_from_source()) {
689-
return nullptr;
690-
}
691-
692-
InstanceKlass* k = lookup_class_by_id(super());
693-
if (super_name != k->name()) {
694-
error("The specified super class %s (id %d) does not match actual super class %s",
695-
k->name()->as_klass_external_name(), super(),
696-
super_name->as_klass_external_name());
697-
}
698-
return k;
699-
}
700-
701-
InstanceKlass* ClassListParser::lookup_interface_for_current_class(Symbol* interface_name) {
702-
if (!is_loading_from_source()) {
703-
return nullptr;
704-
}
705-
706-
const int n = _interfaces->length();
707-
if (n == 0) {
708-
error("Class %s implements the interface %s, but no interface has been specified in the input line",
709-
_class_name, interface_name->as_klass_external_name());
710-
ShouldNotReachHere();
711-
}
712-
713-
int i;
714-
for (i=0; i<n; i++) {
715-
InstanceKlass* k = lookup_class_by_id(_interfaces->at(i));
716-
if (interface_name == k->name()) {
717-
return k;
718-
}
719-
}
720-
721-
// interface_name is not specified by the "interfaces:" keyword.
722-
print_specified_interfaces();
723-
error("The interface %s implemented by class %s does not match any of the specified interface IDs",
724-
interface_name->as_klass_external_name(), _class_name);
725-
ShouldNotReachHere();
726-
return nullptr;
727-
}

src/hotspot/share/cds/classListParser.hpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -134,12 +134,10 @@ class ClassListParser : public StackObj {
134134

135135
ClassListParser(const char* file, ParseMode _parse_mode);
136136
~ClassListParser();
137+
objArrayOop get_specified_interfaces(TRAPS);
137138

138139
public:
139-
static int parse_classlist(const char* classlist_path, ParseMode parse_mode, TRAPS) {
140-
ClassListParser parser(classlist_path, parse_mode);
141-
return parser.parse(THREAD); // returns the number of classes loaded.
142-
}
140+
static int parse_classlist(const char* classlist_path, ParseMode parse_mode, TRAPS);
143141

144142
static bool is_parsing_thread();
145143
static ClassListParser* instance() {
@@ -192,11 +190,6 @@ class ClassListParser : public StackObj {
192190

193191
bool lambda_form_line() { return _lambda_form_line; }
194192

195-
// Look up the super or interface of the current class being loaded
196-
// (in this->load_current_class()).
197-
InstanceKlass* lookup_super_for_current_class(Symbol* super_name);
198-
InstanceKlass* lookup_interface_for_current_class(Symbol* interface_name);
199-
200193
static void populate_cds_indy_info(const constantPoolHandle &pool, int cp_index, CDSIndyInfo* cii, TRAPS);
201194
};
202195
#endif // SHARE_CDS_CLASSLISTPARSER_HPP

0 commit comments

Comments
 (0)