@@ -749,6 +749,12 @@ static void initBitsProgramPointer(Transform &t) {
749749 assert (bits_program_pointer == t.tgt .bitsPointers ());
750750}
751751
752+ static uint64_t aligned_alloc_size (uint64_t size, unsigned align) {
753+ if (size <= align)
754+ return align;
755+ return add_saturate (size, align - 1 );
756+ }
757+
752758static void calculateAndInitConstants (Transform &t) {
753759 if (!bits_program_pointer)
754760 initBitsProgramPointer (t);
@@ -757,6 +763,7 @@ static void calculateAndInitConstants(Transform &t) {
757763 const auto &globals_src = t.src .getGlobalVars ();
758764 num_globals_src = globals_src.size ();
759765 unsigned num_globals = num_globals_src;
766+ uint64_t glb_alloc_aligned_size = 0 ;
760767
761768 heap_block_alignment = 8 ;
762769
@@ -765,6 +772,9 @@ static void calculateAndInitConstants(Transform &t) {
765772 for (auto GV : globals_src) {
766773 if (GV->isConst ())
767774 ++num_consts_src;
775+ glb_alloc_aligned_size
776+ = add_saturate (glb_alloc_aligned_size,
777+ aligned_alloc_size (GV->size (), GV->getAlignment ()));
768778 }
769779
770780 for (auto GVT : globals_tgt) {
@@ -773,6 +783,9 @@ static void calculateAndInitConstants(Transform &t) {
773783 if (I == globals_src.end ()) {
774784 ++num_globals;
775785 }
786+ glb_alloc_aligned_size
787+ = add_saturate (glb_alloc_aligned_size,
788+ aligned_alloc_size (GVT->size (), GVT->getAlignment ()));
776789 }
777790
778791 num_ptrinputs = 0 ;
@@ -795,7 +808,6 @@ static void calculateAndInitConstants(Transform &t) {
795808 num_locals_tgt = 0 ;
796809 uint64_t max_gep_src = 0 , max_gep_tgt = 0 ;
797810 uint64_t max_alloc_size = 0 ;
798- uint64_t max_aligned_size = 0 ;
799811 uint64_t max_access_size = 0 ;
800812 uint64_t min_global_size = UINT64_MAX;
801813
@@ -820,13 +832,18 @@ static void calculateAndInitConstants(Transform &t) {
820832
821833 // Mininum access size (in bytes)
822834 uint64_t min_access_size = 8 ;
835+ uint64_t loc_src_alloc_aligned_size = 0 ;
836+ uint64_t loc_tgt_alloc_aligned_size = 0 ;
823837 unsigned min_vect_elem_sz = 0 ;
824838 bool does_mem_access = false ;
825839 bool has_ptr_load = false ;
826840
827841 for (auto fn : { &t.src , &t.tgt }) {
828- unsigned &cur_num_locals = fn == &t.src ? num_locals_src : num_locals_tgt;
829- uint64_t &cur_max_gep = fn == &t.src ? max_gep_src : max_gep_tgt;
842+ bool is_src = fn == &t.src ;
843+ unsigned &cur_num_locals = is_src ? num_locals_src : num_locals_tgt;
844+ uint64_t &cur_max_gep = is_src ? max_gep_src : max_gep_tgt;
845+ uint64_t &loc_alloc_aligned_size
846+ = is_src ? loc_src_alloc_aligned_size : loc_tgt_alloc_aligned_size;
830847
831848 for (auto &v : fn->getInputs ()) {
832849 auto *i = dynamic_cast <const Input *>(&v);
@@ -889,8 +906,9 @@ static void calculateAndInitConstants(Transform &t) {
889906
890907 if (auto *mi = dynamic_cast <const MemInstr *>(&i)) {
891908 auto [alloc, align] = mi->getMaxAllocSize ();
892- max_alloc_size = max (max_alloc_size, alloc);
893- max_aligned_size = max (max_aligned_size, add_saturate (alloc, align-1 ));
909+ max_alloc_size = max (max_alloc_size, alloc);
910+ loc_alloc_aligned_size = add_saturate (loc_alloc_aligned_size,
911+ aligned_alloc_size (alloc, align));
894912 max_access_size = max (max_access_size, mi->getMaxAccessSize ());
895913 cur_max_gep = add_saturate (cur_max_gep, mi->getMaxGEPOffset ());
896914 has_free |= mi->canFree ();
@@ -916,7 +934,7 @@ static void calculateAndInitConstants(Transform &t) {
916934
917935 } else if (isCast (ConversionOp::Int2Ptr, i) ||
918936 isCast (ConversionOp::Ptr2Int, i)) {
919- max_alloc_size = max_access_size = cur_max_gep = max_aligned_size
937+ max_alloc_size = max_access_size = cur_max_gep = loc_alloc_aligned_size
920938 = UINT64_MAX;
921939 has_int2ptr |= isCast (ConversionOp::Int2Ptr, i) != nullptr ;
922940 has_ptr2int |= isCast (ConversionOp::Ptr2Int, i) != nullptr ;
@@ -998,20 +1016,24 @@ static void calculateAndInitConstants(Transform &t) {
9981016 // counterexamples more readable
9991017 // Allow an extra bit for the sign
10001018 auto max_geps
1001- = ilog2_ceil (add_saturate (max (max_gep_src, max_gep_tgt), max_access_size),
1002- true ) + 1 ;
1019+ = bit_width (add_saturate (max (max_gep_src, max_gep_tgt), max_access_size))
1020+ + 1 ;
10031021 bits_for_offset = min (round_up (max_geps, 4 ), (uint64_t )t.src .bitsPtrOffset ());
10041022 bits_for_offset = min (bits_for_offset, config::max_offset_bits);
10051023 bits_for_offset = min (bits_for_offset, bits_program_pointer);
10061024
10071025 // ASSUMPTION: programs can only allocate up to half of address space
10081026 // so the first bit of size is always zero.
10091027 // We need this assumption to support negative offsets.
1010- bits_size_t = ilog2_ceil (max_alloc_size, true );
1028+ bits_size_t = bit_width (max_alloc_size);
10111029 bits_size_t = min (max (bits_for_offset, bits_size_t ), bits_program_pointer-1 );
10121030
10131031 // +1 because the pointer after the object must be valid (can't overflow)
1014- bits_ptr_address = ilog2_ceil (add_saturate (max_aligned_size, 1 ), true );
1032+ uint64_t loc_alloc_aligned_size
1033+ = max (loc_src_alloc_aligned_size, loc_tgt_alloc_aligned_size);
1034+ bits_ptr_address
1035+ = add_saturate (
1036+ bit_width (max (glb_alloc_aligned_size, loc_alloc_aligned_size)), 1 );
10151037
10161038 // as an (unsound) optimization, we fix the first bit of the addr for
10171039 // local/non-local if both exist (to reduce axiom fml size)
@@ -1044,7 +1066,8 @@ static void calculateAndInitConstants(Transform &t) {
10441066 << " \n bits_ptr_address: " << bits_ptr_address
10451067 << " \n bits_program_pointer: " << bits_program_pointer
10461068 << " \n max_alloc_size: " << max_alloc_size
1047- << " \n max_aligned_size: " << max_aligned_size
1069+ << " \n glb_alloc_aligned_size: " << glb_alloc_aligned_size
1070+ << " \n loc_alloc_aligned_size: " << loc_alloc_aligned_size
10481071 << " \n min_access_size: " << min_access_size
10491072 << " \n max_access_size: " << max_access_size
10501073 << " \n bits_byte: " << bits_byte
0 commit comments