Skip to content

Commit bd33ece

Browse files
committed
win32/vmem.h change emulated calloc() to OS/CRT's native calloc()
Most OSes/libcs have an optimization that calloc() sometimes, or most of the time, do not call memset() in userland wasting CPU to zeroize brand new memory blocks/pages obtained fresh from the kernel. The larger the calloc() allocation is, the higher chance the memory blocks will be obtained fresh from the kernel. MS CRT's calloc() is a wrapper function that is either thin or heavy (personal opinions), and ultimatly forwards to HeapAlloc(hSecretUBHandle,HEAP_ZERO_MEMORY,size). Whether [email protected] has or doesn't have the don't memset(,0,); fresh kernel pages optimization, this author doesn't know and it is irrelevant. WinPerl did its part, to take advantage of the optimization if it exists inside Microsoft's closed source OS. Historically perlhost.h/vmem.h was perl5xx.dll emulating calloc() because this area of the interp is "unfinished business" from the late 1990s where Win95 and "Win32s Runtime" on Win 3.11 WFW OS compatiblity was critical for WinPerl. WinNT Kernel Win OSes have always been POSIX-like or actually Unix SVR1 1983 compatible from the start (and remained compatible with POSIX/SVR1 1983 until WSL 1). The alternate never used memory allocator in vmem.h doesn't have a Calloc() method, so the nextgen and current "native kernel32.dll malloc()" code couldn't implement a Calloc() method. The DIY malloc() impl doesn't have a Calloc() because in 1993-1997-ish, VirtualAlloc, VirtualProtect, VirtualFree, couldn't be used in WinPerl for some reason lost to time. This author's Win95 Kernel32.dll file exports all 3 functions and they are not stubs that only do "return STATUS_NOT_IMPLEMENTED;". do_crt_invalid_parameter() was added so the DIY allocator behaves like the native MS CRT calloc() behaves. perlhost.h's design concept is that the library can be copy pasted without modification to the PHP and Python interps, something like that. Therefore perlhost.h and vmem.h aren't allowed to be aware of the Perl C API. So no croak()/die()/die_noperl(). -split off the very cold "Free to wrong pool" panic branch into its own function. Less "dead" machine code for the CPU to skip around in the perf critical VMemNL::Free() call. VC 2022 -O1 LTO inlined the DispatchWrongPool() method against our wishes, so override VC 2022's and GCC's inline criteria. We do not want inlining here. -move 2 of void* writes out of the CS lock inside PerlMemSharedMalloc() PerlMemMalloc and PerlMemParseMalloc and the Calloc()s, they are writes of constants to a new mem block and not reads/writes to the head (VMem*) object, or the first block hanging off the VMem* LL, so its not needed to muxtex lock those 2 writes -m_lRefCount assignment in VMem::VMem so CC doesn't need to save var this around fn call InitializeCriticalSection in this function -change return NULL; to return ptr; better codegen on MSVC 2022, since optmizer doesnt realize var ptr is a free 0x0 value after false test and instead emits xor RAX, RAX; -reorder the VMem struct so VMemNL m_VMem (the per-my_perl pool) is at the the front
1 parent 09a0707 commit bd33ece

File tree

3 files changed

+190
-42
lines changed

3 files changed

+190
-42
lines changed

iperlsys.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,12 @@ struct IPerlLIOInfo
800800
Interface for perl memory allocation
801801
*/
802802

803+
/* let CPAN conditionally know if these brand new macros are available:
804+
PerlMem_calloc PerlMemShared_calloc PerlMemParse_calloc
805+
note, we must always define this macro, regardless if the build config
806+
is using these vtables, or this file NOOPs itself to the OS's libc */
807+
#define PERL_IMPLICIT_SYS_HAS_CALLOC 1
808+
803809
#if defined(PERL_IMPLICIT_SYS)
804810

805811
/* IPerlMem */

win32/perlhost.h

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,7 @@ class CPerlHost
6666
inline void* Malloc(size_t size) { return m_VMem.Malloc(size); };
6767
inline void* Realloc(void* ptr, size_t size) { return m_VMem.Realloc(ptr, size); };
6868
inline void Free(void* ptr) { m_VMem.Free(ptr); };
69-
inline void* Calloc(size_t num, size_t size)
70-
{
71-
size_t count = num*size;
72-
void* lpVoid = Malloc(count);
73-
if (lpVoid)
74-
lpVoid = memset(lpVoid, 0, count);
75-
return lpVoid;
76-
};
69+
inline void* Calloc(size_t num, size_t size) { return m_VMem.Calloc(num, size); }
7770
inline void GetLock(void) { m_VMem.GetLock(); };
7871
inline void FreeLock(void) { m_VMem.FreeLock(); };
7972
inline int IsLocked(void) { return m_VMem.IsLocked(); };
@@ -107,11 +100,11 @@ class CPerlHost
107100
};
108101
inline void* CallocShared(size_t num, size_t size)
109102
{
110-
size_t count = num*size;
111-
void* lpVoid = MallocShared(count);
112-
if (lpVoid)
113-
lpVoid = memset(lpVoid, 0, count);
114-
return lpVoid;
103+
void *result;
104+
GetLockShared();
105+
result = m_pVMemShared->Calloc(num, size);
106+
FreeLockShared();
107+
return result;
115108
};
116109

117110
/* IPerlMemParse */
@@ -124,14 +117,7 @@ class CPerlHost
124117
inline void* MallocParse(size_t size) { return m_pVMemParse->Malloc(size); };
125118
inline void* ReallocParse(void* ptr, size_t size) { return m_pVMemParse->Realloc(ptr, size); };
126119
inline void FreeParse(void* ptr) { m_pVMemParse->Free(ptr); };
127-
inline void* CallocParse(size_t num, size_t size)
128-
{
129-
size_t count = num*size;
130-
void* lpVoid = MallocParse(count);
131-
if (lpVoid)
132-
lpVoid = memset(lpVoid, 0, count);
133-
return lpVoid;
134-
};
120+
inline void* CallocParse(size_t num, size_t size){ return m_pVMemParse->Calloc(num, size); };
135121

136122
/* IPerlEnv */
137123
char *Getenv(const char *varname);
@@ -188,6 +174,18 @@ class CPerlHost
188174
inline VMem* GetMemParse(void) { m_pVMemParse->AddRef(); return m_pVMemParse; };
189175
inline VDir* GetDir(void) { return &m_vDir; };
190176

177+
public:
178+
179+
inline char* MapPathA(const char *pInName) { return m_vDir.MapPathA(pInName); };
180+
inline WCHAR* MapPathW(const WCHAR *pInName) { return m_vDir.MapPathW(pInName); };
181+
inline operator VDir* () { return GetDir(); };
182+
183+
protected:
184+
185+
VMemNL m_VMem; /* make this 1st member of CPerlHost* struct, highest use */
186+
VMem* m_pVMemShared;
187+
VMem* m_pVMemParse;
188+
191189
public:
192190

193191
const struct IPerlMem* m_pHostperlMem;
@@ -200,14 +198,7 @@ class CPerlHost
200198
const struct IPerlSock* m_pHostperlSock;
201199
const struct IPerlProc* m_pHostperlProc;
202200

203-
inline char* MapPathA(const char *pInName) { return m_vDir.MapPathA(pInName); };
204-
inline WCHAR* MapPathW(const WCHAR *pInName) { return m_vDir.MapPathW(pInName); };
205-
inline operator VDir* () { return GetDir(); };
206201
protected:
207-
VMemNL m_VMem;
208-
VMem* m_pVMemShared;
209-
VMem* m_pVMemParse;
210-
211202
LPSTR* m_lppEnvList;
212203
DWORD m_dwEnvCount;
213204
BOOL m_bTopLevel; // is this a toplevel host?

win32/vmem.h

Lines changed: 165 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
// #define _USE_BUDDY_BLOCKS
2828

2929
// #define _DEBUG_MEM
30+
31+
static void * do_crt_invalid_parameter(void);
32+
3033
#ifdef _DEBUG_MEM
3134
#define ASSERT(f) if(!(f)) DebugBreak();
3235

@@ -98,6 +101,14 @@ inline void MEMODSlx(char *str, long x)
98101
*/
99102

100103
#ifdef _USE_LINKED_LIST
104+
105+
# if defined(__GNUC__)
106+
# define VMEM_FORCE_NOINLINE __attribute__((__noinline__))
107+
# elif defined(_MSC_VER)
108+
# define VMEM_FORCE_NOINLINE __declspec(noinline)
109+
# else
110+
# error "Unknown C compiler family type"
111+
#endif
101112
class VMemNL; /* NL = no locks */
102113
class VMem;
103114

@@ -140,6 +151,7 @@ class VMemNL
140151
void* Malloc(size_t size);
141152
void* Realloc(void* pMem, size_t size);
142153
void Free(void* pMem);
154+
void* Calloc(size_t num, size_t size);
143155
void GetLock(void);
144156
void FreeLock(void);
145157
int IsLocked(void);
@@ -151,15 +163,26 @@ class VMemNL
151163
return TRUE;
152164
};
153165

166+
#ifdef _USE_LINKED_LIST
167+
VMEM_FORCE_NOINLINE void* DispatchWrongPool(PMEMORY_BLOCK_HEADER ptr);
168+
/* Retval is NULL. Encourages CC to see a better ABI match, and maybe do
169+
a tailcall in ::Realloc(), which would be the same as a real __noreturn
170+
decl or ~0-4 CPU ops bigger. */
171+
#endif
172+
154173
protected:
155174
#ifdef _USE_LINKED_LIST
175+
/* prep work that can be done outside of the CS lock */
176+
void PrepLinkBlock(PMEMORY_BLOCK_HEADER ptr)
177+
{ /* these 2 of 3 ptrs are psuedo-const addrs into our VMem* obj */
178+
ptr->pPrev = &m_Dummy; /* LL termination sentinal */
179+
ptr->u.owner_nl = this;
180+
}
156181
void LinkBlock(PMEMORY_BLOCK_HEADER ptr)
157182
{
158183
PMEMORY_BLOCK_HEADER next = m_Dummy.pNext;
159184
m_Dummy.pNext = ptr;
160-
ptr->pPrev = &m_Dummy;
161185
ptr->pNext = next;
162-
ptr->u.owner_nl = this;
163186
next->pPrev = ptr;
164187
}
165188
void UnlinkBlock(PMEMORY_BLOCK_HEADER ptr)
@@ -182,6 +205,9 @@ class VMem : public VMemNL {
182205
CRITICAL_SECTION m_cs; // access lock
183206
#endif
184207
volatile long m_lRefCount; // number of current users
208+
#ifdef _USE_LINKED_LIST
209+
VMEM_FORCE_NOINLINE void* DispatchWrongPool(PMEMORY_BLOCK_HEADER ptr);
210+
#endif
185211

186212
public:
187213
VMem();
@@ -190,6 +216,7 @@ class VMem : public VMemNL {
190216
void* Malloc(size_t size);
191217
void* Realloc(void* pMem, size_t size);
192218
void Free(void* pMem);
219+
void* Calloc(size_t num, size_t size);
193220
void GetLock(void);
194221
void FreeLock(void);
195222
inline int IsLocked(void);
@@ -200,6 +227,9 @@ class VMem : public VMemNL {
200227
VMemNL::VMemNL(void)
201228
{
202229
#ifdef _USE_LINKED_LIST
230+
/* addr &m_Dummy happens to be (void*)&m_Dummy == (void*)(VMem*)this
231+
the offset of member m_Dummy inside struct VMem {} is 0x00, and therefore
232+
no U8 offset byte is present in machine code. */
203233
m_Dummy.pNext = m_Dummy.pPrev = &m_Dummy;
204234
m_Dummy.u.owner_nl = this;
205235
#endif
@@ -208,10 +238,10 @@ VMemNL::VMemNL(void)
208238

209239
VMem::VMem(void)
210240
{
241+
m_lRefCount = 1;
211242
#ifdef _USE_LINKED_LIST
212243
InitializeCriticalSection(&m_cs);
213244
#endif _USE_LINKED_LIST
214-
m_lRefCount = 1;
215245
return;
216246
}
217247

@@ -252,8 +282,9 @@ void* VMemNL::Malloc(size_t size)
252282

253283
PMEMORY_BLOCK_HEADER ptr = (PMEMORY_BLOCK_HEADER)malloc(size+sizeof(MEMORY_BLOCK_HEADER));
254284
if (!ptr) {
255-
return NULL;
285+
return ptr; /* NULL */
256286
}
287+
PrepLinkBlock(ptr);
257288
GetLock();
258289
LinkBlock(ptr);
259290
FreeLock();
@@ -282,6 +313,7 @@ void* VMemNL::Realloc(void* pMem, size_t size)
282313
FreeLock();
283314
return NULL;
284315
}
316+
PrepLinkBlock(ptr);
285317
LinkBlock(ptr);
286318
FreeLock();
287319

@@ -297,16 +329,7 @@ void VMemNL::Free(void* pMem)
297329
if (pMem) {
298330
PMEMORY_BLOCK_HEADER ptr = (PMEMORY_BLOCK_HEADER)(((char*)pMem)-sizeof(MEMORY_BLOCK_HEADER));
299331
if (ptr->u.owner_nl != this) {
300-
if (ptr->u.owner_nl) {
301-
#if 1
302-
int *nowhere = NULL;
303-
Perl_warn_nocontext("Free to wrong pool %p not %p",this,ptr->u.owner_nl);
304-
*nowhere = 0; /* this segfault is deliberate,
305-
so you can see the stack trace */
306-
#else
307-
ptr->u.owner_nl->Free(pMem);
308-
#endif
309-
}
332+
DispatchWrongPool(ptr);
310333
return;
311334
}
312335
GetLock();
@@ -331,6 +354,47 @@ Win32 fixes-vmem.h hack to handle free-by-wrong-thread after eval "".
331354
#endif
332355
}
333356

357+
void* VMemNL::Calloc(size_t num, size_t size)
358+
{
359+
#ifdef _USE_LINKED_LIST
360+
PMEMORY_BLOCK_HEADER ptr;
361+
size_t totalsize = num * size;
362+
if (totalsize == 0) /* UCRT converts 0*0 to 1, and passed 1 to HeapAlloc */
363+
ptr = (PMEMORY_BLOCK_HEADER)calloc(1, sizeof(MEMORY_BLOCK_HEADER));
364+
else if (!((((size_t)0)-(0x20+sizeof(MEMORY_BLOCK_HEADER))) / num >= size))
365+
return do_crt_invalid_parameter();
366+
else
367+
ptr = (PMEMORY_BLOCK_HEADER)calloc(1,totalsize+sizeof(MEMORY_BLOCK_HEADER));
368+
if (ptr == NULL)
369+
return ptr;
370+
PrepLinkBlock(ptr);
371+
GetLock();
372+
LinkBlock(ptr);
373+
FreeLock();
374+
return (ptr+1);
375+
#else
376+
return calloc(num, size);
377+
#endif
378+
}
379+
380+
#ifdef _USE_LINKED_LIST
381+
void* VMemNL::DispatchWrongPool(PMEMORY_BLOCK_HEADER ptr)
382+
{
383+
if (ptr->u.owner_nl) {
384+
#if 1
385+
int *nowhere = NULL;
386+
Perl_warn_nocontext("Free to wrong pool %p not %p",this,ptr->u.owner_nl);
387+
*nowhere = 0; /* this segfault is deliberate,
388+
so you can see the stack trace */
389+
#else
390+
void *pMem = (ptr+1); /* recreate the ptr interp was using */
391+
ptr->u.owner_nl->Free(pMem);
392+
#endif
393+
}
394+
return NULL;
395+
}
396+
#endif /*_USE_LINKED_LIST*/
397+
334398
#endif
335399

336400
#undef VMemNL
@@ -556,6 +620,7 @@ class VMem
556620
void* Malloc(size_t size);
557621
void* Realloc(void* pMem, size_t size);
558622
void Free(void* pMem);
623+
void* Calloc(size_t num, size_t size);
559624
void GetLock(void);
560625
void FreeLock(void);
561626
inline int IsLocked(void);
@@ -735,6 +800,26 @@ void VMem::Init(void)
735800
m_lAllocSize = lAllocStart;
736801
}
737802

803+
void* Calloc(size_t num, size_t size)
804+
{
805+
void * ptr;
806+
size_t totalsize = num * size;
807+
if (totalsize == 0) { /* UCRT converts 0*0 to 1 */
808+
char * pv = Malloc(1); /* and passes 1 to HeapAlloc */
809+
if (pv)
810+
pv[0] = 0xFE; /* don't '\0' it, instead poison it (WinPerl invented) */
811+
ptr = (void*)pv; /* ask for 0 bytes? you get 0 bytes! no '\0' for you */
812+
} /* this overflow check is supposedly the same one a real MS CRT uses */
813+
else if (!(( ((size_t)0) - 0x20) / num >= size))
814+
return do_crt_invalid_parameter();
815+
else {
816+
ptr = Malloc(totalsize);
817+
if (ptr)
818+
ptr = memset(ptr, 0, totalsize);
819+
}
820+
return ptr;
821+
}
822+
738823
void* VMem::Malloc(size_t size)
739824
{
740825
WALKHEAP();
@@ -1398,6 +1483,72 @@ void VMem::WalkHeap(int complete)
13981483

13991484
#endif /* _USE_MSVCRT_MEM_ALLOC */
14001485

1486+
1487+
#ifndef STATUS_DLL_NOT_FOUND
1488+
# define STATUS_DLL_NOT_FOUND 0xC0000135
1489+
#endif
1490+
1491+
#ifndef STATUS_PROCEDURE_NOT_FOUND
1492+
# define STATUS_PROCEDURE_NOT_FOUND 0xC000007A
1493+
#endif
1494+
1495+
typedef void(__cdecl * inv_arg_t)(void);
1496+
typedef void(__cdecl * inv_arg_noinfo_t)(void);
1497+
1498+
/* Emulate CRT's UI behavior upon failure. In real world testing on various
1499+
MS CRT versions & ages _invalid_parameter() and _invalid_parameter_noinfo()
1500+
are no_return'es/SEGV'es but maybe the very rare "checked" or debug MS CRTs
1501+
allow resuming execution. */
1502+
1503+
static void *
1504+
do_crt_invalid_parameter(void)
1505+
{
1506+
static inv_arg_t g_inv_arg = NULL;
1507+
static inv_arg_noinfo_t g_inv_arg_noinfo = NULL;
1508+
inv_arg_t inv_arg;
1509+
1510+
errno = ENOMEM;
1511+
inv_arg = g_inv_arg;
1512+
if (!inv_arg) {
1513+
inv_arg_noinfo_t inv_arg_noinfo = g_inv_arg_noinfo;
1514+
if (!inv_arg_noinfo) {
1515+
char ** ppv = _sys_errlist; /* get a ptr into the CRT's .rdata */
1516+
HMODULE h;
1517+
BOOL r = GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCSTR)ppv, &h);
1518+
if (r) {
1519+
inv_arg = (inv_arg_t)GetProcAddress(h, "_invalid_parameter");
1520+
if (!inv_arg) {
1521+
inv_arg_noinfo =
1522+
(inv_arg_noinfo_t)GetProcAddress(h, "_invalid_parameter_noinfo");
1523+
if (!inv_arg_noinfo) {
1524+
/* 0 = continuable, 0 = no args array ptr in arg 4 */
1525+
RaiseException(STATUS_PROCEDURE_NOT_FOUND, 0, 0, NULL);
1526+
}
1527+
else {
1528+
g_inv_arg_noinfo = inv_arg_noinfo;
1529+
goto do_inv_arg_noinfo;
1530+
}
1531+
}
1532+
else {
1533+
g_inv_arg = inv_arg;
1534+
goto do_inv_arg;
1535+
}
1536+
}
1537+
else /* throw a SEGV-style GUI popup, crash code says its not a SEGV */
1538+
RaiseException(STATUS_DLL_NOT_FOUND, 0, 0, NULL);
1539+
}
1540+
else {
1541+
do_inv_arg_noinfo:
1542+
inv_arg_noinfo();
1543+
}
1544+
}
1545+
else {
1546+
do_inv_arg:
1547+
inv_arg();
1548+
}
1549+
return NULL;
1550+
}
1551+
14011552
#define ___VMEM_H_INC___
14021553

14031554
#endif /* ___VMEM_H_INC___ */

0 commit comments

Comments
 (0)