Skip to content

Commit 82b75f9

Browse files
authored
Disable the old QCDOC qalloc by default and copy utility functions from memory.c
1. qalloc() appears to have been a special routine written for the PPC440-based QCDOC supercomputer(s) from around 2005, its source does not seem to be readily available. So switch the #if 1 in the code to rely on standard malloc() by default. 2. Utility functions like get_num_procs, get_num_threads that were added to the "normally" used memory.c in the meantime were still missing here.
1 parent 7887c45 commit 82b75f9

File tree

1 file changed

+311
-10
lines changed

1 file changed

+311
-10
lines changed

driver/others/memory_qalloc.c

Lines changed: 311 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,29 +38,37 @@
3838

3939
#include <stdio.h>
4040
#include "common.h"
41+
#ifdef OS_LINUX
42+
#include <sys/sysinfo.h>
43+
#include <sched.h>
44+
#include <errno.h>
45+
#include <linux/unistd.h>
46+
#include <sys/syscall.h>
47+
#include <sys/time.h>
48+
#include <sys/resource.h>
49+
#endif
4150

42-
#ifndef SMP
43-
#define blas_cpu_number 1
44-
#else
45-
46-
int blas_cpu_number = 1;
47-
48-
int blas_get_cpu_number(void){
51+
#ifdef OS_HAIKU
52+
#include <unistd.h>
53+
#endif
4954

50-
return blas_cpu_number;
51-
}
55+
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
56+
#include <sys/sysctl.h>
57+
#include <sys/resource.h>
5258
#endif
5359

60+
5461
#define FIXED_PAGESIZE 4096
5562

63+
5664
void *sa = NULL;
5765
void *sb = NULL;
5866
static double static_buffer[BUFFER_SIZE/sizeof(double)];
5967

6068
void *blas_memory_alloc(int numproc){
6169

6270
if (sa == NULL){
63-
#if 1
71+
#if 0
6472
sa = (void *)qalloc(QFAST, BUFFER_SIZE);
6573
#else
6674
sa = (void *)malloc(BUFFER_SIZE);
@@ -75,3 +83,296 @@ void blas_memory_free(void *free_area){
7583
return;
7684
}
7785

86+
87+
88+
extern void openblas_warning(int verbose, const char * msg);
89+
90+
#ifndef SMP
91+
92+
#define blas_cpu_number 1
93+
#define blas_num_threads 1
94+
95+
/* Dummy Function */
96+
int goto_get_num_procs (void) { return 1;};
97+
void goto_set_num_threads(int num_threads) {};
98+
99+
#else
100+
101+
#if defined(OS_LINUX) || defined(OS_SUNOS)
102+
#ifndef NO_AFFINITY
103+
int get_num_procs(void);
104+
#else
105+
int get_num_procs(void) {
106+
107+
static int nums = 0;
108+
cpu_set_t cpuset,*cpusetp;
109+
size_t size;
110+
int ret;
111+
112+
#if defined(__GLIBC_PREREQ)
113+
#if !__GLIBC_PREREQ(2, 7)
114+
int i;
115+
#if !__GLIBC_PREREQ(2, 6)
116+
int n;
117+
#endif
118+
#endif
119+
#endif
120+
121+
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
122+
#if !defined(OS_LINUX)
123+
return nums;
124+
#endif
125+
126+
/*
127+
#if !defined(__GLIBC_PREREQ)
128+
return nums;
129+
#else
130+
#if !__GLIBC_PREREQ(2, 3)
131+
return nums;
132+
#endif
133+
134+
#if !__GLIBC_PREREQ(2, 7)
135+
ret = sched_getaffinity(0,sizeof(cpuset), &cpuset);
136+
if (ret!=0) return nums;
137+
n=0;
138+
#if !__GLIBC_PREREQ(2, 6)
139+
for (i=0;i<nums;i++)
140+
if (CPU_ISSET(i,&cpuset)) n++;
141+
nums=n;
142+
#else
143+
nums = CPU_COUNT(sizeof(cpuset),&cpuset);
144+
#endif
145+
return nums;
146+
#else
147+
if (nums >= CPU_SETSIZE) {
148+
cpusetp = CPU_ALLOC(nums);
149+
if (cpusetp == NULL) {
150+
return nums;
151+
}
152+
size = CPU_ALLOC_SIZE(nums);
153+
ret = sched_getaffinity(0,size,cpusetp);
154+
if (ret!=0) {
155+
CPU_FREE(cpusetp);
156+
return nums;
157+
}
158+
ret = CPU_COUNT_S(size,cpusetp);
159+
if (ret > 0 && ret < nums) nums = ret;
160+
CPU_FREE(cpusetp);
161+
return nums;
162+
} else {
163+
ret = sched_getaffinity(0,sizeof(cpuset),&cpuset);
164+
if (ret!=0) {
165+
return nums;
166+
}
167+
ret = CPU_COUNT(&cpuset);
168+
if (ret > 0 && ret < nums) nums = ret;
169+
return nums;
170+
}
171+
#endif
172+
#endif
173+
*/
174+
return 1;
175+
}
176+
#endif
177+
#endif
178+
179+
#ifdef OS_ANDROID
180+
int get_num_procs(void) {
181+
static int nums = 0;
182+
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
183+
return nums;
184+
}
185+
#endif
186+
187+
#ifdef OS_HAIKU
188+
int get_num_procs(void) {
189+
static int nums = 0;
190+
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
191+
return nums;
192+
}
193+
#endif
194+
195+
#ifdef OS_AIX
196+
int get_num_procs(void) {
197+
static int nums = 0;
198+
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
199+
return nums;
200+
}
201+
#endif
202+
203+
#ifdef OS_WINDOWS
204+
205+
int get_num_procs(void) {
206+
207+
static int nums = 0;
208+
209+
if (nums == 0) {
210+
211+
SYSTEM_INFO sysinfo;
212+
213+
GetSystemInfo(&sysinfo);
214+
215+
nums = sysinfo.dwNumberOfProcessors;
216+
}
217+
218+
return nums;
219+
}
220+
221+
#endif
222+
223+
#if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
224+
225+
int get_num_procs(void) {
226+
227+
static int nums = 0;
228+
229+
int m[2];
230+
size_t len;
231+
232+
if (nums == 0) {
233+
m[0] = CTL_HW;
234+
m[1] = HW_NCPU;
235+
len = sizeof(int);
236+
sysctl(m, 2, &nums, &len, NULL, 0);
237+
}
238+
239+
return nums;
240+
}
241+
242+
#endif
243+
244+
#if defined(OS_DARWIN)
245+
int get_num_procs(void) {
246+
static int nums = 0;
247+
size_t len;
248+
if (nums == 0){
249+
len = sizeof(int);
250+
sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0);
251+
}
252+
return nums;
253+
}
254+
/*
255+
void set_stack_limit(int limitMB){
256+
int result=0;
257+
struct rlimit rl;
258+
rlim_t StackSize;
259+
260+
StackSize=limitMB*1024*1024;
261+
result=getrlimit(RLIMIT_STACK, &rl);
262+
if(result==0){
263+
if(rl.rlim_cur < StackSize){
264+
rl.rlim_cur=StackSize;
265+
result=setrlimit(RLIMIT_STACK, &rl);
266+
if(result !=0){
267+
fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
268+
}
269+
}
270+
}
271+
}
272+
*/
273+
#endif
274+
275+
276+
/*
277+
OpenBLAS uses the numbers of CPU cores in multithreading.
278+
It can be set by openblas_set_num_threads(int num_threads);
279+
*/
280+
int blas_cpu_number = 0;
281+
/*
282+
The numbers of threads in the thread pool.
283+
This value is equal or large than blas_cpu_number. This means some threads are sleep.
284+
*/
285+
int blas_num_threads = 0;
286+
287+
int goto_get_num_procs (void) {
288+
return blas_cpu_number;
289+
}
290+
291+
void openblas_fork_handler()
292+
{
293+
// This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
294+
// built with "make USE_OPENMP=0".
295+
// Hanging can still happen when OpenBLAS is built against the libgomp
296+
// implementation of OpenMP. The problem is tracked at:
297+
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
298+
// In the mean time build with USE_OPENMP=0 or link against another
299+
// implementation of OpenMP.
300+
#if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER)
301+
int err;
302+
err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL);
303+
if(err != 0)
304+
openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n");
305+
#endif
306+
}
307+
308+
extern int openblas_num_threads_env();
309+
extern int openblas_goto_num_threads_env();
310+
extern int openblas_omp_num_threads_env();
311+
312+
int blas_get_cpu_number(void){
313+
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
314+
int max_num;
315+
#endif
316+
int blas_goto_num = 0;
317+
int blas_omp_num = 0;
318+
319+
if (blas_num_threads) return blas_num_threads;
320+
321+
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
322+
max_num = get_num_procs();
323+
#endif
324+
325+
// blas_goto_num = 0;
326+
#ifndef USE_OPENMP
327+
blas_goto_num=openblas_num_threads_env();
328+
if (blas_goto_num < 0) blas_goto_num = 0;
329+
330+
if (blas_goto_num == 0) {
331+
blas_goto_num=openblas_goto_num_threads_env();
332+
if (blas_goto_num < 0) blas_goto_num = 0;
333+
}
334+
335+
#endif
336+
337+
// blas_omp_num = 0;
338+
blas_omp_num=openblas_omp_num_threads_env();
339+
if (blas_omp_num < 0) blas_omp_num = 0;
340+
341+
if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
342+
else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
343+
else blas_num_threads = MAX_CPU_NUMBER;
344+
345+
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
346+
if (blas_num_threads > max_num) blas_num_threads = max_num;
347+
#endif
348+
349+
if (blas_num_threads > MAX_CPU_NUMBER) blas_num_threads = MAX_CPU_NUMBER;
350+
351+
#ifdef DEBUG
352+
printf( "Adjusted number of threads : %3d\n", blas_num_threads);
353+
#endif
354+
355+
blas_cpu_number = blas_num_threads;
356+
357+
return blas_num_threads;
358+
}
359+
#endif
360+
361+
362+
int openblas_get_num_procs(void) {
363+
#ifndef SMP
364+
return 1;
365+
#else
366+
return get_num_procs();
367+
#endif
368+
}
369+
370+
int openblas_get_num_threads(void) {
371+
#ifndef SMP
372+
return 1;
373+
#else
374+
// init blas_cpu_number if needed
375+
blas_get_cpu_number();
376+
return blas_cpu_number;
377+
#endif
378+
}

0 commit comments

Comments
 (0)