Skip to content

Commit 6e9cbe3

Browse files
committed
hint processing should not open files
move opening of files from hint processing and into open routines. This is MPICH commit 92f1c69f0de8 and 22a77dceda11 see https://trac.mpich.org/projects/mpich/ticket/2261 Ref: #158 Signed-off-by: Pavan Balaji <[email protected]>
1 parent 377bad1 commit 6e9cbe3

File tree

2 files changed

+123
-106
lines changed

2 files changed

+123
-106
lines changed

ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c

Lines changed: 20 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
2020
char *value;
2121
int flag;
2222
ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
23-
struct lov_user_md lum = { 0 };
24-
int err, myrank, fd_sys, perm, amode, old_mask;
23+
int err, myrank;
2524
static char myname[] = "ADIOI_LUSTRE_SETINFO";
2625

2726
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
@@ -47,18 +46,25 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
4746
/* striping information */
4847
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
4948
value, &flag);
50-
if (flag)
49+
if (flag) {
50+
ADIOI_Info_set(fd->info, "striping_unit", value);
5151
str_unit=atoll(value);
52+
}
5253

5354
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
5455
value, &flag);
55-
if (flag)
56+
if (flag) {
57+
ADIOI_Info_set(fd->info, "striping_factor", value);
5658
str_factor=atoll(value);
59+
}
5760

5861
ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
5962
MPI_MAX_INFO_VAL, value, &flag);
60-
if (flag)
63+
if (flag) {
64+
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
6165
start_iodev=atoll(value);
66+
}
67+
6268

6369
/* direct read and write */
6470
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
@@ -84,72 +90,20 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
8490
}
8591
MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm);
8692

93+
/* do not open file in hint processing. Open file in open routines,
94+
* where we can better deal with EXCL flag . Continue to check the
95+
* "all processors set a value" condition holds. */
8796
if (stripe_val[0] != str_factor
8897
|| stripe_val[1] != str_unit
8998
|| stripe_val[2] != start_iodev) {
90-
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
91-
"-striping_factor:striping_unit:start_iodevice "
92-
"need to be identical across all processes\n");
93-
MPI_Abort(MPI_COMM_WORLD, 1);
94-
} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
95-
/* if user has specified striping info, process 0 tries to set it */
96-
if (!myrank) {
97-
if (fd->perm == ADIO_PERM_NULL) {
98-
old_mask = umask(022);
99-
umask(old_mask);
100-
perm = old_mask ^ 0666;
101-
}
102-
else perm = fd->perm;
103-
104-
amode = 0;
105-
if (fd->access_mode & ADIO_CREATE)
106-
amode = amode | O_CREAT;
107-
if (fd->access_mode & ADIO_RDONLY)
108-
amode = amode | O_RDONLY;
109-
if (fd->access_mode & ADIO_WRONLY)
110-
amode = amode | O_WRONLY;
111-
if (fd->access_mode & ADIO_RDWR)
112-
amode = amode | O_RDWR;
113-
if (fd->access_mode & ADIO_EXCL)
114-
amode = amode | O_EXCL;
115-
116-
/* we need to create file so ensure this is set */
117-
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
118-
119-
fd_sys = open(fd->filename, amode, perm);
120-
if (fd_sys == -1) {
121-
if (errno != EEXIST)
122-
fprintf(stderr,
123-
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
124-
} else {
125-
lum.lmm_magic = LOV_USER_MAGIC;
126-
lum.lmm_pattern = 0;
127-
lum.lmm_stripe_size = str_unit;
128-
/* crude check for overflow of lustre internal datatypes.
129-
* Silently cap to large value if user provides a value
130-
* larger than lustre supports */
131-
if (lum.lmm_stripe_size != str_unit) {
132-
lum.lmm_stripe_size = UINT_MAX;
133-
}
134-
lum.lmm_stripe_count = str_factor;
135-
if ( lum.lmm_stripe_count != str_factor) {
136-
lum.lmm_stripe_count = USHRT_MAX;
137-
}
138-
lum.lmm_stripe_offset = start_iodev;
139-
if (lum.lmm_stripe_offset != start_iodev) {
140-
lum.lmm_stripe_offset = USHRT_MAX;
141-
}
142-
143-
err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
144-
if (err == -1 && errno != EEXIST) {
145-
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
146-
}
147-
close(fd_sys);
148-
}
149-
} /* End of striping parameters validation */
99+
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME("ADIOI_LUSTRE_SetInfo",
100+
"str_factor or str_unit or start_iodev",
101+
error_code);
102+
ADIOI_Free(value);
103+
return;
150104
}
151-
MPI_Barrier(fd->comm);
152105
}
106+
153107
/* get other hint */
154108
if (users_info != MPI_INFO_NULL) {
155109
/* CO: IO Clients/OST,

ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c

Lines changed: 103 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,17 @@
1818
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
1919
{
2020
int perm, old_mask, amode, amode_direct;
21-
int lumlen;
21+
int lumlen, myrank, flag, set_layout=0, err;
2222
struct lov_user_md *lum = NULL;
2323
char *value;
24+
ADIO_Offset str_factor = -1, str_unit=0, start_iodev=-1;
2425

2526
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
2627
static char myname[] = "ADIOI_LUSTRE_OPEN";
2728
#endif
2829

30+
MPI_Comm_rank(fd->comm, &myrank);
31+
2932
if (fd->perm == ADIO_PERM_NULL) {
3033
old_mask = umask(022);
3134
umask(old_mask);
@@ -47,46 +50,103 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
4750

4851
amode_direct = amode | O_DIRECT;
4952

50-
fd->fd_sys = open(fd->filename, amode|O_CREAT, perm);
51-
52-
if (fd->fd_sys != -1) {
53-
int err;
54-
55-
/* get file striping information and set it in info */
56-
/* odd malloc here because lov_user_md contains some fixed data and
57-
* then a list of 'lmm_objects' representing stripe */
58-
lumlen = sizeof(struct lov_user_md) +
59-
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
60-
/* furthermore, Pascal Deveze reports that, even though we pass a
61-
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
62-
* struct are uninitialzed, the call can give an error. calloc in case
63-
* there are other members that must be initialized and in case
64-
* lov_user_md struct changes in future */
65-
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
66-
lum->lmm_magic = LOV_USER_MAGIC;
67-
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
68-
if (!err) {
69-
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
70-
71-
fd->hints->striping_unit = lum->lmm_stripe_size;
72-
sprintf(value, "%d", lum->lmm_stripe_size);
73-
ADIOI_Info_set(fd->info, "striping_unit", value);
74-
75-
fd->hints->striping_factor = lum->lmm_stripe_count;
76-
sprintf(value, "%d", lum->lmm_stripe_count);
77-
ADIOI_Info_set(fd->info, "striping_factor", value);
78-
79-
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
80-
sprintf(value, "%d", lum->lmm_stripe_offset);
81-
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
82-
83-
ADIOI_Free(value);
84-
}
85-
ADIOI_Free(lum);
86-
87-
if (fd->access_mode & ADIO_APPEND)
88-
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
8953
}
54+
/* odd length here because lov_user_md contains some fixed data and
55+
* then a list of 'lmm_objects' representing stripe */
56+
lumlen = sizeof(struct lov_user_md) +
57+
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
58+
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
59+
60+
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
61+
/* we already validated in LUSTRE_SetInfo that these are going to be the same */
62+
if (fd->info != MPI_INFO_NULL) {
63+
/* striping information */
64+
ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
65+
value, &flag);
66+
if (flag)
67+
str_unit=atoll(value);
68+
69+
ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
70+
value, &flag);
71+
if (flag)
72+
str_factor=atoll(value);
73+
74+
ADIOI_Info_get(fd->info, "romio_lustre_start_iodevice",
75+
MPI_MAX_INFO_VAL, value, &flag);
76+
if (flag)
77+
start_iodev=atoll(value);
78+
}
79+
if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0))
80+
set_layout = 1;
81+
82+
/* if hints were set, we need to delay creation of any lustre objects.
83+
* However, if we open the file with O_LOV_DELAY_CREATE and don't call the
84+
* follow-up ioctl, subsequent writes will fail */
85+
if (myrank == 0 && set_layout)
86+
amode = amode | O_LOV_DELAY_CREATE;
87+
88+
fd->fd_sys = open(fd->filename, amode, perm);
89+
if (fd->fd_sys == -1) goto fn_exit;
90+
91+
/* we can only set these hints on new files */
92+
/* It was strange and buggy to open the file in the hint path. Instead,
93+
* we'll apply the file tunings at open time */
94+
if ((amode & O_CREAT) && set_layout ) {
95+
/* if user has specified striping info, process 0 tries to set it */
96+
if (!myrank) {
97+
lum->lmm_magic = LOV_USER_MAGIC;
98+
lum->lmm_pattern = 0;
99+
/* crude check for overflow of lustre internal datatypes.
100+
* Silently cap to large value if user provides a value
101+
* larger than lustre supports */
102+
if (str_unit > UINT_MAX)
103+
lum->lmm_stripe_size = UINT_MAX;
104+
else
105+
lum->lmm_stripe_size = str_unit;
106+
107+
if (str_factor > USHRT_MAX)
108+
lum->lmm_stripe_count = USHRT_MAX;
109+
else
110+
lum->lmm_stripe_count = str_factor;
111+
112+
if (start_iodev > USHRT_MAX)
113+
lum->lmm_stripe_offset = USHRT_MAX;
114+
else
115+
lum->lmm_stripe_offset = start_iodev;
116+
err = ioctl(fd->fd_sys, LL_IOC_LOV_SETSTRIPE, lum);
117+
if (err == -1 && errno != EEXIST) {
118+
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
119+
/* not a fatal error, but user might care to know */
120+
}
121+
} /* End of striping parameters validation */
122+
}
123+
124+
/* Pascal Deveze reports that, even though we pass a
125+
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
126+
* struct are uninitialzed, the call can give an error. zero it out in case
127+
* there are other members that must be initialized and in case
128+
* lov_user_md struct changes in future */
129+
memset(lum, 0, lumlen);
130+
lum->lmm_magic = LOV_USER_MAGIC;
131+
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
132+
if (!err) {
133+
134+
fd->hints->striping_unit = lum->lmm_stripe_size;
135+
sprintf(value, "%d", lum->lmm_stripe_size);
136+
ADIOI_Info_set(fd->info, "striping_unit", value);
137+
138+
fd->hints->striping_factor = lum->lmm_stripe_count;
139+
sprintf(value, "%d", lum->lmm_stripe_count);
140+
ADIOI_Info_set(fd->info, "striping_factor", value);
141+
142+
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
143+
sprintf(value, "%d", lum->lmm_stripe_offset);
144+
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
145+
146+
}
147+
148+
if (fd->access_mode & ADIO_APPEND)
149+
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
90150

91151
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
92152
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
@@ -101,6 +161,9 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
101161
fd->direct_write = fd->direct_read = 0;
102162
}
103163
}
164+
fn_exit:
165+
ADIOI_Free(lum);
166+
ADIOI_Free(value);
104167

105168
/* --BEGIN ERROR HANDLING-- */
106169
if (fd->fd_sys == -1 || ((fd->fd_direct == -1) &&

0 commit comments

Comments
 (0)