Skip to content

Commit 8649a9f

Browse files
committed
Merge pull request #757 from roblatham00/lustre-excl-open-fix
hint processing should not open files
2 parents f18988c + 6e9cbe3 commit 8649a9f

File tree

2 files changed

+123
-106
lines changed

2 files changed

+123
-106
lines changed

ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c

Lines changed: 20 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
2020
char *value;
2121
int flag;
2222
ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
23-
struct lov_user_md lum = { 0 };
24-
int err, myrank, fd_sys, perm, amode, old_mask;
23+
int err, myrank;
2524
static char myname[] = "ADIOI_LUSTRE_SETINFO";
2625

2726
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
@@ -47,18 +46,25 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
4746
/* striping information */
4847
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
4948
value, &flag);
50-
if (flag)
49+
if (flag) {
50+
ADIOI_Info_set(fd->info, "striping_unit", value);
5151
str_unit=atoll(value);
52+
}
5253

5354
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
5455
value, &flag);
55-
if (flag)
56+
if (flag) {
57+
ADIOI_Info_set(fd->info, "striping_factor", value);
5658
str_factor=atoll(value);
59+
}
5760

5861
ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
5962
MPI_MAX_INFO_VAL, value, &flag);
60-
if (flag)
63+
if (flag) {
64+
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
6165
start_iodev=atoll(value);
66+
}
67+
6268

6369
/* direct read and write */
6470
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
@@ -84,72 +90,20 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
8490
}
8591
MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm);
8692

93+
/* do not open file in hint processing. Open file in open routines,
94+
* where we can better deal with EXCL flag . Continue to check the
95+
* "all processors set a value" condition holds. */
8796
if (stripe_val[0] != str_factor
8897
|| stripe_val[1] != str_unit
8998
|| stripe_val[2] != start_iodev) {
90-
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
91-
"-striping_factor:striping_unit:start_iodevice "
92-
"need to be identical across all processes\n");
93-
MPI_Abort(MPI_COMM_WORLD, 1);
94-
} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
95-
/* if user has specified striping info, process 0 tries to set it */
96-
if (!myrank) {
97-
if (fd->perm == ADIO_PERM_NULL) {
98-
old_mask = umask(022);
99-
umask(old_mask);
100-
perm = old_mask ^ 0666;
101-
}
102-
else perm = fd->perm;
103-
104-
amode = 0;
105-
if (fd->access_mode & ADIO_CREATE)
106-
amode = amode | O_CREAT;
107-
if (fd->access_mode & ADIO_RDONLY)
108-
amode = amode | O_RDONLY;
109-
if (fd->access_mode & ADIO_WRONLY)
110-
amode = amode | O_WRONLY;
111-
if (fd->access_mode & ADIO_RDWR)
112-
amode = amode | O_RDWR;
113-
if (fd->access_mode & ADIO_EXCL)
114-
amode = amode | O_EXCL;
115-
116-
/* we need to create file so ensure this is set */
117-
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
118-
119-
fd_sys = open(fd->filename, amode, perm);
120-
if (fd_sys == -1) {
121-
if (errno != EEXIST)
122-
fprintf(stderr,
123-
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
124-
} else {
125-
lum.lmm_magic = LOV_USER_MAGIC;
126-
lum.lmm_pattern = 0;
127-
lum.lmm_stripe_size = str_unit;
128-
/* crude check for overflow of lustre internal datatypes.
129-
* Silently cap to large value if user provides a value
130-
* larger than lustre supports */
131-
if (lum.lmm_stripe_size != str_unit) {
132-
lum.lmm_stripe_size = UINT_MAX;
133-
}
134-
lum.lmm_stripe_count = str_factor;
135-
if ( lum.lmm_stripe_count != str_factor) {
136-
lum.lmm_stripe_count = USHRT_MAX;
137-
}
138-
lum.lmm_stripe_offset = start_iodev;
139-
if (lum.lmm_stripe_offset != start_iodev) {
140-
lum.lmm_stripe_offset = USHRT_MAX;
141-
}
142-
143-
err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
144-
if (err == -1 && errno != EEXIST) {
145-
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
146-
}
147-
close(fd_sys);
148-
}
149-
} /* End of striping parameters validation */
99+
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME("ADIOI_LUSTRE_SetInfo",
100+
"str_factor or str_unit or start_iodev",
101+
error_code);
102+
ADIOI_Free(value);
103+
return;
150104
}
151-
MPI_Barrier(fd->comm);
152105
}
106+
153107
/* get other hint */
154108
if (users_info != MPI_INFO_NULL) {
155109
/* CO: IO Clients/OST,

ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c

Lines changed: 103 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,17 @@
1818
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
1919
{
2020
int perm, old_mask, amode, amode_direct;
21-
int lumlen;
21+
int lumlen, myrank, flag, set_layout=0, err;
2222
struct lov_user_md *lum = NULL;
2323
char *value;
24+
ADIO_Offset str_factor = -1, str_unit=0, start_iodev=-1;
2425

2526
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
2627
static char myname[] = "ADIOI_LUSTRE_OPEN";
2728
#endif
2829

30+
MPI_Comm_rank(fd->comm, &myrank);
31+
2932
if (fd->perm == ADIO_PERM_NULL) {
3033
old_mask = umask(022);
3134
umask(old_mask);
@@ -47,46 +50,103 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
4750

4851
amode_direct = amode | O_DIRECT;
4952

50-
fd->fd_sys = open(fd->filename, amode|O_CREAT, perm);
51-
52-
if (fd->fd_sys != -1) {
53-
int err;
54-
55-
/* get file striping information and set it in info */
56-
/* odd malloc here because lov_user_md contains some fixed data and
57-
* then a list of 'lmm_objects' representing stripe */
58-
lumlen = sizeof(struct lov_user_md) +
59-
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
60-
/* furthermore, Pascal Deveze reports that, even though we pass a
61-
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
62-
* struct are uninitialzed, the call can give an error. calloc in case
63-
* there are other members that must be initialized and in case
64-
* lov_user_md struct changes in future */
65-
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
66-
lum->lmm_magic = LOV_USER_MAGIC;
67-
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
68-
if (!err) {
69-
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
70-
71-
fd->hints->striping_unit = lum->lmm_stripe_size;
72-
sprintf(value, "%d", lum->lmm_stripe_size);
73-
ADIOI_Info_set(fd->info, "striping_unit", value);
74-
75-
fd->hints->striping_factor = lum->lmm_stripe_count;
76-
sprintf(value, "%d", lum->lmm_stripe_count);
77-
ADIOI_Info_set(fd->info, "striping_factor", value);
78-
79-
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
80-
sprintf(value, "%d", lum->lmm_stripe_offset);
81-
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
82-
83-
ADIOI_Free(value);
84-
}
85-
ADIOI_Free(lum);
86-
87-
if (fd->access_mode & ADIO_APPEND)
88-
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
8953
}
54+
/* odd length here because lov_user_md contains some fixed data and
55+
* then a list of 'lmm_objects' representing stripe */
56+
lumlen = sizeof(struct lov_user_md) +
57+
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
58+
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
59+
60+
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
61+
/* we already validated in LUSTRE_SetInfo that these are going to be the same */
62+
if (fd->info != MPI_INFO_NULL) {
63+
/* striping information */
64+
ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
65+
value, &flag);
66+
if (flag)
67+
str_unit=atoll(value);
68+
69+
ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
70+
value, &flag);
71+
if (flag)
72+
str_factor=atoll(value);
73+
74+
ADIOI_Info_get(fd->info, "romio_lustre_start_iodevice",
75+
MPI_MAX_INFO_VAL, value, &flag);
76+
if (flag)
77+
start_iodev=atoll(value);
78+
}
79+
if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0))
80+
set_layout = 1;
81+
82+
/* if hints were set, we need to delay creation of any lustre objects.
83+
* However, if we open the file with O_LOV_DELAY_CREATE and don't call the
84+
* follow-up ioctl, subsequent writes will fail */
85+
if (myrank == 0 && set_layout)
86+
amode = amode | O_LOV_DELAY_CREATE;
87+
88+
fd->fd_sys = open(fd->filename, amode, perm);
89+
if (fd->fd_sys == -1) goto fn_exit;
90+
91+
/* we can only set these hints on new files */
92+
/* It was strange and buggy to open the file in the hint path. Instead,
93+
* we'll apply the file tunings at open time */
94+
if ((amode & O_CREAT) && set_layout ) {
95+
/* if user has specified striping info, process 0 tries to set it */
96+
if (!myrank) {
97+
lum->lmm_magic = LOV_USER_MAGIC;
98+
lum->lmm_pattern = 0;
99+
/* crude check for overflow of lustre internal datatypes.
100+
* Silently cap to large value if user provides a value
101+
* larger than lustre supports */
102+
if (str_unit > UINT_MAX)
103+
lum->lmm_stripe_size = UINT_MAX;
104+
else
105+
lum->lmm_stripe_size = str_unit;
106+
107+
if (str_factor > USHRT_MAX)
108+
lum->lmm_stripe_count = USHRT_MAX;
109+
else
110+
lum->lmm_stripe_count = str_factor;
111+
112+
if (start_iodev > USHRT_MAX)
113+
lum->lmm_stripe_offset = USHRT_MAX;
114+
else
115+
lum->lmm_stripe_offset = start_iodev;
116+
err = ioctl(fd->fd_sys, LL_IOC_LOV_SETSTRIPE, lum);
117+
if (err == -1 && errno != EEXIST) {
118+
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
119+
/* not a fatal error, but user might care to know */
120+
}
121+
} /* End of striping parameters validation */
122+
}
123+
124+
/* Pascal Deveze reports that, even though we pass a
125+
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
126+
* struct are uninitialzed, the call can give an error. zero it out in case
127+
* there are other members that must be initialized and in case
128+
* lov_user_md struct changes in future */
129+
memset(lum, 0, lumlen);
130+
lum->lmm_magic = LOV_USER_MAGIC;
131+
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
132+
if (!err) {
133+
134+
fd->hints->striping_unit = lum->lmm_stripe_size;
135+
sprintf(value, "%d", lum->lmm_stripe_size);
136+
ADIOI_Info_set(fd->info, "striping_unit", value);
137+
138+
fd->hints->striping_factor = lum->lmm_stripe_count;
139+
sprintf(value, "%d", lum->lmm_stripe_count);
140+
ADIOI_Info_set(fd->info, "striping_factor", value);
141+
142+
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
143+
sprintf(value, "%d", lum->lmm_stripe_offset);
144+
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
145+
146+
}
147+
148+
if (fd->access_mode & ADIO_APPEND)
149+
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
90150

91151
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
92152
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
@@ -101,6 +161,9 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
101161
fd->direct_write = fd->direct_read = 0;
102162
}
103163
}
164+
fn_exit:
165+
ADIOI_Free(lum);
166+
ADIOI_Free(value);
104167

105168
/* --BEGIN ERROR HANDLING-- */
106169
if (fd->fd_sys == -1 || ((fd->fd_direct == -1) &&

0 commit comments

Comments
 (0)