Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 2058f03

Browse files
committed
Merge pull request #511 from hppritcha/topic/cherry_pick_6e9cbe39
hint processing should not open files
2 parents a1400db + 5d73456 commit 2058f03

File tree

2 files changed

+123
-107
lines changed

2 files changed

+123
-107
lines changed

ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c

Lines changed: 20 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
2020
char *value;
2121
int flag;
2222
ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
23-
struct lov_user_md lum = { 0 };
24-
int err, myrank, fd_sys, perm, amode, old_mask;
23+
int err, myrank;
2524
static char myname[] = "ADIOI_LUSTRE_SETINFO";
2625

2726
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
@@ -47,18 +46,25 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
4746
/* striping information */
4847
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
4948
value, &flag);
50-
if (flag)
49+
if (flag) {
50+
ADIOI_Info_set(fd->info, "striping_unit", value);
5151
str_unit=atoll(value);
52+
}
5253

5354
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
5455
value, &flag);
55-
if (flag)
56+
if (flag) {
57+
ADIOI_Info_set(fd->info, "striping_factor", value);
5658
str_factor=atoll(value);
59+
}
5760

5861
ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
5962
MPI_MAX_INFO_VAL, value, &flag);
60-
if (flag)
63+
if (flag) {
64+
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
6165
start_iodev=atoll(value);
66+
}
67+
6268

6369
/* direct read and write */
6470
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
@@ -84,72 +90,20 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
8490
}
8591
MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm);
8692

93+
/* do not open file in hint processing. Open file in open routines,
94+
* where we can better deal with EXCL flag . Continue to check the
95+
* "all processors set a value" condition holds. */
8796
if (stripe_val[0] != str_factor
8897
|| stripe_val[1] != str_unit
8998
|| stripe_val[2] != start_iodev) {
90-
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
91-
"-striping_factor:striping_unit:start_iodevice "
92-
"need to be identical across all processes\n");
93-
MPI_Abort(MPI_COMM_WORLD, 1);
94-
} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
95-
/* if user has specified striping info, process 0 tries to set it */
96-
if (!myrank) {
97-
if (fd->perm == ADIO_PERM_NULL) {
98-
old_mask = umask(022);
99-
umask(old_mask);
100-
perm = old_mask ^ 0666;
101-
}
102-
else perm = fd->perm;
103-
104-
amode = 0;
105-
if (fd->access_mode & ADIO_CREATE)
106-
amode = amode | O_CREAT;
107-
if (fd->access_mode & ADIO_RDONLY)
108-
amode = amode | O_RDONLY;
109-
if (fd->access_mode & ADIO_WRONLY)
110-
amode = amode | O_WRONLY;
111-
if (fd->access_mode & ADIO_RDWR)
112-
amode = amode | O_RDWR;
113-
if (fd->access_mode & ADIO_EXCL)
114-
amode = amode | O_EXCL;
115-
116-
/* we need to create file so ensure this is set */
117-
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
118-
119-
fd_sys = open(fd->filename, amode, perm);
120-
if (fd_sys == -1) {
121-
if (errno != EEXIST)
122-
fprintf(stderr,
123-
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
124-
} else {
125-
lum.lmm_magic = LOV_USER_MAGIC;
126-
lum.lmm_pattern = 0;
127-
lum.lmm_stripe_size = str_unit;
128-
/* crude check for overflow of lustre internal datatypes.
129-
* Silently cap to large value if user provides a value
130-
* larger than lustre supports */
131-
if (lum.lmm_stripe_size != str_unit) {
132-
lum.lmm_stripe_size = UINT_MAX;
133-
}
134-
lum.lmm_stripe_count = str_factor;
135-
if ( lum.lmm_stripe_count != str_factor) {
136-
lum.lmm_stripe_count = USHRT_MAX;
137-
}
138-
lum.lmm_stripe_offset = start_iodev;
139-
if (lum.lmm_stripe_offset != start_iodev) {
140-
lum.lmm_stripe_offset = USHRT_MAX;
141-
}
142-
143-
err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
144-
if (err == -1 && errno != EEXIST) {
145-
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
146-
}
147-
close(fd_sys);
148-
}
149-
} /* End of striping parameters validation */
99+
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME("ADIOI_LUSTRE_SetInfo",
100+
"str_factor or str_unit or start_iodev",
101+
error_code);
102+
ADIOI_Free(value);
103+
return;
150104
}
151-
MPI_Barrier(fd->comm);
152105
}
106+
153107
/* get other hint */
154108
if (users_info != MPI_INFO_NULL) {
155109
/* CO: IO Clients/OST,

ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c

Lines changed: 103 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,17 @@
1818
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
1919
{
2020
int perm, old_mask, amode, amode_direct;
21-
int lumlen;
21+
int lumlen, myrank, flag, set_layout=0, err;
2222
struct lov_user_md *lum = NULL;
2323
char *value;
24+
ADIO_Offset str_factor = -1, str_unit=0, start_iodev=-1;
2425

2526
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
2627
static char myname[] = "ADIOI_LUSTRE_OPEN";
2728
#endif
2829

30+
MPI_Comm_rank(fd->comm, &myrank);
31+
2932
if (fd->perm == ADIO_PERM_NULL) {
3033
old_mask = umask(022);
3134
umask(old_mask);
@@ -47,46 +50,102 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
4750

4851
amode_direct = amode | O_DIRECT;
4952

50-
fd->fd_sys = open(fd->filename, amode|O_CREAT, perm);
51-
52-
if (fd->fd_sys != -1) {
53-
int err;
54-
55-
/* get file striping information and set it in info */
56-
/* odd malloc here because lov_user_md contains some fixed data and
57-
* then a list of 'lmm_objects' representing stripe */
58-
lumlen = sizeof(struct lov_user_md) +
59-
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
60-
/* furthermore, Pascal Deveze reports that, even though we pass a
61-
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
62-
* struct are uninitialzed, the call can give an error. calloc in case
63-
* there are other members that must be initialized and in case
64-
* lov_user_md struct changes in future */
65-
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
66-
lum->lmm_magic = LOV_USER_MAGIC;
67-
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
68-
if (!err) {
69-
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
70-
71-
fd->hints->striping_unit = lum->lmm_stripe_size;
72-
sprintf(value, "%d", lum->lmm_stripe_size);
73-
ADIOI_Info_set(fd->info, "striping_unit", value);
74-
75-
fd->hints->striping_factor = lum->lmm_stripe_count;
76-
sprintf(value, "%d", lum->lmm_stripe_count);
77-
ADIOI_Info_set(fd->info, "striping_factor", value);
78-
79-
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
80-
sprintf(value, "%d", lum->lmm_stripe_offset);
81-
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
82-
83-
ADIOI_Free(value);
84-
}
85-
ADIOI_Free(lum);
86-
87-
if (fd->access_mode & ADIO_APPEND)
88-
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
89-
}
53+
/* odd length here because lov_user_md contains some fixed data and
54+
* then a list of 'lmm_objects' representing stripe */
55+
lumlen = sizeof(struct lov_user_md) +
56+
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
57+
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
58+
59+
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
60+
/* we already validated in LUSTRE_SetInfo that these are going to be the same */
61+
if (fd->info != MPI_INFO_NULL) {
62+
/* striping information */
63+
ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
64+
value, &flag);
65+
if (flag)
66+
str_unit=atoll(value);
67+
68+
ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
69+
value, &flag);
70+
if (flag)
71+
str_factor=atoll(value);
72+
73+
ADIOI_Info_get(fd->info, "romio_lustre_start_iodevice",
74+
MPI_MAX_INFO_VAL, value, &flag);
75+
if (flag)
76+
start_iodev=atoll(value);
77+
}
78+
if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0))
79+
set_layout = 1;
80+
81+
/* if hints were set, we need to delay creation of any lustre objects.
82+
* However, if we open the file with O_LOV_DELAY_CREATE and don't call the
83+
* follow-up ioctl, subsequent writes will fail */
84+
if (myrank == 0 && set_layout)
85+
amode = amode | O_LOV_DELAY_CREATE;
86+
87+
fd->fd_sys = open(fd->filename, amode, perm);
88+
if (fd->fd_sys == -1) goto fn_exit;
89+
90+
/* we can only set these hints on new files */
91+
/* It was strange and buggy to open the file in the hint path. Instead,
92+
* we'll apply the file tunings at open time */
93+
if ((amode & O_CREAT) && set_layout ) {
94+
/* if user has specified striping info, process 0 tries to set it */
95+
if (!myrank) {
96+
lum->lmm_magic = LOV_USER_MAGIC;
97+
lum->lmm_pattern = 0;
98+
/* crude check for overflow of lustre internal datatypes.
99+
* Silently cap to large value if user provides a value
100+
* larger than lustre supports */
101+
if (str_unit > UINT_MAX)
102+
lum->lmm_stripe_size = UINT_MAX;
103+
else
104+
lum->lmm_stripe_size = str_unit;
105+
106+
if (str_factor > USHRT_MAX)
107+
lum->lmm_stripe_count = USHRT_MAX;
108+
else
109+
lum->lmm_stripe_count = str_factor;
110+
111+
if (start_iodev > USHRT_MAX)
112+
lum->lmm_stripe_offset = USHRT_MAX;
113+
else
114+
lum->lmm_stripe_offset = start_iodev;
115+
err = ioctl(fd->fd_sys, LL_IOC_LOV_SETSTRIPE, lum);
116+
if (err == -1 && errno != EEXIST) {
117+
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
118+
/* not a fatal error, but user might care to know */
119+
}
120+
} /* End of striping parameters validation */
121+
}
122+
123+
/* Pascal Deveze reports that, even though we pass a
124+
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
125+
* struct are uninitialzed, the call can give an error. zero it out in case
126+
* there are other members that must be initialized and in case
127+
* lov_user_md struct changes in future */
128+
memset(lum, 0, lumlen);
129+
lum->lmm_magic = LOV_USER_MAGIC;
130+
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
131+
if (!err) {
132+
133+
fd->hints->striping_unit = lum->lmm_stripe_size;
134+
sprintf(value, "%d", lum->lmm_stripe_size);
135+
ADIOI_Info_set(fd->info, "striping_unit", value);
136+
137+
fd->hints->striping_factor = lum->lmm_stripe_count;
138+
sprintf(value, "%d", lum->lmm_stripe_count);
139+
ADIOI_Info_set(fd->info, "striping_factor", value);
140+
141+
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
142+
sprintf(value, "%d", lum->lmm_stripe_offset);
143+
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
144+
145+
}
146+
147+
if (fd->access_mode & ADIO_APPEND)
148+
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
90149

91150
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
92151
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
@@ -101,6 +160,9 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
101160
fd->direct_write = fd->direct_read = 0;
102161
}
103162
}
163+
fn_exit:
164+
ADIOI_Free(lum);
165+
ADIOI_Free(value);
104166

105167
/* --BEGIN ERROR HANDLING-- */
106168
if (fd->fd_sys == -1 || ((fd->fd_direct == -1) &&

0 commit comments

Comments
 (0)