Skip to content

Commit 19fe5ca

Browse files
committed
io/ompio: next step in code-reorganization
- move the sort_iovec operations to fcoll/base - move set_view_internal to common/ompio - move set_file_default to common/ompio - remove io_ompio_sort, not used anymore.
1 parent cb00866 commit 19fe5ca

17 files changed

+518
-595
lines changed

ompi/mca/common/ompio/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ headers = \
2525
sources = \
2626
common_ompio_print_queue.c \
2727
common_ompio_file_open.c \
28+
common_ompio_file_view.c \
2829
common_ompio_file_read.c \
2930
common_ompio_file_write.c
3031

ompi/mca/common/ompio/common_ompio.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,10 @@ OMPI_DECLSPEC int mca_common_ompio_file_close (mca_io_ompio_file_t *ompio_fh);
8282
OMPI_DECLSPEC int mca_common_ompio_file_get_size (mca_io_ompio_file_t *ompio_fh, OMPI_MPI_OFFSET_TYPE *size);
8383
OMPI_DECLSPEC int mca_common_ompio_file_get_position (mca_io_ompio_file_t *fh,OMPI_MPI_OFFSET_TYPE *offset);
8484
OMPI_DECLSPEC int mca_common_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset);
85-
85+
OMPI_DECLSPEC int mca_common_ompio_set_file_defaults (mca_io_ompio_file_t *fh);
86+
OMPI_DECLSPEC int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE disp,
87+
ompi_datatype_t *etype, ompi_datatype_t *filetype, const char *datarep,
88+
ompi_info_t *info);
8689

8790

8891
#endif /* MCA_COMMON_OMPIO_H */

ompi/mca/common/ompio/common_ompio_file_open.c

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm,
9191
ompio_fh->f_info = info;
9292
ompio_fh->f_atomicity = 0;
9393

94-
ompi_io_ompio_set_file_defaults (ompio_fh);
94+
mca_common_ompio_set_file_defaults (ompio_fh);
9595
ompio_fh->f_filename = filename;
9696

9797
ompio_fh->f_split_coll_req = NULL;
@@ -105,9 +105,6 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm,
105105
ompio_fh->f_decode_datatype=ompi_io_ompio_decode_datatype;
106106
ompio_fh->f_generate_current_file_view=ompi_io_ompio_generate_current_file_view;
107107

108-
ompio_fh->f_sort=ompi_io_ompio_sort;
109-
ompio_fh->f_sort_iovec=ompi_io_ompio_sort_iovec;
110-
111108
ompio_fh->f_get_num_aggregators=mca_io_ompio_get_num_aggregators;
112109
ompio_fh->f_get_bytes_per_agg=mca_io_ompio_get_bytes_per_agg;
113110
ompio_fh->f_set_aggregator_props=mca_io_ompio_set_aggregator_props;
@@ -377,4 +374,78 @@ int mca_common_ompio_file_get_position (mca_io_ompio_file_t *fh,
377374
return OMPI_SUCCESS;
378375
}
379376

377+
int mca_common_ompio_set_file_defaults (mca_io_ompio_file_t *fh)
378+
{
379+
380+
if (NULL != fh) {
381+
ompi_datatype_t *types[2];
382+
int blocklen[2] = {1, 1};
383+
OPAL_PTRDIFF_TYPE d[2], base;
384+
int i;
385+
386+
fh->f_io_array = NULL;
387+
fh->f_perm = OMPIO_PERM_NULL;
388+
fh->f_flags = 0;
389+
fh->f_bytes_per_agg = mca_io_ompio_bytes_per_agg;
390+
fh->f_datarep = strdup ("native");
391+
392+
fh->f_offset = 0;
393+
fh->f_disp = 0;
394+
fh->f_position_in_file_view = 0;
395+
fh->f_index_in_file_view = 0;
396+
fh->f_total_bytes = 0;
397+
398+
fh->f_init_procs_per_group = -1;
399+
fh->f_init_procs_in_group = NULL;
400+
401+
fh->f_procs_per_group = -1;
402+
fh->f_procs_in_group = NULL;
403+
404+
fh->f_init_num_aggrs = -1;
405+
fh->f_init_aggr_list = NULL;
406+
407+
408+
/* Default file View */
409+
fh->f_iov_type = MPI_DATATYPE_NULL;
410+
fh->f_stripe_size = mca_io_ompio_bytes_per_agg;
411+
/*Decoded iovec of the file-view*/
412+
fh->f_decoded_iov = NULL;
413+
fh->f_etype = NULL;
414+
fh->f_filetype = NULL;
415+
fh->f_orig_filetype = NULL;
416+
417+
mca_common_ompio_set_view(fh,
418+
0,
419+
&ompi_mpi_byte.dt,
420+
&ompi_mpi_byte.dt,
421+
"native",
422+
fh->f_info);
423+
424+
425+
/*Create a derived datatype for the created iovec */
426+
types[0] = &ompi_mpi_long.dt;
427+
types[1] = &ompi_mpi_long.dt;
428+
429+
d[0] = (OPAL_PTRDIFF_TYPE) fh->f_decoded_iov;
430+
d[1] = (OPAL_PTRDIFF_TYPE) &fh->f_decoded_iov[0].iov_len;
431+
432+
base = d[0];
433+
for (i=0 ; i<2 ; i++) {
434+
d[i] -= base;
435+
}
436+
437+
ompi_datatype_create_struct (2,
438+
blocklen,
439+
d,
440+
types,
441+
&fh->f_iov_type);
442+
ompi_datatype_commit (&fh->f_iov_type);
443+
444+
return OMPI_SUCCESS;
445+
}
446+
else {
447+
return OMPI_ERROR;
448+
}
449+
}
450+
380451

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
/*
2+
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3+
* University Research and Technology
4+
* Corporation. All rights reserved.
5+
* Copyright (c) 2004-2005 The University of Tennessee and The University
6+
* of Tennessee Research Foundation. All rights
7+
* reserved.
8+
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* University of Stuttgart. All rights reserved.
10+
* Copyright (c) 2004-2005 The Regents of the University of California.
11+
* All rights reserved.
12+
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
13+
* $COPYRIGHT$
14+
*
15+
* Additional copyrights may follow
16+
*
17+
* $HEADER$
18+
*/
19+
20+
#include "ompi_config.h"
21+
#include "opal/datatype/opal_convertor.h"
22+
#include "ompi/datatype/ompi_datatype.h"
23+
#include <stdlib.h>
24+
#include <stdio.h>
25+
26+
#include "common_ompio.h"
27+
#include "ompi/mca/fcoll/base/base.h"
28+
29+
static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *);
30+
static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype );
31+
static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype )
32+
{
33+
ompi_datatype_t *type;
34+
if( ompi_datatype_is_predefined(oldtype) ) {
35+
*newtype = oldtype;
36+
return OMPI_SUCCESS;
37+
}
38+
39+
if ( OMPI_SUCCESS != ompi_datatype_duplicate (oldtype, &type)){
40+
ompi_datatype_destroy (&type);
41+
return MPI_ERR_INTERN;
42+
}
43+
44+
ompi_datatype_set_args( type, 0, NULL, 0, NULL, 1, &oldtype, MPI_COMBINER_DUP );
45+
46+
*newtype = type;
47+
return OMPI_SUCCESS;
48+
}
49+
50+
51+
int mca_common_ompio_set_view (mca_io_ompio_file_t *fh,
52+
OMPI_MPI_OFFSET_TYPE disp,
53+
ompi_datatype_t *etype,
54+
ompi_datatype_t *filetype,
55+
const char *datarep,
56+
ompi_info_t *info)
57+
{
58+
59+
size_t max_data = 0;
60+
int i;
61+
int num_groups = 0;
62+
mca_io_ompio_contg *contg_groups;
63+
64+
size_t ftype_size;
65+
OPAL_PTRDIFF_TYPE ftype_extent, lb, ub;
66+
ompi_datatype_t *newfiletype;
67+
68+
if ( NULL != fh->f_etype ) {
69+
ompi_datatype_destroy (&fh->f_etype);
70+
}
71+
if ( NULL != fh->f_filetype ) {
72+
ompi_datatype_destroy (&fh->f_filetype);
73+
}
74+
if ( NULL != fh->f_orig_filetype ) {
75+
ompi_datatype_destroy (&fh->f_orig_filetype);
76+
}
77+
if (NULL != fh->f_decoded_iov) {
78+
free (fh->f_decoded_iov);
79+
fh->f_decoded_iov = NULL;
80+
}
81+
82+
if (NULL != fh->f_datarep) {
83+
free (fh->f_datarep);
84+
fh->f_datarep = NULL;
85+
}
86+
87+
/* Reset the flags first */
88+
fh->f_flags = 0;
89+
90+
fh->f_flags |= OMPIO_FILE_VIEW_IS_SET;
91+
fh->f_datarep = strdup (datarep);
92+
datatype_duplicate (filetype, &fh->f_orig_filetype );
93+
94+
opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent);
95+
opal_datatype_type_size (&filetype->super, &ftype_size);
96+
97+
if ( etype == filetype &&
98+
ompi_datatype_is_predefined (filetype ) &&
99+
ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){
100+
ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE,
101+
&ompi_mpi_byte.dt,
102+
&newfiletype);
103+
ompi_datatype_commit (&newfiletype);
104+
}
105+
else {
106+
newfiletype = filetype;
107+
}
108+
109+
fh->f_iov_count = 0;
110+
fh->f_disp = disp;
111+
fh->f_offset = disp;
112+
fh->f_total_bytes = 0;
113+
fh->f_index_in_file_view=0;
114+
fh->f_position_in_file_view=0;
115+
116+
ompi_io_ompio_decode_datatype (fh,
117+
newfiletype,
118+
1,
119+
NULL,
120+
&max_data,
121+
&fh->f_decoded_iov,
122+
&fh->f_iov_count);
123+
124+
opal_datatype_get_extent(&newfiletype->super, &lb, &fh->f_view_extent);
125+
opal_datatype_type_ub (&newfiletype->super, &ub);
126+
opal_datatype_type_size (&etype->super, &fh->f_etype_size);
127+
opal_datatype_type_size (&newfiletype->super, &fh->f_view_size);
128+
datatype_duplicate (etype, &fh->f_etype);
129+
// This file type is our own representation. The original is stored
130+
// in orig_file type, No need to set args on this one.
131+
ompi_datatype_duplicate (newfiletype, &fh->f_filetype);
132+
133+
fh->f_cc_size = get_contiguous_chunk_size (fh);
134+
135+
if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) {
136+
if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) &&
137+
fh->f_view_extent == (OPAL_PTRDIFF_TYPE)fh->f_view_size ) {
138+
fh->f_flags |= OMPIO_CONTIGUOUS_FVIEW;
139+
}
140+
}
141+
142+
contg_groups = (mca_io_ompio_contg*) calloc ( 1, fh->f_size * sizeof(mca_io_ompio_contg));
143+
if (NULL == contg_groups) {
144+
opal_output (1, "OUT OF MEMORY\n");
145+
return OMPI_ERR_OUT_OF_RESOURCE;
146+
}
147+
for( i = 0; i < fh->f_size; i++){
148+
contg_groups[i].procs_in_contg_group = (int*)calloc (1,fh->f_size * sizeof(int));
149+
if(NULL == contg_groups[i].procs_in_contg_group){
150+
int j;
151+
opal_output (1, "OUT OF MEMORY\n");
152+
for(j=0; j<i; j++) {
153+
free(contg_groups[j].procs_in_contg_group);
154+
}
155+
free(contg_groups);
156+
return OMPI_ERR_OUT_OF_RESOURCE;
157+
}
158+
}
159+
160+
if ( SIMPLE != mca_io_ompio_grouping_option ) {
161+
if( OMPI_SUCCESS != mca_io_ompio_fview_based_grouping(fh,
162+
&num_groups,
163+
contg_groups)){
164+
opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_fview_based_grouping failed\n");
165+
free(contg_groups);
166+
return OMPI_ERROR;
167+
}
168+
}
169+
else {
170+
if( OMPI_SUCCESS != mca_io_ompio_simple_grouping(fh,
171+
&num_groups,
172+
contg_groups)){
173+
opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_simple_grouping failed\n");
174+
free(contg_groups);
175+
return OMPI_ERROR;
176+
}
177+
}
178+
179+
180+
if ( OMPI_SUCCESS != mca_io_ompio_finalize_initial_grouping(fh,
181+
num_groups,
182+
contg_groups) ){
183+
opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_finalize_initial_grouping failed\n");
184+
free(contg_groups);
185+
return OMPI_ERROR;
186+
}
187+
for( i = 0; i < fh->f_size; i++){
188+
free(contg_groups[i].procs_in_contg_group);
189+
}
190+
free(contg_groups);
191+
192+
if ( etype == filetype &&
193+
ompi_datatype_is_predefined (filetype ) &&
194+
ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){
195+
ompi_datatype_destroy ( &newfiletype );
196+
}
197+
198+
199+
if (OMPI_SUCCESS != mca_fcoll_base_file_select (fh, NULL)) {
200+
opal_output(1, "mca_common_ompio_set_view: mca_fcoll_base_file_select() failed\n");
201+
return OMPI_ERROR;
202+
}
203+
204+
return OMPI_SUCCESS;
205+
}
206+
207+
OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh)
208+
{
209+
int uniform = 0;
210+
OMPI_MPI_OFFSET_TYPE avg[3] = {0,0,0};
211+
OMPI_MPI_OFFSET_TYPE global_avg[3] = {0,0,0};
212+
int i = 0;
213+
214+
/* This function does two things: first, it determines the average data chunk
215+
** size in the file view for each process and across all processes.
216+
** Second, it establishes whether the view across all processes is uniform.
217+
** By definition, uniform means:
218+
** 1. the file view of each process has the same number of contiguous sections
219+
** 2. each section in the file view has exactly the same size
220+
*/
221+
222+
for (i=0 ; i<(int)fh->f_iov_count ; i++) {
223+
avg[0] += fh->f_decoded_iov[i].iov_len;
224+
if (i && 0 == uniform) {
225+
if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) {
226+
uniform = 1;
227+
}
228+
}
229+
}
230+
if ( 0 != fh->f_iov_count ) {
231+
avg[0] = avg[0]/fh->f_iov_count;
232+
}
233+
avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_iov_count;
234+
avg[2] = (OMPI_MPI_OFFSET_TYPE) uniform;
235+
236+
fh->f_comm->c_coll.coll_allreduce (avg,
237+
global_avg,
238+
3,
239+
OMPI_OFFSET_DATATYPE,
240+
MPI_SUM,
241+
fh->f_comm,
242+
fh->f_comm->c_coll.coll_allreduce_module);
243+
global_avg[0] = global_avg[0]/fh->f_size;
244+
global_avg[1] = global_avg[1]/fh->f_size;
245+
246+
#if 0
247+
/* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */
248+
int global_uniform=0;
249+
250+
if ( global_avg[0] == avg[0] &&
251+
global_avg[1] == avg[1] &&
252+
0 == avg[2] &&
253+
0 == global_avg[2] ) {
254+
uniform = 0;
255+
}
256+
else {
257+
uniform = 1;
258+
}
259+
260+
/* second confirmation round to see whether all processes agree
261+
** on having a uniform file view or not
262+
*/
263+
fh->f_comm->c_coll.coll_allreduce (&uniform,
264+
&global_uniform,
265+
1,
266+
MPI_INT,
267+
MPI_MAX,
268+
fh->f_comm,
269+
fh->f_comm->c_coll.coll_allreduce_module);
270+
271+
if ( 0 == global_uniform ){
272+
/* yes, everybody agrees on having a uniform file view */
273+
fh->f_flags |= OMPIO_UNIFORM_FVIEW;
274+
}
275+
#endif
276+
return global_avg[0];
277+
}
278+
279+

0 commit comments

Comments
 (0)