Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 113 additions & 13 deletions darshan-runtime/lib/darshan-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ static int darshan_deflate_buffer(
static void darshan_core_cleanup(
struct darshan_core_runtime* core);
static void darshan_core_fork_child_cb(void);
static void darshan_core_reduce_min_time(
void* in_time_v, void* inout_time_v,
int *len, MPI_Datatype *datatype);
static void darshan_core_reduce_max_time(
void* in_time_v, void* inout_time_v,
int *len, MPI_Datatype *datatype);

#define DARSHAN_WARN(__err_str, ...) do { \
darshan_core_fprintf(stderr, "darshan_library_warning: " \
Expand Down Expand Up @@ -197,6 +203,7 @@ void darshan_core_initialize(int argc, char **argv)
int jobid;
int ret;
int i;
struct timespec start_ts;

/* setup darshan runtime if darshan is enabled and hasn't been initialized already */
if (__darshan_core != NULL || getenv("DARSHAN_DISABLE"))
Expand Down Expand Up @@ -309,7 +316,9 @@ void darshan_core_initialize(int argc, char **argv)

/* set known job-level metadata fields for the log file */
init_core->log_job_p->uid = getuid();
init_core->log_job_p->start_time = time(NULL);
clock_gettime(CLOCK_REALTIME, &start_ts);
init_core->log_job_p->start_time_sec = (int64_t)start_ts.tv_sec;
init_core->log_job_p->start_time_nsec = (int64_t)start_ts.tv_nsec;
init_core->log_job_p->nprocs = nprocs;
init_core->log_job_p->jobid = (int64_t)jobid;

Expand Down Expand Up @@ -390,6 +399,7 @@ void darshan_core_shutdown(int write_log)
{
struct darshan_core_runtime *final_core;
double start_log_time;
struct timespec end_ts;
int internal_timing_flag;
double open1 = 0, open2 = 0;
double job1 = 0, job2 = 0;
Expand All @@ -408,6 +418,8 @@ void darshan_core_shutdown(int write_log)
int i;
int ret;
#ifdef HAVE_MPI
MPI_Datatype ts_type;
MPI_Op ts_max_op, ts_min_op;
darshan_record_id *shared_recs = NULL;
darshan_record_id *mod_shared_recs = NULL;
int shared_rec_cnt = 0;
Expand Down Expand Up @@ -442,7 +454,9 @@ void darshan_core_shutdown(int write_log)
PMPI_Barrier(final_core->mpi_comm);
#endif
start_log_time = darshan_core_wtime_absolute();
final_core->log_job_p->end_time = time(NULL);
clock_gettime(CLOCK_REALTIME, &end_ts);
final_core->log_job_p->end_time_sec = (int64_t)end_ts.tv_sec;
final_core->log_job_p->end_time_nsec = (int64_t)end_ts.tv_nsec;

internal_timing_flag = final_core->config.internal_timing_flag;

Expand Down Expand Up @@ -476,22 +490,30 @@ void darshan_core_shutdown(int write_log)
MPI_INT, MPI_SUM, final_core->mpi_comm);

/* reduce to report first start and last end time across all ranks at rank 0 */
/* NOTE: custom MPI max/min reduction operators required for sec/nsec time tuples */
PMPI_Type_contiguous(2, MPI_INT64_T, &ts_type);
PMPI_Type_commit(&ts_type);
PMPI_Op_create(darshan_core_reduce_min_time, 1, &ts_min_op);
PMPI_Op_create(darshan_core_reduce_max_time, 1, &ts_max_op);
if(my_rank == 0)
{
PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->start_time,
1, MPI_INT64_T, MPI_MIN, 0, final_core->mpi_comm);
PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->end_time,
1, MPI_INT64_T, MPI_MAX, 0, final_core->mpi_comm);
PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->start_time_sec,
1, ts_type, ts_min_op, 0, final_core->mpi_comm);
PMPI_Reduce(MPI_IN_PLACE, &final_core->log_job_p->end_time_sec,
1, ts_type, ts_max_op, 0, final_core->mpi_comm);
}
else
{
PMPI_Reduce(&final_core->log_job_p->start_time,
&final_core->log_job_p->start_time,
1, MPI_INT64_T, MPI_MIN, 0, final_core->mpi_comm);
PMPI_Reduce(&final_core->log_job_p->end_time,
&final_core->log_job_p->end_time,
1, MPI_INT64_T, MPI_MAX, 0, final_core->mpi_comm);
PMPI_Reduce(&final_core->log_job_p->start_time_sec,
&final_core->log_job_p->start_time_sec,
1, ts_type, ts_min_op, 0, final_core->mpi_comm);
PMPI_Reduce(&final_core->log_job_p->end_time_sec,
&final_core->log_job_p->end_time_sec,
1, ts_type, ts_max_op, 0, final_core->mpi_comm);
}
PMPI_Type_free(&ts_type);
PMPI_Op_free(&ts_min_op);
PMPI_Op_free(&ts_max_op);

/* get a list of records which are shared across all processes */
darshan_get_shared_records(final_core, &shared_recs, &shared_rec_cnt);
Expand Down Expand Up @@ -1486,7 +1508,6 @@ static void darshan_get_logfile_name(

jobid = core->log_job_p->jobid;
pid = core->pid;
start_time = core->log_job_p->start_time;

/* first, check if user specifies a complete logpath to use */
user_logfile_name = getenv("DARSHAN_LOGFILE");
Expand Down Expand Up @@ -1514,6 +1535,7 @@ static void darshan_get_logfile_name(
logmod = darshan_hash((void*)hname,strlen(hname),hlevel);

/* use human readable start time format in log filename */
start_time = (time_t)core->log_job_p->start_time_sec;
start_tm = localtime(&start_time);

if(core->config.log_path_byenv)
Expand Down Expand Up @@ -2262,6 +2284,84 @@ static int darshan_core_name_is_excluded(const char *name, darshan_module_id mod
return(0);
}

#ifdef HAVE_MPI
static void darshan_core_reduce_min_time(void* in_time_v, void* inout_time_v,
int *len, MPI_Datatype *datatype)
{
int64_t tmp_sec, tmp_nsec;
int64_t *in_sec = in_time_v;
int64_t *in_nsec = in_sec+1;
int64_t *inout_sec = inout_time_v;
int64_t *inout_nsec = inout_sec+1;
int i;

for(i=0; i<*len; i++)
{
/* min */
if((*in_sec < *inout_sec) ||
((*in_sec == *inout_sec) &&
(*in_nsec < *inout_nsec)))
{
tmp_sec = *in_sec;
tmp_nsec = *in_nsec;
}
else
{
tmp_sec = *inout_sec;
tmp_nsec = *inout_nsec;
}

/* update pointers */
*inout_sec = tmp_sec;
*inout_nsec = tmp_nsec;
inout_sec+=2;
inout_nsec+=2;
in_sec+=2;
in_nsec+=2;
}

return;
}

static void darshan_core_reduce_max_time(void* in_time_v, void* inout_time_v,
int *len, MPI_Datatype *datatype)
{
int64_t tmp_sec, tmp_nsec;
int64_t *in_sec = in_time_v;
int64_t *in_nsec = in_sec+1;
int64_t *inout_sec = inout_time_v;
int64_t *inout_nsec = inout_sec+1;
int i;

for(i=0; i<*len; i++)
{
/* max */
if((*in_sec > *inout_sec) ||
((*in_sec == *inout_sec) &&
(*in_nsec > *inout_nsec)))
{
tmp_sec = *in_sec;
tmp_nsec = *in_nsec;
}
else
{
tmp_sec = *inout_sec;
tmp_nsec = *inout_nsec;
}

/* update pointers */
*inout_sec = tmp_sec;
*inout_nsec = tmp_nsec;
inout_sec+=2;
inout_nsec+=2;
in_sec+=2;
in_nsec+=2;
}

return;
}
#endif

/* crude benchmarking hook into darshan-core to benchmark Darshan
* shutdown overhead using a variety of application I/O workloads
*/
Expand Down
6 changes: 1 addition & 5 deletions darshan-util/darshan-analyzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,7 @@ int process_log(const char *fname, double *io_ratio, int *used_mpio, int *used_p
if (file->mod_map[DARSHAN_PNETCDF_FILE_MOD].len > 0 || file->mod_map[DARSHAN_PNETCDF_VAR_MOD].len > 0)
*used_pnet += 1;

total_job_time = (double)job.end_time - (double)job.start_time;
if (total_job_time < 1.0)
{
total_job_time = 1.0;
}
darshan_log_get_job_runtime(file, job, &total_job_time);

if (f_count > 0)
{
Expand Down
14 changes: 7 additions & 7 deletions darshan-util/darshan-diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,16 @@ int main(int argc, char *argv[])

if (job1.uid != job2.uid)
print_int64_diff("# uid:", job1.uid, job2.uid);
if (job1.start_time != job2.start_time)
print_int64_diff("# start_time:", job1.start_time, job2.start_time);
if (job1.end_time != job2.end_time)
print_int64_diff("# end_time:", job1.end_time, job2.end_time);
if (job1.start_time_sec != job2.start_time_sec)
print_int64_diff("# start_time:", job1.start_time_sec, job2.start_time_sec);
if (job1.end_time_sec != job2.end_time_sec)
print_int64_diff("# end_time:", job1.end_time_sec, job2.end_time_sec);
if (job1.nprocs != job2.nprocs)
print_int64_diff("# nprocs:", job1.nprocs, job2.nprocs);
if ((job1.end_time-job1.start_time) != (job2.end_time - job2.start_time))
if ((job1.end_time_sec-job1.start_time_sec) != (job2.end_time_sec-job2.start_time_sec))
print_int64_diff("# run time:",
(int64_t)(job1.end_time - job1.start_time + 1),
(int64_t)(job2.end_time - job2.start_time + 1));
(int64_t)(job1.end_time_sec - job1.start_time_sec + 1),
(int64_t)(job2.end_time_sec - job2.start_time_sec + 1));

/* get hash of record ids to file names for each log */
ret = darshan_log_get_namehash(file1, &name_hash1);
Expand Down
15 changes: 7 additions & 8 deletions darshan-util/darshan-dxt-parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ int main(int argc, char **argv)
int mount_count;
struct darshan_mnt_info *mnt_data_array;
time_t tmp_time = 0;
int64_t run_time = 0;
double run_time;
char *token;
char *save;
char buffer[DARSHAN_JOB_METADATA_LEN];
Expand Down Expand Up @@ -107,17 +107,16 @@ int main(int argc, char **argv)
printf("# exe: %s\n", tmp_string);
printf("# uid: %" PRId64 "\n", job.uid);
printf("# jobid: %" PRId64 "\n", job.jobid);
printf("# start_time: %" PRId64 "\n", job.start_time);
tmp_time += job.start_time;
printf("# start_time: %" PRId64 "\n", job.start_time_sec);
tmp_time += job.start_time_sec;
printf("# start_time_asci: %s", ctime(&tmp_time));
printf("# end_time: %" PRId64 "\n", job.end_time);
printf("# end_time: %" PRId64 "\n", job.end_time_sec);
tmp_time = 0;
tmp_time += job.end_time;
tmp_time += job.end_time_sec;
printf("# end_time_asci: %s", ctime(&tmp_time));
printf("# nprocs: %" PRId64 "\n", job.nprocs);
if (job.end_time >= job.start_time)
run_time = job.end_time - job.start_time + 1;
printf("# run time: %" PRId64 "\n", run_time);
darshan_log_get_job_runtime(fd, job, &run_time);
printf("# run time: %.4lf\n", run_time);
for (token = strtok_r(job.metadata, "\n", &save);
token != NULL;
token = strtok_r(NULL, "\n", &save))
Expand Down
74 changes: 69 additions & 5 deletions darshan-util/darshan-logutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,11 @@ darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type
int darshan_log_get_job(darshan_fd fd, struct darshan_job *job)
{
struct darshan_fd_int_state *state;
int log_ver_maj, log_ver_min;
char job_buf[DARSHAN_JOB_RECORD_SIZE] = {0};
int job_buf_sz = DARSHAN_JOB_RECORD_SIZE;
char *trailing_data;
int trailing_data_size;
int ret;

if(!fd)
Expand All @@ -283,6 +286,14 @@ int darshan_log_get_job(darshan_fd fd, struct darshan_job *job)
assert(state);
assert(fd->job_map.len > 0 && fd->job_map.off > 0);

/* get major/minor version numbers */
ret = darshan_log_get_format_version(fd->version, &log_ver_maj, &log_ver_min);
if(ret < 0)
{
fprintf(stderr, "Error: unable to parse log file format version.\n");
return(-1);
}

/* read the compressed job data from the log file */
ret = darshan_log_dzread(fd, DARSHAN_JOB_REGION_ID, job_buf, job_buf_sz);
if(ret <= (int)sizeof(*job))
Expand All @@ -291,24 +302,54 @@ int darshan_log_get_job(darshan_fd fd, struct darshan_job *job)
return(-1);
}

memcpy(job, job_buf, sizeof(*job));
/* NOTE: job definition changed to include start/end time nsecs in ver 3.41 */
if(((log_ver_maj == 3) && (log_ver_min >= 41)) || (log_ver_maj > 3))
{
memcpy(job, job_buf, sizeof(*job));
trailing_data = &job_buf[sizeof(*job)];
trailing_data_size = DARSHAN_EXE_LEN+1;
}
else
{
/* backwards compatibility with prior of Darshan job struct,
* which does not have fields for start_time_nsec or
* end_time_nsec
*/
int64_t *tmp_ptr = (int64_t *)job_buf;
job->uid = *(tmp_ptr++);
job->start_time_sec = *(tmp_ptr++);
job->start_time_nsec = 0;
job->end_time_sec = *(tmp_ptr++);
job->end_time_nsec = 0;
job->nprocs = *(tmp_ptr++);
job->jobid = *(tmp_ptr++);
memcpy(job->metadata, tmp_ptr, DARSHAN_JOB_METADATA_LEN);
trailing_data = (char *)tmp_ptr + DARSHAN_JOB_METADATA_LEN;
trailing_data_size = DARSHAN_EXE_LEN+1+(2*sizeof(int64_t));
}

if(fd->swap_flag)
{
/* swap bytes if necessary */
DARSHAN_BSWAP64(&job->uid);
DARSHAN_BSWAP64(&job->start_time);
DARSHAN_BSWAP64(&job->end_time);
DARSHAN_BSWAP64(&job->start_time_sec);
DARSHAN_BSWAP64(&job->end_time_sec);
/* don't byte swap fields explicitly set during up-conversion */
if(((log_ver_maj == 3) && (log_ver_min >= 41)) || (log_ver_maj > 3))
{
DARSHAN_BSWAP64(&job->start_time_nsec);
DARSHAN_BSWAP64(&job->end_time_nsec);
}
DARSHAN_BSWAP64(&job->nprocs);
DARSHAN_BSWAP64(&job->jobid);
}

/* save trailing exe & mount information, so it can be retrieved later */
if(!(state->exe_mnt_data))
state->exe_mnt_data = malloc(DARSHAN_EXE_LEN+1);
state->exe_mnt_data = malloc(trailing_data_size);
if(!(state->exe_mnt_data))
return(-1);
memcpy(state->exe_mnt_data, &job_buf[sizeof(*job)], DARSHAN_EXE_LEN+1);
memcpy(state->exe_mnt_data, trailing_data, trailing_data_size);

return(0);
}
Expand Down Expand Up @@ -893,6 +934,29 @@ char *darshan_log_get_lib_version(void)
return darshan_util_lib_ver;
}

int darshan_log_get_job_runtime(darshan_fd fd, struct darshan_job job, double *runtime)
{
int log_ver_maj, log_ver_min;
int ret;
*runtime = 0;

/* get major/minor version numbers */
ret = darshan_log_get_format_version(fd->version, &log_ver_maj, &log_ver_min);
if(ret < 0)
{
fprintf(stderr, "Error: unable to parse log file format version.\n");
return(-1);
}

if(((log_ver_maj == 3) && (log_ver_min >= 41)) || (log_ver_maj > 3))
*runtime = (double)((job.end_time_sec + (job.end_time_nsec / 1e9)) -
(job.start_time_sec + (job.start_time_nsec / 1e9)));
else
*runtime = (double)(job.end_time_sec - job.start_time_sec + 1);

return(0);
}

/********************************************************
* internal helper functions *
********************************************************/
Expand Down
1 change: 1 addition & 0 deletions darshan-util/darshan-logutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ int darshan_log_put_mod(darshan_fd fd, darshan_module_id mod_id,
void darshan_log_close(darshan_fd file);
void darshan_log_print_version_warnings(const char *version_string);
char *darshan_log_get_lib_version(void);
int darshan_log_get_job_runtime(darshan_fd fd, struct darshan_job job, double *runtime);
void darshan_log_get_modules(darshan_fd fd, struct darshan_mod_info **mods,
int* count);
void darshan_log_get_name_records(darshan_fd fd,
Expand Down
Loading