Skip to content

Commit d3dd5d4

Browse files
authored
Merge pull request #961 from zoziha/buffer-1
Improve text file reading performance
2 parents 07fce84 + 4cf8c21 commit d3dd5d4

File tree

3 files changed

+128
-27
lines changed

3 files changed

+128
-27
lines changed

src/fpm_filesystem.F90

Lines changed: 55 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
!!
33
module fpm_filesystem
44
use,intrinsic :: iso_fortran_env, only : stdin=>input_unit, stdout=>output_unit, stderr=>error_unit
5+
use,intrinsic :: iso_c_binding, only: c_new_line
56
use fpm_environment, only: get_os_type, &
67
OS_UNKNOWN, OS_LINUX, OS_MACOS, OS_WINDOWS, &
78
OS_CYGWIN, OS_SOLARIS, OS_FREEBSD, OS_OPENBSD
89
use fpm_environment, only: separator, get_env, os_is_unix
9-
use fpm_strings, only: f_string, replace, string_t, split, dilate, str_begins_with_str
10+
use fpm_strings, only: f_string, replace, string_t, split, split_first_last, dilate, str_begins_with_str
1011
use iso_c_binding, only: c_char, c_ptr, c_int, c_null_char, c_associated, c_f_pointer
1112
use fpm_error, only : fpm_stop, error_t, fatal_error
1213
implicit none
@@ -50,6 +51,8 @@ end function c_is_dir
5051
end interface
5152
#endif
5253

54+
character(*), parameter :: eol = new_line('a') !! End of line
55+
5356
contains
5457

5558
!> Extract filename from path with/without suffix
@@ -302,37 +305,71 @@ integer function number_of_rows(s) result(nrows)
302305
end function number_of_rows
303306

304307
!> read lines into an array of TYPE(STRING_T) variables expanding tabs
305-
function read_lines_expanded(fh) result(lines)
306-
integer, intent(in) :: fh
308+
function read_lines_expanded(filename) result(lines)
309+
character(len=*), intent(in) :: filename
307310
type(string_t), allocatable :: lines(:)
308311

309312
integer :: i
310-
integer :: iostat
311-
character(len=:),allocatable :: line_buffer_read
313+
character(len=:), allocatable :: content
314+
integer, allocatable :: first(:), last(:)
315+
316+
content = read_text_file(filename)
317+
if (len(content) == 0) then
318+
allocate (lines(0))
319+
return
320+
end if
321+
322+
call split_first_last(content, eol, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows)
312323

313-
allocate(lines(number_of_rows(fh)))
314-
do i = 1, size(lines)
315-
call getline(fh, line_buffer_read, iostat)
316-
lines(i)%s = dilate(line_buffer_read)
324+
! allocate lines from file content string
325+
allocate (lines(size(first)))
326+
do i = 1, size(first)
327+
allocate(lines(i)%s, source=dilate(content(first(i):last(i))))
317328
end do
318329

319330
end function read_lines_expanded
320331

321332
!> read lines into an array of TYPE(STRING_T) variables
322-
function read_lines(fh) result(lines)
323-
integer, intent(in) :: fh
333+
function read_lines(filename) result(lines)
334+
character(len=*), intent(in) :: filename
324335
type(string_t), allocatable :: lines(:)
325336

326337
integer :: i
327-
integer :: iostat
338+
character(len=:), allocatable :: content
339+
integer, allocatable :: first(:), last(:)
328340

329-
allocate(lines(number_of_rows(fh)))
330-
do i = 1, size(lines)
331-
call getline(fh, lines(i)%s, iostat)
341+
content = read_text_file(filename)
342+
if (len(content) == 0) then
343+
allocate (lines(0))
344+
return
345+
end if
346+
347+
call split_first_last(content, eol, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows)
348+
349+
! allocate lines from file content string
350+
allocate (lines(size(first)))
351+
do i = 1, size(first)
352+
allocate(lines(i)%s, source=content(first(i):last(i)))
332353
end do
333354

334355
end function read_lines
335356

357+
!> read text file into a string
358+
function read_text_file(filename) result(string)
359+
character(len=*), intent(in) :: filename
360+
character(len=:), allocatable :: string
361+
integer :: fh, length
362+
363+
open (newunit=fh, file=filename, status='old', action='read', &
364+
access='stream', form='unformatted')
365+
inquire (fh, size=length)
366+
allocate (character(len=length) :: string)
367+
if (length == 0) return
368+
read (fh) string
369+
close (fh)
370+
371+
end function read_text_file
372+
336373
!> Create a directory. Create subdirectories as needed
337374
subroutine mkdir(dir, echo)
338375
character(len=*), intent(in) :: dir
@@ -480,9 +517,8 @@ recursive subroutine list_files(dir, files, recurse)
480517
call fpm_stop(2,'*list_files*:directory listing failed')
481518
end if
482519

483-
open (newunit=fh, file=temp_file, status='old')
484-
files = read_lines(fh)
485-
close(fh,status="delete")
520+
files = read_lines(temp_file)
521+
call delete_file(temp_file)
486522

487523
do i=1,size(files)
488524
files(i)%s = join_path(dir,files(i)%s)
@@ -678,7 +714,7 @@ subroutine getline(unit, line, iostat, iomsg)
678714
!> Error message
679715
character(len=:), allocatable, optional :: iomsg
680716

681-
integer, parameter :: BUFFER_SIZE = 32768
717+
integer, parameter :: BUFFER_SIZE = 1024
682718
character(len=BUFFER_SIZE) :: buffer
683719
character(len=256) :: msg
684720
integer :: size

src/fpm_source_parsing.f90

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,7 @@ function parse_f_source(f_filename,error) result(f_source)
8282

8383
f_source%file_name = f_filename
8484

85-
open(newunit=fh,file=f_filename,status='old')
86-
file_lines = read_lines_expanded(fh)
87-
close(fh)
85+
file_lines = read_lines_expanded(f_filename)
8886

8987
! for efficiency in parsing make a lowercase left-adjusted copy of the file
9088
! Need a copy because INCLUDE (and #include) file arguments are case-sensitive
@@ -427,9 +425,7 @@ function parse_c_source(c_filename,error) result(c_source)
427425
allocate(c_source%modules_provided(0))
428426
allocate(c_source%parent_modules(0))
429427

430-
open(newunit=fh,file=c_filename,status='old')
431-
file_lines = read_lines(fh)
432-
close(fh)
428+
file_lines = read_lines(c_filename)
433429

434430
! Ignore empty files, returned as FPM_UNIT_UNKNOWN
435431
if (len_trim(file_lines) < 1) then

src/fpm_strings.f90

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
!! - [[LOWER]] Changes a string to lowercase over optional specified column range
1414
!!### Parsing and joining
1515
!! - [[SPLIT]] parse string on delimiter characters and store tokens into an allocatable array
16+
!! - [[SPLIT_FIRST_LAST]] Computes the first and last indices of tokens in input string, delimited by the characters in set,
17+
!! and stores them into first and last output arrays.
1618
!! - [[STRING_CAT]] Concatenate an array of **type(string_t)** into a single **CHARACTER** variable
1719
!! - [[JOIN]] append an array of **CHARACTER** variables into a single **CHARACTER** variable
1820
!!### Testing
@@ -40,7 +42,7 @@ module fpm_strings
4042
implicit none
4143

4244
private
43-
public :: f_string, lower, upper, split, str_ends_with, string_t, str_begins_with_str
45+
public :: f_string, lower, upper, split, split_first_last, str_ends_with, string_t, str_begins_with_str
4446
public :: to_fortran_name, is_fortran_name
4547
public :: string_array_contains, string_cat, len_trim, operator(.in.), fnv_1a
4648
public :: replace, resize, str, join, glob
@@ -518,6 +520,73 @@ subroutine split(input_line,array,delimiters,order,nulls)
518520
enddo
519521
end subroutine split
520522

523+
!! Author: Milan Curcic
524+
!! Computes the first and last indices of tokens in input string, delimited
525+
!! by the characters in set, and stores them into first and last output
526+
!! arrays.
527+
pure subroutine split_first_last(string, set, first, last)
528+
character(*), intent(in) :: string
529+
character(*), intent(in) :: set
530+
integer, allocatable, intent(out) :: first(:)
531+
integer, allocatable, intent(out) :: last(:)
532+
533+
integer, dimension(len(string) + 1) :: istart, iend
534+
integer :: p, n, slen
535+
536+
slen = len(string)
537+
538+
n = 0
539+
if (slen > 0) then
540+
p = 0
541+
do while (p < slen)
542+
n = n + 1
543+
istart(n) = min(p + 1, slen)
544+
call split_pos(string, set, p)
545+
iend(n) = p - 1
546+
end do
547+
end if
548+
549+
first = istart(:n)
550+
last = iend(:n)
551+
552+
end subroutine split_first_last
553+
554+
!! Author: Milan Curcic
555+
!! If back is absent, computes the leftmost token delimiter in string whose
556+
!! position is > pos. If back is present and true, computes the rightmost
557+
!! token delimiter in string whose position is < pos. The result is stored
558+
!! in pos.
559+
pure subroutine split_pos(string, set, pos, back)
560+
character(*), intent(in) :: string
561+
character(*), intent(in) :: set
562+
integer, intent(in out) :: pos
563+
logical, intent(in), optional :: back
564+
565+
logical :: backward
566+
integer :: result_pos, bound
567+
568+
if (len(string) == 0) then
569+
pos = 1
570+
return
571+
end if
572+
573+
!TODO use optval when implemented in stdlib
574+
!backward = optval(back, .false.)
575+
backward = .false.
576+
if (present(back)) backward = back
577+
578+
if (backward) then
579+
bound = min(len(string), max(pos - 1, 0))
580+
result_pos = scan(string(:bound), set, back=.true.)
581+
else
582+
result_pos = scan(string(min(pos + 1, len(string)):), set) + pos
583+
if (result_pos < pos + 1) result_pos = len(string) + 1
584+
end if
585+
586+
pos = result_pos
587+
588+
end subroutine split_pos
589+
521590
!> Returns string with characters in charset replaced with target_char.
522591
pure function replace(string, charset, target_char) result(res)
523592
character(*), intent(in) :: string
@@ -1371,7 +1440,7 @@ subroutine remove_newline_characters(string)
13711440

13721441
integer :: feed,length
13731442

1374-
character(*), parameter :: CRLF = new_line('a')//achar(13)
1443+
character(*), parameter :: CRLF = achar(13)//new_line('a')
13751444
character(*), parameter :: SPACE = ' '
13761445

13771446
call remove_characters_in_set(string%s,set=CRLF,replace_with=SPACE)

0 commit comments

Comments
 (0)