@@ -557,26 +557,31 @@ function pmapreduce_commutative(fmap::Function,freduce::Function,iterators::Tupl
557
557
hostnames = gethostnames (procs_used);
558
558
nodes = nodenames (hostnames);
559
559
procid_rank1_on_node = [procs_used[findfirst (isequal (node),hostnames)] for node in nodes];
560
+ Nnodes_reduction = length (procid_rank1_on_node)
560
561
561
562
nprocs_node_dict = nprocs_node (procs_used)
562
563
node_channels = Dict (
563
- node=> RemoteChannel (()-> Channel {Any} (nprocs_node_dict[node]),procid_node)
564
+ node=> (
565
+ out = RemoteChannel (()-> Channel {Any} (nprocs_node_dict[node]),procid_node),
566
+ err = RemoteChannel (()-> Channel {Bool} (nprocs_node_dict[node]),procid_node),
567
+ )
564
568
for (node,procid_node) in zip (nodes,procid_rank1_on_node))
565
569
566
570
# Worker at which the final reduction takes place
567
571
p_final = first (procid_rank1_on_node)
568
572
569
573
finalnode_reducechannel = RemoteChannel (()-> Channel {Any} (length (procid_rank1_on_node)),p_final)
570
-
571
- Ntasks_total = num_workers + length (procid_rank1_on_node) + 1
574
+ finalnode_errorchannel = RemoteChannel (()-> Channel {Bool} (length (procid_rank1_on_node)),p_final)
572
575
573
576
result_channel = RemoteChannel (()-> Channel {Any} (1 ))
577
+ error_channel = RemoteChannel (()-> Channel {Bool} (1 ))
574
578
575
579
# Run the function on each processor and compute the reduction at each node
576
580
@sync for (rank,(p,node)) in enumerate (zip (procs_used,hostnames))
577
581
@async begin
578
582
579
- eachnode_reducechannel = node_channels[node]
583
+ eachnode_reducechannel = node_channels[node]. out
584
+ eachnode_errorchannel = node_channels[node]. err
580
585
581
586
np_node = nprocs_node_dict[node]
582
587
@@ -586,10 +591,13 @@ function pmapreduce_commutative(fmap::Function,freduce::Function,iterators::Tupl
586
591
try
587
592
res = fmap (iterable_on_proc,args... ;kwargs... )
588
593
put! (eachnode_reducechannel,res)
594
+ put! (eachnode_errorchannel,false )
589
595
catch e
590
- throwRemoteException (e)
596
+ put! (eachnode_errorchannel,true )
597
+ rethrow ()
591
598
finally
592
599
if p ∉ procid_rank1_on_node
600
+ finalize (eachnode_errorchannel)
593
601
finalize (eachnode_reducechannel)
594
602
end
595
603
end
@@ -598,13 +606,21 @@ function pmapreduce_commutative(fmap::Function,freduce::Function,iterators::Tupl
598
606
@async if p in procid_rank1_on_node
599
607
@spawnat p begin
600
608
try
601
- res = freduce (take! (eachnode_reducechannel) for i= 1 : np_node)
602
- put! (finalnode_reducechannel,res)
609
+ anyerror = any (take! (eachnode_errorchannel) for i= 1 : np_node)
610
+ if ! anyerror
611
+ res = freduce (take! (eachnode_reducechannel) for i= 1 : np_node)
612
+ put! (finalnode_reducechannel,res)
613
+ put! (finalnode_errorchannel,false )
614
+ else
615
+ put! (finalnode_errorchannel,true )
616
+ end
603
617
catch e
604
- throwRemoteException (e)
618
+ put! (finalnode_errorchannel,true )
619
+ rethrow ()
605
620
finally
606
621
finalize (eachnode_reducechannel)
607
622
if p != p_final
623
+ finalize (finalnode_errorchannel)
608
624
finalize (finalnode_reducechannel)
609
625
end
610
626
end
@@ -614,25 +630,37 @@ function pmapreduce_commutative(fmap::Function,freduce::Function,iterators::Tupl
614
630
@async if p == p_final
615
631
@spawnat p begin
616
632
try
617
- res = freduce (take! (finalnode_reducechannel)
618
- for i= 1 : length (procid_rank1_on_node))
619
-
620
- put! (result_channel,res)
633
+ anyerror = any (take! (finalnode_errorchannel) for i= 1 : Nnodes_reduction)
634
+ if ! anyerror
635
+ res = freduce (take! (finalnode_reducechannel) for i= 1 : Nnodes_reduction)
636
+ put! (result_channel,res)
637
+ put! (error_channel,false )
638
+ else
639
+ put! (error_channel,true )
640
+ end
621
641
catch e
622
- throwRemoteException (e)
642
+ put! (error_channel,true )
643
+ rethrow ()
623
644
finally
645
+ finalize (finalnode_errorchannel)
624
646
finalize (finalnode_reducechannel)
625
647
626
648
if p != result_channel. where
627
649
finalize (result_channel)
628
650
end
651
+ if p != error_channel. where
652
+ finalize (error_channel)
653
+ end
629
654
end
630
655
end
631
656
end
632
657
end
633
658
end
634
659
635
- take! (result_channel)
660
+ anyerror = take! (error_channel)
661
+ if ! anyerror
662
+ return take! (result_channel)
663
+ end
636
664
end
637
665
638
666
function pmapreduce_commutative (fmap:: Function ,freduce:: Function ,
@@ -672,24 +700,31 @@ function pmapreduce(fmap::Function,freduce::Function,iterable::Tuple,args...;kwa
672
700
hostnames = gethostnames (procs_used);
673
701
nodes = nodenames (hostnames);
674
702
procid_rank1_on_node = [procs_used[findfirst (isequal (node),hostnames)] for node in nodes];
703
+ Nnodes_reduction = length (procid_rank1_on_node)
675
704
676
705
nprocs_node_dict = nprocs_node (procs_used)
677
706
node_channels = Dict (
678
- node=> RemoteChannel (()-> Channel {pval} (nprocs_node_dict[node]),procid_node)
707
+ node=> (
708
+ out = RemoteChannel (()-> Channel {Any} (nprocs_node_dict[node]),procid_node),
709
+ err = RemoteChannel (()-> Channel {Bool} (nprocs_node_dict[node]),procid_node),
710
+ )
679
711
for (node,procid_node) in zip (nodes,procid_rank1_on_node))
680
712
681
713
# Worker at which the final reduction takes place
682
714
p_final = first (procid_rank1_on_node)
683
715
684
716
finalnode_reducechannel = RemoteChannel (()-> Channel {pval} (length (procid_rank1_on_node)),p_final)
717
+ finalnode_errorchannel = RemoteChannel (()-> Channel {Bool} (length (procid_rank1_on_node)),p_final)
685
718
686
719
result_channel = RemoteChannel (()-> Channel {Any} (1 ))
720
+ error_channel = RemoteChannel (()-> Channel {Bool} (1 ))
687
721
688
722
# Run the function on each processor and compute the sum at each node
689
723
@sync for (rank,(p,node)) in enumerate (zip (procs_used,hostnames))
690
724
@async begin
691
725
692
- eachnode_reducechannel = node_channels[node]
726
+ eachnode_reducechannel = node_channels[node]. out
727
+ eachnode_errorchannel = node_channels[node]. err
693
728
694
729
np_node = nprocs_node_dict[node]
695
730
@@ -698,10 +733,13 @@ function pmapreduce(fmap::Function,freduce::Function,iterable::Tuple,args...;kwa
698
733
try
699
734
res = pval (p,fmap (iterable_on_proc,args... ;kwargs... ))
700
735
put! (eachnode_reducechannel,res)
736
+ put! (eachnode_errorchannel,false )
701
737
catch e
702
- throwRemoteException (e)
738
+ put! (eachnode_errorchannel,true )
739
+ rethrow ()
703
740
finally
704
741
if p ∉ procid_rank1_on_node
742
+ finalize (eachnode_errorchannel)
705
743
finalize (eachnode_reducechannel)
706
744
end
707
745
end
@@ -710,15 +748,24 @@ function pmapreduce(fmap::Function,freduce::Function,iterable::Tuple,args...;kwa
710
748
@async if p in procid_rank1_on_node
711
749
@spawnat p begin
712
750
try
713
- vals = [take! (eachnode_reducechannel) for i= 1 : np_node]
714
- sort! (vals,by= x-> x. p)
715
- res = pval (p,freduce (v. parent for v in vals))
716
- put! (finalnode_reducechannel,res)
751
+ anyerror = any (take! (eachnode_errorchannel) for i= 1 : np_node)
752
+ if ! anyerror
753
+ vals = [take! (eachnode_reducechannel) for i= 1 : np_node]
754
+ sort! (vals,by= x-> x. p)
755
+ res = pval (p,freduce (v. parent for v in vals))
756
+ put! (finalnode_reducechannel,res)
757
+ put! (finalnode_errorchannel,false )
758
+ else
759
+ put! (finalnode_errorchannel,true )
760
+ end
717
761
catch e
718
- throwRemoteException (e)
762
+ put! (finalnode_errorchannel,true )
763
+ rethrow ()
719
764
finally
765
+ finalize (eachnode_errorchannel)
720
766
finalize (eachnode_reducechannel)
721
767
if p != p_final
768
+ finalize (finalnode_errorchannel)
722
769
finalize (finalnode_reducechannel)
723
770
end
724
771
end
@@ -728,24 +775,38 @@ function pmapreduce(fmap::Function,freduce::Function,iterable::Tuple,args...;kwa
728
775
@async if p == p_final
729
776
@spawnat p begin
730
777
try
731
- vals = [take! (finalnode_reducechannel) for i= 1 : length (procid_rank1_on_node)]
732
- sort! (vals,by= x-> x. p)
733
- res = freduce (v. parent for v in vals)
734
- put! (result_channel,res)
778
+ anyerror = any (take! (finalnode_errorchannel) for i= 1 : Nnodes_reduction)
779
+ if ! anyerror
780
+ vals = [take! (finalnode_reducechannel) for i= 1 : Nnodes_reduction]
781
+ sort! (vals,by= x-> x. p)
782
+ res = freduce (v. parent for v in vals)
783
+ put! (result_channel,res)
784
+ put! (error_channel,false )
785
+ else
786
+ put! (error_channel,true )
787
+ end
735
788
catch e
736
- throwRemoteException (e)
789
+ put! (error_channel,true )
790
+ rethrow ()
737
791
finally
792
+ finalize (finalnode_errorchannel)
738
793
finalize (finalnode_reducechannel)
739
794
if p != result_channel. where
740
795
finalize (result_channel)
741
796
end
797
+ if p != error_channel. where
798
+ finalize (error_channel)
799
+ end
742
800
end
743
801
end
744
802
end
745
803
end
746
804
end
747
805
748
- take! (result_channel)
806
+ anyerror = take! (error_channel)
807
+ if ! anyerror
808
+ return take! (result_channel)
809
+ end
749
810
end
750
811
751
812
function pmapreduce (fmap:: Function ,freduce:: Function ,
0 commit comments