1515 group_by_indices ,
1616 sanitize_strand_vector ,
1717 wrapper_follow_precede ,
18- wrapper_nearest ,
1918)
2019
2120__author__ = "jkanche"
@@ -2279,10 +2278,10 @@ def find_overlaps(
22792278
22802279 all_s_hits , all_q_hits = find_overlaps_groups (
22812280 self ._ranges .get_start ().astype (np .int32 ),
2282- self ._ranges .get_end ().astype (np .int32 ) + 1 ,
2281+ self ._ranges .get_end_exclusive ().astype (np .int32 ),
22832282 [s .astype (np .int32 ) for s in self_groups ],
22842283 query ._ranges .get_start ().astype (np .int32 ),
2285- query ._ranges .get_end ().astype (np .int32 ) + 1 ,
2284+ query ._ranges .get_end_exclusive ().astype (np .int32 ),
22862285 [q .astype (np .int32 ) for q in query_groups ],
22872286 query_type ,
22882287 select ,
@@ -2309,10 +2308,10 @@ def find_overlaps(
23092308
23102309 all_q_hits , all_s_hits = find_overlaps_groups (
23112310 query ._ranges .get_start ().astype (np .int32 ),
2312- query ._ranges .get_end ().astype (np .int32 ) + 1 ,
2311+ query ._ranges .get_end_exclusive ().astype (np .int32 ),
23132312 [q .astype (np .int32 ) for q in query_groups ],
23142313 self ._ranges .get_start ().astype (np .int32 ),
2315- self ._ranges .get_end ().astype (np .int32 ) + 1 ,
2314+ self ._ranges .get_end_exclusive ().astype (np .int32 ),
23162315 [s .astype (np .int32 ) for s in self_groups ],
23172316 query_type ,
23182317 select ,
@@ -2461,6 +2460,7 @@ def nearest(
24612460 select : Literal ["all" , "arbitrary" ] = "arbitrary" ,
24622461 ignore_strand : bool = False ,
24632462 num_threads : int = 1 ,
2463+ adjacent_equals_overlap : bool = True ,
24642464 ) -> Union [np .ndarray , BiocFrame ]:
24652465 """Search nearest positions both upstream and downstream that overlap with each range in ``query``.
24662466
@@ -2479,6 +2479,16 @@ def nearest(
24792479 Number of threads to use.
24802480 Defaults to 1.
24812481
2482+ adjacent_equals_overlap:
2483+ Whether to consider immediately-adjacent subject intervals to be
2484+ equally "nearest" to the query as an overlapping subject interval.
2485+
2486+ If true, both overlapping and immediately-adjacent subject intervals
2487+ (i.e., a gap of zero) will be reported in matches.
2488+
2489+ Otherwise, immediately-adjacent subjects will only be reported if
2490+ overlapping subjects are not present.
2491+
24822492 Returns:
24832493 If select="arbitrary":
24842494 A numpy array of integers with length matching query, containing indices
@@ -2493,36 +2503,37 @@ def nearest(
24932503 if not isinstance (query , GenomicRanges ):
24942504 raise TypeError ("'query' is not a `GenomicRanges` object." )
24952505
2496- effective_threads = min ( num_threads , cpu_count ()) if num_threads > 0 else cpu_count ()
2506+ from iranges . lib_iranges import nearest_groups
24972507
24982508 self_groups , query_groups = self ._get_query_common_groups (query )
24992509
2500- tasks = [
2501- (
2502- s_group ,
2503- q_group ,
2504- self ._ranges ,
2505- query ._ranges ,
2506- self ._strand ,
2507- query ._strand ,
2508- ignore_strand ,
2509- )
2510- for s_group , q_group in zip (self_groups , query_groups )
2511- ]
2510+ all_q_hits , all_s_hits = nearest_groups (
2511+ self ._ranges .get_start ().astype (np .int32 ),
2512+ self ._ranges .get_end_exclusive ().astype (np .int32 ),
2513+ [s .astype (np .int32 ) for s in self_groups ],
2514+ query ._ranges .get_start ().astype (np .int32 ),
2515+ query ._ranges .get_end_exclusive ().astype (np .int32 ),
2516+ [q .astype (np .int32 ) for q in query_groups ],
2517+ select ,
2518+ num_threads ,
2519+ adjacent_equals_overlap ,
2520+ )
25122521
2513- if effective_threads == 1 or len (self_groups ) <= 1 :
2514- results = [wrapper_nearest (task ) for task in tasks ]
2515- else :
2516- with Pool (processes = effective_threads ) as pool :
2517- results = pool .map (wrapper_nearest , tasks )
2522+ if ignore_strand is False :
2523+ s_strands = self ._strand [all_s_hits ]
2524+ q_strands = query ._strand [all_q_hits ]
25182525
2519- if results :
2520- all_qhits_list , all_shits_list = zip (* results )
2521- else :
2522- all_qhits_list , all_shits_list = [], []
2526+ mask = s_strands == q_strands
2527+ # to allow '*' with any strand from query
2528+ mask [s_strands == 0 ] = True
2529+ mask [q_strands == 0 ] = True
2530+ all_q_hits = all_q_hits [mask ]
2531+ all_s_hits = all_s_hits [mask ]
25232532
2524- final_qhits = np .concatenate (all_qhits_list ) if all_qhits_list else np .array ([], dtype = np .int32 )
2525- final_shits = np .concatenate (all_shits_list ) if all_shits_list else np .array ([], dtype = np .int32 )
2533+ order = np .argsort (all_q_hits , stable = True )
2534+
2535+ final_qhits = all_q_hits [order ]
2536+ final_shits = all_s_hits [order ]
25262537
25272538 if select == "arbitrary" :
25282539 ret_result = np .full (len (query ), None )
0 commit comments