38
38
import copy
39
39
import functools
40
40
import inspect
41
+ import itertools
41
42
import json
42
43
import sys
43
44
53
54
# pylint: disable=E0611,W0404
54
55
if sys .version_info >= (3 , 0 ):
55
56
basestring = (bytes , str ) # pylint: disable=C0103,W0622
56
- from itertools import zip_longest
57
- else :
58
- from itertools import izip_longest as zip_longest
59
57
60
58
61
59
class JsonPatchException (Exception ):
@@ -282,15 +280,15 @@ def compare_values(path, value, other):
282
280
if value == other :
283
281
return
284
282
if isinstance (value , dict ) and isinstance (other , dict ):
285
- for operation in compare_dict (path , value , other ):
283
+ for operation in compare_dicts (path , value , other ):
286
284
yield operation
287
285
elif isinstance (value , list ) and isinstance (other , list ):
288
- for operation in compare_list (path , value , other ):
286
+ for operation in compare_lists (path , value , other ):
289
287
yield operation
290
288
else :
291
289
yield {'op' : 'replace' , 'path' : '/' .join (path ), 'value' : other }
292
290
293
- def compare_dict (path , src , dst ):
291
+ def compare_dicts (path , src , dst ):
294
292
for key in src :
295
293
if key not in dst :
296
294
yield {'op' : 'remove' , 'path' : '/' .join (path + [key ])}
@@ -304,23 +302,10 @@ def compare_dict(path, src, dst):
304
302
'path' : '/' .join (path + [key ]),
305
303
'value' : dst [key ]}
306
304
307
- def compare_list (path , src , dst ):
308
- lsrc , ldst = len (src ), len (dst )
309
- for idx in range (min (lsrc , ldst )):
310
- current = path + [str (idx )]
311
- for operation in compare_values (current , src [idx ], dst [idx ]):
312
- yield operation
313
- if lsrc < ldst :
314
- for idx in range (lsrc , ldst ):
315
- current = path + [str (idx )]
316
- yield {'op' : 'add' ,
317
- 'path' : '/' .join (current ),
318
- 'value' : dst [idx ]}
319
- elif lsrc > ldst :
320
- for idx in reversed (range (ldst , lsrc )):
321
- yield {'op' : 'remove' , 'path' : '/' .join (path + [str (idx )])}
305
+ def compare_lists (path , src , dst ):
306
+ return _compare_lists (path , src , dst )
322
307
323
- return cls (list (compare_dict (['' ], src , dst )))
308
+ return cls (list (compare_dicts (['' ], src , dst )))
324
309
325
310
def to_string (self ):
326
311
"""Returns patch set as JSON string."""
@@ -527,3 +512,230 @@ def apply(self, obj):
527
512
}).apply (obj )
528
513
529
514
return obj
515
+
516
+
517
+ def _compare_lists (path , src , dst ):
518
+ """Compares two lists objects and return JSON patch about."""
519
+ return _optimize (_compare (path , src , dst , * _split_by_common_seq (src , dst )))
520
+
521
+
522
+ def _longest_common_subseq (src , dst ):
523
+ """Returns pair of ranges of longest common subsequence for the `src`
524
+ and `dst` lists.
525
+
526
+ >>> src = [1, 2, 3, 4]
527
+ >>> dst = [0, 1, 2, 3, 5]
528
+ >>> # The longest common subsequence for these lists is [1, 2, 3]
529
+ ... # which is located at (0, 3) index range for src list and (1, 4) for
530
+ ... # dst one. Tuple of these ranges we should get back.
531
+ ... assert ((0, 3), (1, 4)) == _longest_common_subseq(src, dst)
532
+ """
533
+ lsrc , ldst = len (src ), len (dst )
534
+ drange = list (range (ldst ))
535
+ matrix = [[0 ] * ldst for _ in range (lsrc )]
536
+ z = 0 # length of the longest subsequence
537
+ range_src , range_dst = None , None
538
+ for i , j in itertools .product (range (lsrc ), drange ):
539
+ if src [i ] == dst [j ]:
540
+ if i == 0 or j == 0 :
541
+ matrix [i ][j ] = 1
542
+ else :
543
+ matrix [i ][j ] = matrix [i - 1 ][j - 1 ] + 1
544
+ if matrix [i ][j ] > z :
545
+ z = matrix [i ][j ]
546
+ if matrix [i ][j ] == z :
547
+ range_src = (i - z + 1 , i + 1 )
548
+ range_dst = (j - z + 1 , j + 1 )
549
+ else :
550
+ matrix [i ][j ] = 0
551
+ return range_src , range_dst
552
+
553
+
554
+ def _split_by_common_seq (src , dst , bx = (0 , - 1 ), by = (0 , - 1 )):
555
+ """Recursively splits the `dst` list onto two parts: left and right.
556
+ The left part contains differences on left from common subsequence,
557
+ same as the right part by for other side.
558
+
559
+ To easily understand the process let's take two lists: [0, 1, 2, 3] as
560
+ `src` and [1, 2, 4, 5] for `dst`. If we've tried to generate the binary tree
561
+ where nodes are common subsequence for both lists, leaves on the left
562
+ side are subsequence for `src` list and leaves on the right one for `dst`,
563
+ our tree would looks like::
564
+
565
+ [1, 2]
566
+ / \
567
+ [0] []
568
+ / \
569
+ [3] [4, 5]
570
+
571
+ This function generate the similar structure as flat tree, but without
572
+ nodes with common subsequences - since we're don't need them - only with
573
+ left and right leaves::
574
+
575
+ []
576
+ / \
577
+ [0] []
578
+ / \
579
+ [3] [4, 5]
580
+
581
+ The `bx` is the absolute range for currently processed subsequence of
582
+ `src` list. The `by` means the same, but for the `dst` list.
583
+ """
584
+ # Prevent useless comparisons in future
585
+ bx = bx if bx [0 ] != bx [1 ] else None
586
+ by = by if by [0 ] != by [1 ] else None
587
+
588
+ if not src :
589
+ return [None , by ]
590
+ elif not dst :
591
+ return [bx , None ]
592
+
593
+ # note that these ranges are relative for processed sublists
594
+ x , y = _longest_common_subseq (src , dst )
595
+
596
+ if x is None or y is None : # no more any common subsequence
597
+ return [bx , by ]
598
+
599
+ return [_split_by_common_seq (src [:x [0 ]], dst [:y [0 ]],
600
+ (bx [0 ], bx [0 ] + x [0 ]),
601
+ (by [0 ], by [0 ] + y [0 ])),
602
+ _split_by_common_seq (src [x [1 ]:], dst [y [1 ]:],
603
+ (bx [0 ] + x [1 ], bx [0 ] + len (src )),
604
+ (bx [0 ] + y [1 ], bx [0 ] + len (dst )))]
605
+
606
+
607
+ def _compare (path , src , dst , left , right ):
608
+ """Same as :func:`_compare_with_shift` but strips emitted `shift` value."""
609
+ for op , _ in _compare_with_shift (path , src , dst , left , right , 0 ):
610
+ yield op
611
+
612
+
613
+ def _compare_with_shift (path , src , dst , left , right , shift ):
614
+ """Recursively compares differences from `left` and `right` sides
615
+ from common subsequences.
616
+
617
+ The `shift` parameter is used to store index shift which caused
618
+ by ``add`` and ``remove`` operations.
619
+
620
+ Yields JSON patch operations and list index shift.
621
+ """
622
+ if isinstance (left , list ):
623
+ for item , shift in _compare_with_shift (path , src , dst , * left ,
624
+ shift = shift ):
625
+ yield item , shift
626
+ elif left is not None :
627
+ for item , shift in _compare_left (path , src , left , shift ):
628
+ yield item , shift
629
+
630
+ if isinstance (right , list ):
631
+ for item , shift in _compare_with_shift (path , src , dst , * right ,
632
+ shift = shift ):
633
+ yield item , shift
634
+ elif right is not None :
635
+ for item , shift in _compare_right (path , dst , right , shift ):
636
+ yield item , shift
637
+
638
+
639
+ def _compare_left (path , src , left , shift ):
640
+ """Yields JSON patch ``remove`` operations for elements that are only
641
+ exists in the `src` list."""
642
+ start , end = left
643
+ if end == - 1 :
644
+ end = len (src )
645
+ # we need to `remove` elements from list tail to not deal with index shift
646
+ for idx in reversed (range (start + shift , end + shift )):
647
+ current = path + [str (idx )]
648
+ yield (
649
+ {'op' : 'remove' ,
650
+ # yes, there should be any value field, but we'll use it
651
+ # to apply `move` optimization a bit later and will remove
652
+ # it in _optimize function.
653
+ 'value' : src [idx - shift ],
654
+ 'path' : '/' .join (current )},
655
+ shift - 1
656
+ )
657
+ shift -= 1
658
+
659
+
660
+ def _compare_right (path , dst , right , shift ):
661
+ """Yields JSON patch ``add`` operations for elements that are only
662
+ exists in the `dst` list"""
663
+ start , end = right
664
+ if end == - 1 :
665
+ end = len (dst )
666
+ for idx in range (start , end ):
667
+ current = path + [str (idx )]
668
+ yield (
669
+ {'op' : 'add' , 'path' : '/' .join (current ), 'value' : dst [idx ]},
670
+ shift + 1
671
+ )
672
+ shift += 1
673
+
674
+
675
+ def _optimize (operations ):
676
+ """Optimizes operations which was produced by lists comparison.
677
+
678
+ Actually it does two kinds of optimizations:
679
+
680
+ 1. Seeks pair of ``remove`` and ``add`` operations against the same path
681
+ and replaces them with ``replace`` operation.
682
+ 2. Seeks pair of ``remove`` and ``add`` operations for the same value
683
+ and replaces them with ``move`` operation.
684
+ """
685
+ result = []
686
+ ops_by_path = {}
687
+ ops_by_value = {}
688
+ add_remove = set (['add' , 'remove' ])
689
+ for item in operations :
690
+ # could we apply "move" optimization for dict values?
691
+ hashable_value = not isinstance (item ['value' ], (dict , list ))
692
+ if item ['path' ] in ops_by_path :
693
+ _optimize_using_replace (ops_by_path [item ['path' ]], item )
694
+ continue
695
+ if hashable_value and item ['value' ] in ops_by_value :
696
+ prev_item = ops_by_value [item ['value' ]]
697
+ # ensure that we processing pair of add-remove ops
698
+ if set ([item ['op' ], prev_item ['op' ]]) == add_remove :
699
+ _optimize_using_move (prev_item , item )
700
+ ops_by_value .pop (item ['value' ])
701
+ continue
702
+ result .append (item )
703
+ ops_by_path [item ['path' ]] = item
704
+ if hashable_value :
705
+ ops_by_value [item ['value' ]] = item
706
+
707
+ # cleanup
708
+ ops_by_path .clear ()
709
+ ops_by_value .clear ()
710
+ for item in result :
711
+ if item ['op' ] == 'remove' :
712
+ item .pop ('value' ) # strip our hack
713
+ yield item
714
+
715
+
716
+ def _optimize_using_replace (prev , cur ):
717
+ """Optimises JSON patch by using ``replace`` operation instead of
718
+ ``remove`` and ``add`` against the same path."""
719
+ prev ['op' ] = 'replace'
720
+ if cur ['op' ] == 'add' :
721
+ prev ['value' ] = cur ['value' ]
722
+
723
+
724
+ def _optimize_using_move (prev_item , item ):
725
+ """Optimises JSON patch by using ``move`` operation instead of
726
+ ``remove` and ``add`` against the different paths but for the same value."""
727
+ prev_item ['op' ] = 'move'
728
+ move_from , move_to = [
729
+ (item ['path' ], prev_item ['path' ]),
730
+ (prev_item ['path' ], item ['path' ]),
731
+ ][item ['op' ] == 'add' ]
732
+ if item ['op' ] == 'add' : # first was remove then add
733
+ prev_item ['from' ] = move_from
734
+ prev_item ['path' ] = move_to
735
+ else : # first was add then remove
736
+ head , move_from = move_from .rsplit ('/' , 1 )
737
+ # since add operation was first it incremented
738
+ # overall index shift value. we have to fix this
739
+ move_from = int (move_from ) - 1
740
+ prev_item ['from' ] = head + '/%d' % move_from
741
+ prev_item ['path' ] = move_to
0 commit comments