11import csv
22import json
33import os
4- import tempfile
54from collections import defaultdict
65from datetime import datetime
76from itertools import groupby
87
9- from mergin import ClientError
10- from mergin .merginproject import MerginProject , pygeodiff
11- from mergin .utils import int_version
8+ from . import ClientError
9+ from .merginproject import MerginProject , pygeodiff
10+ from .utils import int_version
1211
1312try :
14- from qgis .core import QgsGeometry , QgsDistanceArea , QgsCoordinateReferenceSystem , QgsCoordinateTransformContext , QgsWkbTypes
13+ from qgis .core import QgsGeometry , QgsDistanceArea , QgsCoordinateReferenceSystem , QgsCoordinateTransformContext , \
14+ QgsWkbTypes
15+
1516 has_qgis = True
1617except ImportError :
1718 has_qgis = False
@@ -58,12 +59,14 @@ def qgs_geom_from_wkb(geom):
5859
5960class ChangesetReportEntry :
6061 """ Derivative of geodiff ChangesetEntry suitable for further processing/reporting """
61- def __init__ (self , changeset_entry , geom_idx , geom ):
62+
63+ def __init__ (self , changeset_entry , geom_idx , geom , qgs_distance_area = None ):
6264 self .table = changeset_entry .table .name
6365 self .geom_type = geom ["type" ]
6466 self .crs = "EPSG:" + geom ["srs_id" ]
6567 self .length = None
6668 self .area = None
69+ self .dim = 0
6770
6871 if changeset_entry .operation == changeset_entry .OP_DELETE :
6972 self .operation = "delete"
@@ -75,14 +78,12 @@ def __init__(self, changeset_entry, geom_idx, geom):
7578 self .operation = "unknown"
7679
7780 # only calculate geom properties when qgis api is available
78- if not has_qgis :
81+ if not qgs_distance_area :
7982 return
8083
81- d = QgsDistanceArea ()
82- d .setEllipsoid ('WGS84' )
8384 crs = QgsCoordinateReferenceSystem ()
8485 crs .createFromString (self .crs )
85- d .setSourceCrs (crs , QgsCoordinateTransformContext ())
86+ qgs_distance_area .setSourceCrs (crs , QgsCoordinateTransformContext ())
8687
8788 if hasattr (changeset_entry , "old_values" ):
8889 old_wkb = changeset_entry .old_values [geom_idx ]
@@ -111,65 +112,94 @@ def __init__(self, changeset_entry, geom_idx, geom):
111112 elif self .operation == "insert" :
112113 qgs_geom = qgs_geom_from_wkb (new_wkb )
113114
114- dim = QgsWkbTypes .wkbDimensions (qgs_geom .wkbType ())
115- if dim == 1 :
116- self .length = d .measureLength (qgs_geom )
115+ self . dim = QgsWkbTypes .wkbDimensions (qgs_geom .wkbType ())
116+ if self . dim == 1 :
117+ self .length = qgs_distance_area .measureLength (qgs_geom )
117118 if updated_qgs_geom :
118- self .length = d .measureLength (updated_qgs_geom ) - self .length
119- elif dim == 2 :
120- self .length = d .measurePerimeter (qgs_geom )
121- self .area = d .measureArea (qgs_geom )
119+ self .length = qgs_distance_area .measureLength (updated_qgs_geom ) - self .length
120+ elif self . dim == 2 :
121+ self .length = qgs_distance_area .measurePerimeter (qgs_geom )
122+ self .area = qgs_distance_area .measureArea (qgs_geom )
122123 if updated_qgs_geom :
123- self .length = d .measurePerimeter (updated_qgs_geom ) - self .length
124- self .area = d .measureArea (updated_qgs_geom ) - self .area
125-
126-
127- class ChangesetReport :
128- """ Report (aggregate) from geopackage changeset (diff file) """
129- def __init__ (self , changeset_reader , schema ):
130- self .cr = changeset_reader
131- self .schema = schema
132- self .entries = []
133- # let's iterate through reader and populate entries
134- for entry in self .cr :
135- schema_table = next ((t for t in schema if t ["table" ] == entry .table .name ), None )
136- # get geometry index in both gpkg schema and diffs values
137- geom_idx = next ((index for (index , col ) in enumerate (schema_table ["columns" ]) if col ["type" ] == "geometry" ),
138- None )
139- if geom_idx is None :
140- continue
124+ self .length = qgs_distance_area .measurePerimeter (updated_qgs_geom ) - self .length
125+ self .area = qgs_distance_area .measureArea (updated_qgs_geom ) - self .area
126+
127+
128+ def changeset_report (changeset_reader , schema ):
129+ """ Parse Geodiff changeset reader and create report from it.
130+ Aggregate individual entries based on common table, operation and geom type.
131+ If QGIS API is available, then lengths and areas of individual entries are summed.
132+
133+ :Example:
134+
135+ >>> geodiff.schema("sqlite", "", "/tmp/base.gpkg", "/tmp/base-schema.json")
136+ >>> with open("/tmp/base-schema.json", 'r') as sf:
137+ schema = json.load(sf).get("geodiff_schema")
138+ >>> cr = geodiff.read_changeset("/tmp/base.gpkg-diff")
139+ >>> changeset_report(cr, schema)
140+ [{"table": "Lines", "operation": "insert", "length": 1.234, "area": 0.0, "count": 3}]
141141
142- geom_col = schema_table ["columns" ][geom_idx ]["geometry" ]
143- report_entry = ChangesetReportEntry (entry , geom_idx , geom_col )
144- self .entries .append (report_entry )
145-
146- def report (self ):
147- """ Aggregate entries by table and operation type and calculate quantity """
148- records = []
149- tables = defaultdict (list )
150-
151- for obj in self .entries :
152- tables [obj .table ].append (obj )
153-
154- for table , entries in tables .items ():
155- items = groupby (entries , lambda i : (i .operation , i .geom_type ))
156- for k , v in items :
157- values = list (v )
158- area = sum ([entry .area for entry in values if entry .area ]) if has_qgis else None
159- length = sum ([entry .length for entry in values if entry .length ]) if has_qgis else None
160- records .append ({
161- "table" : table ,
162- "operation" : k [0 ],
163- "length" : length ,
164- "area" : area ,
165- "count" : len (values )
166- })
167- return records
142+ Args:
143+ changeset_reader (pygeodiff.ChangesetReader): changeset reader from geodiff changeset (diff file)
144+ schema: geopackage schema with list of tables (from full .gpkg file)
145+ Returns:
146+ list of dict of aggregated records
147+ """
148+ entries = []
149+ records = []
150+
151+ if has_qgis :
152+ distance_area = QgsDistanceArea ()
153+ distance_area .setEllipsoid ('WGS84' )
154+ else :
155+ distance_area = None
156+ # let's iterate through reader and populate entries
157+ for entry in changeset_reader :
158+ schema_table = next ((t for t in schema if t ["table" ] == entry .table .name ), None )
159+ # get geometry index in both gpkg schema and diffs values
160+ geom_idx = next ((index for (index , col ) in enumerate (schema_table ["columns" ]) if col ["type" ] == "geometry" ),
161+ None )
162+ if geom_idx is None :
163+ continue
164+
165+ geom_col = schema_table ["columns" ][geom_idx ]["geometry" ]
166+ report_entry = ChangesetReportEntry (entry , geom_idx , geom_col , distance_area )
167+ entries .append (report_entry )
168+
169+ # create a map of entries grouped by tables within single .gpkg file
170+ tables = defaultdict (list )
171+ for obj in entries :
172+ tables [obj .table ].append (obj )
173+
174+ # iterate through all tables and aggregate changes by operation type (e.g. insert) and geometry type (e.g point)
175+ # for example 3 point features inserted in 'Points' table would be single row with count 3
176+ for table , entries in tables .items ():
177+ items = groupby (entries , lambda i : (i .operation , i .geom_type ))
178+ for k , v in items :
179+ values = list (v )
180+ # sum lenghts and areas only if it makes sense (valid dimension)
181+ area = sum ([entry .area for entry in values if entry .area ]) if values [0 ].dim == 2 else None
182+ length = sum ([entry .length for entry in values if entry .length ]) if values [0 ].dim > 0 else None
183+ records .append ({
184+ "table" : table ,
185+ "operation" : k [0 ],
186+ "length" : length ,
187+ "area" : area ,
188+ "count" : len (values )
189+ })
190+ return records
168191
169192
170193def create_report (mc , directory , since , to , out_file ):
171194 """ Creates report from geodiff changesets for a range of project versions in CSV format.
172195
196+ Report is created for all .gpkg files and all tables within where updates were done using Geodiff lib.
197+ Changeset contains operation (insert/update/delete) and geometry properties like length/perimeter and area.
198+ Each row is an aggregate of the features modified by the same operation and of the same geometry type and contains
199+ these values: "file", "table", "author", "date", "time", "version", "operation", "length", "area", "count"
200+
201+ No filtering and grouping is done here, this is left for third-party office software to use pivot table functionality.
202+
173203 Args:
174204 mc (MerginClient): MerginClient instance.
175205 directory (str): local project directory (must already exist).
@@ -209,12 +239,12 @@ def create_report(mc, directory, since, to, out_file):
209239 mc .download_file_diffs (directory , f ["path" ], history_keys )
210240
211241 # download full gpkg in "to" version to analyze its schema to determine which col is geometry
212- full_gpkg = os . path . join ( mp .meta_dir , ".cache" , f ["path" ])
242+ full_gpkg = mp .fpath_cache ( f ["path" ], version = to )
213243 if not os .path .exists (full_gpkg ):
214244 mc .download_file (directory , f ["path" ], full_gpkg , to )
215245
216246 # get gpkg schema
217- schema_file = full_gpkg + '-schema' # geodiff writes schema into a file
247+ schema_file = full_gpkg + '-schema.json ' # geodiff writes schema into a file
218248 if not os .path .exists (schema_file ):
219249 mp .geodiff .schema ("sqlite" , "" , full_gpkg , schema_file )
220250 with open (schema_file , 'r' ) as sf :
@@ -226,16 +256,13 @@ def create_report(mc, directory, since, to, out_file):
226256 if f ['history' ][version ]["change" ] == "updated" :
227257 warnings .append (f"Missing diff: { f ['path' ]} was overwritten in { version } - broken diff history" )
228258 else :
229- warnings .append (f"Missin diff: { f ['path' ]} was { f ['history' ][version ]['change' ]} in { version } " )
259+ warnings .append (f"Missing diff: { f ['path' ]} was { f ['history' ][version ]['change' ]} in { version } " )
230260 continue
231261
232- v_diff_file = os .path .join (mp .meta_dir , '.cache' ,
233- version + "-" + f ['history' ][version ]['diff' ]['path' ])
234-
262+ v_diff_file = mp .fpath_cache (f ['history' ][version ]['diff' ]['path' ], version = version )
235263 version_data = versions_map [version ]
236264 cr = mp .geodiff .read_changeset (v_diff_file )
237- rep = ChangesetReport (cr , schema )
238- report = rep .report ()
265+ report = changeset_report (cr , schema )
239266 # append version info to changeset info
240267 dt = datetime .fromisoformat (version_data ["created" ].rstrip ("Z" ))
241268 version_fields = {
0 commit comments