pySBOL3/sbol3/document.py at 9222b6d5250b505b6c9f3733e1cc727601960d8d · SynBioDex/pySBOL3 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
from __future__ import annotations

import collections
import logging
import os
import posixpath
# import typing for typing.Sequence, which we don't want to confuse
# with sbol3.Sequence
import typing as pytyping
import warnings
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Union

import pyshacl
import rdflib

from .constants import (JSONLD, NTRIPLES, OM_NS, PROV_NS, RDF_TYPE, RDF_XML,
                        SBOL3_NS, SBOL_IDENTIFIED, SBOL_LOGGER_NAME,
                        SBOL_NAMESPACE, SBOL_TOP_LEVEL, SORTED_NTRIPLES,
                        TURTLE)
from .custom import CustomIdentified, CustomTopLevel
from .error import SBOLError
from .identified import Identified
from .object import BUILDER_REGISTER, SBOLObject
from .property_base import SingletonProperty
from .toplevel import TopLevel
from .validation import ValidationReport

_default_bindings = {
    'sbol': SBOL3_NS,
    'prov': PROV_NS,
    'om': OM_NS,
    # Should others like SO, SBO, and CHEBI be added?
}


def data_path(path: str) -> str:
    """Expand path based on module installation directory.

    :param path:
    :return:
    """
    # Expand path based on module installation directory
    return os.path.join(os.path.dirname(os.path.realpath(__file__)), path)


class Document:

    @staticmethod
    def register_builder(type_uri: str,
                         builder: Callable[[str, str], Identified]) -> None:
        """A builder function will be called with an identity and a
        keyword argument type_uri.

        builder(identity_uri: str, type_uri: str = None) -> SBOLObject
        """
        Document._uri_type_map[type_uri] = builder

    # Map type URIs to a builder function to construct entities from
    # RDF triples.
    _uri_type_map: Dict[str, Callable[[str, str], Identified]] = BUILDER_REGISTER

    @staticmethod
    def open(location: Union[Path, str], file_format: str = None) -> Document:
        doc = Document()
        doc.read(location, file_format=file_format)
        return doc

    def __init__(self):
        self.logger = logging.getLogger(SBOL_LOGGER_NAME)
        self.objects: List[TopLevel] = []
        # Orphans are non-TopLevel objects that are not otherwise linked
        # into the object hierarchy.
        self.orphans: List[Identified] = []
        self._namespaces: Dict[str, str] = _default_bindings.copy()
        # Non-SBOL triples. These are triples that are not recognized as
        # SBOL. They are stored in _other_rdf for round-tripping purposes.
        self._other_rdf = rdflib.Graph()

    def __str__(self):
        """
        Produce a string representation of the Document.

        :return: A string representation of the Document.
        """
        return self.summary()

    def __len__(self):
        """
        Get the total number of objects in the Document.

        (Returns the same thing as size())

        :return: The total number of objects in the Document.
        """
        return self.size()

    def __contains__(self, item):
        return item in self.objects

    def __iter__(self):
        """Iterate over the top level objects in this document.

        >>> import sbol3
        >>> doc = sbol3.Document()
        >>> doc.read('some_path.ttl')
        >>> for top_level in doc:
        >>>     print(top_level.identity)

        :return: An iterator over the top level objects
        """
        tmp_list = list(self.objects)
        return iter(tmp_list)

    def _build_extension_object(self, identity: str, sbol_type: str,
                                types: List[str]) -> Optional[Identified]:
        custom_types = {
            SBOL_IDENTIFIED: CustomIdentified,
            SBOL_TOP_LEVEL: CustomTopLevel
        }
        if sbol_type not in custom_types:
            msg = f'{identity} has SBOL type {sbol_type} which is not one of'
            msg += f' {custom_types.keys()}. (See Section 6.11)'
            raise SBOLError(msg)
        # Look for a builder associated with one of the rdf:types.
        # If none of the rdf:types have a builder, use the sbol_type's builder
        builder = None
        build_type = None
        for type_uri in types:
            try:
                builder = self._uri_type_map[type_uri]
                build_type = type_uri
                break
            except KeyError:
                logging.warning(f'No builder found for {type_uri}')
        if builder is None:
            builder = custom_types[sbol_type]
            build_type = types[0]
        return builder(identity=identity, type_uri=build_type)

    def _build_object(self, identity: str, types: List[str]) -> Optional[Identified]:
        # Given an identity and a list of RDF types, build an object if possible.
        # If there is 1 SBOL type and we don't know it, raise an exception
        # If there are multiple types and 1 is TopLevel or Identified, then
        #    it is an extension. Use the other types to try to build it. If
        #    no other type is known, build a generic TopLevel or Identified.
        sbol_types = [t for t in types if t.startswith(SBOL3_NS)]
        if len(sbol_types) == 0:
            # If there are no SBOL types in the list. Ignore this entity.
            # Its triples will be stored in self._other_rdf later in the
            # load process.
            return None
        if len(sbol_types) > 1:
            # If there are multiple SBOL types in the list, raise an error.
            # SBOL 3.0.1 Section 5.4: "an object MUST have no more than one
            # rdfType property in the 'http://sbols.org/v3#' namespace"
            msg = f'{identity} has more than one rdfType property in the'
            msg += f' {SBOL3_NS} namespace.'
            raise SBOLError(msg)
        extension_types = {
            SBOL_IDENTIFIED: CustomIdentified,
            SBOL_TOP_LEVEL: CustomTopLevel
        }
        sbol_type = sbol_types[0]
        if sbol_type in extension_types:
            # Build an extension object
            types.remove(sbol_type)
            result = self._build_extension_object(identity, sbol_type, types)
        else:
            try:
                builder = self._uri_type_map[sbol_type]
            except KeyError as exc:
                logging.warning(f'No builder found for {sbol_type}')
                raise SBOLError(f'Unknown type {sbol_type}') from exc
            result = builder(identity=identity, type_uri=sbol_type)
        # Fix https://github.com/SynBioDex/pySBOL3/issues/264
        if isinstance(result, TopLevel):
            # Ensure namespace is not set. It should get set later in the
            # build process. This avoids setting it when the file is invalid
            # and the object has no namespace in the file.
            result.clear_property(SBOL_NAMESPACE)
        # End of fix for https://github.com/SynBioDex/pySBOL3/issues/264
        return result

    def _parse_objects(self, graph: rdflib.Graph) -> Dict[str, SBOLObject]:
        # First extract the identities and their types. Each identity
        # can have either one or two rdf:type properties. If one,
        # create the entity. If two, it is a custom type (see section
        # 6.11 of the spec) and we instantiate it specially.
        #
        identity_types: Dict[str, List[str]] = collections.defaultdict(list)
        for s, p, o in graph.triples((None, rdflib.RDF.type, None)):
            str_o = str(o)
            str_s = str(s)
            identity_types[str_s].append(str_o)
        # Now iterate over the identity->type dict creating the objects.
        result = {}
        for identity, types in identity_types.items():
            obj = self._build_object(identity, types)
            if obj:
                obj.document = self
                result[obj.identity] = obj
        return result

    @staticmethod
    def _parse_attributes(objects, graph) -> dict[str, Identified]:
        # Track the child objects that get assigned to optimize the
        # search for orphans later in the loading process.
        child_objects = {}
        for s, p, o in graph.triples((None, None, None)):
            str_s = str(s)
            str_p = str(p)
            try:
                obj = objects[str_s]
            except KeyError:
                # Object is not an SBOL object, skip it
                continue
            if str_p in obj._owned_objects:
                other_identity = str(o)
                other = objects[other_identity]
                obj._owned_objects[str_p].append(other)
                # Record the assigned object as a child
                child_objects[other_identity] = other
            elif str_p == RDF_TYPE:
                # Handle rdf:type specially because the main type(s)
                # will already be in the list from the build_object
                # phase and those entries need to be maintained and
                # we don't want duplicates
                if o not in obj._properties[str_p]:
                    obj._properties[str_p].append(o)
            else:
                obj._properties[str_p].append(o)
        return child_objects

    @staticmethod
    def _clean_up_singletons(objects: Dict[str, SBOLObject]):
        """Clean up singleton properties after reading an SBOL file.

        When an SBOL file is read, values are appended to the property
        stores without knowledge of which stores are singletons and
        which stores are lists. This method cleans up singleton properties
        by ensuring that each has exactly one value.
        """
        # This is necessary due to defaulting of properties when using
        # the builder. Some objects have required properties, which the
        # builder sets. In the case of singleton values, that can result
        # in multiple values in a singleton property. Only the first value
        # is used, so the value read from file is ignored.
        for _, obj in objects.items():
            for name, attr in obj.__dict__.items():
                if isinstance(attr, SingletonProperty):
                    prop_uri = attr.property_uri
                    store = attr._storage()
                    if len(store[prop_uri]) > 1:
                        store[prop_uri] = store[prop_uri][-1:]

    def clear(self) -> None:
        self.objects.clear()
        self._namespaces = _default_bindings.copy()

    def _parse_graph(self, graph) -> None:
        objects = self._parse_objects(graph)
        child_objects = self._parse_attributes(objects, graph)
        self._clean_up_singletons(objects)
        # Validate all the objects
        # TODO: Where does this belong? Is this automatic?
        #       Or should a user invoke validate?
        # for obj in objects.values():
        #     obj.validate()

        # Extract the top_levels to a dictionary to speed up the search
        # for orphans below.
        top_levels = {uri: obj for uri, obj in objects.items()
                      if isinstance(obj, TopLevel)}
        # Store the TopLevel objects in the Document
        self.objects = list(top_levels.values())
        # Gather Orphans for future writing.
        # These are expected to be non-TopLevel annotation objects whose owners
        # have no custom implementation (i.e. no builder registered). These objects
        # will be written out as part of Document.write_string()
        self.orphans = []
        for uri, obj in objects.items():
            if uri in top_levels or uri in child_objects:
                continue
            found = self.find(uri)
            if found:
                continue
            self.orphans.append(obj)
        # Store the namespaces in the Document for later use
        for prefix, uri in graph.namespaces():
            self.bind(prefix, uri)
        # Remove the triples for every object we have loaded, leaving
        # the non-RDF triples for round tripping
        # See https://github.com/SynBioDex/pySBOL3/issues/96
        for uri in objects:
            graph.remove((rdflib.URIRef(uri), None, None))
        # Now tuck away the graph for use in Document.write_string()
        self._other_rdf = graph

    def _guess_format(self, fpath: str):
        rdf_format = rdflib.util.guess_format(fpath)
        if rdf_format == 'nt':
            # Use N-Triples 1.1 format
            # See https://github.com/RDFLib/rdflib/issues/1376
            # See https://github.com/RDFLib/rdflib/issues/1377
            rdf_format = 'nt11'
        return rdf_format

    @staticmethod
    def file_extension(file_format: str) -> str:
        """Return standard extensions when provided the document's file format

        :param file_format: The format of the file
        :return: A file extension, including the leading '.'
        """
        # dictionary having keys as valid file formats,
        # and their standard extensions as value
        types_with_standard_extension = {
            SORTED_NTRIPLES: '.nt',
            NTRIPLES: '.nt',
            JSONLD: '.json',
            RDF_XML: '.xml',
            TURTLE: '.ttl'
        }
        if file_format not in types_with_standard_extension:
            raise ValueError('Provided file format is not a valid one.')
        return types_with_standard_extension[file_format]

    # Formats: 'n3', 'nt', 'turtle', 'xml'
    def read(self, location: Union[Path, str], file_format: str = None) -> None:
        _location = str(location)  # normalize location to a string
        if file_format is None:
            file_format = self._guess_format(_location)
        if file_format is None:
            raise ValueError('Unable to determine file format')
        if file_format == SORTED_NTRIPLES:
            file_format = NTRIPLES
        graph = rdflib.Graph()
        graph.parse(_location, format=file_format)
        return self._parse_graph(graph)

    # Formats: 'n3', 'nt', 'turtle', 'xml'
    def read_string(self, data: str, file_format: str) -> None:
        # TODO: clear the document, this isn't append
        if file_format == SORTED_NTRIPLES:
            file_format = NTRIPLES
        graph = rdflib.Graph()
        graph.parse(data=data, format=file_format)
        return self._parse_graph(graph)

    def _add(self, obj: TopLevel) -> TopLevel:
        """Add objects to the document.
        """
        if not isinstance(obj, TopLevel):
            message = f'Expected TopLevel instance, {type(obj).__name__} found'
            raise TypeError(message)
        found_obj = self.find(obj.identity)
        if found_obj is not None:
            message = f'An entity with identity "{obj.identity}"'
            message += ' already exists in document'
            raise ValueError(message)
        self.objects.append(obj)

        # Assign this document to the object tree rooted
        # in the TopLevel being added
        def assign_document(x: Identified):
            x.document = self
        obj.traverse(assign_document)
        return obj

    def _add_all(self, objects: pytyping.Sequence[TopLevel]) -> pytyping.Sequence[TopLevel]:
        # Perform type check of all objects.
        # We do this to avoid finding out part way through that an
        # object can't be added. That would leave the document in an
        # unknown state.
        for obj in objects:
            if not isinstance(obj, TopLevel):
                if isinstance(obj, Identified):
                    raise TypeError(f'{obj.identity} is not a TopLevel object')
                else:
                    raise TypeError(f'{repr(obj)} is not a TopLevel object')

        # Dispatch to Document._add to add the individual objects
        for obj in objects:
            self._add(obj)
        # return the passed argument
        return objects

    def add(self,
            objects: Union[TopLevel, pytyping.Sequence[TopLevel]]) -> Union[TopLevel, pytyping.Sequence[TopLevel]]:
        # objects must be TopLevel or iterable. If neither, raise a TypeError.
        #
        # Note: Python documentation for collections.abc says "The only
        # reliable way to determine whether an object is iterable is to
        # call iter(obj)." `iter` will raise TypeError if the object is
        # not iterable
        if not isinstance(objects, TopLevel):
            try:
                iter(objects)
            except TypeError:
                raise TypeError('argument must be either TopLevel or Iterable')
        # Now dispatch to the appropriate method
        if isinstance(objects, TopLevel):
            return self._add(objects)
        else:
            return self._add_all(objects)

    def _find_in_objects(self, search_string: str) -> Optional[Identified]:
        # TODO: implement recursive search
        for obj in self.objects:
            # TODO: needs an object.find(search_string) method on ... Identified?
            result = obj.find(search_string)
            if result is not None:
                return result
        return None

    def find(self, search_string: str) -> Optional[Identified]:
        """Find an object by identity URI or by display_id.

        :param search_string: Either an identity URI or a display_id
        :type search_string: str
        :returns: The named object or ``None`` if no object was found

        """
        for obj in self.objects:
            if obj.identity == search_string:
                return obj
            if obj.display_id and obj.display_id == search_string:
                return obj
        return self._find_in_objects(search_string)

    def join_lines(self, lines: List[Union[bytes, str]]) -> Union[bytes, str]:
        """Join lines for either bytes or strings. Joins a list of lines
        together whether they are bytes or strings. Returns a bytes if the input was
        a list of bytes, and a str if the input was a list of str.
        """
        if not lines:
            return ''
        lines_type = type(lines[0])
        if lines_type is bytes:
            # rdflib 5
            return b'\n'.join(lines) + b'\n'
        elif lines_type is str:
            # rdflib 6
            return '\n'.join(lines) + '\n'

    def write_string(self, file_format: str) -> str:
        graph = self.graph()
        if file_format in (NTRIPLES, SORTED_NTRIPLES):
            # RDFlib puts in an extra blank line so we handle N-Triples
            # in a special way to get rid of the extra line.

            # Have RDFlib give us the n-triples as a string
            nt_text = graph.serialize(format=NTRIPLES)
            # Split it into lines and filter out the blank lines
            lines = [line for line in nt_text.splitlines() if line]
            if file_format == SORTED_NTRIPLES:
                # sort the lines
                lines.sort()
            # Join the lines together
            result = self.join_lines(lines)
        elif file_format == JSONLD:
            context = {f'@{prefix}': uri for prefix, uri in self._namespaces.items()}
            result = graph.serialize(format=file_format, context=context)
        else:
            result = graph.serialize(format=file_format)
        if type(result) is bytes:
            result = result.decode()
        return result

    def write(self, fpath: Union[Path, str], file_format: str = None) -> None:
        """Write the document to file.

        If file_format is None the desired format is guessed from the
        extension of fpath. If file_format cannot be guessed a ValueError
        is raised.
        """
        _fpath = str(fpath)  # normalize fpath to a string
        if file_format is None:
            file_format = self._guess_format(_fpath)
        if file_format is None:
            raise ValueError('Unable to determine file format')
        with open(_fpath, 'w') as outfile:
            outfile.write(self.write_string(file_format))

    def graph(self) -> rdflib.Graph:
        """Convert document to an RDF Graph.

        The returned graph is a snapshot of the document and will
        not be updated by subsequent changes to the document.
        """
        graph = rdflib.Graph()
        for prefix, uri in self._namespaces.items():
            graph.bind(prefix, uri)
        for orphan in self.orphans:
            orphan.serialize(graph)
        for obj in self.objects:
            obj.serialize(graph)
        # Add the non-SBOL RDF triples into the generated graph
        graph += self._other_rdf
        return graph

    def bind(self, prefix: str, uri: str) -> None:
        """Bind a prefix to an RDF namespace in the written RDF document.

        These prefixes make the written RDF easier for humans to read.
        These prefixes do not change the semantic meaning of the RDF
        document in any way.
        """
        # Remove any prefix referencing the given URI
        if uri in self._namespaces.values():
            for k, v in list(self._namespaces.items()):
                if v == uri:
                    del self._namespaces[k]
        self._namespaces[prefix] = uri

    def addNamespace(self, namespace: str, prefix: str) -> None:
        """Document.addNamespace is deprecated. Replace with Document.bind.

        Document.addNamespace existed in pySBOL2 and was commonly used.

        Document.addNamespace(namespace, prefix) should now be
        Document.bind(prefix, namespace). Note the change of argument
        order.
        """
        warnings.warn('Use Document.bind() instead', DeprecationWarning)
        self.bind(prefix, namespace)

    def parse_shacl_graph(self, shacl_graph: rdflib.Graph,
                          report: ValidationReport) -> ValidationReport:
        """Convert SHACL violations and warnings into a pySBOL3
        validation report.

        :param shacl_graph: The output graph from pyshacl
        :type shacl_graph: rdflib.Graph
        :param report: The ValidationReport to be populated
        :type report: ValidationReport
        :return: report
        :rtype: ValidationReport
        """
        shacl_ns = rdflib.Namespace('http://www.w3.org/ns/shacl#')
        sh_result_severity = shacl_ns.resultSeverity
        sh_warning = shacl_ns.Warning
        sh_violation = shacl_ns.Violation
        for shacl_report in shacl_graph.subjects(rdflib.RDF.type,
                                                 shacl_ns.ValidationReport):
            for result in shacl_graph.objects(shacl_report, shacl_ns.result):
                object_id = shacl_graph.value(result, shacl_ns.focusNode)
                result_path = shacl_graph.value(result, shacl_ns.resultPath)
                result_message = shacl_graph.value(result, shacl_ns.resultMessage)
                message = f'{result_path}: {result_message}'
                severity = shacl_graph.value(result, sh_result_severity)
                if severity == sh_violation:
                    report.addError(object_id, None, message)
                elif severity == sh_warning:
                    report.addWarning(object_id, None, message)
        return report

    def validate_shacl(self,
                       report: Optional[ValidationReport] = None
                       ) -> ValidationReport:
        """Validate this document using SHACL rules.
        """
        if report is None:
            report = ValidationReport()
        # Save to RDF, then run SHACL over the resulting graph
        data_graph = self.graph()
        shacl_graph = None
        data_graph.parse(data_path(os.path.join('rdf', 'sbol3-shapes.ttl')),
                         format='ttl')
        shacl_report = pyshacl.validate(data_graph=data_graph,
                                        shacl_graph=shacl_graph,
                                        ont_graph=None,
                                        inference=None,
                                        abort_on_first=False,
                                        meta_shacl=False,
                                        advanced=True,
                                        debug=False)
        # Split up the shacl_report tuple
        conforms, results_graph, _ = shacl_report
        if not conforms:
            self.parse_shacl_graph(results_graph, report)
        return report

    def validate(self, report: ValidationReport = None) -> ValidationReport:
        """Validate all objects in this document."""
        if report is None:
            report = ValidationReport()
        for obj in self.objects:
            obj.validate(report)
        self.validate_shacl(report)
        return report

    def find_all(self, predicate: Callable[[Identified], bool]) -> List[Identified]:
        """Executes a predicate on every object in the document tree,
        gathering the list of objects to which the predicate returns true.
        """
        result: List[Identified] = []

        def wrapped_filter(visited: Identified):
            if predicate(visited):
                result.append(visited)
        self.traverse(wrapped_filter)
        return result

    def accept(self, visitor: Any) -> Any:
        """Invokes `visit_document` on `visitor` with `self` as the only
        argument.

        :param visitor: The visitor instance
        :type visitor: Any
        :raises AttributeError: If visitor lacks a visit_document method
        :return: Whatever `visitor.visit_document` returns
        :rtype: Any

        """
        return visitor.visit_document(self)

    def traverse(self, func: Callable[[Identified], None]):
        """Enable a traversal of the entire object hierarchy contained
        in this document.
        """
        for obj in self.objects:
            obj.traverse(func)

    def builder(self, type_uri: str) -> Callable[[str, str], Identified]:
        """Lookup up the builder callable for the given type_uri.

        The builder must have been previously registered under this
        type_uri via Document.register_builder().

        :raises: ValueError if the type_uri does not have an associated
                 builder.
        """
        try:
            return self._uri_type_map[type_uri]
        except KeyError:
            raise ValueError(f'No builder for {type_uri}')

    def summary(self):
        """
        Produce a string representation of the Document.
        :return: A string representation of the Document.
        """
        summary = ''
        col_size = 30
        total_core_objects = 0

        type_list = [obj.type_uri for obj in self.objects]
        type_set = list(set(type_list))
        type_set.sort()
        for obj_type in type_set:
            property_name = obj_type[obj_type.rfind('#')+1:]
            obj_count = len([x for x in type_list if x == obj_type])
            total_core_objects += obj_count
            summary += property_name
            summary += '.' * (col_size-len(property_name))
            summary += str(obj_count) + '\n'

        # TODO: Is there a pySBOL equivalent to Annotation Objects?
        # summary += 'Annotation Objects'
        # summary += '.' * (col_size-18)
        # summary += str(self.size() - total_core_objects) + '\n'
        summary += '---\n'
        summary += 'Total: '
        summary += '.' * (col_size-5)
        summary += str(self.size()) + '\n'
        return summary

    def size(self):
        """
        Get the total number of objects in the Document.

        :return: The total number of objects in the Document.
        """
        return len(self.objects)

    def remove(self, objects: Iterable[TopLevel]):
        objects_to_remove = []
        for obj in objects:
            if not isinstance(obj, TopLevel):
                raise ValueError('')
            if obj not in self.objects:
                raise ValueError('')
            objects_to_remove.append(obj)
        # Now do the removal of each top level object and all of its children
        for obj in objects_to_remove:
            obj.remove_from_document()

    def remove_object(self, top_level: TopLevel):
        """Removes the given TopLevel from this document. No referential
        integrity is updated, and the TopLevel object is not informed
        that it has been removed, so it may still have a pointer to this
        document. No errors are raised and no value is returned.

        N.B. You probably want to use `remove` instead of `remove_object`.

        :param top_level: An object to remove
        :return: Nothing
        """
        try:
            self.objects.remove(top_level)
        except ValueError:
            pass

    def migrate(self, top_levels: Iterable[TopLevel]) -> Any:
        """Migrate objects to this document.

        No effort is made to maintain referential integrity. The
        burden of referential integrity lies with the caller of this
        method.

        :param top_levels: The top levels to migrate to this document
        :return: Nothing
        """
        objects = []
        for top_level in top_levels:
            if not isinstance(top_level, TopLevel):
                raise ValueError(f"Object {top_level.identity} is not a TopLevel object")
            objects.append(top_level)
        # Remove each object from its former document if it has one
        for obj in objects:
            obj.remove_from_document()
        # Add each document to this document
        self.add(objects)

    @staticmethod
    def change_object_namespace(top_levels: Iterable[TopLevel],
                                new_namespace: str,
                                update_references: Iterable[TopLevel] = None
                                ) -> Any:
        """Change the namespace of all TopLevel objects in `top_levels` to
        new_namespace, regardless of the previous value, while
        maintaining referential integrity among all the top level
        objects in top_levels, including their dependents. The
        namespace change is "in place". No new objects are allocated.

        Note: this operation can result in an invalid Document if the
        change in namespace creates a naming collision. This method
        does not check for this case either before or after the
        operation. It is up to the caller to decide whether this
        operation is safe.

        :param top_levels: objects to change
        :param new_namespace: new namespace for objects
        :param update_references: objects that should have their references
                                  updated without changing their namespace
        :return: Nothing
        """
        # Validate the objects and build a map of old name to new name
        objects = []
        identity_map = {}
        for top_level in top_levels:
            if not isinstance(top_level, TopLevel):
                raise ValueError(f'{top_level.identity} is not a TopLevel')
            # if top_level not in self:
            #     raise ValueError(f'{top_level.identity} not in this document')
            # Formulate the new identity
            _, path, display_id = top_level.split_identity()
            new_identity = posixpath.join(new_namespace, path, display_id)
            identity_map[top_level.identity] = top_level
            objects.append((top_level, new_identity))
        # Now change the object identities, and then remap the referenced objects
        for top_level, new_identity in objects:
            top_level.namespace = new_namespace
            top_level.set_identity(new_identity)
            top_level.update_all_dependents(identity_map)
        # Now update any TopLevels in the update_references group. These are
        # objects that may have references to the objects that had their
        # namespace changed.
        if update_references is not None:
            for top_level in update_references:
                top_level.update_all_dependents(identity_map)
        return None

    def clone(self) -> List[TopLevel]:
        """Clone the top level objects in this document.

        :return: A list of cloned TopLevel objects
        """
        return [tl.clone() for tl in self]

    def copy(self) -> 'Document':
        """Make a copy of this document.

        :return: A new document containing a new set of objects
                 that are identical to the original objects.
        """
        result = Document()
        copy(self, into_document=result)
        return result


def copy(top_levels: Iterable[TopLevel],
         into_namespace: Optional[str] = None,
         into_document: Optional[Document] = None) -> List[TopLevel]:
    """Copy SBOL objects, optionally changing their namespace and
    optionally adding them to a document. Referential integrity among
    the group of provided TopLevel objects is maintained.

    If `new_namespace` is provided, the newly created objects will have
    the provided namespace and will maintain the rest of their
    identities, including the local path and diplay ID.

    If `new_document` is provided, the newly created objects will be
    added to the provided Document.

    :param top_levels: Top Level objects to be copied
    :param into_namespace: A namespace to be given to the new objects
    :param into_document: A document to which the newly created objects
                         will be added
    :return: A list of the newly created objects
    """
    objects = []
    for top_level in top_levels:
        if not isinstance(top_level, TopLevel):
            raise ValueError(f"Object {top_level.identity} is not a TopLevel object")
        objects.append(top_level)
    clones = [tl.clone() for tl in objects]
    if into_namespace is not None:
        Document.change_object_namespace(clones, into_namespace)
    if into_document is not None:
        into_document.add(clones)
    return clones