Skip to content

Commit 34c3a4a

Browse files
committed
Merge branch 'develop' of https://github.com/nltk/nltk into develop
1 parent 253dd3a commit 34c3a4a

File tree

6 files changed

+98
-50
lines changed

6 files changed

+98
-50
lines changed

nltk/collections.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
# For license information, see LICENSE.TXT
77

88
import bisect
9-
109
from functools import total_ordering
1110
from itertools import chain, islice
1211

nltk/corpus/reader/wordnet.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -589,12 +589,13 @@ def closure(self, rel, depth=-1):
589589
590590
>>> dog = wn.synset('dog.n.01')
591591
>>> hyp = lambda s:sorted(s.hypernyms())
592-
>>> print(sorted(dog.closure(hyp)))
593-
[Synset('animal.n.01'), Synset('canine.n.02'), Synset('carnivore.n.01'),\
594-
Synset('chordate.n.01'), Synset('domestic_animal.n.01'), Synset('entity.n.01'),\
595-
Synset('living_thing.n.01'), Synset('mammal.n.01'), Synset('object.n.01'),\
596-
Synset('organism.n.01'), Synset('physical_entity.n.01'), Synset('placental.n.01'),\
597-
Synset('vertebrate.n.01'), Synset('whole.n.02')]
592+
>>> print(list(dog.closure(hyp)))
593+
[Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'),\
594+
Synset('animal.n.01'), Synset('placental.n.01'), Synset('organism.n.01'),\
595+
Synset('mammal.n.01'), Synset('living_thing.n.01'), Synset('vertebrate.n.01'),\
596+
Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'),\
597+
Synset('physical_entity.n.01'), Synset('entity.n.01')]
598+
598599
UserWarning: Discarded redundant search for Synset('animal.n.01') at depth 7
599600
"""
600601

nltk/data.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
from zlib import Z_FINISH as FLUSH
5353

5454
from nltk import grammar, sem
55-
from nltk.compat import add_py3_data, py3_data
5655
from nltk.internals import deprecated
5756

5857
textwrap_indent = functools.partial(textwrap.indent, prefix=" ")
@@ -300,7 +299,6 @@ class FileSystemPathPointer(PathPointer, str):
300299
directly via a given absolute path.
301300
"""
302301

303-
@py3_data
304302
def __init__(self, _path):
305303
"""
306304
Create a new path pointer for the given absolute path.
@@ -349,7 +347,6 @@ class BufferedGzipFile(GzipFile):
349347
Python versions.
350348
"""
351349

352-
@py3_data
353350
def __init__(
354351
self, filename=None, mode=None, compresslevel=9, fileobj=None, **kwargs
355352
):
@@ -382,7 +379,6 @@ class ZipFilePathPointer(PathPointer):
382379
which can be accessed by reading that zipfile.
383380
"""
384381

385-
@py3_data
386382
def __init__(self, zipfile, entry=""):
387383
"""
388384
Create a new path pointer pointing at the specified entry
@@ -791,7 +787,6 @@ def load(
791787
:param encoding: the encoding of the input; only used for text formats.
792788
"""
793789
resource_url = normalize_resource_url(resource_url)
794-
resource_url = add_py3_data(resource_url)
795790

796791
# Determine the format of the resource.
797792
if format == "auto":
@@ -818,7 +813,6 @@ def load(
818813
print(f"<<Using cached copy of {resource_url}>>")
819814
return resource_val
820815

821-
resource_url = normalize_resource_url(resource_url)
822816
protocol, path_ = split_resource_url(resource_url)
823817

824818
if path_[-7:] == ".pickle":
@@ -979,7 +973,7 @@ def _open(resource_url):
979973

980974

981975
class LazyLoader:
982-
@py3_data
976+
983977
def __init__(self, _path):
984978
self._path = _path
985979

@@ -1020,7 +1014,6 @@ class OpenOnDemandZipFile(zipfile.ZipFile):
10201014
read-only (i.e. ``write()`` and ``writestr()`` are disabled.
10211015
"""
10221016

1023-
@py3_data
10241017
def __init__(self, filename):
10251018
if not isinstance(filename, str):
10261019
raise TypeError("ReopenableZipFile filename must be a string")
@@ -1077,7 +1070,6 @@ class SeekableUnicodeStreamReader:
10771070

10781071
DEBUG = True # : If true, then perform extra sanity checks.
10791072

1080-
@py3_data
10811073
def __init__(self, stream, encoding, errors="strict"):
10821074
# Rewind the stream to its beginning.
10831075
stream.seek(0)

nltk/sem/boxer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,11 +530,15 @@ def _handle_time_expression(self, arg):
530530
else:
531531
return None
532532
self.assertToken(self.token(), ")")
533+
534+
def func_gen(x):
535+
return lambda sent_index, word_indices: x
536+
533537
return [
534538
lambda sent_index, word_indices: BoxerPred(
535539
self.discourse_id, sent_index, word_indices, arg, tok, "n", 0
536540
)
537-
] + [lambda sent_index, word_indices: cond for cond in conds]
541+
] + [func_gen(cond) for cond in conds]
538542

539543
def _handle_date(self, arg):
540544
# []: (+), []:'XXXX', [1004]:'04', []:'XX'

nltk/test/unit/test_wordnet.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_hyperhyponyms(self):
4545
S("brunch.n.01"),
4646
S("buffet.n.02"),
4747
]
48-
self.assertEqual(sorted(S("meal.n.1").hyponyms()[:5]), first_five_meal_hypo)
48+
self.assertEqual(sorted(S("meal.n.1").hyponyms())[:5], first_five_meal_hypo)
4949
self.assertEqual(S("Austen.n.1").instance_hypernyms(), [S("writer.n.01")])
5050
first_five_composer_hypo = [
5151
S("ambrose.n.01"),
@@ -55,14 +55,14 @@ def test_hyperhyponyms(self):
5555
S("beethoven.n.01"),
5656
]
5757
self.assertEqual(
58-
S("composer.n.1").instance_hyponyms()[:5], first_five_composer_hypo
58+
sorted(S("composer.n.1").instance_hyponyms())[:5], first_five_composer_hypo
5959
)
6060

6161
# Test root hyper-/hyponyms
6262
self.assertEqual(S("person.n.01").root_hypernyms(), [S("entity.n.01")])
6363
self.assertEqual(S("sail.v.01").root_hypernyms(), [S("travel.v.01")])
6464
self.assertEqual(
65-
S("fall.v.12").root_hypernyms(), [S("act.v.01"), S("fall.v.17")]
65+
sorted(S("fall.v.12").root_hypernyms()), [S("act.v.01"), S("fall.v.17")]
6666
)
6767

6868
def test_derivationally_related_forms(self):
@@ -84,24 +84,25 @@ def test_derivationally_related_forms(self):
8484
def test_meronyms_holonyms(self):
8585
# Test meronyms, holonyms.
8686
self.assertEqual(
87-
S("dog.n.01").member_holonyms(), [S("canis.n.01"), S("pack.n.06")]
87+
sorted(S("dog.n.01").member_holonyms()), [S("canis.n.01"), S("pack.n.06")]
8888
)
8989
self.assertEqual(S("dog.n.01").part_meronyms(), [S("flag.n.07")])
9090

9191
self.assertEqual(S("faculty.n.2").member_meronyms(), [S("professor.n.01")])
9292
self.assertEqual(S("copilot.n.1").member_holonyms(), [S("crew.n.01")])
9393

9494
self.assertEqual(
95-
S("table.n.2").part_meronyms(),
95+
sorted(S("table.n.2").part_meronyms()),
9696
[S("leg.n.03"), S("tabletop.n.01"), S("tableware.n.01")],
9797
)
9898
self.assertEqual(S("course.n.7").part_holonyms(), [S("meal.n.01")])
9999

100100
self.assertEqual(
101-
S("water.n.1").substance_meronyms(), [S("hydrogen.n.01"), S("oxygen.n.01")]
101+
sorted(S("water.n.1").substance_meronyms()),
102+
[S("hydrogen.n.01"), S("oxygen.n.01")],
102103
)
103104
self.assertEqual(
104-
S("gin.n.1").substance_holonyms(),
105+
sorted(S("gin.n.1").substance_holonyms()),
105106
[
106107
S("gin_and_it.n.01"),
107108
S("gin_and_tonic.n.01"),
@@ -123,7 +124,7 @@ def test_misc_relations(self):
123124
# Test misc relations.
124125
self.assertEqual(S("snore.v.1").entailments(), [S("sleep.v.01")])
125126
self.assertEqual(
126-
S("heavy.a.1").similar_tos(),
127+
sorted(S("heavy.a.1").similar_tos()),
127128
[
128129
S("dense.s.03"),
129130
S("doughy.s.01"),
@@ -162,10 +163,14 @@ def test_domains(self):
162163
def test_in_topic_domains(self):
163164
# Test in domains.
164165
self.assertEqual(
165-
S("computer_science.n.01").in_topic_domains()[0], S("access.n.05")
166+
sorted(S("computer_science.n.01").in_topic_domains())[0], S("access.n.05")
167+
)
168+
self.assertEqual(
169+
sorted(S("germany.n.01").in_region_domains())[23], S("trillion.n.02")
170+
)
171+
self.assertEqual(
172+
sorted(S("slang.n.02").in_usage_domains())[1], S("airhead.n.01")
166173
)
167-
self.assertEqual(S("germany.n.01").in_region_domains()[23], S("trillion.n.02"))
168-
self.assertEqual(S("slang.n.02").in_usage_domains()[1], S("airhead.n.01"))
169174

170175
def test_wordnet_similarities(self):
171176
# Path based similarities.

nltk/util.py

Lines changed: 69 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,13 @@ def breadth_first(tree, children=iter, maxdepth=-1):
257257

258258

259259
def edge_closure(tree, children=iter, maxdepth=-1, verbose=False):
260-
"""Yield the edges of a graph in breadth-first order,
260+
"""
261+
:param tree: the tree root
262+
:param children: a function taking as argument a tree node
263+
:param maxdepth: to limit the search depth
264+
:param verbose: to print warnings when cycles are discarded
265+
266+
Yield the edges of a graph in breadth-first order,
261267
discarding eventual cycles.
262268
The first argument should be the start node;
263269
children should be a function taking as argument a graph node
@@ -295,13 +301,13 @@ def edge_closure(tree, children=iter, maxdepth=-1, verbose=False):
295301
def edges2dot(edges, shapes=None, attr=None):
296302
"""
297303
:param edges: the set (or list) of edges of a directed graph.
298-
299-
:return dot_string: a representation of 'edges' as a string in the DOT
300-
graph language, which can be converted to an image by the 'dot' program
301-
from the Graphviz package, or nltk.parse.dependencygraph.dot2img(dot_string).
302-
303304
:param shapes: dictionary of strings that trigger a specified shape.
304305
:param attr: dictionary with global graph attributes
306+
:return: a representation of 'edges' as a string in the DOT graph language.
307+
308+
Returns dot_string: a representation of 'edges' as a string in the DOT
309+
graph language, which can be converted to an image by the 'dot' program
310+
from the Graphviz package, or nltk.parse.dependencygraph.dot2img(dot_string).
305311
306312
>>> import nltk
307313
>>> from nltk.util import edges2dot
@@ -337,8 +343,12 @@ def edges2dot(edges, shapes=None, attr=None):
337343

338344
def unweighted_minimum_spanning_digraph(tree, children=iter, shapes=None, attr=None):
339345
"""
346+
:param tree: the tree root
347+
:param children: a function taking as argument a tree node
348+
:param shapes: dictionary of strings that trigger a specified shape.
349+
:param attr: dictionary with global graph attributes
340350
341-
Build a Minimum Spanning Tree (MST) of an unweighted graph,
351+
Build a Minimum Spanning Tree (MST) of an unweighted graph,
342352
by traversing the nodes of a tree in breadth-first order,
343353
discarding eventual cycles.
344354
@@ -378,7 +388,15 @@ def unweighted_minimum_spanning_digraph(tree, children=iter, shapes=None, attr=N
378388

379389

380390
def acyclic_breadth_first(tree, children=iter, maxdepth=-1, verbose=False):
381-
"""Traverse the nodes of a tree in breadth-first order,
391+
"""
392+
:param tree: the tree root
393+
:param children: a function taking as argument a tree node
394+
:param maxdepth: to limit the search depth
395+
:param verbose: to print warnings when cycles are discarded
396+
:return: the tree in breadth-first order
397+
398+
Adapted from breadth_first() above, to discard cycles.
399+
Traverse the nodes of a tree in breadth-first order,
382400
discarding eventual cycles.
383401
384402
The first argument should be the tree root;
@@ -389,32 +407,41 @@ def acyclic_breadth_first(tree, children=iter, maxdepth=-1, verbose=False):
389407
queue = deque([(tree, 0)])
390408
while queue:
391409
node, depth = queue.popleft()
410+
if node in traversed:
411+
continue
392412
yield node
393413
traversed.add(node)
394414
if depth != maxdepth:
395415
try:
396416
for child in children(node):
397417
if child not in traversed:
398418
queue.append((child, depth + 1))
399-
else:
400-
if verbose:
401-
warnings.warn(
402-
"Discarded redundant search for {} at depth {}".format(
403-
child, depth + 1
404-
),
405-
stacklevel=2,
406-
)
419+
elif verbose:
420+
warnings.warn(
421+
"Discarded redundant search for {} at depth {}".format(
422+
child, depth + 1
423+
),
424+
stacklevel=2,
425+
)
407426
except TypeError:
408427
pass
409428

410429

411430
def acyclic_depth_first(
412431
tree, children=iter, depth=-1, cut_mark=None, traversed=None, verbose=False
413432
):
414-
"""Traverse the nodes of a tree in depth-first order,
433+
"""
434+
:param tree: the tree root
435+
:param children: a function taking as argument a tree node
436+
:param depth: the maximum depth of the search
437+
:param cut_mark: the mark to add when cycles are truncated
438+
:param traversed: the set of traversed nodes
439+
:param verbose: to print warnings when cycles are discarded
440+
:return: the tree in depth-first order
441+
442+
Traverse the nodes of a tree in depth-first order,
415443
discarding eventual cycles within any branch,
416444
adding cut_mark (when specified) if cycles were truncated.
417-
418445
The first argument should be the tree root;
419446
children should be a function taking as argument a tree node
420447
and returning an iterator of the node's children.
@@ -476,7 +503,17 @@ def acyclic_depth_first(
476503
def acyclic_branches_depth_first(
477504
tree, children=iter, depth=-1, cut_mark=None, traversed=None, verbose=False
478505
):
479-
"""Traverse the nodes of a tree in depth-first order,
506+
"""
507+
:param tree: the tree root
508+
:param children: a function taking as argument a tree node
509+
:param depth: the maximum depth of the search
510+
:param cut_mark: the mark to add when cycles are truncated
511+
:param traversed: the set of traversed nodes
512+
:param verbose: to print warnings when cycles are discarded
513+
:return: the tree in depth-first order
514+
515+
Adapted from acyclic_depth_first() above, to
516+
traverse the nodes of a tree in depth-first order,
480517
discarding eventual cycles within the same branch,
481518
but keep duplicate paths in different branches.
482519
Add cut_mark (when defined) if cycles were truncated.
@@ -548,15 +585,22 @@ def acyclic_branches_depth_first(
548585

549586

550587
def acyclic_dic2tree(node, dic):
551-
"""Convert acyclic dictionary 'dic', where the keys are nodes, and the
588+
"""
589+
:param node: the root node
590+
:param dic: the dictionary of children
591+
592+
Convert acyclic dictionary 'dic', where the keys are nodes, and the
552593
values are lists of children, to output tree suitable for pprint(),
553594
starting at root 'node', with subtrees as nested lists."""
554595
return [node] + [acyclic_dic2tree(child, dic) for child in dic[node]]
555596

556597

557598
def unweighted_minimum_spanning_dict(tree, children=iter):
558599
"""
559-
Output a dictionary representing a Minimum Spanning Tree (MST)
600+
:param tree: the tree root
601+
:param children: a function taking as argument a tree node
602+
603+
Output a dictionary representing a Minimum Spanning Tree (MST)
560604
of an unweighted graph, by traversing the nodes of a tree in
561605
breadth-first order, discarding eventual cycles.
562606
@@ -598,7 +642,10 @@ def unweighted_minimum_spanning_dict(tree, children=iter):
598642

599643
def unweighted_minimum_spanning_tree(tree, children=iter):
600644
"""
601-
Output a Minimum Spanning Tree (MST) of an unweighted graph,
645+
:param tree: the tree root
646+
:param children: a function taking as argument a tree node
647+
648+
Output a Minimum Spanning Tree (MST) of an unweighted graph,
602649
by traversing the nodes of a tree in breadth-first order,
603650
discarding eventual cycles.
604651

0 commit comments

Comments
 (0)