-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_gen.py
More file actions
executable file
·73 lines (60 loc) · 2.42 KB
/
parse_gen.py
File metadata and controls
executable file
·73 lines (60 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#! /usr/bin/env python3
# parse_gen.py
import os
import sys
from nltk.parse.generate import generate
from nltk.grammar import Nonterminal
from nltk import PCFG
from random import choice
import argparse
def parse_args():
parser = argparse.ArgumentParser(description='parse_gen.py')
parser.add_argument('-n', '--num-sent', type=int, default=10, help='number of sentences')
parser.add_argument('-d', '--max_depth', type=int, default=4, help='maximum depth')
parser.add_argument('-g', '--grammar-file', type=str, default='grammars/gram_file_simple.txt', help='grammar file')
parser.add_argument('-o', '--output-file', type=argparse.FileType('w'), default=sys.stdout, help='generated file')
parser.add_argument('-v', '--verbose', action="store_true", help="verbose flag")
args = parser.parse_args()
return args
# Stolen from here:
# https://stackoverflow.com/questions/15009656/how-to-use-nltk-to-generate-sentences-from-an-induced-grammar/15617664
# def generate_sample(grammar, items=["S"]):
# frags = []
# if len(items) == 1:
# if isinstance(items[0], Nonterminal):
# for prod in grammar.productions(lhs=items[0]):
# frags.append(generate_sample(grammar, prod.rhs()))
# else:
# frags.append(items[0])
# else:
# chosen_expansion = choice(items)
# frags.append(generate_sample, chosen_expansion)
# return frags
# Stolen from here:
# https://stackoverflow.com/questions/15009656/how-to-use-nltk-to-generate-sentences-from-an-induced-grammar/15617664
def generate_sample(grammar, prod, frags):
if prod in grammar._lhs_index:
derivations = grammar._lhs_index[prod]
derivation = choice(derivations)
for d in derivation._rhs:
generate_sample(grammar, d, frags)
elif prod in grammar._rhs_index:
# terminal
frags.append(prod)
def generate_sentences(args):
grammar_string = ""
with open(args.grammar_file, "r") as gram_file:
grammar_string = gram_file.read()
grammar = PCFG.fromstring(grammar_string)
if args.verbose:
print(grammar)
print()
for _ in range(args.num_sent):
frags = []
generate_sample(grammar, grammar.start(), frags)
yield ' '.join(frags)
if __name__ == "__main__":
args = parse_args()
for sentence in generate_sentences(args):
args.output_file.write(sentence)
args.output_file.write('\n')