1 | #!/usr/bin/python |
---|
2 | |
---|
3 | import sys, re |
---|
4 | |
---|
5 | clauses = [] |
---|
6 | clause = {} |
---|
7 | |
---|
8 | for ll in sys.stdin: |
---|
9 | line = ll.strip() |
---|
10 | if line.startswith('<clause>'): |
---|
11 | if clause: clauses.append(clause) |
---|
12 | clause = {'text': line[8:], 'vp': [], 'phr': []} |
---|
13 | elif line.startswith('<vp>'): |
---|
14 | tag, text = line[6:].split('): ', 1) |
---|
15 | clause['vp'].append({'text': text, 'tag': tag}) |
---|
16 | elif line.startswith('<vplemma>'): |
---|
17 | clause['vp'][-1]['lemma'] = line.split('): ', 1)[1] |
---|
18 | elif line.startswith('<phr>'): |
---|
19 | tag, text = line[7:].split('): ', 1) |
---|
20 | clause['phr'].append({'text': text, 'tag': tag}) |
---|
21 | elif line.startswith('<phrlemma>'): |
---|
22 | clause['phr'][-1]['lemma'] = line.split('): ', 1)[1] |
---|
23 | |
---|
24 | for clause in clauses: # just the example print of the data structure |
---|
25 | print clause['text'] |
---|
26 | for vp in clause['vp']: |
---|
27 | print ' ', 'vp:', vp['text'], '[%s]' % vp['tag'], '[%s]' % vp['lemma'] |
---|
28 | for p in clause['phr']: |
---|
29 | print ' ', 'phr:', p['text'], '[%s]' % p['tag'], '[%s]' % p['lemma'] |
---|
30 | print |
---|
31 | print |
---|
32 | print |
---|
33 | |
---|
34 | for clause in clauses: # "intelligence" |
---|
35 | if len(clause['vp']) == 0: continue |
---|
36 | if len(clause['phr']) < 2: continue |
---|
37 | phr1 = clause['phr'][0] |
---|
38 | for phr2 in clause['phr'][1:]: |
---|
39 | print phr1['text'], ':: was ::', phr2['text'] |
---|
40 | |
---|
41 | print |
---|