#!/usr/bin/python import sys, re clauses = [] clause = {} for ll in sys.stdin: line = ll.strip() if line.startswith(''): if clause: clauses.append(clause) clause = {'text': line[8:], 'vp': [], 'phr': []} elif line.startswith(''): tag, text = line[6:].split('): ', 1) clause['vp'].append({'text': text, 'tag': tag}) elif line.startswith(''): clause['vp'][-1]['lemma'] = line.split('): ', 1)[1] elif line.startswith(''): tag, text = line[7:].split('): ', 1) clause['phr'].append({'text': text, 'tag': tag}) elif line.startswith(''): clause['phr'][-1]['lemma'] = line.split('): ', 1)[1] for clause in clauses: # just the example print of the data structure print clause['text'] for vp in clause['vp']: print ' ', 'vp:', vp['text'], '[%s]' % vp['tag'], '[%s]' % vp['lemma'] for p in clause['phr']: print ' ', 'phr:', p['text'], '[%s]' % p['tag'], '[%s]' % p['lemma'] print print print for clause in clauses: # "intelligence" if len(clause['vp']) == 0: continue if len(clause['phr']) < 2: continue phr1 = clause['phr'][0] for phr2 in clause['phr'][1:]: print phr1['text'], ':: was ::', phr2['text'] print