#!/usr/bin/python

# ACHTUNG loads both files into memory

verbose = False
#verbose = True

verbose_window = 10	# characters left+right

import sys

gold = open(sys.argv[1]).read().decode('utf8')
test = open(sys.argv[2]).read().decode('utf8')

no_found, no_gold, no_correct = 0, 0, 0
i, gi = 0, 0

while True:
#    if i % 10000 == 0: print i
    if i >= len(test) or gi >= len (gold):
        break
    if test[i].isspace() or test[i] in '":.-?%' or test[i] == u'\uFEFF':
        i += 1
    elif gold[gi].isspace() or gold[gi] in '"%' or gold[gi] == u'\uFEFF':
        gi += 1
    elif test[i] == ',' or gold[gi] in ',:.-?':
        if test[i] == gold[gi]:
            no_correct += 1
        if test[i] == ',' and not gold[gi] == '.':
            no_found += 1
            i += 1
        if gold[gi] == ',':
            no_gold += 1
	    if verbose and test[i] != gold[gi]: print "missing: "+gold[gi-verbose_window:gi+verbose_window]
            gi += 1
        elif gold[gi] in ':.-?':
            gi += 1
    else: # check that chars are same
        if test[i].lower() == gold[gi].lower():
            i  += 1
            gi += 1
        else:
            print
            print test[i:i+100].encode('utf8').replace('\n', '#')
            print
            print gold[gi:gi+100].encode('utf8').replace('\n', '#')
            print
            raise RuntimeError('mismatch')

print '# found commas:', no_found
print '# gold commas:', no_gold
print '# correctly found commas:', no_correct

def percent(n):
    return ('%.1f %%' % (n*100))

if no_found and no_gold and no_correct:
    prec = float(no_correct)/no_found
    rec = float(no_correct)/no_gold
    print
    print 'precision:', percent(prec)
    print 'recall:', percent(rec)
    print 'F:', percent(2 * prec * rec / (prec + rec))