Commit 9a91582c authored by Dennis Müller's avatar Dennis Müller
Browse files

Added stiv's code

parent 2d112387
G = gearman
TASK = SymbolSpotter
init:
gearmand -d
run:
python $(G)_client.py && python $(G)_worker.py
clean:
rm -f *.pyc
rm output/output.xhtml
rm -f output/semantics/*.xhtml
\ No newline at end of file
This diff is collapsed.
import sys
import gearman
import json
import helpers
from subprocess import call
# setup client, connect to Gearman HQ
gm_client = gearman.GearmanClient(['localhost:4730'])
print('Sending job...') # log this
print "Cleaning output file"
call(["rm", "output/output.xhtml"])
print "Cleaning RDF files"
call(["rm", "-f", "output/semantics/*.xhtml"])
entry_prefix = 'file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/'
fname_prefix = '/home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/'
t_entry_prefix = 'file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Testing/'
t_fname_prefix = '/home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Testing/'
entries = {
'0510214':(entry_prefix + 'physics.0510214.xhtml',fname_prefix + 'physics.0510214.xhtml'),
'0511079':(entry_prefix + 'astro-ph.0511079.xhtml',fname_prefix + 'astro-ph.0511079.xhtml'),
#'0510207':(entry_prefix + 'physics.0510207.xhtml',fname_prefix + 'physics.0510207.xhtml'),
'0510152':(entry_prefix + 'physics.0510152.xhtml',fname_prefix + 'physics.0510152.xhtml'),
'0510158':(entry_prefix + 'physics.0510158.xhtml',fname_prefix + 'physics.0510158.xhtml'),
'0510130':(entry_prefix + 'physics.0510130.xhtml',fname_prefix + 'physics.0510130.xhtml'),
'0510100':(entry_prefix + 'physics.0510100.xhtml',fname_prefix + 'physics.0510100.xhtml'),
'0511100':(entry_prefix + 'astro-ph.0511100.xhtml',fname_prefix + 'astro-ph.0511100.xhtml'),
# more to be added
}
test_entries = {
'0511102':(t_entry_prefix + 'astro-ph.0511102.xhtml',t_fname_prefix + 'astro-ph.0511102.xhtml'),
'0510032':(t_entry_prefix + 'physics.0510032.xhtml',t_fname_prefix + 'physics.0510032.xhtml'),
'0510210':(t_entry_prefix + 'physics.0510210.xhtml',t_fname_prefix + 'physics.0510210.xhtml'),
'0510221':(t_entry_prefix + 'physics.0510221.xhtml',t_fname_prefix + 'physics.0510221.xhtml'),
'0510250':(t_entry_prefix + 'physics.0510250.xhtml',t_fname_prefix + 'physics.0510250.xhtml'),
}
prec_rec = {
}
for key in entries:
doc_id = key
entry_data = entries[key]
entry = entry_data[0]
fname = entry_data[1]
docfile = open(fname,'r')
content = docfile.read()
docfile.close()
workload = {
'entry': entry,
'document':content,
'id':doc_id,
'testing':False,
}
request = gm_client.submit_job('annotate', json.dumps(workload))
result = json.loads(request.result) # log this
doc_data = result['doc_data']
doc_id = result['doc_id']
precision = doc_data['precision']
recall = doc_data['recall']
prec_rec[doc_id] = (precision,recall)
for key in prec_rec:
val = prec_rec[key]
print "Doc id: {0} -> Precision {1:.2%}. Recall {2:.2%}".format(key,val[0],val[1])
print "--------------------------------------------------"
prec_avg, rec_avg = helpers.calculate_average(prec_rec)
print "Precision average: {0:.2%}. Recall average: {1:.2%}".format(prec_avg,rec_avg)
print "F-measure: {0:.4f}".format(helpers.calculate_f(prec_avg,rec_avg))
print "--------------------------------------------------"
print "Annotating testing documents"
for key in test_entries:
doc_id = key
entry_data = test_entries[key]
entry = entry_data[0]
fname = entry_data[1]
docfile = open(fname,'r')
content = docfile.read()
docfile.close()
workload = {
'entry': entry,
'document':content,
'id':doc_id,
'testing':True
}
request = gm_client.submit_job('annotate', json.dumps(workload))
result = json.loads(request.result) # log this
print "Done"
\ No newline at end of file
import gearman
import sys
import json
from services import *
gm_worker = gearman.GearmanWorker(['localhost:4730'])
# The method to dispatch the service
def task_listener_annotate(gearman_worker, gearman_job):
output = {'status':-4, 'log':''}
if len(sys.argv) < 2:
output['log'] += 'Error: Missing Arguments: The name of the service to be run should be specified.\r\n'
return json.dumps(output)
try:
s = sys.argv[1] # The name of the service (i.e: name of the class)
service = getattr(sys.modules["services"],s)()
except Exception as e:
output['log'] += "Fatal:Python:" + type(e).__name__ + ":" + str(e) + "\r\n"
return json.dumps(output)
#print('Reporting status...') # log this
return service.analyze(gearman_job.data)
gm_worker.set_client_id('gearman_client')
gm_worker.register_task('annotate', task_listener_annotate)
gm_worker.work()
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Testing/physics.0510032.xhtml#S4.F2.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">1.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Testing/physics.0510032.xhtml#S4.p1.m3">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">20.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Testing/physics.0510032.xhtml#S4.p1.m9">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">11.7</cn>
<apply>
<times/>
<unit>m</unit>
<unit>A</unit>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Testing/physics.0510032.xhtml#S4.p3.m1">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">2.5</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Testing/physics.0510032.xhtml#S4.p3.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">1.8</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
</rdf:RDF>
<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#m1">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">250.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">8.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p1.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">250.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p1.m4">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">700.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p1.m6">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">100.0</cn>
<apply>
<times/>
<unit>e</unit>
<apply>
<times/>
<unit>W</unit>
<unit>m</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p1.m8">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">125.0</cn>
<apply>
<times/>
<unit>e</unit>
<apply>
<times/>
<unit>W</unit>
<unit>m</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p1.m10">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">8.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p1.m12">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">400.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p1.m14">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">75.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p2.m7">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">167.0</cn>
<apply>
<divide/>
<unit>l</unit>
<unit>min</unit>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p2.m11">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">1.0</cn>
<unit>Pa</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.p2.m12">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">21.0</cn>
<unit>Pa</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S2.F1.m9">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">167.0</cn>
<apply>
<times/>
<unit>l</unit>
<apply>
<power/>
<cn>-1.0</cn>
<unit>min</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S3.SS1.p1.m1">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">250.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S3.SS1.p1.m3">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">167.0</cn>
<apply>
<divide/>
<unit>l</unit>
<unit>min</unit>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S3.F3.m15">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">40.0</cn>
<apply>
<times/>
<unit>l</unit>
<apply>
<power/>
<cn>-1.0</cn>
<unit>min</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.SS1.p1.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">0.2</cn>
<unit>Pa</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.F4.m9">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">20.0</cn>
<apply>
<times/>
<unit>l</unit>
<apply>
<power/>
<cn>-1.0</cn>
<unit>min</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.F5.m23">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">40.0</cn>
<apply>
<times/>
<unit>l</unit>
<apply>
<power/>
<cn>-1.0</cn>
<unit>min</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.F5.m26">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">20.0</cn>
<apply>
<times/>
<unit>l</unit>
<apply>
<power/>
<cn>-1.0</cn>
<unit>min</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.SS2.p3.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">2.0</cn>
<unit>C</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.SS3.p3.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">260.0</cn>
<apply>
<times/>
<unit>l</unit>
<apply>
<power/>
<cn>-1.0</cn>
<unit>min</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.F6.m6">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">16.7</cn>
<apply>
<times/>
<unit>l</unit>
<apply>
<power/>
<cn>-1.0</cn>
<unit>min</unit>
</apply>
</apply>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S4.F6.m8">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">1.0</cn>
<unit>Pa</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S5.p2.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">10.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510100.xhtml#S5.F7.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">8.0</cn>
<unit>m</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
</rdf:RDF>
<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510130.xhtml#I1.ix1.p1.m3">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">2.0</cn>
<unit>s</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510130.xhtml#I2.ix5.p1.m1">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">300.0</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510130.xhtml#I2.ix5.p1.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">1.2</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510130.xhtml#I4.ix1.p1.m1">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">300.0</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510130.xhtml#I4.ix1.p1.m2">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">1.2</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
</rdf:RDF>
<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510152.xhtml#m6">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">99.4</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510152.xhtml#m7">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">5.0</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510152.xhtml#m11">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">453.0</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510152.xhtml#m12">
<rdf:XMLLiteral>
<apply>
<times/>
<cn type="real">13.0</cn>
<unit>eV</unit>
</apply>
</rdf:XMLLiteral>
</rdf:Description>
<rdf:Description rdf:about="file:///home/ssherko/Dropbox/Academia/CS6/Thesis/Documents/Training/tex_xml/physics.0510152.xhtml#S1.p1.m2">
<rdf:XMLLiteral>
<apply>