From c6929f560da2f850a4745fdab94c64554899c5ab Mon Sep 17 00:00:00 2001
From: jfschaefer <jfschaefer@outlook.com>
Date: Wed, 28 Mar 2018 20:21:43 +0200
Subject: [PATCH] add resource for quantity expressions

---
 resources/index.md                |  1 +
 resources/quantity-expressions.md | 78 +++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 resources/quantity-expressions.md

diff --git a/resources/index.md b/resources/index.md
index 4e910ba..35d1cc3 100644
--- a/resources/index.md
+++ b/resources/index.md
@@ -5,6 +5,7 @@ title: SIGMathLing - Datasets and Resources
 
  1. [arXMLiv corpus, 08.2017 release](/resources/arxmliv-dataset-082017/)
  2. [arXMLiv word embeddings, 08.2017 release](/resources/arxmliv-embeddings-082017)
+ 3. [quantity expressions](/resources/quantity-expressions)
 
 
 Additional resources are en route, see the [plan](/technical/) for details.
diff --git a/resources/quantity-expressions.md b/resources/quantity-expressions.md
new file mode 100644
index 0000000..ef11ed7
--- /dev/null
+++ b/resources/quantity-expressions.md
@@ -0,0 +1,78 @@
+## Quantity Expressions Dataset
+
+This dataset contains the results of Ulrich Rabenstein's
+[master thesis](https://gl.kwarc.info/supervision/MSc-archive/blob/master/2017/urabenstein/Rabenstein.pdf),
+in which he developed a framework for the detection of quantity expressions in STEM documents.
+
+
+### Accessibility and License
+
+The content of this Dataset is licensed to [SIGMathLing members](/member/) for research
+and tool development purposes. 
+
+Access is restricted to  [SIGMathLing members](/member/) under the
+[SIGMathLing Non-Disclosure-Agreement](/nda/) as for most [arXiv](http://arxiv.org)
+articles, the right of distribution was only given (or assumed) to arXiv itself.
+
+
+### Contents
+
+ * `Annotations.zip`: All quantity expressions detected by the spotter in a format suitable for the [Kwarc Annotation Tool (KAT)](https://github.com/kwarc/kat).
+ * `Documents.zip`: The documents in which quantity expressions were searched. These are modified arXMLiv documents in which each word is wrapped by a `<span>`. This was required by KAT to annotate words.
+ * `Harvest.zip`: Data for math web search.
+ * `screen-reader-documents.zip`: The documents prepared in a way that enables screen readers to read out units ("two kilometers" instead of "two k m" for "2km").
+
+
+### Remarks on Annotation Format
+
+The annotations are stored as RDF in a way suitable for the [Kwarc Annotation Tool (KAT)](https://github.com/kwarc/kat).
+For more information on KAT and the KAT format consider reading [this](https://gl.kwarc.info/KAT/papers/blob/master/cicm14/paper.pdf)
+and [this](https://gl.kwarc.info/KAT/papers/blob/master/cicm16/paper.pdf) paper.
+In the example annotation below, 
+```
+cse(%2F%2F*%5B%40id%3D'S1.p10.1'%5D%2C%2F%2F*%5B%40id%3D'S1.p10.1.w270'%5D%2C%2F%2F*%5B%40id%3D'S1.p10.1.w272'%5D)
+```
+describes the annotated quantity expression.
+URL-decoding the expression in the parentheses, we can obtain the three comma-separated XPaths
+```
+//*[@id='S1.p10.1'],//*[@id='S1.p10.1.w270'],//*[@id='S1.p10.1.w272']
+```
+where the first path is the common parent, the second path is the start of the annotated range,
+and the third path is the end of the annotated range.
+
+```
+<rdf:Description rdf:nodeID="KAT_5764208381">
+  <kat:run rdf:nodeID="kat_run"/>
+  <kat:kannspec rdf:nodeID="KAT_1_QuantityExpression"/>
+  <kat:concept>QuantityExpression</kat:concept>
+  <kat:type rdf:resource="http://kwarc.info/semanticextraction/KAnnSpec#quantityexpression"/>
+  <kat:annotates rdf:resource="http://localhost/procl.html#cse(%2F%2F*%5B%40id%3D'S1.p10.1'%5D%2C%2F%2F*%5B%40id%3D'S1.p10.1.w270'%5D%2C%2F%2F*%5B%40id%3D'S1.p10.1.w272'%5D)"/>
+  <kat:contentmathml rdf:parseType="Literal" score="1">
+    <apply>
+      <times/>
+      <cn>21</cn>
+      <apply>
+        <times/>
+        <apply>
+          <csymbol cd="Prefix">Prefix</csymbol>
+          <csymbol cd="centi">c</csymbol>
+          <csymbol cd="meter">m</csymbol>
+        </apply>
+      </apply>
+    </apply>
+  </kat:contentmathml>
+</rdf:Description>
+```
+
+### Download
+
+From [this repository](https://gl.kwarc.info/SIGMathLing/quantity-expressions) (only for [SIGMathLing members](/member/)).
+
+### Evaluation
+
+According to the thesis, a manual validation of 50 randomly selected documents containing in total 646 quantity expressions yielded the following values:
+
+ * Precision: 75%
+ * Recall: 93%
+ * F-Score: 83%
+
-- 
GitLab