From 7212be242a9d7d81a764d918e65fd4b37cc81d9b Mon Sep 17 00:00:00 2001 From: Takuto ASAKURA Date: Wed, 18 Mar 2020 00:45:33 +0900 Subject: [PATCH 1/3] add grounding-dataset-v1 --- resources/grounding-dataset-v1.md | 40 +++++++++++++++++++++++++++++++ resources/index.md | 1 + 2 files changed, 41 insertions(+) create mode 100644 resources/grounding-dataset-v1.md diff --git a/resources/grounding-dataset-v1.md b/resources/grounding-dataset-v1.md new file mode 100644 index 0000000..91abf81 --- /dev/null +++ b/resources/grounding-dataset-v1.md @@ -0,0 +1,40 @@ +--- +layout: page +title: Dataset for Grounding of Formulae, Version 1 +--- + +### Basic Information + +* Author: Takuto Asakura, André Greiner-Petter, Akiko Aizawa, and Yusuke Miyao +* Release date: 2020-03-18 + +### Accessibility and License + +The content of this dataset is licensed to [SIGMathLing members](/member/) for +research and tool development purposes. + +Access is restricted to [SIGMathLing members](/member/) under the [SIGMathLing +Non-Disclosure-Agreement](/nda/) as for most [arXiv](http://arxiv.org) +articles, the right of distribution was only given (or assumed) to arXiv +itself. + +### Description + +This is the first public release of the dataset for grounding of formulae. + +As a trial work, this dataset consists of an annotated long paper (20 pages in +PDF): + +* Simeone, O.: A Very Brief Introduction to Machine Learning with Applications +to Communication Systems. IEEE Transactions on Cognitive Communications and +Networking 4(4) (2018) + +The original XHTML file of the paper was taken from the [arXMLiv:08.2018 +dataset](/resources/arxmliv-dataset-082018/), and we manually annotated all +937 identifiers (i.e., `` tags) in the document to the corresponding +mathematical objects (meanings). + +### Download + +[Download link](https://gl.kwarc.info/SIGMathLing/dataset-grounding-v1) +([SIGMathLing members](/member/) only) diff --git a/resources/index.md b/resources/index.md index 7b6baf9..eac938f 100644 --- a/resources/index.md +++ b/resources/index.md @@ -11,6 +11,7 @@ title: SIGMathLing - Datasets and Resources 1. [quantity expressions](/resources/quantity-expressions) 1. [arXMLiv word embeddings, 08.2017 release](/resources/arxmliv-embeddings-082017) 1. [arXMLiv corpus, 08.2017 release](/resources/arxmliv-dataset-082017/) + 1. [Dataset for Grounding of Formulae, v1](/resources/grounding-dataset-v1) ## Resources hosted externally 1. [ACL-math-annotation](http://www-al.nii.ac.jp/acl-math-annotation/) -- GitLab From f399d2792eb6170410998272e1090a506e861863 Mon Sep 17 00:00:00 2001 From: Takuto ASAKURA Date: Sun, 22 Mar 2020 19:17:23 +0900 Subject: [PATCH 2/3] grounding-dataset-v1: fix the download link --- resources/grounding-dataset-v1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/grounding-dataset-v1.md b/resources/grounding-dataset-v1.md index 91abf81..5b61424 100644 --- a/resources/grounding-dataset-v1.md +++ b/resources/grounding-dataset-v1.md @@ -36,5 +36,5 @@ mathematical objects (meanings). ### Download -[Download link](https://gl.kwarc.info/SIGMathLing/dataset-grounding-v1) +[Download link](https://gl.kwarc.info/SIGMathLing/grounding-dataset-v1) ([SIGMathLing members](/member/) only) -- GitLab From 49cef0de1b55fdf6c053752e3cbe7d45a35a146f Mon Sep 17 00:00:00 2001 From: Takuto ASAKURA Date: Thu, 26 Mar 2020 13:25:33 +0900 Subject: [PATCH 3/3] move grounding-dataset to a new WIP section --- resources/{grounding-dataset-v1.md => grounding-dataset.md} | 6 +++--- resources/index.md | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) rename resources/{grounding-dataset-v1.md => grounding-dataset.md} (88%) diff --git a/resources/grounding-dataset-v1.md b/resources/grounding-dataset.md similarity index 88% rename from resources/grounding-dataset-v1.md rename to resources/grounding-dataset.md index 5b61424..0aca663 100644 --- a/resources/grounding-dataset-v1.md +++ b/resources/grounding-dataset.md @@ -1,12 +1,12 @@ --- layout: page -title: Dataset for Grounding of Formulae, Version 1 +title: Dataset for Grounding of Formulae --- ### Basic Information * Author: Takuto Asakura, André Greiner-Petter, Akiko Aizawa, and Yusuke Miyao -* Release date: 2020-03-18 +* Updated: 2020-03-26 ### Accessibility and License @@ -20,7 +20,7 @@ itself. ### Description -This is the first public release of the dataset for grounding of formulae. +This is the project to create a dataset for grounding of formulae. As a trial work, this dataset consists of an annotated long paper (20 pages in PDF): diff --git a/resources/index.md b/resources/index.md index eac938f..98b1527 100644 --- a/resources/index.md +++ b/resources/index.md @@ -11,7 +11,9 @@ title: SIGMathLing - Datasets and Resources 1. [quantity expressions](/resources/quantity-expressions) 1. [arXMLiv word embeddings, 08.2017 release](/resources/arxmliv-embeddings-082017) 1. [arXMLiv corpus, 08.2017 release](/resources/arxmliv-dataset-082017/) - 1. [Dataset for Grounding of Formulae, v1](/resources/grounding-dataset-v1) + +## Work-In-Progress Resources hosted on the SIGMathLing Repository + 1. [Dataset for Grounding of Formulae](/resources/grounding-dataset) ## Resources hosted externally 1. [ACL-math-annotation](http://www-al.nii.ac.jp/acl-math-annotation/) -- GitLab