From bae00ca5df188af2d7532b961ebb1a3071b46e54 Mon Sep 17 00:00:00 2001
From: Deyan Ginev <d.ginev@jacobs-university.de>
Date: Mon, 22 Jul 2019 14:55:11 -0400
Subject: [PATCH] add frequency breakdown of contents

---
 resources/arxmliv-statements-082018.md | 56 ++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/resources/arxmliv-statements-082018.md b/resources/arxmliv-statements-082018.md
index 07ace63..cc56301 100644
--- a/resources/arxmliv-statements-082018.md
+++ b/resources/arxmliv-statements-082018.md
@@ -24,6 +24,7 @@ articles, the right of distribution was only given (or assumed) to arXiv itself.
   - each filename is a SHA-256 hash of its contents, as a guarantee for uniqueness and random order
   - two separate tar bundles over the same data, one with and one without lexemes for mathematical expressions
   - data is extracted from the separately distributed [arXMLiv 08.2018](https://sigmathling.kwarc.info/resources/arxmliv-dataset-082018/) dataset.
+  - see the bottom of this page for a full statement freqeuncy breakdown
 
   | file name                                         | MD5                                |  size | size unpacked |
   | :------------------------------------------------ | :--------------------------------- | ----: | ------------: |
@@ -104,3 +105,58 @@ nomath source: `definition/35b170bae4259a5c430846116142d4e4a45097e52daf818b78ea3
 
 ### Generated via
   - [llamapun 0.3.2](https://github.com/KWARC/llamapun/releases/tag/0.3.2)
+
+### Contents Breakdown
+
+  | **statement class** | **frequency** | **frequency (nomath)** |
+  | :------------------ | ------------: | ---------------------: |
+  | abstract            |     1,030,774 |              1,030,691 |
+  | acknowledgement     |       162,230 |                162,220 |
+  | affirmation         |            36 |                     22 |
+  | answer              |            40 |                     39 |
+  | assumption          |        29,577 |                 26,890 |
+  | bound               |            47 |                     37 |
+  | case                |         3,256 |                  2,208 |
+  | claim               |        89,737 |                 75,778 |
+  | comment             |           325 |                    322 |
+  | conclusion          |       284,585 |                284,536 |
+  | condition           |         3,950 |                  3,508 |
+  | conjecture          |        44,893 |                 41,780 |
+  | constraint          |           753 |                    731 |
+  | convention          |         2,176 |                  2,160 |
+  | corollary           |       436,768 |                402,728 |
+  | criterion           |           236 |                    219 |
+  | definition          |       686,717 |                667,797 |
+  | demonstration       |        23,043 |                 22,842 |
+  | discussion          |       116,650 |                116,643 |
+  | example             |       295,152 |                289,005 |
+  | exercise            |           404 |                    404 |
+  | expansion           |             5 |                      2 |
+  | expectation         |            13 |                     13 |
+  | experiment          |           154 |                    153 |
+  | explanation         |            16 |                     16 |
+  | fact                |        17,737 |                 16,473 |
+  | hint                |             9 |                      9 |
+  | introduction        |       688,530 |                688,187 |
+  | issue               |            41 |                     28 |
+  | keywords            |         1,565 |                  1,565 |
+  | lemma               |     1,320,646 |              1,162,559 |
+  | method              |        50,968 |                 50,947 |
+  | notation            |        16,611 |                 16,077 |
+  | note                |         4,462 |                  4,415 |
+  | notice              |             4 |                      4 |
+  | observation         |        18,776 |                 18,013 |
+  | overview            |        11,279 |                 11,277 |
+  | principle           |           236 |                    232 |
+  | problem             |        30,369 |                 29,221 |
+  | proof               |     2,125,750 |              2,096,644 |
+  | proposition         |       829,068 |                763,268 |
+  | question            |        27,240 |                 26,673 |
+  | relatedwork         |        26,300 |                 26,299 |
+  | remark              |       639,038 |                635,180 |
+  | result              |       239,905 |                239,639 |
+  | rule                |           775 |                    712 |
+  | solution            |           163 |                    144 |
+  | step                |         6,910 |                  6,536 |
+  | summary             |           117 |                    117 |
+  | theorem             |     1,287,653 |              1,212,044 |
-- 
GitLab