From 4506a587deb80e1f4a24f6646a5f5c0263c6a67c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Sch=C3=A4rtl?= <andreas@schaertl.me>
Date: Wed, 1 Jul 2020 16:48:38 +0200
Subject: [PATCH] used predicates: add seperator script

---
 experimental/ulo/seperate.py | 58 ++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100755 experimental/ulo/seperate.py

diff --git a/experimental/ulo/seperate.py b/experimental/ulo/seperate.py
new file mode 100755
index 0000000..eff5bc7
--- /dev/null
+++ b/experimental/ulo/seperate.py
@@ -0,0 +1,58 @@
+#! /usr/bin/env python3
+
+#
+# generate-table.py
+#
+# Given CSV data passed on stdin with two columns (number of
+# occurences, URI), split up the lines in those that have at least one
+# occurence and those that do not.
+#
+
+
+from typing import Tuple
+import csv
+import sys
+
+
+def shorten_predicate(uri: str) -> str:
+    ULO_NAMESPACE = 'https://mathhub.info/ulo#'
+    DCTERMS_NAMESPACE = 'http://purl.org/dc/terms/'
+
+    uri = uri.replace(ULO_NAMESPACE, 'ulo:')
+    uri = uri.replace(DCTERMS_NAMESPACE, 'dcterms:')
+
+    return uri
+
+
+def main():
+    occupied = []
+    unoccupied = []
+
+    for row in csv.reader(sys.stdin):
+        if row:
+            predicate = shorten_predicate(row[1])
+            occurence = int(row[0])
+
+            if occurence == 0:
+                unoccupied.append(predicate)
+            else:
+                occupied.append(predicate)
+
+    print('--------------- OCCUPIED ---------------')
+    for pred in occupied:
+        print(pred)
+
+    print('\n------------ UNOCCUPIED ------------')
+    for pred in unoccupied:
+        print(pred)
+
+    print('\n------------ STATS ------------')
+    print('#occupied %d' % len(occupied))
+    print('#unoccupied %d' % len(unoccupied))
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except (KeyboardInterrupt, SystemExit, BrokenPipeError):
+        pass
-- 
GitLab