From 4506a587deb80e1f4a24f6646a5f5c0263c6a67c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Sch=C3=A4rtl?= <andreas@schaertl.me> Date: Wed, 1 Jul 2020 16:48:38 +0200 Subject: [PATCH] used predicates: add seperator script --- experimental/ulo/seperate.py | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 experimental/ulo/seperate.py diff --git a/experimental/ulo/seperate.py b/experimental/ulo/seperate.py new file mode 100755 index 0000000..eff5bc7 --- /dev/null +++ b/experimental/ulo/seperate.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python3 + +# +# generate-table.py +# +# Given CSV data passed on stdin with two columns (number of +# occurences, URI), split up the lines in those that have at least one +# occurence and those that do not. +# + + +from typing import Tuple +import csv +import sys + + +def shorten_predicate(uri: str) -> str: + ULO_NAMESPACE = 'https://mathhub.info/ulo#' + DCTERMS_NAMESPACE = 'http://purl.org/dc/terms/' + + uri = uri.replace(ULO_NAMESPACE, 'ulo:') + uri = uri.replace(DCTERMS_NAMESPACE, 'dcterms:') + + return uri + + +def main(): + occupied = [] + unoccupied = [] + + for row in csv.reader(sys.stdin): + if row: + predicate = shorten_predicate(row[1]) + occurence = int(row[0]) + + if occurence == 0: + unoccupied.append(predicate) + else: + occupied.append(predicate) + + print('--------------- OCCUPIED ---------------') + for pred in occupied: + print(pred) + + print('\n------------ UNOCCUPIED ------------') + for pred in unoccupied: + print(pred) + + print('\n------------ STATS ------------') + print('#occupied %d' % len(occupied)) + print('#unoccupied %d' % len(unoccupied)) + + +if __name__ == '__main__': + try: + main() + except (KeyboardInterrupt, SystemExit, BrokenPipeError): + pass -- GitLab