diff --git a/experimental/ulo/seperate.py b/experimental/ulo/seperate.py new file mode 100755 index 0000000000000000000000000000000000000000..eff5bc7e6cdc901e1b25a9186dd3a1365cb8cd04 --- /dev/null +++ b/experimental/ulo/seperate.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python3 + +# +# generate-table.py +# +# Given CSV data passed on stdin with two columns (number of +# occurences, URI), split up the lines in those that have at least one +# occurence and those that do not. +# + + +from typing import Tuple +import csv +import sys + + +def shorten_predicate(uri: str) -> str: + ULO_NAMESPACE = 'https://mathhub.info/ulo#' + DCTERMS_NAMESPACE = 'http://purl.org/dc/terms/' + + uri = uri.replace(ULO_NAMESPACE, 'ulo:') + uri = uri.replace(DCTERMS_NAMESPACE, 'dcterms:') + + return uri + + +def main(): + occupied = [] + unoccupied = [] + + for row in csv.reader(sys.stdin): + if row: + predicate = shorten_predicate(row[1]) + occurence = int(row[0]) + + if occurence == 0: + unoccupied.append(predicate) + else: + occupied.append(predicate) + + print('--------------- OCCUPIED ---------------') + for pred in occupied: + print(pred) + + print('\n------------ UNOCCUPIED ------------') + for pred in unoccupied: + print(pred) + + print('\n------------ STATS ------------') + print('#occupied %d' % len(occupied)) + print('#unoccupied %d' % len(unoccupied)) + + +if __name__ == '__main__': + try: + main() + except (KeyboardInterrupt, SystemExit, BrokenPipeError): + pass