seperate.py

#! /usr/bin/env python3

#
# generate-table.py
#
# Given CSV data passed on stdin with two columns (number of
# occurences, URI), split up the lines in those that have at least one
# occurence and those that do not.
#


from typing import Tuple
import csv
import sys


def shorten_predicate(uri: str) -> str:
    ULO_NAMESPACE = 'https://mathhub.info/ulo#'
    DCTERMS_NAMESPACE = 'http://purl.org/dc/terms/'

    uri = uri.replace(ULO_NAMESPACE, 'ulo:')
    uri = uri.replace(DCTERMS_NAMESPACE, 'dcterms:')

    return uri


def main():
    occupied = []
    unoccupied = []

    for row in csv.reader(sys.stdin):
        if row:
            predicate = shorten_predicate(row[1])
            occurence = int(row[0])

            if occurence == 0:
                unoccupied.append(predicate)
            else:
                occupied.append(predicate)

    print('--------------- OCCUPIED ---------------')
    for pred in occupied:
        print(pred)

    print('\n------------ UNOCCUPIED ------------')
    for pred in unoccupied:
        print(pred)

    print('\n------------ STATS ------------')
    print('#occupied %d' % len(occupied))
    print('#unoccupied %d' % len(unoccupied))


if __name__ == '__main__':
    try:
        main()
    except (KeyboardInterrupt, SystemExit, BrokenPipeError):
        pass