Skip to content
Snippets Groups Projects
fix-rdf-file.py 949 B
Newer Older
Andreas Schärtl's avatar
Andreas Schärtl committed
#! /usr/bin/env python3

Andreas Schärtl's avatar
Andreas Schärtl committed
'''
fix-rdf-file.py: Given an RDF file passed on stdin, fix some errors in
IRIs. This is not a complete solution, but a shim to get a working first
verison.
'''

Andreas Schärtl's avatar
Andreas Schärtl committed

import re
import sys


def fix_quoted(s: str) -> str:
    payload = s.strip('"')

    bad_chars = (
            '|', '\\', ' ', '^'
    )

    for c in bad_chars:
        escaped = '%' + '%X' % ord(c)
        payload = payload.replace(c, escaped)

    return '"%s"' % payload
Andreas Schärtl's avatar
Andreas Schärtl committed


def print_fixed_line(line: str):
    # https://stackoverflow.com/questions/249791
    regex = r"(\"(?:[^\"]|\\.)*\")"
    matches = re.findall(regex, line)

    for match in matches:
        fixed = fix_quoted(match)
        line = line.replace(match, fixed)

    print(line, end='')


def main():
    for line in sys.stdin:
        print_fixed_line(line)


if __name__ == '__main__':
    try:
        main()
    except (KeyboardInterrupt, SystemExit, BrokenPipeError):
        pass