#! /usr/bin/env python3 ''' fix-rdf-file.py: Given an RDF file passed on stdin, fix some errors in IRIs. This is not a complete solution, but a shim to get a working first verison. ''' import re import sys def fix_quoted(s: str) -> str: payload = s.strip('"') bad_chars = ( '|', '\\', ' ', '^' ) for c in bad_chars: escaped = '%' + '%X' % ord(c) payload = payload.replace(c, escaped) return '"%s"' % payload def print_fixed_line(line: str): # https://stackoverflow.com/questions/249791 regex = r"(\"(?:[^\"]|\\.)*\")" matches = re.findall(regex, line) for match in matches: fixed = fix_quoted(match) line = line.replace(match, fixed) print(line, end='') def main(): for line in sys.stdin: print_fixed_line(line) if __name__ == '__main__': try: main() except (KeyboardInterrupt, SystemExit, BrokenPipeError): pass