Skip to content
Snippets Groups Projects
fix-rdf-file.py 949 B
Newer Older
  • Learn to ignore specific revisions
  • Andreas Schärtl's avatar
    Andreas Schärtl committed
    #! /usr/bin/env python3
    
    
    Andreas Schärtl's avatar
    Andreas Schärtl committed
    '''
    fix-rdf-file.py: Given an RDF file passed on stdin, fix some errors in
    IRIs. This is not a complete solution, but a shim to get a working first
    verison.
    '''
    
    
    Andreas Schärtl's avatar
    Andreas Schärtl committed
    
    import re
    import sys
    
    
    def fix_quoted(s: str) -> str:
        payload = s.strip('"')
    
    
        bad_chars = (
                '|', '\\', ' ', '^'
        )
    
        for c in bad_chars:
            escaped = '%' + '%X' % ord(c)
            payload = payload.replace(c, escaped)
    
        return '"%s"' % payload
    
    Andreas Schärtl's avatar
    Andreas Schärtl committed
    
    
    def print_fixed_line(line: str):
        # https://stackoverflow.com/questions/249791
        regex = r"(\"(?:[^\"]|\\.)*\")"
        matches = re.findall(regex, line)
    
        for match in matches:
            fixed = fix_quoted(match)
            line = line.replace(match, fixed)
    
        print(line, end='')
    
    
    def main():
        for line in sys.stdin:
            print_fixed_line(line)
    
    
    if __name__ == '__main__':
        try:
            main()
        except (KeyboardInterrupt, SystemExit, BrokenPipeError):
            pass