Skip to content
Snippets Groups Projects
fix-rdf-file.py 943 B
Newer Older
  • Learn to ignore specific revisions
  • Andreas Schärtl's avatar
    Andreas Schärtl committed
    #! /usr/bin/env python3
    
    
    from urllib import parse
    import re
    import sys
    
    
    def fix_quoted(s: str) -> str:
        payload = s.strip('"')
    
    
        #fixed = parse.quote(payload)
        #fixed = fixed.replace('http%3A', 'http:')
        #fixed = fixed.replace('https%3A', 'https:')
    
        bad_chars = (
                '|', '\\', ' ', '^'
        )
    
        for c in bad_chars:
            escaped = '%' + '%X' % ord(c)
            payload = payload.replace(c, escaped)
    
        return '"%s"' % payload
    
    Andreas Schärtl's avatar
    Andreas Schärtl committed
    
    
    def print_fixed_line(line: str):
        # https://stackoverflow.com/questions/249791
        regex = r"(\"(?:[^\"]|\\.)*\")"
        matches = re.findall(regex, line)
    
        for match in matches:
            fixed = fix_quoted(match)
            line = line.replace(match, fixed)
    
        print(line, end='')
    
    
    def main():
        for line in sys.stdin:
            print_fixed_line(line)
    
    
    if __name__ == '__main__':
        try:
            main()
        except (KeyboardInterrupt, SystemExit, BrokenPipeError):
            pass