Skip to content
Snippets Groups Projects
fix-rdf-file.py 770 B
Newer Older
  • Learn to ignore specific revisions
  • Andreas Schärtl's avatar
    Andreas Schärtl committed
    #! /usr/bin/env python3
    
    
    from urllib import parse
    import re
    import sys
    
    
    def fix_quoted(s: str) -> str:
        payload = s.strip('"')
        fixed = parse.quote(payload)
    
        fixed = fixed.replace('http%3A', 'http:')
        fixed = fixed.replace('https%3A', 'https:')
    
    Andreas Schärtl's avatar
    Andreas Schärtl committed
        return '"%s"' % fixed
    
    
    def print_fixed_line(line: str):
        # https://stackoverflow.com/questions/249791
        regex = r"(\"(?:[^\"]|\\.)*\")"
        matches = re.findall(regex, line)
    
        for match in matches:
            fixed = fix_quoted(match)
            line = line.replace(match, fixed)
    
        print(line, end='')
    
    
    def main():
        for line in sys.stdin:
            print_fixed_line(line)
    
    
    if __name__ == '__main__':
        try:
            main()
        except (KeyboardInterrupt, SystemExit, BrokenPipeError):
            pass