From b931680c737d166f466165683b6e71161942d617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Sch=C3=A4rtl?= <andreas.schaertl@fau.de> Date: Tue, 28 Apr 2020 20:06:21 +0200 Subject: [PATCH] add fix-rdf-file.py passed an rdf file w/ bad iris on stdin, it returns a fixed version --- ulo/fix-rdf-file.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100755 ulo/fix-rdf-file.py diff --git a/ulo/fix-rdf-file.py b/ulo/fix-rdf-file.py new file mode 100755 index 0000000..a671246 --- /dev/null +++ b/ulo/fix-rdf-file.py @@ -0,0 +1,36 @@ +#! /usr/bin/env python3 + + +from urllib import parse +import re +import sys + + +def fix_quoted(s: str) -> str: + payload = s.strip('"') + fixed = parse.quote(payload) + return '"%s"' % fixed + + +def print_fixed_line(line: str): + # https://stackoverflow.com/questions/249791 + regex = r"(\"(?:[^\"]|\\.)*\")" + matches = re.findall(regex, line) + + for match in matches: + fixed = fix_quoted(match) + line = line.replace(match, fixed) + + print(line, end='') + + +def main(): + for line in sys.stdin: + print_fixed_line(line) + + +if __name__ == '__main__': + try: + main() + except (KeyboardInterrupt, SystemExit, BrokenPipeError): + pass -- GitLab