From b931680c737d166f466165683b6e71161942d617 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Sch=C3=A4rtl?= <andreas.schaertl@fau.de>
Date: Tue, 28 Apr 2020 20:06:21 +0200
Subject: [PATCH] add fix-rdf-file.py

passed an rdf file w/ bad iris on stdin, it returns
a fixed version
---
 ulo/fix-rdf-file.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100755 ulo/fix-rdf-file.py

diff --git a/ulo/fix-rdf-file.py b/ulo/fix-rdf-file.py
new file mode 100755
index 0000000..a671246
--- /dev/null
+++ b/ulo/fix-rdf-file.py
@@ -0,0 +1,36 @@
+#! /usr/bin/env python3
+
+
+from urllib import parse
+import re
+import sys
+
+
+def fix_quoted(s: str) -> str:
+    payload = s.strip('"')
+    fixed = parse.quote(payload)
+    return '"%s"' % fixed
+
+
+def print_fixed_line(line: str):
+    # https://stackoverflow.com/questions/249791
+    regex = r"(\"(?:[^\"]|\\.)*\")"
+    matches = re.findall(regex, line)
+
+    for match in matches:
+        fixed = fix_quoted(match)
+        line = line.replace(match, fixed)
+
+    print(line, end='')
+
+
+def main():
+    for line in sys.stdin:
+        print_fixed_line(line)
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except (KeyboardInterrupt, SystemExit, BrokenPipeError):
+        pass
-- 
GitLab