Newer
Older
'''
fix-rdf-file.py: Given an RDF file passed on stdin, fix some errors in
IRIs. This is not a complete solution, but a shim to get a working first
verison.
'''
import re
import sys
def fix_quoted(s: str) -> str:
payload = s.strip('"')
bad_chars = (
'|', '\\', ' ', '^'
)
for c in bad_chars:
escaped = '%' + '%X' % ord(c)
payload = payload.replace(c, escaped)
return '"%s"' % payload
def print_fixed_line(line: str):
# https://stackoverflow.com/questions/249791
regex = r"(\"(?:[^\"]|\\.)*\")"
matches = re.findall(regex, line)
for match in matches:
fixed = fix_quoted(match)
line = line.replace(match, fixed)
print(line, end='')
def main():
for line in sys.stdin:
print_fixed_line(line)
if __name__ == '__main__':
try:
main()
except (KeyboardInterrupt, SystemExit, BrokenPipeError):
pass