From e97194bdb52446337ef1417d98eea49b70ce712a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Sch=C3=A4rtl?= <andreas.schaertl@fau.de> Date: Thu, 30 Apr 2020 16:28:04 +0200 Subject: [PATCH] add fix-rdf-file.go - It does the same as fix-rdf-file.py - I was hoping that it would be faster, but it's actually slower. I do use regex in the Python version so that's probably it. For what it's worth, this confirms that the weird regular expressoin in the Python version is actually correct. --- ulo/fix-rdf-file.go | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 ulo/fix-rdf-file.go diff --git a/ulo/fix-rdf-file.go b/ulo/fix-rdf-file.go new file mode 100644 index 0000000..288ddcb --- /dev/null +++ b/ulo/fix-rdf-file.go @@ -0,0 +1,66 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "strings" +) + +// characters that we want to escape +var BadChars = []string{ + "|", "\\", " ", "^", "<", ">", +} + +// what we want to escape BadChars to; EscapedChars[i] should +// containt he escaped version of BadChars[i] +var EscapedChars = []string{} + +func init() { + // initialize EscapedChars + + for _, bc := range BadChars { + escaped := fmt.Sprintf("%%%X", bc) + EscapedChars = append(EscapedChars, escaped) + } +} + +func Fix(r rune) (fixed string) { + fixed = fmt.Sprintf("%c", r) + + for i, bad := range BadChars { + escaped := EscapedChars[i] + fixed = strings.ReplaceAll(fixed, bad, escaped) + } + + return fixed +} + +func PrintFixed(line string) { + insideQuoted := false + + out := strings.Builder{} + + for _, r := range line { + if r == '"' { + insideQuoted = !insideQuoted + } + + if insideQuoted { + fixed := Fix(r) + out.WriteString(fixed) + } else { + out.WriteRune(r) + } + } + + fmt.Print(out.String()) +} + +func main() { + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + PrintFixed(scanner.Text()) + PrintFixed("\n") + } +} -- GitLab