ulo/scripts: clean up

38bc05fe · Andreas Schärtl · f595d9ab · 38bc05fe · 38bc05fe · 38bc05fe
Commit 38bc05fe authored 4 years ago by Andreas Schärtl
--- a/ulo/scripts/coq-cut.sh
+++ b/ulo/scripts/coq-cut.sh
 #! /bin/sh
-# cut out some components from the rdf files exported from Coq [1]
+# Cut out some components from the rdf files exported from Coq [1]
-# that GraphDB does not like; right now I don't know if they
+# that GraphDB does not like. Right now I don't know if they
 # are important or if they can be skipped, but I just want a working
 # prototype first of all
 #

--- a/ulo/scripts/extract-iris.sh
+++ b/ulo/scripts/extract-iris.sh
 #! /bin/sh
-# extract iris from rdf files passed on stdin; really it just returns all
+# Extract IRIs from rdf files passed on stdin. Actually just returns
-# quoted strings
+# all quoted strings.
 set -eu

--- a/ulo/scripts/fix-rdf-file.py
+++ b/ulo/scripts/fix-rdf-file.py
 #! /usr/bin/env python3
+'''
+fix-rdf-file.py: Given an RDF file passed on stdin, fix some errors in
+IRIs. This is not a complete solution, but a shim to get a working first
+verison.
+'''
-from urllib import parse
 import re
 import sys
@@ -9,10 +14,6 @@ import sys
 def fix_quoted(s: str) -> str:
    payload = s.strip('"')
-    #fixed = parse.quote(payload)
-    #fixed = fixed.replace('http%3A', 'http:')
-    #fixed = fixed.replace('https%3A', 'https:')
    bad_chars = (
            '|', '\\', ' ', '^'
    )

--- a/ulo/scripts/isabelle-prepare-directory.sh
+++ b/ulo/scripts/isabelle-prepare-directory.sh
+#! /bin/sh
+set -eu
+# Prepare DIRECTORY for import w/ the GraphDB web interface.
+#
+# That is, it searches for *.rdf.xz files in DIRECTORY and passes
+# each file to the isabelle-prepare-file.sh script.
+#
+# If you want to prepare Isabelle exported files for GraphDB, you
+# probably should use this script.
+if [ ! $# -eq 1 ]; then
+    echo "usage: $0 DIRECTORY" 1>&2
+    exit 1
+fi
+directory="$1"
+threads=6
+script_dir=$(dirname "$0")
+cd "$script_dir"
+find "$directory" -name "*.rdf.xz" -print0 -exec xargs -0 -P $threads ./isabelle-prepare-file.sh {} \;
--- a/ulo/scripts/isabelle-prepare-file.sh
+++ b/ulo/scripts/isabelle-prepare-file.sh
+#! /bin/sh
+set -eu
+# Given a single *.rdf.xz FILE, (1) extract that file, (2) apply fixes
+# with fix-rdf-file.py and (3) re-compress with gzip as GraphDB only
+# supports gzip and not xz.
+if [ ! $# -eq 1 ]; then
+    echo "usage: $0 FILE" 1>&2
+    exit 1
+fi
+script_dir=$(dirname "$0")
+cd "$script_dir"
+file="$1"
+echo "$file" 1>&2
+# the filename of the extracted (uncompressed) file
+rdf_file=$(echo "$file" | sed 's/\.xz//')
+work_file=$(mktemp)
+# uncompress, fix iris
+xzcat "$file" | ./fix-rdf-file.py > "$work_file"
+mv "$work_file" "$rdf_file"
+# compress again
+gzip "$rdf_file"
--- a/ulo/scripts/isabelle-prepare.sh
+++ b/ulo/scripts/isabelle-prepare.sh
-#! /bin/sh
-set -eu
-# prepare DIRECTORY for import w/ the graphdb web
-# interface; this is for our prototype, I can't imagine
-# we'll use the web interface in production
-#
-# this script is a fork of xz-to-gz.sh, found in the
-# same directory
-if [ ! $# -eq 1 ]; then
-    echo "usage: $0 DIRECTORY" 1>&2
-    exit 1
-fi
-script_dir=$(dirname "$0")
-cd "$script_dir"
-directory="$1"
-files=$(find "$directory" -name "*.xz")
-for file in $files; do
-    echo "$file" 1>&2
-    # the filename of the extracted (uncompressed) file
-    rdf_file=$(echo "$file" | sed 's/\.xz//')
-    work_file=$(mktemp)
-    # uncompress, fix iris
-    xzcat "$file" | ./fix-rdf-file.py > "$work_file"
-    mv "$work_file" "$rdf_file"
-    # compress again
-    gzip "$rdf_file"
-done
--- a/ulo/scripts/build_virtuoso.sh
+++ b/ulo/scripts/build_virtuoso.sh
--- a/ulo/scripts/import-recursive-xz-rdf-to-virtuoso.sh
+++ b/ulo/scripts/import-recursive-xz-rdf-to-virtuoso.sh
--- a/ulo/scripts/xz-to-gz.sh
+++ b/ulo/scripts/xz-to-gz.sh
-#! /bin/sh
-# given a path, traverse that file system tree and extract all
-# .xz files and re-compress them to .gz; some tooling, in particular
-# graphdb, only supports gz and not xz
-set -eu
-if [ ! $# -eq 1 ]; then
-    echo "usage: $0 DIRECTORY" 1>&2
-    exit 1
-fi
-directory="$1"
-files=$(find "$directory" -name "*.xz")
-for file in $files; do
-    echo "$file" 1>&2
-    # the filename of the extracted (uncompressed) file
-    rdf_file=$(echo "$file" | sed 's/\.xz//')
-    # uncompress and then compress again
-    unxz "$file"
-    gzip "$rdf_file"
-done