diff --git a/ulo/extract-iris.sh b/ulo/extract-iris.sh new file mode 100755 index 0000000000000000000000000000000000000000..cd42f55990199b88faa58b7c8001489c6138e42d --- /dev/null +++ b/ulo/extract-iris.sh @@ -0,0 +1,8 @@ +#! /bin/sh + +# extract iris from rdf files passed on stdin; really it just returns all +# quoted strings + +set -eu + +grep -P -o '".*"' | sed 's/"//g' diff --git a/ulo/isabelle-prepare.sh b/ulo/isabelle-prepare.sh new file mode 100755 index 0000000000000000000000000000000000000000..94591094011382737d6c2886bd0b3f8c6c78c6b8 --- /dev/null +++ b/ulo/isabelle-prepare.sh @@ -0,0 +1,30 @@ +#! /bin/sh + +set -eu + +# prepare DIRECTORY for import w/ the graphdb web +# interface; this is for our prototype, I can't imagine +# we'll use the web interface in production +# +# this script is a fork of xz-to-gz.sh, found in the +# same directory + +if [ ! $# -eq 1 ]; then + echo "usage: $0 DIRECTORY" 1>&2 + exit 1 +fi + +directory="$1" +files=$(find "$directory" -name "*.xz") + +for file in $files; do + echo "$file" 1>&2 + + # the filename of the extracted (uncompressed) file + rdf_file=$(echo "$file" | sed 's/\.xz//') + + # uncompress, fix iris, compress again + unxz "$file" + sed -i 's/|/%7C/g' "$rdf_file" + gzip "$rdf_file" +done