#! /bin/sh

set -eu

# prepare DIRECTORY for import w/ the graphdb web
# interface; this is for our prototype, I can't imagine
# we'll use the web interface in production
#
# this script is a fork of xz-to-gz.sh, found in the
# same directory

if [ ! $# -eq 1 ]; then
    echo "usage: $0 DIRECTORY" 1>&2
    exit 1
fi

directory="$1"
files=$(find "$directory" -name "*.xz")

for file in $files; do
    echo "$file" 1>&2

    # the filename of the extracted (uncompressed) file
    rdf_file=$(echo "$file" | sed 's/\.xz//')

    # uncompress, fix iris, compress again
    unxz "$file"
    sed -i 's/|/%7C/g' "$rdf_file"
    sed -i 's/\\/%5C/g' "$rdf_file"
    sed -i 's/ /%20/g' "$rdf_file"
    sed -i 's/\^/%5E/g' "$rdf_file"
    gzip "$rdf_file"
done