#! /bin/sh

set -eu

# prepare DIRECTORY for import w/ the graphdb web
# interface; this is for our prototype, I can't imagine
# we'll use the web interface in production
#
# this script is a fork of xz-to-gz.sh, found in the
# same directory

if [ ! $# -eq 1 ]; then
    echo "usage: $0 DIRECTORY" 1>&2
    exit 1
fi

script_dir=$(dirname "$0")
cd "$script_dir"

directory="$1"
files=$(find "$directory" -name "*.xz")

for file in $files; do
    echo "$file" 1>&2

    # the filename of the extracted (uncompressed) file
    rdf_file=$(echo "$file" | sed 's/\.xz//')
    work_file=$(mktemp)

    # uncompress, fix iris
    xzcat "$file" | ./fix-rdf-file.py > "$work_file"
    mv "$work_file" "$rdf_file"

    # compress again
    gzip "$rdf_file"
done