Skip to content
Snippets Groups Projects
Commit 38bc05fe authored by Andreas Schärtl's avatar Andreas Schärtl
Browse files

ulo/scripts: clean up

parent f595d9ab
No related branches found
No related tags found
No related merge requests found
#! /bin/sh #! /bin/sh
# cut out some components from the rdf files exported from Coq [1] # Cut out some components from the rdf files exported from Coq [1]
# that GraphDB does not like; right now I don't know if they # that GraphDB does not like. Right now I don't know if they
# are important or if they can be skipped, but I just want a working # are important or if they can be skipped, but I just want a working
# prototype first of all # prototype first of all
# #
......
#! /bin/sh #! /bin/sh
# extract iris from rdf files passed on stdin; really it just returns all # Extract IRIs from rdf files passed on stdin. Actually just returns
# quoted strings # all quoted strings.
set -eu set -eu
......
#! /usr/bin/env python3 #! /usr/bin/env python3
'''
fix-rdf-file.py: Given an RDF file passed on stdin, fix some errors in
IRIs. This is not a complete solution, but a shim to get a working first
verison.
'''
from urllib import parse
import re import re
import sys import sys
...@@ -9,10 +14,6 @@ import sys ...@@ -9,10 +14,6 @@ import sys
def fix_quoted(s: str) -> str: def fix_quoted(s: str) -> str:
payload = s.strip('"') payload = s.strip('"')
#fixed = parse.quote(payload)
#fixed = fixed.replace('http%3A', 'http:')
#fixed = fixed.replace('https%3A', 'https:')
bad_chars = ( bad_chars = (
'|', '\\', ' ', '^' '|', '\\', ' ', '^'
) )
......
#! /bin/sh
set -eu
# Prepare DIRECTORY for import w/ the GraphDB web interface.
#
# That is, it searches for *.rdf.xz files in DIRECTORY and passes
# each file to the isabelle-prepare-file.sh script.
#
# If you want to prepare Isabelle exported files for GraphDB, you
# probably should use this script.
if [ ! $# -eq 1 ]; then
echo "usage: $0 DIRECTORY" 1>&2
exit 1
fi
directory="$1"
threads=6
script_dir=$(dirname "$0")
cd "$script_dir"
find "$directory" -name "*.rdf.xz" -print0 -exec xargs -0 -P $threads ./isabelle-prepare-file.sh {} \;
#! /bin/sh
set -eu
# Given a single *.rdf.xz FILE, (1) extract that file, (2) apply fixes
# with fix-rdf-file.py and (3) re-compress with gzip as GraphDB only
# supports gzip and not xz.
if [ ! $# -eq 1 ]; then
echo "usage: $0 FILE" 1>&2
exit 1
fi
script_dir=$(dirname "$0")
cd "$script_dir"
file="$1"
echo "$file" 1>&2
# the filename of the extracted (uncompressed) file
rdf_file=$(echo "$file" | sed 's/\.xz//')
work_file=$(mktemp)
# uncompress, fix iris
xzcat "$file" | ./fix-rdf-file.py > "$work_file"
mv "$work_file" "$rdf_file"
# compress again
gzip "$rdf_file"
#! /bin/sh
set -eu
# prepare DIRECTORY for import w/ the graphdb web
# interface; this is for our prototype, I can't imagine
# we'll use the web interface in production
#
# this script is a fork of xz-to-gz.sh, found in the
# same directory
if [ ! $# -eq 1 ]; then
echo "usage: $0 DIRECTORY" 1>&2
exit 1
fi
script_dir=$(dirname "$0")
cd "$script_dir"
directory="$1"
files=$(find "$directory" -name "*.xz")
for file in $files; do
echo "$file" 1>&2
# the filename of the extracted (uncompressed) file
rdf_file=$(echo "$file" | sed 's/\.xz//')
work_file=$(mktemp)
# uncompress, fix iris
xzcat "$file" | ./fix-rdf-file.py > "$work_file"
mv "$work_file" "$rdf_file"
# compress again
gzip "$rdf_file"
done
#! /bin/sh
# given a path, traverse that file system tree and extract all
# .xz files and re-compress them to .gz; some tooling, in particular
# graphdb, only supports gz and not xz
set -eu
if [ ! $# -eq 1 ]; then
echo "usage: $0 DIRECTORY" 1>&2
exit 1
fi
directory="$1"
files=$(find "$directory" -name "*.xz")
for file in $files; do
echo "$file" 1>&2
# the filename of the extracted (uncompressed) file
rdf_file=$(echo "$file" | sed 's/\.xz//')
# uncompress and then compress again
unxz "$file"
gzip "$rdf_file"
done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment