diff --git a/doc/report/appendix.tex b/doc/report/appendix.tex new file mode 100644 index 0000000000000000000000000000000000000000..55926b8cf3b4cbaaca31a345941589dff738f958 --- /dev/null +++ b/doc/report/appendix.tex @@ -0,0 +1,100 @@ +\section{ULO Predicates Used in Coq and Isabelle Exports}\label{sec:used} + +Only a subset of the upper level ontology are actually taken +advantage of in the existing Coq and Isabelle exports. This +section lists which predicates are used and wich are not. + +\subsection*{ULO Predicates used in the Isabelle Exports} + +\begin{flushleft} + \texttt{check-time} \texttt{defines} \texttt{definition} + \texttt{derived} \texttt{experimental} + \texttt{external-size} \texttt{function} + \texttt{important} \texttt{inductive-on} + \texttt{instance-of} \texttt{justifies} \texttt{name} + \texttt{para} \texttt{paratype} \texttt{predicate} + \texttt{primitive} \texttt{section} \texttt{sourceref} + \texttt{specified-in} \texttt{specifies} + \texttt{statement} \texttt{theory} \texttt{type} + \texttt{unimportant} \texttt{universe} \texttt{uses} +\end{flushleft} + +\subsection*{ULO predicates \emph{not} used in the Isabelle exports.} + +\begin{flushleft} + \texttt{action-times} \texttt{aligned-with} + \texttt{alternative-for} \texttt{antonym} + \texttt{automatically-proved} \texttt{axiom} + \texttt{constructs} \texttt{contains} + \texttt{counter-example-for} \texttt{crossrefs} + \texttt{declaration} \texttt{deprecated} \texttt{docref} + \texttt{example} \texttt{example-for} \texttt{file} + \texttt{folder} \texttt{formalizes} \texttt{generated-by} + \texttt{hypernym} \texttt{hyponym} + \texttt{implementation-uses} + \texttt{implementation-uses-implementation-of} + \texttt{implementation-uses-interface-of} \texttt{inspired-by} + \texttt{inter-statement} \texttt{interface-uses} + \texttt{interface-uses-implementation-of} + \texttt{interface-uses-interface-of} \texttt{internal-size} + \texttt{last-checked-at} \texttt{library} + \texttt{library-group} \texttt{logical} + \texttt{mutual-block} \texttt{nyms} + \texttt{organizational} \texttt{phrase} \texttt{physical} + \texttt{proof} \texttt{proposition} \texttt{revision} + \texttt{rule} \texttt{same-as} \texttt{see-also} + \texttt{similar-to} \texttt{size-properties} + \texttt{superseded-by} \texttt{theorem} \texttt{typedec} + \texttt{uses-implementation} \texttt{uses-interface} +\end{flushleft} + +\subsection*{ULO predicates used in the Coq exports.}\label{fig:used} + +\begin{flushleft} + \texttt{axiom} \texttt{definition} + \texttt{derived} \texttt{example} \texttt{file} + \texttt{folder} \texttt{internal-size} + \texttt{library} \texttt{library-group} + \texttt{predicate} \texttt{primitive} + \texttt{proposition} \texttt{revision} + \texttt{statement} \texttt{theory} \texttt{type} + \texttt{uses} +\end{flushleft} + +\subsection*{ULO predicates \emph{not} used in the Coq exports.}\label{fig:used} + +\begin{flushleft} + \texttt{action-times} \texttt{aligned-with} + \texttt{alternative-for} \texttt{antonym} + \texttt{automatically-proved} \texttt{check-time} + \texttt{constructs} \texttt{contains} + \texttt{counter-example-for} \texttt{crossrefs} + \texttt{declaration} \texttt{defines} + \texttt{deprecated} \texttt{docref} + \texttt{example-for} \texttt{experimental} + \texttt{external-size} \texttt{formalizes} + \texttt{function} \texttt{generated-by} + \texttt{hypernym} \texttt{hyponym} + \texttt{implementation-uses} + \texttt{implementation-uses-implementation-of} + \texttt{implementation-uses-interface-of} + \texttt{important} \texttt{inductive-on} + \texttt{inspired-by} \texttt{instance-of} + \texttt{inter-statement} \texttt{interface-uses} + \texttt{interface-uses-implementation-of} + \texttt{interface-uses-interface-of} + \texttt{justifies} \texttt{last-checked-at} + \texttt{logical} \texttt{mutual-block} + \texttt{name} \texttt{nyms} + \texttt{organizational} \texttt{para} + \texttt{paratype} \texttt{phrase} + \texttt{physical} \texttt{proof} \texttt{rule} + \texttt{same-as} \texttt{section} + \texttt{see-also} \texttt{similar-to} + \texttt{size-properties} \texttt{sourceref} + \texttt{specified-in} \texttt{specifies} + \texttt{superseded-by} \texttt{theorem} + \texttt{typedec} \texttt{unimportant} + \texttt{universe} \texttt{uses-implementation} + \texttt{uses-interface} +\end{flushleft} diff --git a/doc/report/applications-preds.tex b/doc/report/applications-preds.tex new file mode 100644 index 0000000000000000000000000000000000000000..43cbcd065e267d27f731d297f1a0328802394e6d --- /dev/null +++ b/doc/report/applications-preds.tex @@ -0,0 +1,47 @@ +\begin{figure} +\end{figure} + +\begin{figure} + \centering + \begin{subfigure}[]{0.9\textwidth} + \begin{lstlisting} + PREFIX ulo: <https://mathhub.info/ulo#> + + SELECT ?predicate (COUNT(?predicate) as ?count) + WHERE { + ?s ?predicate ?o . + } + GROUP BY ?predicate + ORDER BY DESC(?count) + \end{lstlisting} + \caption{SPARQL query that returns a list of all + \texttt{predicate}s used in the backing store. We include + the \texttt{ulo} prefix so the results are printed in a + concise human readable format.}\label{fig:preds-query} + \end{subfigure} + \vspace{0.5cm} + + \begin{subfigure}[]{0.9\textwidth} + \large + \begin{tabular*}{\textwidth}{c @{\extracolsep{\fill}} lll} + \toprule + & \texttt{predicate} & \texttt{count} \\ \midrule + 1 & \texttt{ulo:uses} & 1160140 \\ + 2 & \texttt{ulo:declares} & 88862 \\ + 3 & \texttt{ulo:internal-size} & 82336 \\ + 4 & \texttt{ulo:derived} & 69017 \\ + %5 & \texttt{ulo:statement} & 44638 \\ + %6 & ulo:object & 15179 \\ + %7 & ulo:primitive & 14459 \\ + %8 & ulo:proposition & 10437 \\ + $\vdots$ & $\vdots$ & $\vdots$ \\ \bottomrule + \end{tabular*} + \caption{Result of query from Figure~\ref{fig:preds-query}. The database + returns a list of all involved \texttt{predicate}s ordered by their + \texttt{count}. The particular results here are from the core + Coq export~\cite{ulocoqspec}.}\label{fig:preds-result} + \end{subfigure} + \vspace{0.5cm} + + \caption{Querying for the kinds of predicates used in a data set} +\end{figure} diff --git a/doc/report/applications.tex b/doc/report/applications.tex index eaa51b2139c51a0fb8da8c57e10c121902cf9614..d1596b4874ae74e8171a6c50fb78e062c82b83fb 100644 --- a/doc/report/applications.tex +++ b/doc/report/applications.tex @@ -28,31 +28,20 @@ various queries and applications: \subsection{Exploring Existing Data Sets}\label{sec:expl} -As a very first application, we simply looked at what ULO predicates -are actually used by the respective data sets. With more than -250~million triplets in the store, we hoped that this would give us -some insight into the kind of knowledge we are dealing with. +Four our first application, we looked at what ULO predicates are +actually used by the respective data sets. With more than 250~million +triplets in the store, we hoped that this would give us some insight +into the kind of knowledge we are dealing with. Implementing a query for this job is not very difficult. In SPARQL, -this can be achieved with the \texttt{COUNT} aggregate. -\begin{lstlisting} - PREFIX ulo: <https://mathhub.info/ulo#> - - SELECT ?predicate (COUNT(?predicate) as ?count) - WHERE { - ?s ?predicate ?o . - } - GROUP BY ?predicate - ORDER BY DESC(?count) -\end{lstlisting} -This yields a list of all used predicates with \texttt{?count} being -the number of occurrences. Looking at the results, we find that both -the Isabelle and the Coq data sets only use subsets of the predicates -provided by the ULO ontology. The results are listed in -Figure~\ref{fig:used}. In both cases, the exports use less than a -third of the available predicates. - -\input{applications-ulo-table.tex} +this can be achieved with the \texttt{COUNT} aggregate, the full query +is given in verbatim in Figure~\ref{fig:preds-query}. This yields a +list of all used predicates with \texttt{?count} being the number of +occurrences (Figure~\ref{fig:preds-result}). Looking at the results, +we find that both the Isabelle and the Coq data sets only use subsets +of the predicates provided by the ULO ontology. The full results are +listed in Appendix~\ref{sec:used}. In both cases, what sta ndsndsout is +that either exports use less than a third of the available predicates. We also see that the Isabelle and Coq exports use different predicates. For example, the Isabelle contains organizational meta @@ -75,6 +64,8 @@ database literature include the clever use of default values or inference of missing values~\cite{kdisc, aidisc}, neither of which feels particularly applicable to an ULO data set. +\input{applications-preds.tex} + \subsection{Querying for Tetrapodal Search}\label{sec:tetraq} Various queries for a tetrapodal search system were previously diff --git a/doc/report/references.bib b/doc/report/references.bib index 8f3b031242b34d0e867e854a0d299fe927b742dc..85c6bb404c531adfb8a891e51278f093db3678d6 100644 --- a/doc/report/references.bib +++ b/doc/report/references.bib @@ -37,6 +37,14 @@ url = {https://gl.mathhub.info/Coqxml} } +@online{ulocoqspec, + title = {coq.8.9.0}, + organization = {MathHub}, + date = {2019}, + urldate = {2020-07-30}, + url = {https://gl.mathhub.info/Coqxml/coq.8.9.0} +} + @online{sparqlpagination, title = {Paginating SPARQL results}, author = {Joshua Taylor}, diff --git a/doc/report/report.tex b/doc/report/report.tex index dd5728c8295065f7482741cf35b5bcad4fcb31cd..db70eba12df604091b21f9282cc283898678f9e1 100644 --- a/doc/report/report.tex +++ b/doc/report/report.tex @@ -10,6 +10,7 @@ \usepackage{lipsum} \usepackage{listings} \usepackage{multicol} +\usepackage{placeins} \usepackage{subcaption} \usepackage{subcaption} \usepackage{tabularx} @@ -29,7 +30,7 @@ \setkomafont{disposition}{\normalfont\bfseries} \addtokomafont{descriptionlabel}{\normalfont\bfseries} -\lstset{basicstyle=\footnotesize\ttfamily,breaklines=true} +\lstset{basicstyle=\ttfamily,breaklines=true} \title{ulo-storage} \subtitle{Indexing and Querying Organizational Data in Mathematical Libraries} @@ -59,6 +60,10 @@ \newpage \input{conclusion.tex} +\newpage +\appendix +\input{appendix} + \newpage \printbibliography{}