slides.tex

\documentclass{beamer}

\beamertemplatenavigationsymbolsempty{}

\addtobeamertemplate{navigation symbols}{}{%
    \usebeamerfont{footline}%
    \usebeamercolor[fg]{footline}%
    \hspace{1em}%
    Page~\insertframenumber{}
}

% set up citations

\usepackage[
    backend=biber,
    natbib=true,
    sorting=none
]{biblatex}
\addbibresource{references.bib}
\setbeamertemplate{bibliography item}{\insertbiblabel}

\begin{document}

% [Title]

\title{ulo-storage}
\author{Andreas Schärtl}

\frame{\titlepage}

% [Table of Contents]

\begin{frame}{Table of Contents}
  \tableofcontents{}
\end{frame}

% [Context]

\section{Context}

\begin{frame}{Context}
  \begin{figure}
    \centering
    \includegraphics[width=5.0cm]{figs/math.eps}
    \caption{``Five Aspects of Math Artefacts'' taken from~\cite{tetra}.}
  \end{figure}

  \begin{itemize}
  \item \emph{Tetrapodal Search} wants to be a unified search for
    four distinct areas of mathematical knowledge~\cite{tetra}.
    \begin{itemize}
        \item Symbolic Knowledge: Formulae, syntax trees\ldots
        \item Concrete Knowledge: Examples, listings\ldots
        \item Narrative Knowledge: Names, comments, prose\ldots
        \item Organizational Knowledge: Identifiers, references, relationships\ldots
    \end{itemize}
  \item Each component should be formatted, stored and accessible in a
    format optimized for the given kind of knowledge.
  \end{itemize}
\end{frame}

\subsection{Focus}

\begin{frame}{Context: Organizational Knowledge}
  \begin{itemize}
    \item For this project, \emph{ulo-storage}, the focus was on
      organizational knowledge.
      \begin{itemize}
        \item Identifiers, references and their relationships.
        \item Essentially meta data.
      \end{itemize}
    \item Previous work introduced the \emph{upper level
      ontology}~(ULO) as a way of formulating organizational
      mathematical knowledge~\cite{ulo}.
      \begin{itemize}
        \item Includes predicates such as \texttt{ulo:definition},
          \texttt{ulo:defines}, \texttt{ulo:name}, \texttt{ulo:para}
      \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}{Context: Organizational Knowledge}
  \begin{figure}
    \centering
    \includegraphics[width=0.8\textwidth]{figs/uses.png}
  \end{figure}
\end{frame}

\subsection{Contribution}

\begin{frame}{Context: Contribution}
  The contribution of \emph{ulo-storage} is twofold.

  \vspace{2mm}

  \begin{description}
    \item[Implementation] We provide infrastructure for automated
      imports from existing ULO exports into a queryable database
      repository.
    \item[Exploration] Built on top of above infrastructure we
      implemented various queries and applications.
  \end{description}

  \vspace{2mm}

  While the exploring part is probably the more interesting to talk
  about, the implementation probably has more practical use.
\end{frame}

% [Implementation]

\section{Implementation}

\begin{frame}{Implementation: Starting Point}
  \begin{itemize}
    \item We already have exports from third party libraries to ULO
      RDF~triplets, in particular from collections of
      Coq~\cite{ulocoq} and Isabelle~\cite{uloisabelle} code.
      \begin{itemize}
        \item More than 200 million triplets.
      \end{itemize}
    \item RDF triplets are stored as XML~files in Git~repositories.
      \begin{itemize}
        \item Export itself is not the focus of \emph{ulo-storage}.
      \end{itemize}
  \end{itemize}
\end{frame}


\begin{frame}{Implementation: Starting Point}
  \begin{figure}
    \centering
    \includegraphics[width=1.0\textwidth]{figs/files.png}
  \end{figure}
\end{frame}

\subsection{Components}

\begin{frame}{Implementation: Components}
  \begin{itemize}
    \item Involved in the implementation for \emph{ulo-storage} are the
      following components.
      \begin{itemize}
        \item \emph{Collector:} Fetch XML~files from Git~repositories
          and apply on the fly filters and modifications.
        \item \emph{Importer:} Import XML~stream from Collector into a
          permanent storage.
        \item \emph{Endpoint:} Programming Interface for the permanent
          storage. Essentially the database interface.
      \end{itemize}
    \item Data flows
      \begin{align*}
        \text{Git / File System} \rightarrow \text{Collector}
        \rightarrow \text{Importer} \rightarrow \text{Endpoint}
      \end{align*}
  \end{itemize}
\end{frame}

\subsection{Versioning}

\begin{frame}{Implementation: Versioning}
  \begin{itemize}
    \item One problem in our implementation is \emph{versioning}.
    \item Data flows
        \begin{align*}
            \mathcal{L} \rightarrow \mathcal{E} \rightarrow \mathcal{D}
        \end{align*}
        from \emph{third party library~$\mathcal{L}$} to an \emph{XML
        representation~$\mathcal{E}$} to \emph{database storage~$\mathcal{D}$}.
    \item When $\mathcal{E}$ changes, so does~$\mathcal{E}$ when a new
      export is run. This should result in~$\mathcal{D}$ changing as well.
    \item Difficult! A work around is re-creating databases from
      scratch. Maybe the only realistic way to tackle this
      problem. Maybe splitting up knowledge into many smaller
      repositories is a good idea.
  \end{itemize}
\end{frame}

% [Applications & Questions]

\section{Applications and Questions}

\begin{frame}{Applications and Questions}
  \begin{itemize}
    \item Based on our implementation of Collecter, Importer and
      Endpoint we implemented some applications and queries.
    \item Our endpoint supports both SPARQL~\cite{sparql} and
      RDF4J~\cite{rdf4j}. Both have their advantages.
    \item While the applications themselves are not terribly
      interesting, they lead us to some interesting questions. This
      will be the focus of this discussion.
  \end{itemize}
\end{frame}

\subsection{Used Predicates}

\begin{frame}{Applications and Questions: Used Predicates}
  \begin{itemize}
    \item As a \emph{Hello World} application, we wrote an application
      with RDF4J that looks at what ULO~predicates are used.
    \item Both Coq and Isabelle exports used less than a third of all
      ULO~predicates. Many predicates are used by neither exports.
    \item Queries formulated for a tetrapodal search system have to
      account for these ``holes'' in existing data sets.
  \end{itemize}
\end{frame}

\begin{frame}{Applications and Questions: Used Predicates}

  \begin{figure}
      \begin{scriptsize}
          \begin{flushleft}
              \texttt{check-time} \texttt{defines} \texttt{definition}
              \texttt{derived} \texttt{experimental}
              \texttt{external-size} \texttt{function}
              \texttt{important} \texttt{inductive-on}
              \texttt{instance-of} \texttt{justifies} \texttt{name}
              \texttt{para} \texttt{paratype} \texttt{predicate}
              \texttt{primitive} \texttt{section} \texttt{sourceref}
              \texttt{specified-in} \texttt{specifies}
              \texttt{statement} \texttt{theory} \texttt{type}
              \texttt{unimportant} \texttt{universe} \texttt{uses}
          \end{flushleft}
      \end{scriptsize}
      \caption{Used predicates in an Isabelle export.}
  \end{figure}

  \begin{figure}
      \begin{scriptsize}
          \begin{flushleft}
                \texttt{action-times} \texttt{aligned-with}
                \texttt{alternative-for} \texttt{antonym}
                \texttt{automatically-proved} \texttt{axiom}
                \texttt{constructs} \texttt{contains}
                \texttt{counter-example-for} \texttt{crossrefs}
                \texttt{declaration} \texttt{deprecated} \texttt{docref}
                \texttt{example} \texttt{example-for} \texttt{file}
                \texttt{folder} \texttt{formalizes} \texttt{generated-by}
                \texttt{hypernym} \texttt{hyponym}
                \texttt{implementation-uses}
                \texttt{implementation-uses-implementation-of}
                \texttt{implementation-uses-interface-of} \texttt{inspired-by}
                \texttt{inter-statement} \texttt{interface-uses}
                \texttt{interface-uses-implementation-of}
                \texttt{interface-uses-interface-of} \texttt{internal-size}
                \texttt{last-checked-at} \texttt{library}
                \texttt{library-group} \texttt{logical}
                \texttt{mutual-block} \texttt{nyms}
                \texttt{organizational} \texttt{phrase} \texttt{physical}
                \texttt{proof} \texttt{proposition} \texttt{revision}
                \texttt{rule} \texttt{same-as} \texttt{see-also}
                \texttt{similar-to} \texttt{size-properties}
                \texttt{superseded-by} \texttt{theorem} \texttt{typedec}
                \texttt{uses-implementation} \texttt{uses-interface}
          \end{flushleft}
      \end{scriptsize}
      \caption{\emph{Unused} predicates in an Isabelle export.}
  \end{figure}
\end{frame}

\subsection{Algorithms and Problems}

\begin{frame}{Applications and Questions: Algorithms and Problems}
  \begin{itemize}
    \item One query for a tetrapodal search system is the following:
      ``Find algorithms that solve $NP$-hard graph
      problems''~\cite{tetra}.
      \begin{itemize}
        \item Exploit \texttt{ulo:theorem} and \texttt{ulo:proof}?
          Tempting but potentially very complicated.
        \item Algorithms aren't programs! Programs implement
          algorithms that solve problems.
      \end{itemize}
    \item This illustrates the difficulty in designing an ontology
      (schema) that is both expressive and concise.
  \end{itemize}
\end{frame}

% [Summary]
\section{Summary}

\begin{frame}{Summary}
  \begin{itemize}
    \item Importing existing XML~exports into a queryable database is
      pretty straightforward. It does introduce some interesting
      questions.
    \item Versioning of data sets requires us to re-create
      databases. This does introduce latency but is probably the only
      way out as diffing huge data sets is not feasible.
    \item Existing exports have ``holes'', they only use small-ish
      subsets of~{ULO}. On the other hand, representing algorithms and
      algorithmic problems might require us to extend~{ULO}? Maybe instead
      of tetrapodal search we need $n$-podal search.
    \begin{itemize}
      \item Maybe it would be interesting to collect a database of
        algorithms and proofs (OEIS~\cite{oeis} for algorithms).
    \end{itemize}
  \end{itemize}
\end{frame}

% [References]

\section{References}

\begin{frame}[allowframebreaks]{References}
  \printbibliography{}
\end{frame}

\end{document}