diff --git a/doc/report/components.tex b/doc/report/components.tex index b069a0a348723e43cf77a3edf84d65b7e8214b2c..b6cb50fa2e853053f561bfccf1322601432ea747 100644 --- a/doc/report/components.tex +++ b/doc/report/components.tex @@ -3,15 +3,51 @@ With various ULO/RDF files in place we have the aim of making the underlying data available for use with applications. For this, we should first make out the various components that might be involved in -such a system. As a guide, figure~\ref{fig:components} illustrates -the various components and their interplay. +such a system. As a guide, figure~\ref{fig:components} illustrates the +various components and their interplay. We will now give an overview +over all involved components. Each component will later be discussed +in more detail, this section serves only for the reader to get a +general understanding of the developed infrastructure and its +topology. + \begin{figure}[]\begin{center} \includegraphics{figs/components} \caption{Components involved in the \emph{ulo-storage} system.}\label{fig:components} \end{center}\end{figure} -We will now give an overview over all involved components. Each -component will later be discussed in more detail, this section serves -only for the reader to get a general understanding of the developed -infrastructure and its topology. +\begin{itemize} +\item ULO/RDF data is present on various locations, be it Git + repositories, available on web servers via HTTP or on the local disk + of a user. Regardless where this ULO/RDF data is stored, a + \emph{Collecter} collects these {ULO/RDF}. In the easiest case, this + involves cloning a Git repository or crawling a file system for + matching files. + +\item With streams of ULO/RDF files at the Collecter, this information + then gets passed to the \emph{Importer}. The Importer imports + triplets from files into some kind of permanent storage. For use in + this project, the GraphDB~\cite{graphdb} triplet store was natural + fit. In practice, both Collecter and Importer end up being one piece + of software, but this does not have to be the case. + +\item Finally, with all triplets stored in a database, an + \emph{Endpoint} is where applications access the underlying + knowledge base. This does not necessarily need to be any specific + software, rather the programming API of the database could be + understood as an endpoint of its own. However, some thought should + be put into designing an Endpoint that is convenient to use. +\end{itemize} + +Additionally, one could think of a \emph{Harvester} component. Before +we assumed that the ULO/RDF triplets are already available as +such. Indeed for this project this is the case as we worked on already +exported triplets from the Isabelle and Coq libraries. However, this +does not need to be the case. It might be desirable to automate the +export from third party formats to ULO/RDF and indeed this is what a +Harvester would do. It fetches mathematical knowledge from some +remote source and then provides a volatile stream of ULO/RDF data to +the Collecter, which then passes it to the Importer and so on. The big +advantage of such an approach would be that exports from third party +libraries can always be up to date and do not have to be initiated +manually. diff --git a/doc/report/references.bib b/doc/report/references.bib index 4063e9eb8f11fd6fecaff41469843bbfa75edaf5..68d3389301720b7517a6eb643942cb3f0adad52c 100644 --- a/doc/report/references.bib +++ b/doc/report/references.bib @@ -46,6 +46,14 @@ url = {https://rdf4j.org/}, } +@online{graphdb, + title = {GraphDB 9.3 documentation}, + organization = {Ontotext}, + date = {2020}, + urldate = {2020-06-16}, + url = {http://graphdb.ontotext.com/documentation/free/} +} + @online{graphdbapi, title = {Using GraphDB with the RDF4J API}, organization = {Ontotext},