Skip to content
Snippets Groups Projects
Commit 613c6792 authored by Constantin Jucovschi's avatar Constantin Jucovschi
Browse files

updated pom and added command line parameters

parent 6dacd340
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,24 @@
<version>0.0.1-SNAPSHOT</version>
<name>defindexer Maven Webapp</name>
<url>http://maven.apache.org</url>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>info.kwarc.lmh.DefIndexer.DefIndexer</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
......@@ -19,20 +37,21 @@
<artifactId>solr-solrj</artifactId>
<version>4.6.0</version>
</dependency>
<dependency>
<groupId>args4j</groupId>
<artifactId>args4j</artifactId>
<version>2.0.26</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.3</version>
</dependency>
<dependency>
<groupId>info.kwarc.mmt</groupId>
<artifactId>service-wrapper</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<finalName>defindexer</finalName>
</build>
</project>
......@@ -2,39 +2,103 @@ package info.kwarc.lmh.DefIndexer;
import info.kwarc.mmt.MMT;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.common.util.ContentStream;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
public class DefIndexer {
@Option(name="--solr",usage="URL of the SOLR server")
String solrUrl = "http://localhost:8983/solr";
@Option(name="--mmt",usage="URL of the MMT server")
String mmtUrl = "http://localhost:8080";
@Option(name="--stomp", usage="Connect to STOMP server to serve e.g. authoring environments")
String stompUrl = "http://localhost:61613";
public DefIndexer() {
}
final static Pattern uri = Pattern.compile("http://mathhub.info/([\\w-]+)/([\\w-]+)/([\\w-\\./]+)\\?([\\w-.]+)(\\?([\\w-.]+))?");
public static void main(String[] args) {
String urlString = "http://mathhub.info:8983/solr";
SolrServer solr = new HttpSolrServer(urlString);
class StringContentStream implements ContentStream {
String content;
public StringContentStream(String content) {
this.content = content;
}
public String getName() {
return "noname";
}
public String getSourceInfo() {
return "mmt";
}
public String getContentType() {
return "application/xml";
}
public Long getSize() {
return new Long(content.length());
}
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(content.getBytes());
}
public Reader getReader() throws IOException {
return new StringReader(content);
}
}
public void run(String[] args) {
CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(args);
} catch( CmdLineException e ) {
System.err.println(e.getMessage());
System.err.println("java DefIndexer [options...] arguments...");
parser.printUsage(System.err);
System.err.println();
System.err.println(" Example: java DefIndexer --solr http://mathhub.info:8983/solr --mmt http://mathhub.info:8080 ");
return;
}
//SolrServer solr = new HttpSolrServer(solrUrl);
ConcurrentUpdateSolrServer solr = new ConcurrentUpdateSolrServer(solrUrl, 10, 5);
try {
solr.deleteByQuery( "*:*" );
solr.commit();
} catch (SolrServerException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
MMT mmt = new MMT("http://mathhub.info:8080");
MMT mmt = new MMT(mmtUrl);
for (String theoryURI : mmt.getTheories()) {
String content = mmt.getPresentation(theoryURI);
for (String concept : mmt.getTheoryConcepts(theoryURI)) {
SolrInputDocument doc = new SolrInputDocument();
Matcher m = uri.matcher(concept);
if (!m.matches()) {
continue;
......@@ -46,37 +110,34 @@ public class DefIndexer {
String omdoc = m.group(3);
String theory = m.group(4);
String symb = m.group(6);
doc.addField( "id", concept, 1.0f );
doc.addField( "repository", repo, 1.0f );
doc.addField( "repository_name", repo_name, 1.0f );
doc.addField( "repository_group", repo_group, 1.0f );
doc.addField( "omdoc", omdoc, 1.0f );
doc.addField( "theory", theory, 1.0f );
doc.addField( "symbol", symb, 1.0f );
doc.addField( "content", content, 1.0f );
try {
solr.add(doc);
} catch (SolrServerException e) {
ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");
up.addContentStream(new StringContentStream(content));
up.setParam( "literal.id", concept);
up.setParam( "literal.repository", repo);
up.setParam( "literal.repository_name", repo_name);
up.setParam( "literal.repository_group", repo_group);
up.setParam( "literal.omdoc", omdoc);
up.setParam( "literal.theory", theory);
up.setParam( "literal.symbol", symb);
up.setParam( "literal.description", content);
//up.setParam( "literal.content", content);
up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
solr.request(up);
} catch (IOException e2) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
e2.printStackTrace();
} catch (SolrServerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
try {
solr.commit();
} catch (SolrServerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
new DefIndexer().run(args);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment