Commit 03e4eecc authored by jfschaefer's avatar jfschaefer
Browse files

added script for combined lexicons and example

parent 67057736
#!/usr/bin/python3
import sys, os, re
class CoLexLineException(Exception):
def __init__(self, lineNumber, message):
Exception.__init__(self, f"Line {lineNumber}: {message}")
class GfPart(object):
def __init__(self, name):
self.name = name
self.linesInAbstract = []
self.linesInConcrete = {}
self.alreadyAddedEntries = []
def pushEntry(self, entry):
if entry.nameComment:
self.linesInAbstract.append(" " * 8 + "-- " + entry.nameComment)
line = " " * 8 + entry.name + " : " + entry.gfType + ";"
if entry.gfTypeComment:
line += " -- " + entry.gfTypeComment
self.linesInAbstract.append(line)
for lang in entry.gfLins.keys():
if lang not in self.linesInConcrete.keys():
self.linesInConcrete[lang] = []
if self.alreadyAddedEntries:
print("Warning: The following entries have no gf linearization for " + lang + ":")
print(", ".join(self.alreadyAddedEntries))
line = " " * 8 + entry.name + " = " + entry.gfLins[lang] + ";"
if entry.gfLinsComments[lang]:
line += " -- " + entry.gfLinsComments[lang]
self.linesInConcrete[lang].append(line)
for lang in self.linesInConcrete.keys():
if lang not in entry.gfLins.keys():
print("Warning: The entry '" + entry.name + "' has no gf linearization for " + lang)
self.alreadyAddedEntries.append(entry.name)
def save(self, directory):
with open(os.path.join(directory, self.name + ".gf"), "w") as fp:
fp.write("\n".join(self.linesInAbstract))
for lang in self.linesInConcrete.keys():
with open(os.path.join(directory, self.name + lang + ".gf"), "w") as fp:
fp.write("\n".join(self.linesInConcrete[lang]))
class MmtPart(object):
def __init__(self, name):
self.name = name
self.lines = []
def pushEntry(self, entry):
jOD = chr(10072)
jDD = chr(10073)
jMD = chr(10074)
if entry.nameComment:
self.lines.append(" " * 4 + "// " + entry.nameComment + jDD)
if (not entry.mmtType) and (not entry.mmtDef):
print("Warning: The entry '" + entry.name + "' has neither an mmt type nor an mmt definition")
line = " " * 4 + entry.name + " "
if entry.mmtType:
line += ": " + entry.mmtType + " "
if entry.mmtTypeComment:
line += jOD + " // " + entry.mmtTypeComment + " "
if entry.mmtDef:
line += jOD + " = " + entry.mmtDef + " "
if entry.mmtDefComment:
line += jOD + " // " + entry.mmtDefComment + " "
if entry.mmtNotation:
line += jOD + " # " + entry.mmtNotation + " "
if entry.mmtNotationComment:
line += jOD + " // " + entry.mmtNotationComment + " "
line += jDD
self.lines.append(line)
def save(self, directory):
with open(os.path.join(directory, self.name + ".mmt"), "w") as fp:
fp.write("\n".join(self.lines))
class Entry(object):
def __init__(self, name):
self.name = name
self.nameComment = None
self.mmtType = None
self.mmtTypeComment = None
self.mmtDef = None
self.mmtDefComment = None
self.mmtNotation = None
self.mmtNotationComment = None
self.gfType = None
self.gfTypeComment = None
self.gfLins = {} # linearizations
self.gfLinsComments = {}
class Converter(object):
commentSuffix = r"\s*(?P<comment>//.*)?$"
emptyLineRegex = re.compile("^" + commentSuffix)
commentRegex = re.compile(".*(?P<comment>//.*)$")
entryNameRegex = re.compile(r"^\s*(?P<name>[a-zA-Z_][a-zA-Z0-9_]*)" + commentSuffix)
mmttypeRegex = re.compile(r"^\s*:\s+(?P<type>.*)$")
mmtdefRegex = re.compile(r"^\s*=\s+(?P<def>.*)$")
mmtnotationRegex = re.compile(r"^\s*\#\s+(?P<notation>.*)$")
gftypeRegex = re.compile(r"^\s*::\s+(?P<type>.*)$")
gflinRegex = re.compile(r"^\s*\#(?P<lang>[a-zA-Z][a-zA-Z0-9_]*)\s+(?P<lin>.*)$")
def __init__(self, gfPart, mmtPart):
self.gf = gfPart
self.mmt = mmtPart
self.currentEntry = None
self.lineCounter = 0
self.entries = set()
def __splitField(self, string):
""" extracts comment, if it exists, and trims whitespace """
match = Converter.commentRegex.match(string)
if match:
return (string[:len(string)-len(match.group("comment"))].strip(), match.group("comment")[2:].strip())
else:
return (string.strip(), None)
def pushLine(self, line):
self.lineCounter += 1
if Converter.emptyLineRegex.match(line):
return
match = Converter.entryNameRegex.match(line)
if match:
if self.currentEntry:
self.mmt.pushEntry(self.currentEntry)
self.gf.pushEntry(self.currentEntry)
self.currentEntry = Entry(match.group("name"))
if self.currentEntry.name in self.entries:
raise CoLexLineException(self.lineCounter, "There is already an entry '" + self.currentEntry.name + "'")
self.entries |= set(self.currentEntry.name)
if match.group("comment"):
self.currentEntry.nameComment = match.group("comment")[2:].strip()
return
entry = self.currentEntry
match = Converter.mmttypeRegex.match(line)
if match:
if entry.mmtType:
raise CoLexLineException(self.lineCounter, "Two mmt types provided for '" + entry.name + "'")
entry.mmtType, entry.mmtTypeComment = self.__splitField(match.group("type"))
return
match = Converter.mmtdefRegex.match(line)
if match:
if entry.mmtDef:
raise CoLexLineException(self.lineCounter, "Two mmt definitions provided for '" + entry.name + "'")
entry.mmtDef, entry.mmtDefComment = self.__splitField(match.group("def"))
return
match = Converter.mmtnotationRegex.match(line)
if match:
if entry.mmtNotation:
raise CoLexLineException(self.lineCounter, "Two mmt notations provided for '" + entry.name + "'")
entry.mmtNotation, entry.mmtNotationComment = self.__splitField(match.group("notation"))
return
match = Converter.gftypeRegex.match(line)
if match:
if entry.gfType:
raise CoLexLineException(self.lineCounter, "Two gf types provided for '" + entry.name + "'")
entry.gfType, entry.gfTypeComment = self.__splitField(match.group("type"))
return
match = Converter.gflinRegex.match(line)
if match:
lang = match.group("lang")
if lang in entry.gfLins:
raise CoLexLineException(self.lineCounter, f"Two gf linearizations for language '{lang}' provided for '{entry.name}'")
entry.gfLins[lang], entry.gfLinsComments[lang] = self.__splitField(match.group("lin"))
return
raise CoLexLineException(self.lineCounter, "Cannot parse line: '" + line + "'")
def finish(self):
if self.currentEntry:
self.mmt.pushEntry(self.currentEntry)
self.gf.pushEntry(self.currentEntry)
def colexUnzip(source_directory, name, target_directory):
converter = Converter(GfPart(name), MmtPart(name))
with open(os.path.join(source_directory, name + ".colex"), "r") as lexicon:
for line in lexicon:
converter.pushLine(line[:-1])
converter.finish()
converter.mmt.save(target_directory)
converter.gf.save(target_directory)
if len(sys.argv) != 4:
print("Usage: colexUnzipper {SOURCE_DIR} {NAME} {TARGET_DIR}")
print("Example: colexUnzipper . exampleLexicon /tmp")
else:
colexUnzip(sys.argv[1], sys.argv[2], sys.argv[3])
cyclic // property of a group
: group → prop
:: A
#en mkA("cyclic")
#de mkA("zyklisch")
setOfNaturalNumbers
: type
:: Noun // TODO: add linearizations
# ℕ
zero // the natural number zero
: ℕ
:: N
# 0
#en mkN("zero")
absoluteValue
: ℝ → ℝ^+
:: N2 // What is the actual type?
= [q] case q < 0 ⟹ -q . q // Definition commented out in arithmetics.mmt. Is it wrong?
# | 1 | prec 8
#en mkN2(mkA("absolute"),mkN("value")) // how should we do this?
#de mkN2(mkA("absolut"),mkN("Wert")) // how should we do this?
bijective
: {A, B} (A → B) → prop
:: A
= [x] injective(x) ∧ surjective(x)
#en mkA("bijective")
#de mkA("bijektiv")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment