Commit ce50d618 authored by Michael Kohlhase's avatar Michael Kohlhase

merge

parents e0776204 a4d3bdc5
This diff is collapsed.
abstract MathLex = Cat ** {
cat MathNP;
cat
MathNP; -- do we really need this?
MathExpr;
MathBinRel; -- binary relation
MathBinOp; -- binary operator
MathUnOp; -- unary operator
MathPrefixedAdj; -- like "_-dimensional"
MathPrefixedNoun; -- like "_-tuple"
fun
positive_A : A ;
prime_A : A ;
integer_N : N ;
call_V2 : V2;
iff_Subj : Subj;
three_PN : PN;
-- for the Math Grammar
appo : N -> MathNP -> N;
n_MathNP : MathNP;
cartesian_A : A ;
positive_A : A ;
ordered_A : A ;
finite_A : A ;
total_A : A ;
prime_A : A ;
even_A : A ;
permutation_N : N ;
character_N : N ;
alphabet_N : N ;
element_N : N ;
integer_N : N ;
product_N : N ;
string_N : N ;
space_N : N ;
word_N : N ;
sign_N : N ;
set_N : N ;
dimensional_MPA : MathPrefixedAdj;
tuple_MPN : MathPrefixedNoun;
three_PN : PN;
divides_MathBinRel : MathBinRel;
power_MathBinOp : MathBinOp;
xtimes_MathBinOp : MathBinOp;
in_MathBinOp : MathBinOp;
neg_MathUnOp : MathUnOp;
lessThan_MathBinRel : MathBinRel;
equals_MathBinRel : MathBinRel;
i_MathExpr : MathExpr;
n_MathExpr : MathExpr;
m_MathExpr : MathExpr;
s_MathExpr : MathExpr;
a_MathExpr : MathExpr;
sigma_MathExpr : MathExpr;
ldots_MathExpr : MathExpr;
bigA_MathExpr : MathExpr;
one_MathExpr : MathExpr;
each_Det : Det;
call_V2A : V2A; -- we call it prime
call_V3 : V3; -- we call it an integer
iff_Subj : Subj;
else_Adv : Adv;
otherwise_Adv : Adv;
-- appo : N -> MathNP -> N;
AdvSInEnd : Adv -> S -> S;
-- for the Math Grammar
mathPrefixNoun : MathNP -> MathPrefixedNoun -> N;
mathPrefixAdj : MathNP -> MathPrefixedAdj -> A;
applyRel : MathBinRel -> MathExpr -> MathExpr -> MathExpr;
applyBinOp : MathBinOp -> MathExpr -> MathExpr -> MathExpr;
applyUnOp : MathUnOp -> MathExpr -> MathExpr;
mathIndex : MathExpr -> MathExpr -> MathExpr;
exprToMathNP : MathExpr -> MathNP; -- an integer $i$
exprToCl : MathExpr -> Cl; -- since $m = n$
exprToN : MathExpr -> N; -- for all $n \in A$
divides_MathBinRel : MathBinRel;
power_MathBinOp : MathBinOp;
xtimes_MathBinOp : MathBinOp;
neg_MathUnOp : MathUnOp;
lessThan_MathBinRel : MathBinRel;
equals_MathBinRel : MathBinRel;
i_MathExpr : MathExpr;
n_MathExpr : MathExpr;
m_MathExpr : MathExpr;
sigma_MathExpr : MathExpr;
ldots_MathExpr : MathExpr;
bigA_MathExpr : MathExpr;
one_MathExpr : MathExpr;
}
--# -path=.:prelude
concrete MathLexEng of MathLex = CatEng **
open ParadigmsEng, IrregEng, Prelude in {
open ParadigmsEng, ResEng, IrregEng, Prelude in {
flags
optimize=values ;
lincat MathNP = Str;
lincat
MathNP = Str;
MathExpr = Str;
MathBinRel = Str;
MathBinOp = Str;
MathUnOp = Str;
MathPrefixedAdj = A;
MathPrefixedNoun = N;
lin
cartesian_A = regA "cartesian" ;
positive_A = regA "positive" ;
ordered_A = regA "ordered";
finite_A = regA "finite" ;
total_A = regA "total" ;
prime_A = regA "prime" ;
even_A = regA "even" ;
permutation_N = regN "permutation" ;
character_N = regN "character" ;
alphabet_N = regN "alphabet" ;
integer_N = regN "integer" ;
call_V2 = dirV2 (regV "call") ;
-- lin call_V2 = mkV2 (mkV "call" "calls" "called" "called" "calling");
iff_Subj = mkSubj "iff";
element_N = regN "element" ;
product_N = regN "product" ;
string_N = regN "string" ;
space_N = regN "space" ;
word_N = regN "word" ;
sign_N = regN "sign" ;
set_N = regN "set" ;
dimensional_MPA = regA "dimensional" ;
tuple_MPN = regN "tuple";
three_PN = mkPN (mkN nonhuman (mkN "three")) ;
divides_MathBinRel = "\\divides";
lessThan_MathBinRel = "<";
equals_MathBinRel = "=";
power_MathBinOp = "^";
in_MathBinOp = "\\in";
xtimes_MathBinOp = "\\times";
neg_MathUnOp = "-";
i_MathExpr = "i";
n_MathExpr = "n";
m_MathExpr = "m";
s_MathExpr = "s";
a_MathExpr = "a";
sigma_MathExpr = "\\sigma";
ldots_MathExpr = "\\ldots";
bigA_MathExpr = "A";
one_MathExpr = "1";
each_Det = lin Det { s = "each" ; sp = table { NCase Gen => "each's"; _ => "each" }; n = Sg; hasNum = False }; -- how correct is this?
call_V2A = mkV2A (mkV "call" "calls" "called" "called" "calling") noPrep ;
call_V3 = mkV3 "call" ;
iff_Subj = mkSubj "iff";
else_Adv = mkAdv "else";
otherwise_Adv = mkAdv "otherwise";
-- appo n a = lin N {s = \\x,y => n.s ! x ! y ++ a; g=n.g};
AdvSInEnd a s = lin S { s = s.s ++ a.s } ;
-- for the Math Grammar
appo n a = lin N {s = \\x,y => n.s ! x ! y ++ a; g=n.g};
n_MathNP = "$n$";
mathPrefixNoun mathnp noun = lin N { s = \\number,case_ => mathnp ++ "-" ++ noun.s ! number ! case_; g = noun.g };
mathPrefixAdj mathnp adj = lin A { s = \\x => mathnp ++ "-" ++ adj.s ! x };
applyRel rel a b = a ++ rel ++ b; -- e.g. "m < n"
applyBinOp op a b = a ++ op ++ b; -- e.g. "m + n"
applyUnOp op a = op ++ a; -- e.g. "- n"
exprToMathNP expr = "$" ++ expr ++ "$";
mathIndex a b = a ++ "_" ++ b;
exprToCl expr = { s = table {
Pres => table {
Simul => table { CPos => table { _ => "$" ++ expr ++ "$" }; _ => table { _ => "???" } };
_ => table { _ => table { _ => "???" } } };
_ => table { _ => table { _ => table { _ => "???" } } } } };
exprToN expr = lin N { s = table { _ => table { _ => "$" ++ expr ++ "$" } } ; g = Neutr };
}
import MathEng.gf
parse "we call a positive integer $ n $ prime , iff there is no integer $ 1 < m < n $ such that $ m \\divides n $"
parse "the empty set is the set without elements"
parse "we call a set empty , iff it is the empty set"
parse "an alphabet $ A $ is a finite set"
-- Warning: the following sentences have very many parse trees!
-- parse "we call each element $ a \\in A $ a character , and an $ n $ - tuple of $ s \\in A ^ n $ a word of $ A $"
-- parse "we call an $ n $ - dimensional cartesian product $ A _ 1 \\times \\ldots \\times A _ n $ an $ n $ - dimensional cartesian space , iff $ A _ i = A $ for some set $ A $ for all $ i $"
-- parse "the sign of a permutation $ \\sigma $ of a finite totally ordered set is $ 1 $ if $ \\sigma $ is even and $ 1 $ else"
......@@ -233,6 +233,19 @@ as MathML expressions, Listing \ref{sentence.html5.cleaned} shows the essence;
Appendix~\ref{sec:sentence.html5} has the full listing.
\lstinputlisting[language=HTML,caption=HTML5 Representation of (*),label=sentence.html5.cleaned]{sentence.html5.cleaned}
\section{Other open questions}
\begin{enumerate}
\item Scalability: We get easily tens of thousands of parse trees for all sentences except the shortest ones.
How can we longer sentences in a reasonable amount of time?
We can fine-tune the grammar to reduce the number of possible trees, but we can't really get around the exponential inflation.
\item Lexer: We would like to parse things like xml tags. This does not work nicely if the \texttt{++} operator always
inserts a space. Similarly, whitespaces are required before periods, commas, etc.
How should we deal with this?
\item Lexer: How can we deal with things like id's in XML?
\end{enumerate}
\emph{Remark}: It seems that both lexer issues could be solved by writing a custom lexer.
\section{Conclusion}\label{sec:concl}
\ednote{tbw.}
......
import MathEng.gf
parse -cat=MathNP "$ n $"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment