Below is a list of my academic publications in reverse chronological order.
-
Fritz Henglein, Ulrik Terp Rasmussen
Proceedings 2017 ACM SIGPLAN Workshop on Partial Evaluation and Program Manipulation
, January
2017
@inproceedings{henglein2017,
author = {Fritz Henglein, Ulrik Terp Rasmussen},
title = {{PEG} Parsing in Less Space Using Progressive Tabling and Dynamic Analysis},
booktitle = {Proceedings 2017 ACM SIGPLAN Workshop on Partial Evaluation and Program Manipulation},
doi = {10.1145/3018882.3018889},
year = {2017},
month = {January},
abstract = {Tabular top-down parsing and its lazy variant, Packrat, are
linear-time execution models for the TDPL family of recursive
descent parsers with limited backtracking. Exponential work
due to backtracking is avoided by tabulating the result of
each (nonterminal, offset)-pair at the expense of always using
space proportional to the product of the input length and
grammar size. Current methods for limiting the space usage
rely either on manual annotations or on static analyses that
are sensitive to the syntactic structure of the grammar. We
present progressive tabular parsing (PTP), a new execution
model which progressively computes parse tables for longer
prefixes of the input and simultaneously generates a leftmost
expansion of the parts of the parse tree that can be
resolved. Table columns can be discarded on-the-fly as the
expansion progresses through the input string, providing
best-case constant and worst-case linear memory
use. Furthermore, semantic actions are scheduled before the
parser has seen the end of the input. The scheduling is
conservative in the sense that no action has to be "undone" in
the case of backtracking. The time complexity is O(dmn) where
m is the size of the parser specification, n is the size of
the input string, and d is either a configured constant or the
maximum parser stack depth. For common data exchange formats
such as JSON, we demonstrate practically constant space
usage.},
}
-
Ulrik Terp Rasmussen
PhD Thesis
, University of Copenhagen
, January
2017
@phdthesis{rasmussen2017,
title = {Stream {Processing} using {Grammars} and {Regular} {Expressions}},
note = {PhD Thesis},
abstract = {In this dissertation we study regular expression based parsing and the use of grammatical specifications for the synthesis of fast, streaming string-processing programs.
In the first part we develop two linear-time algorithms for regular expression based parsing with Perl-style greedy disambiguation. The first algorithm operates in two passes in a semi-streaming fashion, using a constant amount of working memory and an auxiliary tape storage which is written in the first pass and consumed by the second. The second algorithm is a single-pass and optimally streaming algorithm which outputs as much of the parse tree as is semantically possible based on the input prefix read so far, and resorts to buffering as many symbols as is required to resolve the next choice. Optimality is obtained by performing a PSPACE-complete pre-analysis on the regular expression.
In the second part we present Kleenex, a language for expressing high-performance streaming string processing programs as regular grammars with embedded semantic actions, and its compilation to streaming string transducers with worst-case linear-time performance. Its underlying theory is based on transducer decomposition into oracle and action machines, and a finite-state specialization of the streaming parsing algorithm presented in the first part. In the second part we also develop a new linear-time streaming parsing algorithm for parsing expression grammars (PEG) which generalizes the regular grammars of Kleenex. The algorithm is based on a bottom-up tabulation algorithm reformulated using least fixed points and evaluated using an instance of the chaotic iteration scheme by Cousot and Cousot.},
school = {University of Copenhagen},
author = {Ulrik Terp Rasmussen},
month = {January},
year = {2017},
}
-
Bjørn Bugge Grathwohl, Fritz Henglein, Ulrik Terp Rasmussen, Kristoffer Aalund Søholm, Sebastian Paaske Tørholm
Proceedings 43rd ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (POPL'2016)
, January
2016
@inproceedings{grathwohl2016,
author = {Bj{\o}rn Bugge Grathwohl, Fritz Henglein, Ulrik Terp Rasmussen,
Kristoffer Aalund S{\o}holm, Sebastian Paaske T{\o}rholm},
title = {{K}leenex: Compiling Nondeterministic Transducers to Deterministic
Streaming Transducers},
booktitle = {Proceedings 43rd ACM SIGPLAN-SIGACT Symposium on Principles of
Programming Languages (POPL'2016)},
year = {2016},
month = {January},
location = {St. Petersburg, Florida, USA},
doi = {10.1145/2837614.2837647},
abstract = {We present and illustrate Kleenex, a language for expressing
general nondeterministic finite transducers, and its novel
compilation to streaming string transducers with worst-case
linear-time performance and sustained high throughput. Its
underlying theory is based on transducer decomposition into
oracle and action machines: the oracle machine performs
streaming greedy disambiguation of the input; the action
machine performs the output actions. In use cases Kleenex
achieves consistently high throughput rates around the 1 Gbps
range on stock hardware. It performs well, especially in
complex use cases, in comparison to both specialized and
related tools such as awk, sed, RE2, Ragel and
regular-expression libraries.},
}
-
Niels Bjørn Bugge Grathwohl, Fritz Henglein, Ulrik Terp Rasmussen
Theoretical Aspects of Computing - ICTAC 2014
, p. 224-240
, September
2014
@inproceedings{grathwohl2014,
author = {Niels Bjørn Bugge Grathwohl, Fritz Henglein, Ulrik Terp Rasmussen},
title = {Optimally Streaming Greedy Regular Expression Parsing},
abstract = {We study the problem of streaming regular expression parsing: Given
a regular expression and an input stream of symbols, how to
output a serialized syntax tree representation as an output
stream during input stream processing. We show that optimally
streaming regular expression parsing, outputting bits of the
output as early as is semantically possible for any regular
expression of size m and any input string of length n, can be
performed in time $O(2^{m \log m} + m n)$ on a unit-cost
random-access machine. This is for the wide-spread greedy
disambiguation strategy for choosing parse trees of
grammatically ambiguous regular expressions. In particular,
for a fixed regular expression, the algorithm's run-time
scales linearly with the input string length. The exponential
is due to the need for preprocessing the regular expression to
analyze state coverage of its associated NFA, a PSPACE-hard
problem, and tabulating all reachable ordered sets of
NFA-states. Previous regular expression parsing algorithms
operate in multiple phases, always requiring processing or
storing the whole input string before outputting the first bit
of output, not only for those regular expressions and input
prefixes where reading to the end of the input is strictly
necessary.},
year = {2014},
month = {September},
publisher = {Springer},
booktitle = {Theoretical Aspects of Computing - ICTAC 2014},
pages = {224-240},
editor = {Gabriel Ciobanu, Dominique Méry},
doi = {10.1007/978-3-319-10882-7_14},
}
-
Niels Bjørn Bugge Grathwohl, Fritz Henglein, Lasse Nielsen, Ulrik Terp Rasmussen
Proceedings 18th International Conference on Implementation and Application of Automata (CIAA)
, p. 60-71
, July
2013
@inproceedings{grathwohl2013,
author = {Niels Bjørn Bugge Grathwohl, Fritz Henglein, Lasse Nielsen, Ulrik Terp
Rasmussen},
title = {Two-Pass Greedy Regular Expression Parsing},
abstract = {We present new algorithms for producing greedy parses for regular
expressions (REs) in a semi-streaming fashion. Our lean-log
algorithm executes in time O(mn) for REs of size m and input
strings of size n and outputs a compact bit-coded parse tree
representation. It improves on previous algorithms by:
operating in only 2 passes; using only O(m) words of
random-access memory (independent of n); requiring only kn
bits of sequentially written and read log storage, where k <
1/3 m is the number of alternatives and Kleene stars in the
RE; processing the input string as a symbol stream and not
requiring it to be stored at all. Previous RE parsing
algorithms do not scale linearly with input size, or require
substantially more log storage and employ 3 passes where the
first consists of reversing the input, or do not or are not
known to produce a greedy parse. The performance of our
unoptimized C-based prototype indicates that the superior
performance of our lean-log algorithm can also be observed in
practice; it is also surprisingly competitive with RE tools
not performing full parsing, such as Grep.},
year = {2013},
month = {July},
booktitle = {Proceedings 18th International Conference on Implementation and
Application of Automata (CIAA)},
volume = {7982},
series = {Lecture Notes in Computer Science},
editor = {Konstantinidis, Stavros},
publisher = {Springer},
pages = {60-71},
doi = {10.1007/978-3-642-39274-0_7},
}
-
Ulrik Rasmussen
Master's Thesis
, University of Copenhagen
, September
2013
@mastersthesis{rasmussen2013,
author = {Ulrik Rasmussen},
title = {Formalization of Proofs by Logical Relations in a Logical Framework},
abstract = {Logical relations are an important proof technique frequently
employed in the study of programming languages based on typed
lambda calculi, where they have been used to prove a broad
range of foundational properties. We present applications and
extensions of the method of structural logical relations by
Sch\"{u}rmann and Sarnat [In Proc. of 23rd Symp. on Logic in
Computer Science, 2008], which enables syntactic and
verifiable representations of proofs by logical relations in
the Twelf proof assistant by reducing problems to a
consistency proof of an auxiliary logic. We apply the method
in several case studies where we formalize logical relations
for prototypical programming languages. We identify
shortcomings of the existing method, which we address by
extending the auxiliary logic with several new reasoning
principles. },
year = {2013},
month = {September},
note = {Master's Thesis},
school = {University of Copenhagen},
}
-
Ulrik Rasmussen, Andrzej Filinski
Proceedings International Workshop on Logical Frameworks and Meta-Languages: Theory and Practice (LFMTP)
, September
2013
@inproceedings{rasmussen2013a,
author = {Ulrik Rasmussen, Andrzej Filinski},
title = {Structural Logical Relations with Case Analysis and Equality Reasoning},
abstract = {Formalizing proofs by logical relations in the Twelf proof
assistant is known to be notoriously difficult. However, as
demonstrated by Schürmann and Sarnat [In Proc. of 23rd
Symp. on Logic in Computer Science, 2008] such proofs can be
represented and verified in Twelf if done so using a
Gentzen-style auxiliary assertion logic which is subsequently
proved consistent via cut elimination. We demonstrate in this
paper an application of the above methodology to proofs of
observational equivalence between expressions in a simply
typed lambda calculus with a call-by-name operational
semantics. Our use case requires the assertion logic to be
extended with reasoning principles not present in the original
presentation of the formalization method. We address this by
generalizing the assertion logic to include dependent sorts,
and demonstrate that the original cut elimination proof
continues to apply without modification.},
year = {2013},
month = {September},
booktitle = {Proceedings International Workshop on Logical Frameworks and
Meta-Languages: Theory and Practice (LFMTP)},
doi = {10.1145/2503887.2503891},
}