% This file was created with JabRef 2.6.
% Encoding: UTF8
@TECHREPORT{BGHNP00,
author = {Gregor Baudis and Clemens Gr{\"o}pl and Stefan Hougardy and Till
Nierhoff and Hans J{\"u}rgen Pr{\"o}mel},
title = {Approximating Minimum Spanning Sets in Hypergraphs and Polymatroids},
institution = {Humboldt-University Berlin},
year = {2000},
note = {This paper was already accepted for ICALP 2000 but we did not present
it since later we were informed that the main result had already
been proven in a different way.},
abstract = {We present a new analysis of the greedy algorithm for the problem
of finding a minimum spanning subset in k-polymatroids. This algorithm
has a performance ratio of approximately ln k, which is best possible
for large k. A consequence of this algorithm is a polynomial time
approximation algorithm with approximation ratio ln k for finding
minimum weight spanning subhypergraphs in (k+1)-restricted hypergraphs.
This generalization of the well-known set cover problem naturally
arises when computing Steiner minimum trees. Other applications of
the algorithm include the rigidity problem in statics.},
file = {BGHNP00.pdf:BGHNP00.pdf:PDF;BGHNP00.ps:BGHNP00.ps:PostScript},
keywords = {Hypergraphs, set systems, and designs, Steiner trees, Approximation
algorithms, Colouring, packing and covering, Combinatorial optimization,
Graph algorithms}
}
@INCOLLECTION{BGRK11,
author = {Bertsch, Andreas and Gr{\"o}pl, Clemens and Reinert, Knut and Kohlbacher,
Oliver},
title = {{OpenMS} and {TOPP}: Open Source Software for {LC-MS} Data Analysis},
booktitle = {Data Mining in Proteomics},
publisher = {Humana Press},
year = {2011},
editor = {Hamacher, Michael and Eisenacher, Martin and Stephan, Christian},
volume = {696},
series = {Methods in Molecular Biology},
pages = {353-367},
abstract = {Proteomics experiments based on state-of-the-art mass spectrometry
produce vast amounts of data, which cannot be analyzed manually.
Hence, software is needed which is able to analyze the data in an
automated fashion. The need for robust and reusable software tools
triggered the development of libraries implementing different algorithms
for the various analysis steps. OpenMS is such a software library
and provides a wealth of data structures and algorithms for the analysis
of mass spectrometric data. For users unfamiliar with programming,
TOPP (“The OpenMS Proteomics Pipeline”) offers a wide range of already
implemented tools sharing the same interface and designed for a specific
analysis task each. TOPP thus makes the sophisticated algorithms
of OpenMS accessible to nonprogrammers. The individual TOPP tools
can be strung together into pipelines for analyzing mass spectrometry-based
experiments starting from the raw output of the mass spectrometer.
These analysis pipelines can be constructed using a graphical editor.
Even complex analytical workflows can thus be analyzed with ease.},
affiliation = {Division for Simulation of Biological Systems, WSI/ZBIT, Eberhard-Karls-Universität
Tübingen, Tübingen, Germany},
isbn = {978-1-60761-987-1},
keyword = {Biomedical and Life Sciences},
owner = {groepl},
timestamp = {2012.01.18},
url = {http://dx.doi.org/10.1007/978-1-60761-987-1_23}
}
@INCOLLECTION{BGKR11,
author = {Bielow, Chris and Gr{\"o}pl, Clemens and Kohlbacher, Oliver and Reinert,
Knut},
title = {Bioinformatics for Qualitative and Quantitative Proteomics},
booktitle = {Bioinformatics for Omics Data},
publisher = {Humana Press},
year = {2011},
editor = {Mayer, Bernd},
volume = {719},
series = {Methods in Molecular Biology},
pages = {331-349},
abstract = {Mass spectrometry is today a key analytical technique to elucidate
the amount and content of proteins expressed in a certain cellular
context. The degree of automation in proteomics has yet to reach
that of genomic techniques, but even current technologies make a
manual inspection of the data infeasible. This article addresses
the key algorithmic problems bioinformaticians face when handling
modern proteomic samples and shows common solutions to them. We provide
examples on how algorithms can be combined to build relatively complex
analysis pipelines, point out certain pitfalls and aspects worth
considering and give a list of current state-of-the-art tools.},
affiliation = {AG Algorithmische Bioinformatik, Institut für Informatik, Freie Universität
Berlin, Berlin, Germany},
isbn = {978-1-61779-027-0},
keyword = {Biomedical and Life Sciences},
owner = {groepl},
timestamp = {2012.01.18},
url = {http://dx.doi.org/10.1007/978-1-61779-027-0_15}
}
@TECHREPORT{BGPPS97,
author = {Mathias Block and Clemens Gr{\"o}pl and Harry Preuss and Hans J{\"u}rgen
Pr{\"o}mel and Anand Srivastav},
title = {Efficient ordering of state variables and transition relation partitions
in symbolic model checking},
institution = {Humboldt-Universit{\"a}t zu Berlin},
year = {1997},
abstract = {Among the main algorithmic problems in the verification of sequential
circuits are the computation of good orders of state variables and
transition relation partitions. Existing model checking packages
like SMV from CMU, VIS from Berkeley or Rulebase from IBM Haifa provide
variants of Rudell's sifting algorithm for the variable ordering
problem and greedy-type algorithms for the partition ordering problem.
For both problems, we give new simulated annealing based algorithms
and we provide a model checking program called VERIFY which is based
on SMV. The impact of our approach is demonstrated on industrial
arbiter circuits and ISCAS '89 benchmarks. In particular on large
industrial circuits our algorithms show a better space/time performance
than previously given heuristics.},
file = {BGPPS97.pdf:BGPPS97.pdf:PDF},
keywords = {Randomized algorithms and probabilistic analysis, VLSI-Design and
layout, Binary Decision Diagrams, Hardware verification, Local search
and metaheuristics},
local_author = {block groepl proemel}
}
@ARTICLE{BGJK07,
author = {Manuel Bodirsky and Clemens Gr{\"o}pl and Daniel Johannsen and Mihyun
Kang},
title = {A direct decomposition of 3-connected planar graphs},
journal = {S{\'e}minaire Lotharingien de Combinatoire},
year = {2007},
volume = {B54Ak},
pages = {15 pages},
abstract = {Abstract. We present a decomposition strategy for c-nets, i.e., rooted
3-connected planar maps. The decomposition yields an algebraic equation
for the number of c-nets with a given number of vertices and a given
size of the outer face. The decomposition also leads to a deterministic
and polynomial time algorithm to sample c-nets uniformly at random.
Using rejection sampling, we can also sample isomorphism types of
convex polyhedra, i.e., 3-connected planar graphs, uniformly at random.
Résumé. Nous proposons une stratégie de décomposition pour les cartes
pointées 3-connexes (c-réseaux). Cette décomposition permet d'obtenir
une équation algébrique pour le nombre de c-réseaux suivant le nombre
de sommets et la taille de la face extèrieure. On en déduit un algorithme
de complexité en temps polynomiale pour le tirage aléatoire uniforme
des c-réseaux. En utilisant une méthode à rejet, nous obtenons aussi
un algorithme de tirage aléatoire uniforme pour les graphes planaires
3-connexes.},
address = {Taormina},
booktitle = {Proceedings of the 17th Annual International Conference on Formal
Power Series and Algebraic Combinatorics (FPSAC05)},
file = {BGJK07.pdf:BGJK07.pdf:PDF},
keywords = {Three-Connected Planar Graph, Cnet, Planar Graph, Enumeration, Random
graphs, Random Generation, Dynamic Programming, Graph Theory}
}
@INPROCEEDINGS{BGJK04a,
author = {Manuel Bodirsky and Clemens Gr{\"o}pl and Daniel Johannsen and Mihyun
Kang},
title = {A direct decomposition of 3-connected planar graphs},
booktitle = {Proceedings of the 17th Annual International Conference on Formal
Power Series and Algebraic Combinatorics (FPSAC05)},
year = {2005},
address = {Taormina},
abstract = {ABSTRACT. We present a decomposition strategy for c-nets, i. e., rooted
3-connected planar maps. The decomposition yields an algebraic equation
for the number of c-nets with a given number of vertices and a given
size of the outer face. The decomposition also leads to a deterministic
and polynomial time algorithm to sample c-nets uniformly at random.
Using rejection sampling, we can also sample isomorphism types of
convex polyhedra, i.e., 3-connected planar graphs, uniformly at random.
R´E SUM´E. Nous proposons une strat´egie de d´ecomposition pour les
cartes point´ees 3-connexes (c-r´eseaux). Cette d´ecomposition permet
d’obtenir une ´equation alg´ebrique pour le nombre de c-r´eseaux
suivant le nombre de sommets et la taille de la face ext`erieure.
On en d´eduit un algorithme de complexit´e en temps polynomiale pour
le tirage al´eatoire uniforme des c-r´eseaux. En utilisant une m´ethode
`a rejet, nous obtenons aussi un algorithme de tirage al´eatoire
uniforme pour les graphes planaires 3-connexes.},
file = {BGJK04a.pdf:BGJK04a.pdf:PDF},
keywords = {Three-Connected Planar Graph, Cnet, Planar Graph, Enumeration, Random
graphs, Random Generation, Dynamic Programming, Graph Theory}
}
@ARTICLE{BGK07b,
author = {Manuel Bodirsky and Clemens Gr{\"o}pl and Mihyun Kang},
title = {Generating unlabeled connected cubic planar graphs uniformly at random},
journal = {Random Structures and Algorithms},
year = {2008},
volume = {32},
pages = {157-180},
number = {2},
abstract = {We present an expected polynomial time algorithm to generate an unlabeled
connected cubic planar graph uniformly at random. We first consider
rooted connected cubic planar graphs, i.e., we count connected cubic
planar graphs up to isomorphisms that fix a certain directed edge.
Based on decompositions along the connectivity structure, we derive
recurrence formulas for the exact number of rooted cubic planar graphs.
This leads to rooted 3-connected cubic planar graphs, which have
a unique embedding on the sphere. Special care has to be taken for
rooted graphs that have a sense-reversing automorphism. Therefore
we introduce the concept of colored networks, which stand in bijective
correspondence to rooted 3-connected cubic planar graphs with given
symmetries. Colored networks can again be decomposed along the connectivity
structure. For rooted 3-connected cubic planar graphs embedded in
the plane, we switch to the dual and count rooted triangulations.
Since all these numbers can be evaluated in polynomial time using
dynamic programming, rooted connected cubic planar graphs can be
generated uniformly at random in polynomial time by inverting the
decomposition along the connectivity structure. To generate connected
cubic planar graphs without a root uniformly at random, we apply
rejection sampling and obtain an expected polynomial time algorithm},
doi = {10.1002/rsa.20206},
file = {BGK07b.pdf:BGK07b.pdf:PDF}
}
@ARTICLE{BGK07,
author = {Manuel Bodirsky and Clemens Gr{\"o}pl and Mihyun Kang},
title = {Generating labeled planar graphs uniformly at random},
journal = {Theoretical Computer Science},
year = {2007},
volume = {379},
pages = {377-386},
number = {3},
abstract = {We present a deterministic polynomial time algorithm to sample a labeled
planar graph uniformly at random. Our approach uses recursive formulae
for the exact number of labeled planar graphs with n vertices and
m edges, based on a decomposition into 1-, 2-, and 3-connected components.
We can then use known sampling algorithms and counting formulae for
3-connected planar graphs.},
file = {BGK07.pdf:BGK07.pdf:PDF},
keywords = {Labeled planar graph, Enumeration, Decomposition, Sampling algorithm,
Dynamic programming, Graph theory},
owner = {groeplhl},
url = {http://www.sciencedirect.com/science/article/B6V1G-4N4PY45-5/2 /4c06fbabe635c3a288cbf0427e0b395d}
}
@INPROCEEDINGS{BGK05a,
author = {Manuel Bodirsky and Clemens Gr{\"o}pl and Mihyun Kang},
title = {Sampling Unlabeled Biconnected Planar Graphs},
booktitle = {Proceedings of the 16th Annual International Symposium on Algorithms
and Computation (ISAAC05)},
year = {2005},
file = {BGK05a.pdf:BGK05a.pdf:PDF},
keywords = {Planar Graph, Enumeration, Random graphs, Random Generation, Graph
Theory}
}
@INPROCEEDINGS{BGK03b,
author = {Manuel Bodirsky and Clemens Gr{\"o}pl and Mihyun Kang},
title = {Generating labeled planar graphs uniformly at random},
booktitle = {Proceedings of ICALP 2003},
year = {2003},
number = {2719},
series = {Lecture Notes in Computer Science},
pages = {1095--1107},
publisher = {Springer Verlag},
note = {Appeared 2008 in Theoretical Computer Science. \{ The download is
the journal submission
From time to time we receive requests
for source code, so here it is: See the files BGK03b.README and GBK03b.tar.bz2
\}},
abstract = {We present an expected polynomial time algorithm to generate a labeled
planar graph uniformly at random. To generate the planar graphs,
we derive recurrence formulas that count all such graphs with n vertices
and m edges, based on a decomposition into 1-, 2-, and 3-connected
components. For 3-connected graphs we apply a recent random generation
algorithm by Schaeffer and a counting formula by Mullin and Schellenberg.},
file = {BGK03b.pdf:BGK03b.pdf:PDF},
keywords = {Planar Graph, Enumeration, Random graphs, Random Generation, Dynamic
Programming, Graph Theory}
}
@INPROCEEDINGS{BGK03c,
author = {Manuel Bodirsky and Clemens Gr{\"o}pl and Mihyun Kang},
title = {Decomposing, Counting, and Generating Unlabeled Cubic Planar Graphs},
booktitle = {European Conference on Combinatorics, Graph Theory, and Applications
EUROCOMB'03 Prague},
year = {2003},
abstract = {We present an expected polynomial time algorithm to generate an unlabeled
connected cubic planar graph uniformly at random. We first consider
rooted cubic planar graphs, i.e., we count connected cubic planar
graphs up to isomorphisms that fix a certain directed edge. Based
on decompositions along the connectivity structure, we derive recurrence
formulas for counting the exact number of rooted cubic planar graphs.
This leads to 3-connected planar graphs, which have a unique embedding
on the sphere; but special care has to be taken for rooted graphs
that have a sense-reversing automorphism. Therefore we introduce
the concept of colored networks, which stand in bijective correspondence
to rooted graphs with given symmetries. Colored networks can again
be decomposed along their connectivity structure. For rooted 3-connected
cubic planar graphs embedded in the plane, we switch to the dual
and count rooted triangulations. All these numbers can be evaluated
in polynomial time by dynamic programming. We can use them to generate
rooted cubic planar graphs uniformly at random. To generate connected
cubic planar graphs without a root uniformly at random, we apply
rejection sampling and obtain an expected polynomial time algorithm.},
file = {BGK03c.pdf:BGK03c.pdf:PDF},
keywords = {Cubic Planar Graph, Planar Graph, Cubic Graph, Enumeration, Random
graphs, Random Generation, Dynamic Programming, Graph Theory}
}
@ARTICLE{FGGGHNPT03,
author = {Fr{\"o}mmel, Cornelius and Gille, Christoph and Goede, Andrean and
Gr{\"o}pl, Clemens and Hougardy, Stefan and Nierhoff, Till and Preissner,
Robert and Thimm, Martin},
title = {Accelerating screening of {3D} protein data with a graph theoretical
approach},
journal = {Bioinformatics},
year = {2003},
volume = {19},
pages = {2442--2447},
number = {18},
month = {December},
abstract = {Motivation: The Dictionary of Interfaces in Proteins (DIP) is a database
collecting the 3D structure of interacting parts of proteins that
are called patches. It serves as a repository, in which patches similar
to given query patches can be found. The computation of the similarity
of two patches is time consuming and traversing the entire DIP requires
some hours. In this work we address the question of how the patches
similar to a given query can be identified by scanning only a small
part of DIP. The answer to this question requires the investigation
of the distribution of the similarity of patches. Results: The score
values describing the similarity of two patches can roughly be divided
into three ranges that correspond to different levels of spatial
similarity. Interestingly, the two iso-score lines separating the
three classes can be determined by two different approaches. Applying
a concept of the theory of random graphs reveals significant structural
properties of the data in DIP. These can be used to accelerate scanning
the DIP for patches similar to a given query. Searches for very similar
patches could be accelerated by a factor of more than 25. Patches
with a medium similarity could be found 10 times faster than by brute-force
search. 10.1093/bioinformatics/btg343},
citeulike-article-id = {2946188},
file = {FGGGHNPT03.pdf:FGGGHNPT03.pdf:PDF;FGGGHNPT03.ps:FGGGHNPT03.ps:PostScript},
keywords = {algorithm, proteomics, statistics},
priority = {5},
url = {http://dx.doi.org/10.1093/bioinformatics/btg343}
}
@ARTICLE{FGGGHNPT03_prelim,
author = {C. Fr{\"o}mmel and C. Gille and A. Goede and C. Gr{\"o}pl and S.
Hougardy and T. Nierhoff and R. Prei{\ss}ner and M. Thimm},
title = {Accelerating screening of {3D} protein data with a graph theoretical
approach},
journal = {Bioinformatics},
year = {2003},
volume = {19},
pages = {2442-2447},
note = {\{Please note that this downloadable version is a preliminary version
that differs from the final published version\}},
abstract = {Motivation: The Dictionary of Interfaces in Proteins (DIP)
is a database collecting the 3D structure of interacting parts of
proteins that are called patches. It serves as a repository, in which
patches similar to given query patches can be found. The computation
of the similarity of two patches is time consuming and traversing
the entire DIP requires some hours. In this work we address the question
of how the patches similar to a given query can be identified by
scanning only a small part of DIP. The answer to this question requires
the investigation of the distribution of the similarity of patches.
Results: The score values describing the similarity of two patches can roughly be divided into three ranges that correspond to different levels of spatial similarity. Interestingly, the two iso-score lines separating the three classes can be determined by two different approaches. Applying a concept of the theory of random graphs reveals significant structural properties of the data in DIP. These can be used to accelerate scanning the DIP for patches similar to a given query. Searches for very similar patches could be accelerated by a factor of more than 25. Patches with a medium similarity could be found 10 times faster than by brute-force search.}, file = {FGGGHNPT03.pdf:FGGGHNPT03.pdf:PDF}, url = {http://bioinformatics.oupjournals.org} } @MISC{FGGGHNPT03b, author = {C. Fr{\"o}mmel and C. Gille and A. Goede and C. Gr{\"o}pl and S. Hougardy and T. Nierhoff and R. Prei{\ss}ner and M. Thimm}, title = {Accelerating Screening of {3D} Protein Data with a Graph Theoretical Approach}, year = {2003}, note = {Poster presented at RECOMB 2003}, owner = {groeplhl} } @MISC{Habilitation-Groepl, author = {Clemens Gr{\"o}pl}, title = {{U}ntersuchungen zum {S}teinerbaumproblem in {G}raphen, zur {E}numeration zuf{\"a}lliger planarer {G}raphen, und zu {A}lgorithmen in der massenspektrometrie-basierten {P}roteomik ({Habilitationsschrift})}, year = {2008}, abstract = {Dokumente zum (kumulativen) Habilitationsverfahren von Dr. Clemens Gr{\"o}pl an der Freien Universit{\"a}t Berlin. Das Gesamtthema der publizierten Forschungsergebnisse gem{\"a}{\ss} §2.I.(c) der Habilitationsordnung des Fachbereiches Mathematik und Informatik der Freien Universit{\"a}t Berlin (ver{\"o}ffentlicht im Amtsblatt der Freien Universit{\"a}t Berlin, 4.10.1999. Ausgabe Nr. 2311999) lautet: "Untersuchungen zum {S}teinerbaumproblem in {G}raphen, zur {E}numeration zuf{\"a}lliger planarer {G}raphen, und zu {A}lgorithmen in der massenspektrometrie-basierten {P}roteomik." - Antrag - Erkl{\"a}rung - Summary of Publications for Habilitation - Sechzehn Originalarbeiten (bitte Copyright beachten!) Das Habilitationsverfahren wurde am 09.07.2009 er{\"o}ffnet und am 10.06.2009 erfolgreich abgeschlossen.}, file = {Habilitation-Groepl.pdf:Habilitation-Groepl.pdf:PDF}, owner = {groepl}, school = {Free University Berlin}, timestamp = {2009.04.29} } @INPROCEEDINGS{GroeplFeaFiDagstuhl05, author = {Clemens Gr{\"o}pl}, title = {An Algorithm for Feature Finding in {LC/MS} Raw Data}, booktitle = {Computational Proteomics}, year = {2005}, organization = {IBFI}, publisher = {Dagstuhl Online Publication Server (DROPS)}, note = {Extended abstract for talk given at Dagstuhl Seminar 05471 on Computational Proteomics, 20.-25. November 2005}, file = {GroeplFeaFiDagstuhl05.pdf:GroeplFeaFiDagstuhl05.pdf:PDF}, url = {http://www.dagstuhl.de/05471/} } @MISC{GroeplAlgBio2002, author = {Clemens Gr{\"o}pl}, title = {{A}lgorithmen in der {B}ioinformatik}, year = {2003}, note = {Skript zu meiner Vorlesung im Wintersemester 2002/03 am Institut f{\"u}r Informatik der Humboldt-Universit{\"a}t zu Berlin}, abstract = {Skript zu meiner Vorlesung {\"u}ber "Algorithmen in der Bioinformatik" im Wintersemester 2002/03 am Institut f{\"u}r Informatik der Humboldt-Universit{\"a}t zu Berlin}, file = {GroeplAlgBio2002.pdf:GroeplAlgBio2002.pdf:PDF} } @PHDTHESIS{Groepl99, author = {Clemens Gr{\"o}pl}, title = {Binary Decision Diagrams for Random Boolean Functions}, school = {Humboldt-Universit{\"a}t zu Berlin}, year = {1999}, abstract = {(deutsche Zusammenfassung siehe unten)
Binary Decision Diagrams (BDDs) are a data structure for Boolean functions which are also known as branching programs. In ordered binary decision diagrams (OBDDs), the tests have to obey a fixed variable ordering. In free binary decision diagrams (FBDDs), each variable can be tested at most once. The efficiency of new variants of the BDD concept is usually demonstrated with spectacular (worst-case) examples. We pursue another approach and compare the representation sizes of almost all Boolean functions. Whereas I. Wegener proved that for `most' values of n the expected OBDD size of a random Boolean function of n variables is equal to the worst-case size up to terms of lower order, we show that this is not the case for n within intervals of constant length around the values n = 2h + h. Furthermore, ranges of n exist for which minimal FBDDs are almost always at least a constant factor smaller than minimal OBDDs. Our main theorems have doubly exponentially small probability bounds (in n). We also investigate the evolution of random OBDDs and their worst-case size, revealing an oscillating behaviour that explains why certain results cannot be improved in general.
Zusammenfassung:
Binary Decision Diagrams (BDDs) sind eine Datenstruktur f{\"u}r Boolesche Funktionen, die auch unter dem Namen branching program bekannt ist. In ordered binary decision diagrams (OBDDs) m{\"u}ssen die Tests einer festen Variablenordnung gen{\"u}gen. In free binary decision diagrams (FBDDs) darf jede Variable h{\"o}chstens einmal getestet werden. Die Effizienz neuer Varianten des BDD-Konzepts wird gew{\"o}hnlich anhand spektakul{\"a}rer (worst-case) Beispiele aufgezeigt. Wir verfolgen einen anderen Ansatz und vergleichen die Darstellungsgr{\"o}{\ss}en f{\"u}r fast alle Booleschen Funktionen. W{\"a}hrend I. Wegener bewiesen hat, da{\ss} f{\"u}r die `meisten' n die erwartete OBDD-Gr{\"o}{\ss}e einer zuf{\"a}lligen Booleschen Funktion von n Variablen gleich der worst-case Gr{\"o}{\ss}e bis auf Terme kleinerer Ordnung ist, zeigen wir da{\ss} dies nicht der Fall ist f{\"u}r n innerhalb von Intervallen konstanter L{\"a}nge um die Werte n = 2h + h. Ferner gibt es Bereiche von n, in denen minimale FBDDs fast immer um mindestens einen konstanten Faktor kleiner sind als minimale OBDDs. Unsere Haupts{\"a}tze ha ben doppelt exponentielle Wahrschein- lichkeitsschranken (in n). Au{\ss}erdem untersuchen wir die Entwicklung zuf{\"a}lliger OBDDs und ihrer worst-case Gr{\"o}{\ss}e und decken dabei ein oszillierendes Verhalten auf, das erkl{\"a}rt, warum gewisse Aussagen im allgemeinen nicht verst{\"a}rkt werden k{\"o}nnen.
Schlagw{\"o}rter:
Bin{\"a}res Entscheidungsdiagramm, Boolesche Funktion,probabilistische Analyse, Shannon Effekt.}, file = {Groepl99.pdf:Groepl99.pdf:PDF}, keywords = {Binary decision diagram, Boolean function, probabilistic analysis, Shannon effect}, timestamp = {09.02.2001 00:00:00}, url = {http://dochost.rz.hu-berlin.de/dissertationen/informatik/groepl-clemens} } @MASTERSTHESIS{Groepl96, author = {Clemens Gr{\"o}pl}, title = {{\"U}ber {A}pproximationsalgorithmen zur {F}{\"a}rbung $k$-f{\"a}rbbarer {G}raphen, die vektorchromatische {Z}ahl und andere {V}arianten der ${\vartheta}$-{F}unktion}, school = {Rheinische Friedrich-Wilhelms-Universit{\"a}t Bonn}, year = {1996}, type = {Diplomarbeit}, address = {Forschungsinstitut f{\"ur} Diskrete Mathematik}, month = {January}, file = {Groepl96.pdf:Groepl96.pdf:PDF}, htmltitle = {\"Uber Approximationsalgorithmen zur F{\"a}rbung k-f{\"a}rbbarer Graphen, die vektorchromatische Zahl und andere Varianten der θ-Funktion}, keywords = {Approximation algorithms, Colouring, packing and covering}, timestamp = {03.04.2000 00:00:00} } @MISC{OpenMSPosterMBI05, author = {Clemens Gr{\"o}pl and Andreas Hildebrandt and Oliver Kohlbacher and Eva Lange and Sandra L{\"o}venich and Marc Sturm}, title = {{OpenMS} - {S}oftware for {M}ass {S}pectrometry}, howpublished = {Poster presented at the MBI Workshop on Computational Proteomics and Mass Spectrometry, January 11-14, Mathematical Biosciences Institute, Ohio State University, Columbus, Ohio}, year = {2005}, note = {http://mbi.osu.edu/2004/workshops2004.html}, file = {OpenMSPosterMBI05.pdf:OpenMSPosterMBI05.pdf:PDF}, keywords = {mass spectrometry, proteomics, open source software} } @MISC{OpenMSPosterHUPOMuenchen05, author = {Clemens Gr{\"o}pl and Andreas Hildebrandt and Oliver Kohlbacher and Eva Lange and Marc Sturm}, title = {{OpenMS} - a generic open source framework for {HPLC/MS}-based proteomics}, howpublished = {Poster presented at the Fourth Annual World Congress of the Human Proteome Organization, Munich}, year = {2005}, note = {http://www.hupo2005.com/}, file = {OpenMSPosterHUPOMuenchen05.pdf:OpenMSPosterHUPOMuenchen05.pdf:PDF}, keywords = {mass spectrometry, proteomics, open source software} } @INCOLLECTION{GHNP01a, author = {Clemens Gr{\"o}pl and Stefan Hougardy and Till Nierhoff and Hans J{\"u}rgen Pr{\"o}mel}, title = {Approximation Algorithms for the {S}teiner Tree Problem in Graphs}, booktitle = {Steiner Trees in Industry}, publisher = {Kluwer Academic Publishers}, year = {2001}, editor = {Xiuzhen Cheng and Ding-Zhu Du}, pages = {235-279}, note = {Survey article with new proofs}, file = {GHNP01a.pdf:GHNP01a.pdf:PDF}, keywords = {Approximation algorithms, Combinatorial optimization, Graph algorithms, Steiner trees} } @ARTICLE{GHNP01d, author = {Clemens Gr{\"o}pl and Stefan Hougardy and Till Nierhoff and Hans J{\"u}rgen Pr{\"o}mel}, title = {Steiner trees in uniformly quasi-bipartite graphs}, journal = {Information Processing Letters}, year = {2002}, volume = {83}, pages = {195-200}, abstract = {The area of approximation algorithms for the Steiner tree problem in graphs has seen continuous progress over the last years. Currently the best approximation algorithm has a performance ratio of 1.550. This is still far away from 1.0074, the largest known lower bound on the achievable performance ratio. As all instances resulting from known lower bound reductions are uniformly quasi-bipartite, it is interesting whether this special case can be approximated better than the general case. We present an approximation algorithm with performance ratio 73/60 < 1.217 for the uniformly quasi-bipartite case. This improves on the previously known ratio of 1.279 of Robins and Zelikovsky. We use a new method of analysis that combines ideas from the greedy algorithm for set cover with a matroid-style exchange argument to model the connectivity constraint. As a consequence, we are able to provide a tight instance.}, file = {GHNP01d.pdf:GHNP01d.pdf:PDF}, keywords = {Steiner trees, Graph algorithms, Approximation algorithms} } @INPROCEEDINGS{GHNP01b, author = {Clemens Gr{\"o}pl and Stefan Hougardy and Till Nierhoff and Hans J{\"u}rgen Pr{\"o}mel}, title = {Lower bounds for approximation algorithms for the {S}teiner tree problem}, booktitle = {Proceedings of the 27th International Workshop on Graph-Theoretic Concepts in Computer Science (2001)}, year = {2001}, series = {LNCS}, publisher = {Springer Verlag}, abstract = {The Steiner tree problem asks for a shortest subgraph connecting a given set of terminals in a graph. It is known to be APX-complete, which means that no polynomial time approximation scheme can exist for this problem, unless P=NP. Currently, the best approximation algorithm for the Steiner tree problem has a performance ratio of $1.55$, whereas the corresponding lower bound is smaller than $1.01$. In this paper, we provide for several Steiner tree approximation algorithms lower bounds on their performance ratio that are much larger. For two algorithms that solve the Steiner tree problem on quasi-bipartite instances, we even prove lower bounds that match the upper bounds. Quasi-bipartite instances are of special interest, as currently all known lower bound reductions for the Steiner tree problem in graphs produce such instances.}, file = {GHNP01b.pdf:GHNP01b.pdf:PDF}, keywords = {Steiner trees, Approximation algorithms, Combinatorial optimization, Graph algorithms, Hypergraphs, set systems, and designs} } @MISC{Steinerbaum-Poster, author = {Clemens Gr{\"o}pl and Stefan Hougardy and Till Nierhoff and Hans J{\"u}rgen Pr{\"o}mel and Martin Thimm}, title = {{A}pproximationsalgorithmen f{\"u}r das {S}teinerbaumproblem in {G}raphen}, howpublished = {Poster}, year = {2002}, file = {Steinerbaum-Poster.pdf:Steinerbaum-Poster.pdf:PDF}, owner = {groeplhl} } @MISC{AbsQuantMyoPosterHUPOMuenchen05, author = {Clemens Gr{\"o}pl and Eva Lange and Knut Reinert and Oliver Kohlbacher and Marc Sturm and Christian G. Huber and Bettina M. Mayr and Christoph L. Klein}, title = {Absolute quantification of the myoglobin content in blood serum using {HPLC/MS} through automatic bioinformatics analysis}, howpublished = {Poster presented at the Fourth Annual World Congress of the Human Proteome Organization (HUPO05), Munich }, year = {2005}, note = {http://www.hupo2005.com/}, file = {AbsQuantMyoPosterHUPOMuenchen05.pdf:AbsQuantMyoPosterHUPOMuenchen05.pdf:PDF}, keywords = {mass spectrometry, proteomics, open source software} } @INPROCEEDINGS{GLRKSHMK05, author = {Clemens Gr{\"o}pl and Eva Lange and Knut Reinert and Oliver Kohlbacher and Marc Sturm and Christian G. Huber and Bettina M. Mayr and Christoph L. Klein}, title = {Algorithms for the automated absolute quantification of diagnostic markers in complex proteomics samples}, booktitle = {Proceedings of the 1st International Symposium on Computational Life Science (CompLife05)}, year = {2005}, pages = {151-163}, abstract = {HPLC-ESI-MS is rapidly becoming an established standard method for shotgun proteomics. Currently, its major drawbacks are twofold: quantification is mostly limited to relative quantification and the large amount of data produced by every individual experiment can make manual analysis quite difficult. Here we present a new, combined experimental and algorithmic approach to absolutely quantify proteins from samples with unprecedented precision. We apply the method to the analysis of myoglobin in human blood serum, which is an important diagnostic marker for myocardial infarction. Our approach was able to determine the absolute amount of myoglobin in a serum sample through a series of standard addition experiments with a relative error of 2.5 percent. Compared to a manual analysis of the same dataset we could improve the precision and conduct it in a fraction of the time needed for the manual analysis. We anticipate that our automatic quantitation method will facilitate further absolute or relative quantitation of even more complex peptide samples. The algorithm was developed using our publically available software framework OpenMS (www.openms.de).}, file = {GLRKSHMK05.pdf:GLRKSHMK05.pdf:PDF} } @INPROCEEDINGS{GPS98, author = {Clemens Gr{\"o}pl and Hans J{\"u}rgen Pr{\"o}mel and Anand Srivastav}, title = {Size and Structure of Random Ordered Binary Decision Diagrams (Extended Abstract)}, booktitle = {STACS 98}, year = {1998}, editor = {Daniel Korb and Christoph Meinel and Michel Morvan}, number = {1373}, series = {Lecture Notes in Computer Science}, pages = {238--248}, address = {Berlin, Heidelberg, New York}, publisher = {Springer Verlag}, abstract = {We investigate the size and structure of ordered binary decision diagrams (OBDDs) for random Boolean functions.Wegener [Weg94] proved that for “most” values of n, the expected OBDD-size of a random Boolean function with n variables equals the worst-case size up to terms of lower order. Our main result is that this phenomenon, also known as strong Shannon effect, shows a threshold behaviour: The strong Shannon effect does not hold within intervals of constant width around the values n=2^h+2, but it does hold outside these intervals. Also, the oscillation of the expected and the worst-case size is described. Methodical innovations of our approach are a functional equation to locate “critical levels” in OBDDs and the use of Azuma’s martingale inequality and Chv´atal’s large deviation inequality for the hypergeometric distribution. This leads to significant improvements over Wegener’s probability bounds.}, file = {GPS98.pdf:GPS98.pdf:PDF}, keywords = {VLSI-Design and layout, Hardware verification, Random graphs} } @ARTICLE{GPS04, author = {Clemens Gr{\"o}pl and Hans J{\"u}rgen Pr{\"o}mel and Anand Srivastav}, title = {Ordered binary decision diagrams and the {S}hannon effect}, journal = {Discrete Applied Mathematics}, year = {2004}, volume = {142}, pages = {67-85}, abstract = {We investigate the size and structure of ordered binary decision diagrams (OBDDs) for random Boolean functions. It was known that for most values of n, the expected OBDD size of a random Boolean function with n variables is equal to the worst-case size up to terms of lower order. Such a phenomenon is generally called strong Shannon effect. Here we show that the strong Shannon effect is not valid for all n. Instead it undergoes a certain periodic `phase transition': If n lies within intervals of constant width around the values n = 2h + h, then the strong Shannon effect does not hold, whereas it does hold outside these intervals. Our analysis provides doubly exponential probability bounds and generalises to ordered Kronecker functional decision diagrams (OKFDDs). }, file = {GPS04.pdf:GPS04.pdf:PDF}, keywords = {VLSI-Design and layout, Binary Decision Diagrams, Graph theory (other), Hardware verification, Probability theory, Random graphs, Randomized algorithms and probabilistic analysis, Theoretical computer science (other)} } @ARTICLE{GPS01, author = {Clemens Gr{\"o}pl and Hans J{\"u}rgen Pr{\"o}mel and Anand Srivastav}, title = {On the Evolution of the Worst-Case {OBDD} Size}, journal = {Information Processing Letters}, year = {2001}, volume = {77}, pages = {1-7}, abstract = {We prove matching lower and upper bounds on the worst-case OBDD size of a Boolean function, revealing an interesting ocillating behaviour.}, file = {GPS01.pdf:GPS01.pdf:PDF}, keywords = {Binary Decision Diagrams} } @MISC{GroRei-MNU06, author = {Clemens Gr{\"o}pl and Knut Reinert}, title = {Bioinformatik - Ein ideales {F}ach, um mathematisch-naturwissenschaftliche {F}{\"a}cher ({B}iologie, {C}hemie, {M}athematik) mit der {I}nformatik zu verbinden}, howpublished = {Talk given at 5. Berliner MNU-Kongress, TU Berlin, http://www.mnu-berlin.de/}, year = {2006}, file = {GroRei-MNU06.pdf:GroRei-MNU06.pdf:PDF;GroRei-MNU06.ppt:GroRei-MNU06.ppt:PowerPoint}, url = {http://www.mnu-berlin.de/} } @INCOLLECTION{GS98, author = {Clemens Gr{\"o}pl and Martin Skutella}, title = {Parallel Repetition of {MIP(2,1)} Systems}, booktitle = {Lectures on Proof Verification and Approximation Algorithms}, publisher = {Springer}, year = {1998}, editor = {Ernst W. Mayr and Hans J{\"u}rgen Pr{\"o}mel and Angelika Steger}, volume = {1367}, series = {Lecture Notes in Computer Science}, chapter = {6}, pages = {161-177}, note = {The book grow out of a Dagstuhl Seminar, April 21-25, 1997}, file = {GS98.pdf:GS98.pdf:PDF}, keywords = {Theoretical computer science (other), Approximation algorithms, PCP and non-approximability} } @INPROCEEDINGS{KRGLPTS06, author = {Oliver Kohlbacher and Knut Reinert and Clemens Gr{\"o}pl and Eva Lange and Nico Pfeiffer and Ole Schulz-Trieglaff and Marc Sturm}, title = {{TOPP} - {T}he {OpenMS} {P}roteomics {P}ipeline}, booktitle = {Proceedings of the 5th European Conference on Computational Biology (ECCB 2006)}, year = {2006}, abstract = {Motivation: Experimental techniques in proteomics have seen rapid development over the last few years. Volume and complexity of the data have both been growing at a similar rate. Accordingly, data management and analysis are one of the major challenges in proteomics. Flexible algorithms are required to handle changing experimental setups and to assist in developing and validating new methods. In order to facilitate these studies, it would be desirable to have a flexible `toolbox' of versatile and user-friendly applications allowing for rapid construction of computational workflows in proteomics. Results: We describe a set of tools for proteomics data analysis -- TOPP, The OpenMS Proteomics Pipeline. TOPP provides a set of computational tools which can be easily combined into analysis pipelines even by non-experts and can be used in proteomics workflows. These applications range from useful utilities (file format conversion, peak picking) over wrapper applications for known applications (e.g. Mascot) to completely new algorithmic techniques for data reduction and data analysis. We anticipate that TOPP will greatly facilitate rapid prototyping of proteomics data evaluation pipelines. As such, we describe the basic concepts and the current abilities of TOPP and illustrate these concepts in the context of two example applications: the identification of peptides from a raw data set through database search and the complex analysis of a standard addition experiment for the absolute quantitation of biomarkers. The latter example demonstrates TOPP's ability to construct flexible analysis pipelines in support of complex experimental setups. Availability: The TOPP components are available as open-source software under the lesser GNU public license (LGPL). Source code is available from the project web site at www.OpenMS.de}, file = {KRGLPTS06.pdf:KRGLPTS06.pdf:PDF} } @INPROCEEDINGS{LanGroRei06, author = {Eva Lange and Clemens Gr{\"o}pl and Knut Reinert and Oliver Kohlbacher and Andreas Hildebrandt}, title = {High Accuracy Peak-Picking of Proteomics Data using Wavelet Techniques}, booktitle = {Proceedings of the 11th Pacific Symposium on Biocomputing (PSB06)}, year = {2006}, pages = {243-254}, abstract = {A new peak picking algorithm for the analysis of mass spectrometric (MS) data is presented. It is independent of the underlying machine or ionization method, and is able to resolve highly convoluted and asymmetric signals. The method uses the multiscale nature of spectrometric data by first detecting the mass peaks in the wavelet-transformed signal before a given asymmetric peak function is fitted to the raw data. In an optional third stage, the resulting fit can be further improved using techniques from nonlinear optimization. In contrast to currently established techniques (e.g. SNAP, Apex) our algorithm is able to separate overlapping peaks of multiply charged peptides in ESI-MS data of low resolution. Its improved accuracy with respect to peak positions makes it a valuable preprocessing method for MS-based identification and quantification experiments. The method has been validated on a number of different annotated test cases, where it compares favorably in both runtime and accuracy with currently established techniques. An implementation of the algorithm is freely available in our open source framework OpenMS.}, file = {LanGroRei06.pdf:LanGroRei06.pdf:PDF} } @INPROCEEDINGS{LangePePiDagstuhl05, author = {Eva Lange and Clemens Gr{\"o}pl and Knut Reinert and Oliver Kohlbacher and Andreas Hildebrandt}, title = {High-accuracy peak picking of proteomics data}, booktitle = {Computational Proteomics}, year = {2005}, organization = {IBFI}, publisher = {Dagstuhl Online Publication Server (DROPS)}, note = {Extended abstract for talk given at Dagstuhl Seminar 05471 on Computational Proteomics, 20.-25. November 2005}, file = {LangePePiDagstuhl05.pdf:LangePePiDagstuhl05.pdf:PDF} } @INPROCEEDINGS{Lange_MapAlignment07, author = {Eva Lange and Clemens Gr{\"o}pl and Ole Schulz-Trieglaff and Andreas Leinenbach and Christian Huber and Knut Reinert}, title = {A Geometric Approach for the Alignment of Liquid Chromatography-Mass Spectrometry Data}, booktitle = {Proceedings of the 15th Annual International Conference on Intelligent Systems for Molecular Biology (ISMB) \& 6th European Conference on Computational Biology (ECCB)}, year = {2007}, abstract = {Motivation: Liquid chromatography coupled to mass spectrometry (LC-MS) and combined with tandem mass spectrometry (LC-MS/MS) have become a prominent tool for the analysis of complex proteomic samples. An important step in a typical workflow is the combination of results from multiple LC-MS experiments to improve confidence in the obtained measurements or to compare results from different samples. To do so, a suitable mapping or alignment between the data sets needs to be estimated. The alignment has to correct for variations in mass and elution time which are present in all mass spectrometry experiments. Results: We propose a novel algorithm to align LC-MS samples and to match corresponding ion species across samples. Our algorithm matches landmark signals between two data sets using a geometric technique based on pose clustering. Variations in mass and retention time are corrected by an affine dewarping function estimated from matched landmarks. We use the pairwise dewarping in an algorithm for aligning multiple samples. We show that our pose clustering approach is fast and reliable as compared to previous approaches. It is robust in the presence of noise and able to accurately align samples with only few common ion species. In addition, we can easily handle different kinds of LC-MS data and adopt our algorithm to new mass spectrometry technologies. Availability: This algorithm is implemented as part of the OpenMS software library for shotgun proteomics and available under the Lesser GNU Public License (LGPL) at www.openms.de}, file = {Lange_MapAlignment07.pdf:Lange_MapAlignment07.pdf:PDF}, owner = {groeplhl} } @ARTICLE{LTNG08, author = {Eva Lange and Ralf Tautenhahn and Steffen Neumann and Clemens Gr{\"o}pl}, title = {Critical assessment of alignment procedures for {LC-MS} proteomics and metabolomic measurements}, journal = {BMC Bioinformatics}, year = {2008}, volume = {9}, number = {375}, abstract = {Background Liquid chromatography coupled to mass spectrometry (LC-MS) has become a prominent tool for the analysis of complex proteomics and metabolomics samples. In many applications multiple LC-MS measurements need to be compared, e. g. to improve reliability or to combine results from different samples in a statistical comparative analysis. As in all physical experiments, LC-MS data are affected by uncertainties, and variability of retention time is encountered in all data sets. It is therefore necessary to estimate and correct the underlying distortions of the retention time axis to search for corresponding compounds in different samples. To this end, a variety of so-called LC-MS map alignment algorithms have been developed during the last four years. Most of these approaches are well documented, but they are usually evaluated on very specific samples only. So far, no publication has been assessing different alignment algorithms using a standard LC-MS sample along with commonly used quality criteria. Results We propose two LC-MS proteomics as well as two LC-MS metabolomics data sets that represent typical alignment scenarios. Furthermore, we introduce a new quality measure for the evaluation of LC-MS alignment algorithms. Using the four data sets to compare six freely available alignment algorithms proposed for the alignment of metabolomics and proteomics LC-MS measurements, we found significant differences with respect to alignment quality, running time, and usability in general. Conclusion The multitude of available alignment methods necessitates the generation of standard data sets and quality measures that allow users as well as developers to benchmark and compare their map alignment tools on a fair basis. Our study represents a first step in this direction. Currently, the installation and evaluation of the "correct" parameter settings can be quite a time-consuming task, and the success of a particular method is still highly dependent on the experience of the user. Therefore, we propose to continue and extend this type of study to a community-wide competition. All data as well as our evaluation scripts are available at http://msbi.ipb-halle.de/msbi/caap webcite.}, doi = {10.1186/1471-2105-9-375}, url = {http://www.biomedcentral.com/1471-2105/9/375} } @ARTICLE{MayKohRei06, author = {Bettina Mayr and Oliver Kohlbacher and Knut Reinert and Marc Sturm and Clemens Gr{\"o}pl and Eva Lange and Christoph Klein and Christian Huber}, title = {Absolute Myoglobin Quantitation in Serum by Combining Two-Dimensional Liquid Chromatography-Electrospray Ionization Mass Spectrometry and Novel Data Analysis Algorithms}, journal = {Journal of Proteome Research}, year = {2006}, volume = {5}, pages = {414-421}, abstract = {To measure myoglobin, a marker for myocardial infarction, directly in human serum, two-dimensional liquid chromatography in combination with electrospray ionization mass spectrometry was applied as an analytical method. High-abundant serum proteins were depleted by strong anion-exchange chromatography. The myoglobin fraction was digested and injected onto a 60 mm x 0.2 mm i.d. monolithic capillary column for quantitation of selected peptides upon mass spectrometric detection. The addition of known amounts of myoglobin to the serum sample was utilized for calibration, and horse myoglobin was added as an internal standard to improve reproducibility. Calibration graphs were linear and facilitated the reproducible and accurate determination of the myoglobin amount present in serum. Manual data evaluation using integrated peak areas and an automated multistage algorithm fitting two-dimensional models of peptide elution profiles and isotope patterns to the mass spectrometric raw data were compared. When the automated method was applied, a myoglobin concentration of 460 pg/íL serum was determined with a maximum relative deviation from the theoretical value of 10.1% and a maximum relative standard deviation of 13.4%.}, file = {MayKohRei06.pdf:MayKohRei06.pdf:PDF} } @MISC{VDINachrichten07, author = {Christian Meier}, title = {{B}ioinformatik: {A}ktuelle {P}roteinausstattung erlaubt {A}ussagen {\"u}ber den {G}esundheitszustand. {S}oftware hilft {\"A}rzten k{\"u}nftig bei der {D}iagnose.}, howpublished = {VDI Nachrichten}, month = {22. Juni}, year = {2007}, note = {Der {A}rtikel eines freien {M}edienjournalisten beschreibt den {E}insatz von {OpenMS} in der {P}roteomik}, abstract = {Der Artikel eines freien Medienjournalisten beschreibt den Einsatz von OpenMS in der Proteomik.}, file = {VDINachrichten07.pdf:VDINachrichten07.pdf:PDF}, owner = {groeplhl} } @INPROCEEDINGS{ReinertOpenMSDagstuhl05, author = {Knut Reinert and Oliver Kohlbacher and Clemens Gr{\"o}pl and Eva Lange and Ole {Schulz-Trieglaff} and Marc Sturm and Nico Pfeifer}, title = {OpenMS - A Framework for Quantitative HPLC/MS-Based Proteomics}, booktitle = {Computational Proteomics}, year = {2005}, organization = {IBFI}, publisher = {Dagstuhl Online Publication Server (DROPS)}, note = {Extended abstract for talk given at Dagstuhl Seminar 05471 on Computational Proteomics, 20.-25. November 2005}, file = {ReinertOpenMSDagstuhl05.pdf:reinert/ReinertOpenMSDagstuhl05.pdf:PDF} } @MISC{BiomarkerECCB2006, author = {Ole Schulz-Trieglaff and Clemens Gr{\"o}pl and Joachim Thiemann and Knut Reinert and Hartmut Schl{\"u}ter}, title = {Peptide Biomarker Discovery for the Diagnosis of Renal Allograft Rejection}, howpublished = {Poster presented at the European Conference on Computational Biology (ECCB 2006), Eilat, Israel, http://www.eccb06.org/}, year = {2006} } @MISC{PepQuant_GCB06, author = {Ole Schulz-Trieglaff and Clemens Gr{\"o}pl and Joachim Thiemann and Knut Reinert and Hartmut Schl{\"u}ter}, title = {Peptide Quantification for the Early Diagnosis of Renal Allograft Rejection}, howpublished = {Poster presented at the German Conference on Bioinformatics (GCB 2006), T{\"u}bingen, Germany, http://www.gcb2006.de/}, year = {2006} } @INPROCEEDINGS{SchulzTrieglaff_PepQuant06, author = {Ole Schulz-Trieglaff and Rene Hussong and Clemens Gr{\"o}pl and Andreas Hildebrandt and Knut Reinert}, title = {A Fast and Accurate Algorithm for the Quantification of Peptides from Mass Spectrometry data}, booktitle = {Proceedings of the Eleventh Annual International Conference on Research in Computational Molecular Biology (RECOMB 2007)}, year = {2007}, pages = {473-487}, abstract = {Liquid chromatography combined with mass spectrometry (LC-MS) has become the prevalent technology in high-throughput proteomics research. One of the aims of this discipline is to obtain accurate quantitative information about all proteins and peptides in a biological sample. Due to size and complexity of the data generated in these experiments, this problem remains a challenging task requiring sophisticated and efficient computational tools. We propose an algorithm that can quantify even low abundance peptides from LC-MS data. Our approach is flexible and can be applied to preprocessed and raw instrument data. It is based on a combination of the sweep line paradigm with a novel wavelet function tailored to detect isotopic patterns. We evaluate our technique on several data sets of varying complexity and show that we are able to rapidly quantify peptides with high accuracy in a sound algorithmic framework.}, file = {SchulzTrieglaff_PepQuant06.pdf:SchulzTrieglaff_PepQuant06.pdf:PDF}, owner = {groeplhl} } @ARTICLE{SHGLHHR07, author = {Ole Schulz-Trieglaff and Rene Hussong and Clemens Gr{\"o}pl and Andreas Leinenbach and Andreas Hildebrandt and Christian Huber and Knut Reinert}, title = {Computational Quantification of Peptides from {LC-MS} data}, journal = {Journal of Computational Biology}, year = {2008}, volume = {15}, pages = {685-704}, number = {7}, abstract = {Liquid chromatography coupled to mass spectrometry (LC-MS) has become a major tool for the study of biological processes. High-throughput LC-MS experimentsare frequently conducted in modern laboratories, generating an enormous amountof data per day. A manual inspection is therefore no longer a feasible task. Consequently, there is a need for computational tools that can rapidly provide informationabout mass, elution time, and abundance of the compounds in a LC-MS sample. Wepresent an algorithm for the detection and quantification of peptides in LC-MS data. Our approach is flexible and independent of the MS technology in use. It is basedon a combination of the sweep line paradigm with a novel wavelet function tailoredto detect isotopic patterns of peptides. We propose a simple voting schema to usethe redundant information in consecutive scans for an accurate determination ofmonoisotopic masses and charge states. By explicitly modeling the instrument inaccuracy, we are also able to cope with data sets of different quality and resolution.We evaluate our technique on data from different instruments and show that we canrapidly estimate mass, centroid of retention time and abundance of peptides in a sound algorithmic framework. Finally, we compare the performance of our method to several other techniques on three data sets of varying complexity.}, doi = {10.1089/cmb.2007.0117}, file = {SHGLHHR07.pdf:SHGLHHR07.pdf:PDF}, keywords = {computational mass spectrometry, liquid chromatography - massspectrometry, quantification, wavelets}, url = {http://dx.doi.org/10.1089/cmb.2007.0117} } @ARTICLE{TPGKR08, author = {Ole Schulz-Trieglaff and Nico Pfeifer and Clemens Gr{\"o}pl and Oliver Kohlbacher and Knut Reinert }, title = {{LC-MSsim} - a simulation software for liquid chromatography mass spectrometry data}, journal = {BMC Bioinformatics}, year = {2008}, volume = {9}, number = {423}, abstract = {BACKGROUND: Mass Spectrometry coupled to Liquid Chromatography (LC-MS) is commonly used to analyze the protein content of biological samples in large scale studies. The data resulting from an LC-MS experiment is huge, highly complex and noisy. Accordingly, it has sparked new developments in Bioinformatics, especially in the fields of algorithm development, statistics and software engineering. In a quantitative label-free mass spectrometry experiment, crucial steps are the detection of peptide features in the mass spectra and the alignment of samples by correcting for shifts in retention time. At the moment, it is difficult to compare the plethora of algorithms for these tasks. So far, curated benchmark data exists only for peptide identification algorithms but no data that represents a ground truth for the evaluation of feature detection, alignment and filtering algorithms. RESULTS: We present LC-MSsim, a simulation software for LC-ESI-MS experiments. It simulates ESI spectra on the MS level. It reads a list of proteins from a FASTA file and digests the protein mixture using a user-defined enzyme. The software creates an LC-MS data set using a predictor for the retention time of the peptides and a model for peak shapes and elution profiles of the mass spectral peaks. Our software also offers the possibility to add contaminants, to change the background noise level and includes a model for the detectability of peptides in mass spectra. After the simulation, LC-MSsim writes the simulated data to public XML formats (mzXML or mzData). The software also stores the positions (monoisotopic m/z and retention time) and ion counts of the simulated ions in separate files. CONCLUSIONS: LC-MSsim generates simulated LC-MS data sets and incorporates models for peak shapes and contaminations. Algorithm developers can match the results of feature detection and alignment algorithms against the simulated ion lists and meaningful error rates can be computed. We anticipate that LC-MSsim will be useful to the wider community to perform benchmark studies and comparisons between computational tools.}, doi = {10.1186/1471-2105-9-423}, keywords = {algorithm, benchmark, lc-ms-ms, massspec, metabolomics, proteomics}, url = {http://dx.doi.org/10.1186/1471-2105-9-423} } @ARTICLE{SBGHHLPTZRK08, author = {Marc Sturm and Andreas Bertsch and Clemens Gr{\"o}pl and Andreas Hildebrandt and Rene Hussong and Eva Lange and Nico Pfeifer and Ole Schulz-Trieglaff and Alexandra Zerck and Knut Reinert and Oliver Kohlbacher}, title = {{OpenMS} - An open-source software framework for mass spectrometry}, journal = {BMC Bioinformatics}, year = {2008}, volume = {9}, number = {163}, abstract = {BACKGROUND: Mass spectrometry is an essential analytical technique for high-throughput analysis in proteomics and metabolomics. The development of new separation techniques, precise mass analyzers and experimental protocols is a very active field of research. This leads to more complex experimental setups yielding ever increasing amounts of data. Consequently, analysis of the data is currently often the bottleneck for experimental studies. Although software tools for many data analysis tasks are available today, they are often hard to combine with each other or not flexible enough to allow for rapid prototyping of a new analysis workflow. RESULTS: We present OpenMS, a software framework for rapid application development in mass spectrometry. OpenMS has been designed to be portable, easy-to-use and robust while offering a rich functionality ranging from basic data structures to sophisticated algorithms for data analysis. This has already been demonstrated in several studies. CONCLUSIONS: OpenMS is available under the Lesser GNU Public License (LGPL) from the project website at http://www.openms.de.}, doi = {10.1186/1471-2105-9-163}, keywords = {lc-ms-ms, massspec, proteomics}, url = {http://dx.doi.org/10.1186/1471-2105-9-163} }