- De
- En
@article{2_116509, author = {Triet Ho Anh Doan and Zeki Mustafa Doğan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, doi = {10.18452/21548}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/116509}, month = {01}, title = {OLA-HD – Ein OCR-D-Langzeitarchiv für historische Drucke}, type = {article}, year = {2020}, }
@article{2_116509, author = {Triet Ho Anh Doan and Zeki Mustafa Doğan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, doi = {10.18452/21548}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/116509}, month = {01}, title = {OLA-HD – Ein OCR-D-Langzeitarchiv für historische Drucke}, type = {article}, year = {2020}, }
@misc{2_121682, abstract = {"Bereits seit einigen Jahren werden große Anstrengungen unternommen, um die im deutschen Sprachraum erschienenen Drucke des 16.-18. Jahrhunderts zu erfassen und zu digitalisieren. Deren Volltexttransformation konzeptionell und technisch vorzubereiten, ist das übergeordnete Ziel des DFG-Projekts OCR-D, das sich mit der Weiterentwicklung von Verfahren der Optical Character Recognition befasst. Der Beitrag beschreibt den aktuellen Entwicklungsstand der OCR-D-Software und analysiert deren erste Teststellung in ausgewählten Bibliotheken."}, author = {Konstantin Baierer and Matthias Boenig and Elisabeth Engl and Clemens Neudecker and Reinhard Altenhöner and Alexander Geyken and Johannes Mangei and Rainer Stotzka and Andreas Dengel and Martin Jenckel and Alexander Gehrke and Frank Puppe and Stefan Weil and Robert Sachunsky and Lena K. Schiffer and Maciej Janicki and Gerhard Heyer and Florian Fink and Klaus U. Schulz and Nikolaus Weichselbaumer and Saskia Limbach and Mathias Seuret and Rui Dong and Manuel Burghardt and Vincent Christlein and Triet Ho Anh Doan and Zeki Mustafa Dogan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, doi = {10.18452/21548}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121682}, month = {01}, title = {OCR-D kompakt: Ergebnisse und Stand der Forschung in der Förderinitiative}, type = {misc}, url = {https://publications.goettingen-research-online.de/handle/2/116509}, year = {2020}, }
@misc{2_121682, abstract = {"Bereits seit einigen Jahren werden große Anstrengungen unternommen, um die im deutschen Sprachraum erschienenen Drucke des 16.-18. Jahrhunderts zu erfassen und zu digitalisieren. Deren Volltexttransformation konzeptionell und technisch vorzubereiten, ist das übergeordnete Ziel des DFG-Projekts OCR-D, das sich mit der Weiterentwicklung von Verfahren der Optical Character Recognition befasst. Der Beitrag beschreibt den aktuellen Entwicklungsstand der OCR-D-Software und analysiert deren erste Teststellung in ausgewählten Bibliotheken."}, author = {Konstantin Baierer and Matthias Boenig and Elisabeth Engl and Clemens Neudecker and Reinhard Altenhöner and Alexander Geyken and Johannes Mangei and Rainer Stotzka and Andreas Dengel and Martin Jenckel and Alexander Gehrke and Frank Puppe and Stefan Weil and Robert Sachunsky and Lena K. Schiffer and Maciej Janicki and Gerhard Heyer and Florian Fink and Klaus U. Schulz and Nikolaus Weichselbaumer and Saskia Limbach and Mathias Seuret and Rui Dong and Manuel Burghardt and Vincent Christlein and Triet Ho Anh Doan and Zeki Mustafa Dogan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, doi = {10.18452/21548}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121682}, month = {01}, title = {OCR-D kompakt: Ergebnisse und Stand der Forschung in der Förderinitiative}, type = {misc}, url = {https://publications.goettingen-research-online.de/handle/2/116509}, year = {2020}, }
@misc{2_63412, abstract = {"Background: Biomedical research projects deal with data management requirements from multiple sources like funding agencies' guidelines, publisher policies, discipline best practices, and their own users' needs. We describe functional and quality requirements based on many years of experience implementing data management for the CRC 1002 and CRC 1190. A fully equipped data management software should improve documentation of experiments and materials, enable data storage and sharing according to the FAIR Guiding Principles while maximizing usability, information security, as well as software sustainability and reusability. Results: We introduce the modular web portal software menoci for data collection, experiment documentation, data publication, sharing, and preservation in biomedical research projects. Menoci modules are based on the Drupal content management system which enables lightweight deployment and setup, and creates the possibility to combine research data management with a customisable project home page or collaboration platform. Conclusions: Management of research data and digital research artefacts is transforming from individual researcher or groups best practices towards project- or organisation-wide service infrastructures. To enable and support this structural transformation process, a vital ecosystem of open source software tools is needed. Menoci is a contribution to this ecosystem of research data management tools that is specifically designed to support biomedical research projects."}, author = {Markus Suhr and Christoph Lehmann and Christian Robert Bauer and Theresa Bender and Cornelius Knopp and Luca Freckmann and Björn Öst Hansen and Christian Henke and Georg Aschenbrandt and Lea Katharina Kühlborn and Sophia Rheinländer and Linus Weber and Bartlomiej Marzec and Marcel Hellkamp and Philipp Wieder and Harald Kusch and Ulrich Sax and Sara Yasemin Nussbeck}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/63412}, month = {01}, title = {menoci: Lightweight Extensible Web Portal enabling FAIR Data Management for Biomedical Research Projects}, type = {misc}, url = {https://sfb1190.med.uni-goettingen.de/production/literature/publications/106}, year = {2020}, }
@misc{2_63412, abstract = {"Background: Biomedical research projects deal with data management requirements from multiple sources like funding agencies' guidelines, publisher policies, discipline best practices, and their own users' needs. We describe functional and quality requirements based on many years of experience implementing data management for the CRC 1002 and CRC 1190. A fully equipped data management software should improve documentation of experiments and materials, enable data storage and sharing according to the FAIR Guiding Principles while maximizing usability, information security, as well as software sustainability and reusability. Results: We introduce the modular web portal software menoci for data collection, experiment documentation, data publication, sharing, and preservation in biomedical research projects. Menoci modules are based on the Drupal content management system which enables lightweight deployment and setup, and creates the possibility to combine research data management with a customisable project home page or collaboration platform. Conclusions: Management of research data and digital research artefacts is transforming from individual researcher or groups best practices towards project- or organisation-wide service infrastructures. To enable and support this structural transformation process, a vital ecosystem of open source software tools is needed. Menoci is a contribution to this ecosystem of research data management tools that is specifically designed to support biomedical research projects."}, author = {Markus Suhr and Christoph Lehmann and Christian Robert Bauer and Theresa Bender and Cornelius Knopp and Luca Freckmann and Björn Öst Hansen and Christian Henke and Georg Aschenbrandt and Lea Katharina Kühlborn and Sophia Rheinländer and Linus Weber and Bartlomiej Marzec and Marcel Hellkamp and Philipp Wieder and Harald Kusch and Ulrich Sax and Sara Yasemin Nussbeck}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/63412}, month = {01}, title = {menoci: Lightweight Extensible Web Portal enabling FAIR Data Management for Biomedical Research Projects}, type = {misc}, url = {https://sfb1190.med.uni-goettingen.de/production/literature/publications/106}, year = {2020}, }
@article{2_129372, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.3389/fdata.2022.945720}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129372}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/114449}, year = {2022}, }
@article{2_129372, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.3389/fdata.2022.945720}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129372}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/114449}, year = {2022}, }
@article{2_114449, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Philipp Wieder and Hendrik Nolte}, doi = {10.3389/fdata.2022.945720}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/114449}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, year = {2022}, }
@article{2_114449, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Philipp Wieder and Hendrik Nolte}, doi = {10.3389/fdata.2022.945720}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/114449}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, year = {2022}, }
@article{2_129373, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.1162/dint_a_00141}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129373}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/121151}, year = {2022}, }
@article{2_129373, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.1162/dint_a_00141}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129373}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/121151}, year = {2022}, }
@article{2_121151, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.1162/dint_a_00141}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121151}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, year = {2022}, }
@article{2_121151, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.1162/dint_a_00141}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121151}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, year = {2022}, }
Thema | Professor*in | Typ |
---|---|---|
Entwicklung einer Provenance aware ad-hoc Schnittstelle für einen Data Lake | Prof. Julian Kunkel | BSc, MSc |
Semantische Klassifizierung von Metadatenattributen in einem Data Lake durch maschinelles Lernen | Prof. Julian Kunkel | BSc, MSc |
Governance für einen Data Lake | Prof. Julian Kunkel | BSc, MSc |
@inproceedings{2_57543, author = {Jens Dierkes and Timo Gnadt and Fabian Cremer and Péter Király and Christopher Menke and Oliver Wannenwetsch and Lena Steilen and Ulrike Wuttke and Wolfram Horstmann and Ramin Yahyapour}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/57543}, month = {01}, title = {Enhanced Research for the Göttingen Campus}, type = {inproceedings}, year = {2015}, }
@inproceedings{2_57543, author = {Jens Dierkes and Timo Gnadt and Fabian Cremer and Péter Király and Christopher Menke and Oliver Wannenwetsch and Lena Steilen and Ulrike Wuttke and Wolfram Horstmann and Ramin Yahyapour}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/57543}, month = {01}, title = {Enhanced Research for the Göttingen Campus}, type = {inproceedings}, year = {2015}, }
Thema | Professor*in | Typ |
---|---|---|
Knowledge Graphs and NLP techniques | Prof. Ramin Yahyapour | BSc, MSc |