Data Depositing Services und der Text+ Datenraum
(Andreas Witt, Andreas Henrich, Jonathan Blumtritt, Christoph Draxler, Axel Herold, Marius Hug, Christoph Kudella, Peter Leinen, Philipp Wieder),
2022-01-01
DOI
Data Depositing Services und der Text+ Datenraum
(Andreas Witt, Andreas Henrich, Jonathan Blumtritt, Christoph Draxler, Axel Herold, Marius Hug, Christoph Kudella, Peter Leinen, Philipp Wieder),
2022-01-01
DOI
Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
(Hendrik Nolte, Philipp Wieder),
2022-01-01
DOI
Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
(Hendrik Nolte, Philipp Wieder),
2022-01-01
DOI
Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
(Hendrik Nolte, Philipp Wieder),
2022-01-01
URLDOI
Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
(Hendrik Nolte, Philipp Wieder),
2022-01-01
URLDOI
Toward data lakes as central building blocks for data management and analysis
(Philipp Wieder, Hendrik Nolte),
2022-01-01
DOI
Toward data lakes as central building blocks for data management and analysis
(Philipp Wieder, Hendrik Nolte),
2022-01-01
DOI
Toward data lakes as central building blocks for data management and analysis
(Hendrik Nolte, Philipp Wieder),
2022-01-01
URLDOI
Toward data lakes as central building blocks for data management and analysis
(Hendrik Nolte, Philipp Wieder),
2022-01-01
URLDOI
BibTeX: Toward data lakes as central building blocks for data management and analysis
@article{2_129372,
abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."},
author = {Hendrik Nolte and Philipp Wieder},
doi = {10.3389/fdata.2022.945720},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129372},
month = {01},
title = {Toward data lakes as central building blocks for data management and analysis},
type = {article},
url = {https://publications.goettingen-research-online.de/handle/2/114449},
year = {2022},
}
BibTeX: Toward data lakes as central building blocks for data management and analysis
@article{2_129372,
abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."},
author = {Hendrik Nolte and Philipp Wieder},
doi = {10.3389/fdata.2022.945720},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129372},
month = {01},
title = {Toward data lakes as central building blocks for data management and analysis},
type = {article},
url = {https://publications.goettingen-research-online.de/handle/2/114449},
year = {2022},
}
BibTeX: Toward data lakes as central building blocks for data management and analysis
@article{2_114449,
abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."},
author = {Philipp Wieder and Hendrik Nolte},
doi = {10.3389/fdata.2022.945720},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/114449},
month = {01},
title = {Toward data lakes as central building blocks for data management and analysis},
type = {article},
year = {2022},
}
BibTeX: Toward data lakes as central building blocks for data management and analysis
@article{2_114449,
abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."},
author = {Philipp Wieder and Hendrik Nolte},
doi = {10.3389/fdata.2022.945720},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/114449},
month = {01},
title = {Toward data lakes as central building blocks for data management and analysis},
type = {article},
year = {2022},
}
BibTeX: Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
@article{2_129373,
author = {Hendrik Nolte and Philipp Wieder},
doi = {10.1162/dint_a_00141},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129373},
month = {01},
title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes},
type = {article},
url = {https://publications.goettingen-research-online.de/handle/2/121151},
year = {2022},
}
BibTeX: Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
@article{2_129373,
author = {Hendrik Nolte and Philipp Wieder},
doi = {10.1162/dint_a_00141},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129373},
month = {01},
title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes},
type = {article},
url = {https://publications.goettingen-research-online.de/handle/2/121151},
year = {2022},
}
BibTeX: Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
@article{2_121151,
author = {Hendrik Nolte and Philipp Wieder},
doi = {10.1162/dint_a_00141},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121151},
month = {01},
title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes},
type = {article},
year = {2022},
}
BibTeX: Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
@article{2_121151,
author = {Hendrik Nolte and Philipp Wieder},
doi = {10.1162/dint_a_00141},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121151},
month = {01},
title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes},
type = {article},
year = {2022},
}
BibTeX: Data Depositing Services und der Text+ Datenraum
@misc{2_127235,
author = {Andreas Witt and Andreas Henrich and Jonathan Blumtritt and Christoph Draxler and Axel Herold and Marius Hug and Christoph Kudella and Peter Leinen and Philipp Wieder},
doi = {10.5281/zenodo.7333737},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/127235},
month = {01},
title = {Data Depositing Services und der Text+ Datenraum},
type = {misc},
year = {2022},
}
BibTeX: Data Depositing Services und der Text+ Datenraum
@misc{2_127235,
author = {Andreas Witt and Andreas Henrich and Jonathan Blumtritt and Christoph Draxler and Axel Herold and Marius Hug and Christoph Kudella and Peter Leinen and Philipp Wieder},
doi = {10.5281/zenodo.7333737},
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/127235},
month = {01},
title = {Data Depositing Services und der Text+ Datenraum},
type = {misc},
year = {2022},
}