Skip to content
Snippets Groups Projects
Commit 73972e4e authored by Seseke, David's avatar Seseke, David :floppy_disk:
Browse files

Add references of presentation

parent d9b19b62
No related branches found
No related tags found
No related merge requests found
@inproceedings{acher-combemale-etal-2024-embracing-deep-variability,
title = {Embracing {{Deep Variability For Reproducibility}} and {{Replicability}}},
booktitle = {Proceedings of the 2nd {{ACM Conference}} on {{Reproducibility}} and {{Replicability}}},
author = {Acher, Mathieu and Combemale, Benoit and Randrianaina, Georges Aaron and Jezequel, Jean-Marc},
year = {2024},
month = jul,
series = {{{ACM REP}} '24},
pages = {30--35},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
doi = {10.1145/3641525.3663621},
urldate = {2024-09-30},
abstract = {Reproducibility (a.k.a., determinism in some cases) constitutes a fundamental aspect in various fields of computer science, such as floating-point computations in numerical analysis and simulation, concurrency models in parallelism, reproducible builds for third parties integration and packaging, and containerization for execution environments. These concepts, while pervasive across diverse concerns, often exhibit intricate inter-dependencies, making it challenging to achieve a comprehensive understanding. In this short and vision paper we delve into the application of software engineering techniques, specifically variability management, to systematically identify and explicit points of variability that may give rise to reproducibility issues (e.g., language, libraries, compiler, virtual machine, OS, environment variables,\ etc.). The primary objectives are: i) gaining insights into the variability layers and their possible interactions, ii) capturing and documenting configurations for the sake of reproducibility, and iii) exploring diverse configurations to replicate, and hence validate and ensure the robustness of results. By adopting these methodologies, we aim to address the complexities associated with reproducibility and replicability in modern software systems and environments, facilitating a more comprehensive and nuanced perspective on these critical aspects.},
isbn = {9798400705304},
file = {/Users/david/Zotero/storage/UUL584M3/Acher et al_2024_Embracing Deep Variability For Reproducibility and Replicability.pdf}
}
@article{ball-medeiros-etal-2022-invitation-teaching-reproducible,
title = {An {{Invitation}} to {{Teaching Reproducible Research}}: {{Lessons}} from a {{Symposium}}},
shorttitle = {An {{Invitation}} to {{Teaching Reproducible Research}}},
author = {Ball, Richard and Medeiros, Norm and Bussberg, Nicholas W. and Piekut, Aneta},
year = {2022},
month = nov,
journal = {Journal of Statistics and Data Science Education},
publisher = {Taylor \& Francis},
issn = {2693-9169},
urldate = {2024-10-01},
abstract = {This article synthesizes ideas that emerged over the course of a 10-week symposium titled ``Teaching Reproducible Research: Educational Outcomes'' https://www.projecttier.org/fellowships-and-workshop...},
copyright = {{\copyright} 2022 The Author(s). Published with license by Taylor and Francis Group, LLC.},
langid = {english},
keywords = {No DOI found},
file = {/Users/david/Zotero/storage/7T5FY7EG/26939169.2022.html}
}
@article{bauer-breznau-etal-2023-teaching-constructive-replications,
title = {Teaching {{Constructive Replications}} in the {{Behavioral}} and {{Social Sciences Using Quantitative Data}}},
author = {Bauer, Gerrit and Breznau, Nate and Gereke, Johanna and H{\"o}ffler, Jan H. and Janz, Nicole and Rahal, Rima-Maria and Rennstich, Joachim K. and Soin{\'e}, Hannah},
year = {2023},
month = dec,
journal = {Teaching of Psychology},
volume = {Online first},
publisher = {SAGE Publications Inc},
issn = {0098-6283},
doi = {10.1177/00986283231219503},
urldate = {2024-04-15},
abstract = {Introduction The replication crisis in the behavioral and social sciences spawned a credibility revolution, calling for new open science research practices that ensure greater transparency, including preregistrations, open data and code, and open access. Statement of the Problem Replications of published research are an important element in this revolution as part of the self-correcting process of scientific knowledge production; however, the teaching value of replications is still underutilized thus far. Literature Review Pedagogical knowledge points to the value of replication as critical to the scientific method of test and retest. Psychology has already begun mass efforts to reproduce previous experiments. Yet, we have very few examples of how analytical and reanalysis replications, after the data come in, contribute to the reproducibility crisis and can be integrated into undergraduate and graduate courses. Teaching Implications Replications with quantitative data can be a pedagogical tool for improving student research method skills and introducing them to best research practices via learning-by-doing. Conclusion This article aims to start filling this gap by offering guidance to instructors in designing and teaching replications for students at various levels and disciplines in the social and behavioral sciences, including a supplementary teaching companion.},
langid = {english}
}
@article{boettiger-2015-introduction-docker-reproducible,
title = {An Introduction to {{Docker}} for Reproducible Research, with Examples from the {{R}} Environment},
author = {Boettiger, Carl},
year = {2015},
month = jan,
journal = {ACM SIGOPS Operating Systems Review},
volume = {49},
number = {1},
eprint = {1410.0846},
primaryclass = {cs},
pages = {71--79},
issn = {0163-5980},
doi = {10.1145/2723872.2723882},
urldate = {2024-10-02},
abstract = {As computational work becomes more and more integral to many aspects of scientific research, computational reproducibility has become an issue of increasing importance to computer systems researchers and domain scientists alike. Though computational reproducibility seems more straight forward than replicating physical experiments, the complex and rapidly changing nature of computer environments makes being able to reproduce and extend such work a serious challenge. In this paper, I explore common reasons that code developed for one research project cannot be successfully executed or extended by subsequent researchers. I review current approaches to these issues, including virtual machines and workflow systems, and their limitations. I then examine how the popular emerging technology Docker combines several areas from systems research - such as operating system virtualization, cross-platform portability, modular re-usable elements, versioning, and a `DevOps' philosophy, to address these challenges. I illustrate this with several examples of Docker use with a focus on the R statistical environment.},
archiveprefix = {arXiv},
keywords = {Computer Science - Software Engineering},
file = {/Users/david/Zotero/storage/QCBXP9XT/boettiger-2015-introduction-docker-reproducible.pdf;/Users/david/Zotero/storage/SX9AXUI3/1410.html}
}
@article{breznau-rinke-etal-2022-observing-many-researchers,
title = {Observing Many Researchers Using the Same Data and Hypothesis Reveals a Hidden Universe of Uncertainty},
author = {Breznau, Nate and Rinke, Eike Mark and Wuttke, Alexander and Nguyen, Hung H. V. and Adem, Muna and Adriaans, Jule and {Alvarez-Benjumea}, Amalia and Andersen, Henrik K. and Auer, Daniel and Azevedo, Flavio and Bahnsen, Oke and Balzer, Dave and Bauer, Gerrit and Bauer, Paul C. and Baumann, Markus and Baute, Sharon and Benoit, Verena and Bernauer, Julian and Berning, Carl and Berthold, Anna and Bethke, Felix S. and Biegert, Thomas and Blinzler, Katharina and Blumenberg, Johannes N. and Bobzien, Licia and Bohman, Andrea and Bol, Thijs and Bostic, Amie and Brzozowska, Zuzanna and Burgdorf, Katharina and Burger, Kaspar and Busch, Kathrin B. and {Carlos-Castillo}, Juan and Chan, Nathan and Christmann, Pablo and Connelly, Roxanne and Czymara, Christian S. and Damian, Elena and Ecker, Alejandro and Edelmann, Achim and Eger, Maureen A. and Ellerbrock, Simon and Forke, Anna and Forster, Andrea and Gaasendam, Chris and Gavras, Konstantin and Gayle, Vernon and Gessler, Theresa and Gnambs, Timo and Godefroidt, Am{\'e}lie and Gr{\"o}mping, Max and Gro{\ss}, Martin and Gruber, Stefan and Gummer, Tobias and Hadjar, Andreas and Heisig, Jan Paul and Hellmeier, Sebastian and Heyne, Stefanie and Hirsch, Magdalena and Hjerm, Mikael and Hochman, Oshrat and H{\"o}vermann, Andreas and Hunger, Sophia and Hunkler, Christian and Huth, Nora and Ign{\'a}cz, Zs{\'o}fia S. and Jacobs, Laura and Jacobsen, Jannes and Jaeger, Bastian and Jungkunz, Sebastian and Jungmann, Nils and Kauff, Mathias and Kleinert, Manuel and Klinger, Julia and Kolb, Jan-Philipp and Ko{\l}czy{\'n}ska, Marta and Kuk, John and Kuni{\ss}en, Katharina and Kurti Sinatra, Dafina and Langenkamp, Alexander and Lersch, Philipp M. and L{\"o}bel, Lea-Maria and Lutscher, Philipp and Mader, Matthias and Madia, Joan E. and Malancu, Natalia and Maldonado, Luis and Marahrens, Helge and Martin, Nicole and Martinez, Paul and Mayerl, Jochen and Mayorga, Oscar J. and McManus, Patricia and McWagner, Kyle and Meeusen, Cecil and Meierrieks, Daniel and Mellon, Jonathan and Merhout, Friedolin and Merk, Samuel and Meyer, Daniel and Micheli, Leticia and Mijs, Jonathan and Moya, Crist{\'o}bal and Neunhoeffer, Marcel and N{\"u}st, Daniel and Nyg{\aa}rd, Olav and Ochsenfeld, Fabian and Otte, Gunnar and Pechenkina, Anna O. and Prosser, Christopher and Raes, Louis and Ralston, Kevin and Ramos, Miguel R. and Roets, Arne and Rogers, Jonathan and Ropers, Guido and Samuel, Robin and Sand, Gregor and Schachter, Ariela and Schaeffer, Merlin and Schieferdecker, David and Schlueter, Elmar and Schmidt, Regine and Schmidt, Katja M. and {Schmidt-Catran}, Alexander and Schmiedeberg, Claudia and Schneider, J{\"u}rgen and Schoonvelde, Martijn and {Schulte-Cloos}, Julia and Schumann, Sandy and Schunck, Reinhard and Schupp, J{\"u}rgen and Seuring, Julian and Silber, Henning and Sleegers, Willem and Sonntag, Nico and Staudt, Alexander and Steiber, Nadia and Steiner, Nils and Sternberg, Sebastian and Stiers, Dieter and Stojmenovska, Dragana and Storz, Nora and Striessnig, Erich and Stroppe, Anne-Kathrin and Teltemann, Janna and Tibajev, Andrey and Tung, Brian and Vagni, Giacomo and Van Assche, Jasper and {van der Linden}, Meta and {van der Noll}, Jolanda and Van Hootegem, Arno and Vogtenhuber, Stefan and Voicu, Bogdan and Wagemans, Fieke and Wehl, Nadja and Werner, Hannah and Wiernik, Brenton M. and Winter, Fabian and Wolf, Christof and Yamada, Yuki and Zhang, Nan and Ziller, Conrad and Zins, Stefan and {\.Z}{\'o}{\l}tak, Tomasz},
year = {2022},
month = nov,
journal = {Proceedings of the National Academy of Sciences},
volume = {119},
number = {44},
pages = {e2203150119},
publisher = {Proceedings of the National Academy of Sciences},
doi = {10.1073/pnas.2203150119},
urldate = {2023-07-28},
abstract = {This study explores how researchers' analytical choices affect the reliability of scientific findings. Most discussions of reliability problems in science focus on systematic biases. We broaden the lens to emphasize the idiosyncrasy of conscious and unconscious decisions that researchers make during data analysis. We coordinated 161 researchers in 73 research teams and observed their research decisions as they used the same data to independently test the same prominent social science hypothesis: that greater immigration reduces support for social policies among the public. In this typical case of social science research, research teams reported both widely diverging numerical findings and substantive conclusions despite identical start conditions. Researchers' expertise, prior beliefs, and expectations barely predict the wide variation in research outcomes. More than 95\% of the total variance in numerical results remains unexplained even after qualitative coding of all identifiable decisions in each team's workflow. This reveals a universe of uncertainty that remains hidden when considering a single study in isolation. The idiosyncratic nature of how researchers' results and conclusions varied is a previously underappreciated explanation for why many scientific hypotheses remain contested. These results call for greater epistemic humility and clarity in reporting scientific findings.},
file = {/Users/david/Zotero/storage/5U2UN2EN/Breznau et al_2022_Observing many researchers using the same data and hypothesis reveals a hidden.pdf}
}
@inproceedings{bzeznik-henriot-etal-2017-nix-hpc-package,
title = {Nix as {{HPC}} Package Management System},
booktitle = {Proceedings of the {{Fourth International Workshop}} on {{HPC User Support Tools}}},
author = {Bzeznik, Bruno and Henriot, Oliver and Reis, Valentin and Richard, Olivier and Tavard, Laure},
year = {2017},
month = nov,
series = {{{HUST}}'17},
pages = {1--6},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
doi = {10.1145/3152493.3152556},
urldate = {2024-10-01},
abstract = {Modern High Performance Computing systems are becoming larger and more heterogeneous. The proper management of software for the users of such systems poses a significant challenge. These users run very diverse applications that may be compiled with proprietary tools for specialized hardware. Moreover, the application life-cycle of these software may exceed the lifetime of the HPC systems themselves. These difficulties motivate the use of specialized package management systems. In this paper, we outline an approach to HPC package development, deployment, management, sharing, and reuse based on the Nix functional package manager. We report our experience with this approach inside the GRICAD HPC center[GRICAD 2017a] in Grenoble over a 12 month period and compare it to other existing approaches.},
isbn = {978-1-4503-5130-0},
file = {/Users/david/Zotero/storage/C29UWG7Z/bzeznik-henriot-etal-2017-nix-hpc-package.pdf}
}
@incollection{cioffi-revilla-2017-computation-social-science,
title = {Computation and {{Social Science}}},
booktitle = {Introduction to {{Computational Social Science}}: {{Principles}} and {{Applications}}},
author = {{Cioffi-Revilla}, Claudio},
editor = {{Cioffi-Revilla}, Claudio},
year = {2017},
pages = {35--102},
publisher = {Springer International Publishing},
address = {Cham},
doi = {10.1007/978-3-319-50131-4_2},
urldate = {2024-10-07},
abstract = {Social scientists have used computation since the days of the earliest digital computers. What is the role of computation in contemporary Computational Social Science (CSS) theory and research? How does computation provide a deeper understanding of social complexity? This chapter is not an introduction to computing for social scientists. Rather, it is an examination of computation from a CSS perspective; similar to how a computational astronomer or a computational biologist would discuss the function of computation in their respective disciplines. After examining key similarities and differences between computers and social systems, from an information-processing perspective, the chapter takes a closer look at programming languages and aspects of implementation. Classes, objects, and dynamics are examined from the perspective of social theories and the role computational entities play in the conduct of CSS research. The Unified Modeling Language (UML) is used as a systematic graphic notation for representing social entities, relations, and interactions, based on a variety of examples drawn from across social science domains. Data structures and algorithms, which are foundational to computation, are examined in the context of CSS research.},
isbn = {978-3-319-50131-4},
langid = {english}
}
@article{clyburne-sherin-fei-etal-2019-computational-reproducibility-containers,
title = {Computational {{Reproducibility}} via {{Containers}} in {{Psychology}}},
author = {{Clyburne-Sherin}, April and Fei, Xu and Green, Seth Ariel},
year = {2019},
month = nov,
journal = {Meta-Psychology},
volume = {3},
issn = {2003-2714},
doi = {10.15626/MP.2018.892},
urldate = {2024-10-07},
abstract = {Scientific progress relies on the replication and reuse of research. Recent studies suggest, however, that sharing code and data does not suffice for computational reproducibility ---defined as the ability of researchers to reproduce ``par- ticular analysis outcomes from the same data set using the same code and software'' (Fidler and Wilcox, 2018). To date, creating long-term computationally reproducible code has been technically challenging and time-consuming. This tutorial introduces Code Ocean, a cloud-based computational reproducibility platform that attempts to solve these problems. It does this by adapting software engineering tools, such as Docker, for easier use by scientists and scientific audiences. In this article, we first outline arguments for the importance of computational reproducibility, as well as some reasons why this is a nontrivial problem for researchers. We then provide a step-by-step guide to getting started with containers in research using Code Ocean. (Disclaimer: the authors all worked for Code Ocean at the time of this article's writing.)},
copyright = {Copyright (c) 2019 April Clyburne-Sherin, Xu Fei, Seth Ariel Green},
langid = {english},
keywords = {code-ocean,computational-reproducibility,containers,methods,reproducibility},
file = {/Users/david/Zotero/storage/QQDUDCNS/Clyburne-Sherin et al_2019_Computational Reproducibility via Containers in Psychology.pdf}
}
@inproceedings{courtès-wurmus-2015-reproducible-usercontrolled-software,
title = {Reproducible and {{User-Controlled Software Environments}} in {{HPC}} with {{Guix}}},
booktitle = {Euro-{{Par}} 2015: {{Parallel Processing Workshops}}},
author = {Court{\`e}s, Ludovic and Wurmus, Ricardo},
editor = {Hunold, Sascha and Costan, Alexandru and Gim{\'e}nez, Domingo and Iosup, Alexandru and Ricci, Laura and G{\'o}mez Requena, Mar{\'i}a Engracia and Scarano, Vittorio and Varbanescu, Ana Lucia and Scott, Stephen L. and Lankes, Stefan and Weidendorfer, Josef and Alexander, Michael},
year = {2015},
pages = {579--591},
publisher = {Springer International Publishing},
address = {Cham},
doi = {10.1007/978-3-319-27308-2_47},
abstract = {Support teams of high-performance computing (HPC) systems often find themselves between a rock and a hard place: on one hand, they understandably administrate these large systems in a conservative way, but on the other hand, they try to satisfy their users by deploying up-to-date tool chains as well as libraries and scientific software. HPC system users often have no guarantee that they will be able to reproduce results at a later point in time, even on the same system---software may have been upgraded, removed, or recompiled under their feet, and they have little hope of being able to reproduce the same software environment elsewhere. We present GNU~Guix and the functional package management paradigm and show how it can improve reproducibility and sharing among researchers with representative use cases.},
isbn = {978-3-319-27308-2},
langid = {english},
keywords = {Directed Acyclic Graph,Environment Module,Software Environment,Support Team,Tool Chain},
file = {/Users/david/Zotero/storage/76DNQYP3/courtès-wurmus-2015-reproducible-usercontrolled-software.pdf}
}
@article{freese-2007-replication-standards-quantitative,
title = {Replication {{Standards}} for {{Quantitative Social Science}}: {{Why Not Sociology}}?},
shorttitle = {Replication {{Standards}} for {{Quantitative Social Science}}},
author = {Freese, Jeremy},
year = {2007},
month = nov,
journal = {Sociological Methods \& Research},
volume = {36},
number = {2},
pages = {153--172},
publisher = {SAGE Publications Inc},
issn = {0049-1241},
doi = {10.1177/0049124107306659},
urldate = {2022-06-14},
abstract = {The credibility of quantitative social science benefits from policies that increase confidence that results reported by one researcher can be verified by others. Concerns about replicability have increased as the scale and sophistication of analyses increase the possible dependence of results on subtle analytic decisions and decrease the extent to which published articles contain full descriptions of methods. The author argues that sociology should adopt standards regarding replication that minimize its conceptualization as an ethical and individualistic matter and advocates for a policy in which authors use independent online archives to deposit the maximum possible information for replicating published results at the time of publication and are explicit about the conditions of availability for any necessary materials that are not provided. The author responds to several objections that might be raised to increasing the transparency of quantitative sociology in this way and offers a candidate replication policy for sociology.},
langid = {english},
keywords = {data archiving,data sharing,replication,transparency},
file = {/Users/david/Zotero/storage/8UIWATWF/Freese_2007_Replication Standards for Quantitative Social Science.pdf}
}
@article{freese-peterson-2017-replication-social-science,
title = {Replication in {{Social Science}}},
author = {Freese, Jeremy and Peterson, David},
year = {2017},
journal = {Annual Review of Sociology},
volume = {43},
pages = {147--165},
doi = {10.1146/annurev-soc-060116-053450},
abstract = {Across the medical and social sciences, new discussions about replication have led to transformations in research practice. Sociologists, however, have been largely absent from these discussions. The goals of this review are to introduce sociologists to these developments, synthesize insights from science studies about replication in general, and detail the specific issues regarding replication that occur in sociology. The first half of the article argues that a sociologically sophisticated understanding of replication must address both the ways that replication rules and conventions evolved within an epistemic culture and how those cultures are shaped by specific research challenges. The second half outlines the four main dimensions of replicability in quantitative sociology---verifiability, robustness, repeatability, and generalizability---and discusses the specific ambiguities of interpretation that can arise in each. We conclude by advocating some commonsense changes to promote replication while acknowledging the epistemic diversity of our field.},
langid = {english},
file = {/Users/david/Zotero/storage/HGRUY9PU/Freese_Peterson_2017_Replication in Social Science.pdf}
}
@article{freese-rauf-etal-2022-advances-transparency-reproducibility,
title = {Advances in Transparency and Reproducibility in the Social Sciences},
author = {Freese, Jeremy and Rauf, Tamkinat and Voelkel, Jan Gerrit},
year = {2022},
month = sep,
journal = {Social Science Research},
volume = {107},
pages = {102770},
issn = {0049-089X},
doi = {10.1016/j.ssresearch.2022.102770},
urldate = {2022-11-01},
abstract = {Worries about a ``credibility crisis'' besieging science have ignited interest in research transparency and reproducibility as ways of restoring trust in published research. For quantitative social science, advances in transparency and reproducibility can be seen as a set of developments whose trajectory predates the recent alarm. We discuss several of these developments, including preregistration, data-sharing, formal infrastructure in the form of resources and policies, open access to research, and specificity regarding research contributions. We also discuss the spillovers of this predominantly quantitative effort towards transparency for qualitative research. We conclude by emphasizing the importance of mutual accountability for effective science, the essential role of openness for this accountability, and the importance of scholarly inclusiveness in figuring out the best ways for openness to be accomplished in practice.},
langid = {english},
keywords = {Open science,Reproducibility,Transparency},
file = {/Users/david/Zotero/storage/HDLMFZVB/S0049089X2200076X.html}
}
@inproceedings{gao-zhang-etal-2019-learning-networking-reproducing,
title = {Learning {{Networking}} by {{Reproducing Research Results}} in an {{NS-3 Simulation Networking Laboratory Course}}},
booktitle = {2019 {{IEEE International Conference}} on {{Engineering}}, {{Technology}} and {{Education}} ({{TALE}})},
author = {Gao, Yayu and Zhang, Chengwei and Hei, Xiaojun and Zhong, Guohui},
year = {2019},
month = dec,
pages = {1--6},
issn = {2470-6698},
doi = {10.1109/TALE48000.2019.9225977},
urldate = {2024-10-01},
abstract = {In the past two years, the undergraduate networking laboratory course at our university has been redesigned in a learning-by-doing approach. We have carefully selected eight papers covering different networking topics and designed corresponding experiments for over 50 students to reproduce the research results in the NS-3 simulation platform. In this paper, we present the design of an NS-3 simulation based networking laboratory course by reproducing research results. The course curriculum and experiment design are introduced, the topics of which include wireless networking, software defined networking, and artificial intelligence algorithms in NS-3 platform. The proposed course has been implemented in our university with an online NS-3 lab environment, and fruitful results and positive feedback have been received from both students and teachers. This course aims to teach both engineering rigor and critical thinking for undergraduate students, moreover, help the students to understand key concepts of advanced topics in networking and obtain hand-on experiences in conducting network simulations, which are crucial for their future career or research.},
keywords = {Analytical models,Artificial intelligence,component,Computational modeling,formatting,Performance evaluation,Software defined networking,style,styling,Wireless communication,Wireless LAN}
}
@article{gayle-connelly-2022-stark-realities-reproducible,
title = {The {{Stark}} Realities of Reproducible Statistically Orientated Sociological Research: {{Some}} Newer Rules of the Sociological Method},
shorttitle = {The {{Stark}} Realities of Reproducible Statistically Orientated Sociological Research},
author = {Gayle, Vernon and Connelly, Roxanne},
year = {2022},
month = nov,
journal = {Methodological Innovations},
volume = {15},
number = {3},
pages = {207--221},
publisher = {SAGE Publications Ltd},
issn = {2059-7991},
doi = {10.1177/20597991221111681},
urldate = {2024-09-30},
abstract = {There is increasing concern that research is not transparent and that empirical results are often impossible to reproduce. Guidelines for undertaking reproducible research have been proposed in a number of academic areas (e.g. computational economics, psychology and medical research), however currently there are no guidelines for sociological research. This methodological paper provides guidance for undertaking reproducible statistically orientated sociological research. We provide an extended demonstration of the issues associated with reproducing results and undertaking transparent analyses. We draw on suitable concepts and techniques from open research, e-research and computing. We propose a set of Newer Rules of the Sociological Method, for undertaking transparent statistically orientated sociological research that supports reproducibility.},
langid = {english},
file = {/Users/david/Zotero/storage/6WDWPC2G/Gayle_Connelly_2022_The Stark realities of reproducible statistically orientated sociological.pdf}
}
@article{gibney-2022-could-machine-learning,
title = {Could Machine Learning Fuel a Reproducibility Crisis in Science?},
author = {Gibney, Elizabeth},
year = {2022},
month = jul,
journal = {Nature},
volume = {608},
number = {7922},
pages = {250--251},
publisher = {Nature Publishing Group},
doi = {10.1038/d41586-022-02035-w},
urldate = {2024-10-01},
abstract = {`Data leakage' threatens the reliability of machine-learning use across disciplines, researchers warn.},
copyright = {2022 Springer Nature Limited},
langid = {english},
keywords = {Machine learning,Mathematics and computing,Publishing},
annotation = {Bandiera\_abtest: a\\
Cg\_type: News\\
Subject\_term: Machine learning, Publishing, Mathematics and computing}
}
@inproceedings{goswami-s.-etal-2022-reproducible-high-performance,
title = {Reproducible {{High Performance Computing}} without {{Redundancy}} with {{Nix}}},
booktitle = {2022 {{Seventh International Conference}} on {{Parallel}}, {{Distributed}} and {{Grid Computing}} ({{PDGC}})},
author = {Goswami, Rohit and S., Ruhila and Goswami, Amrita and Goswami, Sonaly and Goswami, Debabrata},
year = {2022},
month = nov,
pages = {238--242},
issn = {2573-3079},
doi = {10.1109/PDGC56933.2022.10053342},
urldate = {2024-10-02},
abstract = {High performance computing (HPC) clusters are typically managed in a restrictive manner; the large user base makes cluster administrators unwilling to allow privilege escalation. Here we discuss existing methods of package management, including those which have been developed with scalability in mind, and enumerate the drawbacks and advantages of each management methodology. We contrast the paradigms of containerization via docker, virtualization via KVM, pod-infrastructures via Kubernetes, and specialized HPC packaging systems via Spack and identify key areas of neglect. We demonstrate how functional programming due to reliance on immutable states has been leveraged for deterministic package management via the nix-language expressions. We show its associated ecosystem is a prime candidate for HPC package management. We further develop guidelines and identify bottlenecks in the existing structure and present the methodology by which the nix ecosystem should be developed further as an optimal tool for HPC package management. We assert that the caveats of the nix ecosystem can easily mitigated by considerations relevant only to HPC systems, without compromising on functional methodology and features of the nix-language. We show that benefits of adoption in terms of generating reproducible derivations in a secure manner allow for workflows to be scaled across heterogeneous clusters. In particular, from the implementation hurdles faced during the compilation and running of the d-SEAMS scientific software engine, distributed as a nix-derivation on an HPC cluster, we identify communication protocols for working with SLURM and TORQUE user resource allocation queues. These protocols are heuristically defined and described in terms of the reference implementation required for queue-efficient nix builds.},
keywords = {Ecosystems,functional-derivations,functional-package-management,High performance computing,high-performance-computing,HPC,Nix,nix-lang,Packaging,Protocols,reproducible-research,Resource management,Software,Torque}
}
@article{guest-martin-2021-how-computational-modeling,
title = {How {{Computational Modeling Can Force Theory Building}} in {{Psychological Science}}},
author = {Guest, Olivia and Martin, Andrea E.},
year = {2021},
month = jul,
journal = {Perspectives on Psychological Science},
volume = {16},
number = {4},
pages = {789--802},
publisher = {SAGE Publications Inc},
issn = {1745-6916},
doi = {10.1177/1745691620970585},
urldate = {2023-07-10},
abstract = {Psychology endeavors to develop theories of human capacities and behaviors on the basis of a variety of methodologies and dependent measures. We argue that one of the most divisive factors in psychological science is whether researchers choose to use computational modeling of theories (over and above data) during the scientific-inference process. Modeling is undervalued yet holds promise for advancing psychological science. The inherent demands of computational modeling guide us toward better science by forcing us to conceptually analyze, specify, and formalize intuitions that otherwise remain unexamined---what we dub open theory. Constraining our inference process through modeling enables us to build explanatory and predictive theories. Here, we present scientific inference in psychology as a path function in which each step shapes the next. Computational modeling can constrain these steps, thus advancing scientific inference over and above the stewardship of experimental practice (e.g., preregistration). If psychology continues to eschew computational modeling, we predict more replicability crises and persistent failure at coherent theory building. This is because without formal modeling we lack open and transparent theorizing. We also explain how to formalize, specify, and implement a computational model, emphasizing that the advantages of modeling can be achieved by anyone with benefit to all.},
langid = {english},
file = {/Users/david/Zotero/storage/46H4QX7W/Guest_Martin_2021_How Computational Modeling Can Force Theory Building in Psychological Science.pdf}
}
@article{hardwicke-wallach-etal-2020-empirical-assessment-transparency,
title = {An Empirical Assessment of Transparency and Reproducibility-Related Research Practices in the Social Sciences (2014--2017)},
author = {Hardwicke, Tom E. and Wallach, Joshua D. and Kidwell, Mallory C. and Bendixen, Theiss and Cr{\"u}well, Sophia and Ioannidis, John P. A.},
year = {2020},
journal = {Royal Society Open Science},
volume = {7},
number = {2},
pages = {190806},
publisher = {Royal Society},
doi = {10.1098/rsos.190806},
urldate = {2022-11-01},
abstract = {Serious concerns about research quality have catalysed a number of reform initiatives intended to improve transparency and reproducibility and thus facilitate self-correction, increase efficiency and enhance research credibility. Meta-research has evaluated the merits of some individual initiatives; however, this may not capture broader trends reflecting the cumulative contribution of these efforts. In this study, we manually examined a random sample of 250 articles in order to estimate the prevalence of a range of transparency and reproducibility-related indicators in the social sciences literature published between 2014 and 2017. Few articles indicated availability of materials (16/151, 11\% [95\% confidence interval, 7\% to 16\%]), protocols (0/156, 0\% [0\% to 1\%]), raw data (11/156, 7\% [2\% to 13\%]) or analysis scripts (2/156, 1\% [0\% to 3\%]), and no studies were pre-registered (0/156, 0\% [0\% to 1\%]). Some articles explicitly disclosed funding sources (or lack of; 74/236, 31\% [25\% to 37\%]) and some declared no conflicts of interest (36/236, 15\% [11\% to 20\%]). Replication studies were rare (2/156, 1\% [0\% to 3\%]). Few studies were included in evidence synthesis via systematic review (17/151, 11\% [7\% to 16\%]) or meta-analysis (2/151, 1\% [0\% to 3\%]). Less than half the articles were publicly available (101/250, 40\% [34\% to 47\%]). Minimal adoption of transparency and reproducibility-related research practices could be undermining the credibility and efficiency of social science research. The present study establishes a baseline that can be revisited in the future to assess progress.},
keywords = {meta-research,open science,reproducibility,social sciences,transparency},
file = {/Users/david/Zotero/storage/4GDZMVCC/Hardwicke et al_2020_An empirical assessment of transparency and reproducibility-related research.pdf}
}
@article{herndon-ash-etal-2014-does-high-public,
title = {Does High Public Debt Consistently Stifle Economic Growth? {{A}} Critique of {{Reinhart}} and {{Rogoff}}},
shorttitle = {Does High Public Debt Consistently Stifle Economic Growth?},
author = {Herndon, T. and Ash, M. and Pollin, R.},
year = {2014},
month = mar,
journal = {Cambridge Journal of Economics},
volume = {38},
number = {2},
pages = {257--279},
issn = {0309-166X, 1464-3545},
doi = {10.1093/cje/bet075},
urldate = {2022-10-20},
abstract = {We replicate Reinhart and Rogoff (2010a and 2010b) and find that coding errors, selective exclusion of available data, and unconventional weighting of summary statistics lead to serious errors that inaccurately represent the relationship between public debt and GDP growth among 20 advanced economies in the post-war period. Our finding is that when properly calculated, the average real GDP growth rate for countries carrying a public-debt-to-GDP ratio of over 90 percent is actually 2.2 percent, not -0.1 percent as published in Reinhart and Rogoff. That is, contrary to RR, average GDP growth at public debt/GDP ratios over 90 percent is not dramatically different than when debt/GDP ratios are lower.},
langid = {english},
file = {/Users/david/Zotero/storage/8USJS97P/Herndon et al_2014_Does high public debt consistently stifle economic growth.pdf}
}
@book{hörmann-leydold-etal-2004-automatic-nonuniform-random,
title = {Automatic {{Nonuniform Random Variate Generation}}},
author = {H{\"o}rmann, Wolfgang and Leydold, Josef and Derflinger, Gerhard},
editor = {Chambers, J. and Eddy, W. and H{\"a}rdle, W. and Sheather, S. and Tierney, L.},
year = {2004},
series = {Statistics and {{Computing}}},
publisher = {Springer},
address = {Berlin, Heidelberg},
doi = {10.1007/978-3-662-05946-3},
urldate = {2024-10-01},
copyright = {http://www.springer.com/tdm},
isbn = {978-3-642-07372-4 978-3-662-05946-3},
keywords = {algorithms,bayesian statistics,Markov Chain,Monte Carlo Methods,Non-Uniform Random Variate,Option Pricing,Random Variate Generation,Time series,UNU.RAN library,VaR},
file = {/Users/david/Zotero/storage/P2XSDFLY/Hörmann et al_2004_Automatic Nonuniform Random Variate Generation.pdf}
}
@article{hox-2017-computational-social-science,
title = {Computational {{Social Science Methodology}}, {{Anyone}}?},
author = {Hox, Joop J.},
year = {2017},
month = jun,
journal = {Methodology},
volume = {13},
number = {Supplement 1},
pages = {3--12},
publisher = {Hogrefe Publishing},
issn = {1614-1881},
doi = {10.1027/1614-2241/a000127},
urldate = {2020-04-03},
abstract = {. This article reviews computational social science methods and their relation to conventional methodology and statistics. Computational social science has three important features. Firstly, it often involves big data; data sets so large that conventional database and analysis techniques cannot handle them with ease. Secondly, dealing with these big data sets has given rise to analysis techniques that are specially developed for big data. Given the size of the data, resampling and cross-validation approaches become feasible that allow both data-driven exploration and checks on overfitting the data. A third important feature is simulation, especially agent-based simulation. Here size also matters. Agent-based simulation is well known in social science, but modern computer equipment and software allows simulations of unprecedented scale. Many of these techniques, especially the resampling and cross-validation approaches, are potentially very useful for social scientists. Given the relatively small size of social science ``big data'' is useful to explore how well these techniques perform with smaller data sets. Social science methodology can contribute to this field by exploring if well-known methodological distinctions between external validity, internal validity, and construct validity can help clear up discussions on data quality (veracity) in computational social science.},
file = {/Users/david/Zotero/storage/H9BVLIMT/Hox_2017_Computational Social Science Methodology, Anyone.pdf;/Users/david/Zotero/storage/LBZVDJ4Z/a000127.html}
}
@article{karathanasis-hwang-etal-2022-reproducibility-efforts-teaching,
title = {Reproducibility Efforts as a Teaching Tool: {{A}} Pilot Study},
shorttitle = {Reproducibility Efforts as a Teaching Tool},
author = {Karathanasis, Nestoras and Hwang, Daniel and Heng, Vibol and Abhimannyu, Rimal and {Slogoff-Sevilla}, Phillip and Buchel, Gina and Frisbie, Victoria and Li, Peiyao and Kryoneriti, Dafni and Rigoutsos, Isidore},
year = {2022},
month = nov,
journal = {PLOS Computational Biology},
volume = {18},
number = {11},
pages = {e1010615},
publisher = {Public Library of Science},
issn = {1553-7358},
doi = {10.1371/journal.pcbi.1010615},
urldate = {2024-10-01},
abstract = {The ``replication crisis'' is a methodological problem in which many scientific research findings have been difficult or impossible to replicate. Because the reproducibility of empirical results is an essential aspect of the scientific method, such failures endanger the credibility of theories based on them and possibly significant portions of scientific knowledge. An instance of the replication crisis, analytic replication, pertains to reproducing published results through computational reanalysis of the authors' original data. However, direct replications are costly, time-consuming, and unrewarded in today's publishing standards. We propose that bioinformatics and computational biology students replicate recent discoveries as part of their curriculum. Considering the above, we performed a pilot study in one of the graduate-level courses we developed and taught at our University. The course is entitled Intro to R Programming and is meant for students in our Master's and PhD programs who have little to no programming skills. As the course emphasized real-world data analysis, we thought it would be an appropriate setting to carry out this study. The primary objective was to expose the students to real biological data analysis problems. These include locating and downloading the needed datasets, understanding any underlying conventions and annotations, understanding the analytical methods, and regenerating multiple graphs from their assigned article. The secondary goal was to determine whether the assigned articles contained sufficient information for a graduate-level student to replicate its figures. Overall, the students successfully reproduced 39\% of the figures. The main obstacles were the need for more advanced programming skills and the incomplete documentation of the applied methods. Students were engaged, enthusiastic, and focused throughout the semester. We believe that this teaching approach will allow students to make fundamental scientific contributions under appropriate supervision. It will teach them about the scientific process, the importance of reporting standards, and the importance of openness.},
langid = {english},
keywords = {Biochemistry,Bioinformatics,Computer software,Galaxies,Pilot studies,Programming languages,Reproducibility,Software tools},
file = {/Users/david/Zotero/storage/3E3IUXJ8/Karathanasis et al_2022_Reproducibility efforts as a teaching tool.pdf}
}
@article{liu-salganik-2019-successes-struggles-computational,
title = {Successes and {{Struggles}} with {{Computational Reproducibility}}: {{Lessons}} from the {{Fragile Families Challenge}}},
shorttitle = {Successes and {{Struggles}} with {{Computational Reproducibility}}},
author = {Liu, David M. and Salganik, Matthew J.},
year = {2019},
month = jan,
journal = {Socius},
volume = {5},
pages = {2378023119849803},
publisher = {SAGE Publications},
issn = {2378-0231},
doi = {10.1177/2378023119849803},
urldate = {2024-10-02},
abstract = {Reproducibility is fundamental to science, and an important component of reproducibility is computational reproducibility: the ability of a researcher to recreate the results of a published study using the original author's raw data and code. Although most people agree that computational reproducibility is important, it is still difficult to achieve in practice. In this article, the authors describe their approach to enabling computational reproducibility for the 12 articles in this special issue of Socius about the Fragile Families Challenge. The approach draws on two tools commonly used by professional software engineers but not widely used by academic researchers: software containers (e.g., Docker) and cloud computing (e.g., Amazon Web Services). These tools made it possible to standardize the computing environment around each submission, which will ease computational reproducibility both today and in the future. Drawing on their successes and struggles, the authors conclude with recommendations to researchers and journals.},
langid = {english},
file = {/Users/david/Zotero/storage/H93U2C2G/liu-salganik-2019-successes-struggles-computational.pdf}
}
@article{massonnet-ménégoz-etal-2020-replicability-ecearth3-earth,
title = {Replicability of the {{EC-Earth3 Earth}} System Model under a Change in Computing Environment},
author = {Massonnet, Fran{\c c}ois and M{\'e}n{\'e}goz, Martin and Acosta, Mario and {Yepes-Arb{\'o}s}, Xavier and Exarchou, Eleftheria and {Doblas-Reyes}, Francisco J.},
year = {2020},
month = mar,
journal = {Geoscientific Model Development},
volume = {13},
number = {3},
pages = {1165--1178},
publisher = {Copernicus GmbH},
issn = {1991-959X},
doi = {10.5194/gmd-13-1165-2020},
urldate = {2024-10-01},
abstract = {Most Earth system models (ESMs) are running under different high-performance computing (HPC) environments. This has several advantages, from allowing different groups to work with the same tool in parallel to leveraging the burden of ensemble climate simulations, but it also offers alternative solutions in the case of shutdown (expected or not) of any of the environments. However, for obvious scientific reasons, it is critical to ensure that ESMs provide identical results under changes in computing environment. While strict bit-for-bit reproducibility is not always guaranteed with ESMs, it is desirable that results obtained under one computing environment are at least statistically indistinguishable from those obtained under another environment, which we term a ``replicability'' condition following the metrology nomenclature. Here, we develop a protocol to assess the replicability of the EC-Earth ESM. Using two versions of EC-Earth, we present one case of non-replicability and one case of replicability. The non-replicable case occurs with the older version of the model and likely finds its origin in the treatment of river runoff along Antarctic coasts. By contrast, the more recent version of the model provides replicable results. The methodology presented here has been adopted as a standard test by the EC-Earth consortium (27 institutions in Europe) to evaluate the replicability of any new model version across platforms, including for CMIP6 experiments. To a larger extent, it can be used to assess whether other ESMs can safely be ported from one HPC environment to another for studying climate-related questions. Our results and experience with this work suggest that the default assumption should be that ESMs are not replicable under changes in the HPC environment, until proven otherwise.},
langid = {english},
file = {/Users/david/Zotero/storage/5CJR37TC/Massonnet et al_2020_Replicability of the EC-Earth3 Earth system model under a change in computing.pdf}
}
@article{moody-keister-etal-2022-reproducibility-social-sciences,
title = {Reproducibility in the {{Social Sciences}}},
author = {Moody, James W and Keister, Lisa A and Ramos, Maria C},
year = {2022},
journal = {The Annual Review of Sociology},
number = {48},
pages = {65--85},
doi = {10.1146/annurev-soc-090221-035954},
abstract = {Concern over social scientists' inability to reproduce empirical research has spawned a vast and rapidly growing literature. The size and growth of this literature make it difficult for newly interested academics to come up to speed. Here, we provide a formal text modeling approach to characterize the entirety of the field, which allows us to summarize the breadth of this literature and identify core themes. We construct and analyze text networks built from 1,947 articles to reveal differences across social science disciplines within the body of reproducibility publications and to discuss the diversity of subtopics addressed in the literature. This field-wide view suggests that reproducibility is a heterogeneous problem with multiple sources for errors and strategies for solutions, a finding that is somewhat at odds with calls for largely passive remedies reliant on open science. We propose an alternative rigor and reproducibility model that takes an active approach to rigor prior to publication, which may overcome some of the shortfalls of the postpublication model.},
langid = {english},
file = {/Users/david/Zotero/storage/W38DD7Z2/Moody et al_2022_Reproducibility in the Social Sciences.pdf}
}
@article{morris-white-etal-2019-using-simulation-studies,
title = {Using Simulation Studies to Evaluate Statistical Methods},
author = {Morris, Tim P. and White, Ian R. and Crowther, Michael J.},
year = {2019},
journal = {Statistics in Medicine},
volume = {38},
number = {11},
pages = {2074--2102},
issn = {1097-0258},
doi = {10.1002/sim.8086},
urldate = {2021-04-04},
abstract = {Simulation studies are computer experiments that involve creating data by pseudo-random sampling. A key strength of simulation studies is the ability to understand the behavior of statistical methods because some ``truth'' (usually some parameter/s of interest) is known from the process of generating the data. This allows us to consider properties of methods, such as bias. While widely used, simulation studies are often poorly designed, analyzed, and reported. This tutorial outlines the rationale for using simulation studies and offers guidance for design, execution, analysis, reporting, and presentation. In particular, this tutorial provides a structured approach for planning and reporting simulation studies, which involves defining aims, data-generating mechanisms, estimands, methods, and performance measures (``ADEMP''); coherent terminology for simulation studies; guidance on coding simulation studies; a critical discussion of key performance measures and their estimation; guidance on structuring tabular and graphical presentation of results; and new graphical presentations. With a view to describing recent practice, we review 100 articles taken from Volume 34 of Statistics in Medicine, which included at least one simulation study and identify areas for improvement.},
copyright = {{\copyright} 2019 The Authors. Statistics~in~Medicine Published by John Wiley \& Sons Ltd.},
langid = {english},
keywords = {graphics for simulation,Monte Carlo,simulation design,simulation reporting,simulation studies},
file = {/Users/david/Zotero/storage/JXJ4345T/Morris et al_2019_Using simulation studies to evaluate statistical methods.pdf;/Users/david/Zotero/storage/KNLSQJIJ/sim.html}
}
@article{oberauer-lewandowsky-2019-addressing-theory-crisis,
title = {Addressing the Theory Crisis in Psychology},
author = {Oberauer, Klaus and Lewandowsky, Stephan},
year = {2019},
month = oct,
journal = {Psychonomic Bulletin \& Review},
volume = {26},
number = {5},
pages = {1596--1618},
issn = {1069-9384, 1531-5320},
doi = {10.3758/s13423-019-01645-2},
urldate = {2023-07-10},
abstract = {A worrying number of psychological findings are not replicable. Diagnoses of the causes of this ``replication crisis,'' and recommendations to address it, have nearly exclusively focused on methods of data collection, analysis, and reporting. We argue that a further cause of poor replicability is the often weak logical link between theories and their empirical tests. We propose a distinction between discovery-oriented and theory-testing research. In discovery-oriented research, theories do not strongly imply hypotheses by which they can be tested, but rather define a search space for the discovery of effects that would support them. Failures to find these effects do not question the theory. This endeavor necessarily engenders a high risk of Type I errors---that is, publication of findings that will not replicate. Theory-testing research, by contrast, relies on theories that strongly imply hypotheses, such that disconfirmation of the hypothesis provides evidence against the theory. Theory-testing research engenders a smaller risk of Type I errors. A strong link between theories and hypotheses is best achieved by formalizing theories as computational models. We critically revisit recommendations for addressing the ``replication crisis,'' including the proposal to distinguish exploratory from confirmatory research, and the preregistration of hypotheses and analysis plans.},
langid = {english},
file = {/Users/david/Zotero/storage/E4KLUPFX/Oberauer_Lewandowsky_2019_Addressing the theory crisis in psychology.pdf}
}
@article{opensciencecollaboration-2015-estimating-reproducibility-psychological,
title = {Estimating the Reproducibility of Psychological Science},
author = {{Open Science Collaboration}},
year = {2015},
month = aug,
journal = {Science},
volume = {349},
number = {6251},
pages = {aac4716},
publisher = {American Association for the Advancement of Science},
doi = {10.1126/science.aac4716},
urldate = {2022-06-30},
file = {/Users/david/Zotero/storage/78IDJXE4/Open Science Collaboration_2015_Estimating the reproducibility of psychological science.pdf}
}
@article{rahmandad-sterman-2012-reporting-guidelines-simulationbased,
title = {Reporting Guidelines for Simulation-Based Research in Social Sciences},
author = {Rahmandad, Hazhir and Sterman, John D.},
year = {2012},
month = oct,
journal = {MIT web domain},
publisher = {Wiley-Blackwell Pubishers},
issn = {0883-7066},
urldate = {2024-10-01},
abstract = {Reproducibility of research is critical for the healthy growth and accumulation of reliable knowledge, and simulation-based research is no exception. However, studies show many simulation-based studies in the social sciences are not reproducible. Better standards for documenting simulation models and reporting results are needed to enhance the reproducibility of simulation-based research in the social sciences. We provide an initial set of Reporting Guidelines for Simulation-based Research (RGSR) in the social sciences, with a focus on common scenarios in system dynamics research. We discuss these guidelines separately for reporting models, reporting simulation experiments, and reporting optimization results. The guidelines are further divided into minimum and preferred requirements, distinguishing between factors that are indispensable for reproduction of research and those that enhance transparency. We also provide a few guidelines for improved visualization of research to reduce the costs of reproduction. Suggestions for enhancing the adoption of these guidelines are discussed at the end.},
copyright = {Creative Commons Attribution-Noncommercial-Share Alike 3.0},
langid = {american},
keywords = {No DOI found},
annotation = {Accepted: 2013-01-18T15:36:40Z},
file = {/Users/david/Zotero/storage/NJLSA4JJ/Rahmandad_Sterman_2012_Reporting guidelines for simulation-based research in social sciences.pdf}
}
@article{reinhart-rogoff-2010-growth-time-debt,
title = {Growth in a {{Time}} of {{Debt}}},
author = {Reinhart, Carmen M and Rogoff, Kenneth S},
year = {2010},
month = may,
journal = {American Economic Review},
volume = {100},
number = {2},
pages = {573--578},
issn = {0002-8282},
doi = {10.1257/aer.100.2.573},
urldate = {2022-10-20},
langid = {english},
file = {/Users/david/Zotero/storage/HENPRFAL/Reinhart_Rogoff_2010_Growth in a Time of Debt.pdf}
}
@article{salganik-maffeo-etal-2020-prediction-machine-learning,
title = {Prediction, {{Machine Learning}}, and {{Individual Lives}}: An {{Interview}} with {{Matthew Salganik}}},
shorttitle = {Prediction, {{Machine Learning}}, and {{Individual Lives}}},
author = {Salganik, Matthew and Maffeo, Lauren and Rudin, Cynthia},
year = {2020},
month = jul,
journal = {Harvard Data Science Review},
doi = {10.1162/99608f92.eecdfa4e},
urldate = {2024-10-07},
abstract = {Machine learning techniques are increasingly used throughout society to predict individual's life outcomes. However, research published in the Proceedings of the National Academy of Sciences raises questions about the accuracy of these predictions. Led by researchers at Princeton University, this mass collaboration involved 160 teams of data and social scientists building statistical and machine learning models to predict six life outcomes for children, parents, and families. They found that none of the teams could make very accurate predictions, despite using advanced techniques and having access to a rich dataset. This interview of Matthew Salganik, the study's lead author and a professor of Sociology at Princeton University, was conducted by Lauren Maffeo, Associate Principal Analyst at Gartner, and Cynthia Rudin, a professor of Computer Science, Electrical and Computer Engineering, and Statistical Science at Duke University. It provides an overview of the study's goals, research methods, and results. The interview also includes key takeaways for policy leaders who wish to use machine learning to predict and improve life outcomes for people.},
langid = {english},
file = {/Users/david/Zotero/storage/CMSTSRAF/Salganik et al. - 2020 - Prediction, Machine Learning, and Individual Lives.pdf}
}
@article{stodden-mcnutt-etal-2016-enhancing-reproducibility-computational,
title = {Enhancing Reproducibility for Computational Methods},
author = {Stodden, Victoria and McNutt, Marcia and Bailey, David H. and Deelman, Ewa and Gil, Yolanda and Hanson, Brooks and Heroux, Michael A. and Ioannidis, John P.A. and Taufer, Michela},
year = {2016},
month = dec,
journal = {Science},
volume = {354},
number = {6317},
pages = {1240--1241},
publisher = {American Association for the Advancement of Science},
doi = {10.1126/science.aah6168},
urldate = {2024-09-30},
file = {/Users/david/Zotero/storage/QAXBAKWR/Stodden et al_2016_Enhancing reproducibility for computational methods.pdf}
}
@article{vallet-michonneau-etal-2022-practical-transparent-verifiable,
title = {Toward Practical Transparent Verifiable and Long-Term Reproducible Research Using {{Guix}}},
author = {Vallet, Nicolas and Michonneau, David and Tournier, Simon},
year = {2022},
month = oct,
journal = {Scientific Data},
volume = {9},
number = {1},
pages = {597},
publisher = {Nature Publishing Group},
issn = {2052-4463},
doi = {10.1038/s41597-022-01720-9},
urldate = {2024-10-09},
abstract = {Reproducibility crisis urge scientists to promote transparency which allows peers to draw same conclusions after performing identical steps from hypothesis to results. Growing resources are developed to open the access to methods, data and source codes. Still, the computational environment, an interface between data and source code running analyses, is not addressed. Environments are usually described with software and library names associated with version labels or provided as an opaque container image. This is not enough to describe the complexity of the dependencies on which they rely to operate on. We describe this issue and illustrate how open tools like Guix can be used by any scientist to share their environment and allow peers to reproduce it. Some steps of research might not be fully reproducible, but at least, transparency for computation is technically addressable. These tools should be considered by scientists willing to promote transparency and open science.},
copyright = {2022 The Author(s)},
langid = {english},
keywords = {Computational platforms and environments,Research management,Software},
file = {/Users/david/Zotero/storage/4ET75VC4/vallet-michonneau-etal-2022-practical-transparent-verifiable.pdf}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment