See More

" + $('div[id="ref-' + ref + '"]').html() + ""; }).join("\n"); window.tippy(this, { allowHTML: true, content: refHtml, maxWidth: 500, interactive: true, interactiveBorder: 10, theme: 'light-border', placement: 'bottom-start' }); }); // fix footnotes in tables (#411) // replacing broken distill.pub feature $('table d-footnote').each(function() { // we replace internal showAtNode methode which is triggered when hovering a footnote this.hoverBox.showAtNode = function(node) { // ported from https://github.com/distillpub/template/pull/105/files calcOffset = function(elem) { let x = elem.offsetLeft; let y = elem.offsetTop; // Traverse upwards until an `absolute` element is found or `elem` // becomes null. while (elem = elem.offsetParent && elem.style.position != 'absolute') { x += elem.offsetLeft; y += elem.offsetTop; } return { left: x, top: y }; } // https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/offsetTop const bbox = node.getBoundingClientRect(); const offset = calcOffset(node); this.show([offset.left + bbox.width, offset.top + bbox.height]); } }) // clear polling timer clearInterval(tid); // show body now that everything is ready on_load_complete(); } var tid = setInterval(distill_post_process, 50); distill_post_process(); } function init_downlevel() { init_common(); // insert hr after d-title $('.d-title').after($('


')); // check if we have authors var front_matter = JSON.parse($("#distill-front-matter").html()); var have_authors = front_matter.authors && front_matter.authors.length > 0; // manage byline/border if (!have_authors) $('.d-byline').remove(); $('.d-byline').after($('
')); $('.d-byline a').remove(); // remove toc $('.d-contents').remove(); // move appendix elements $('h1.appendix, h2.appendix').each(function(i, val) { $(this).changeElementType('h3'); }); $('h3.appendix').each(function(i, val) { $(this).nextUntil($('h1, h2, h3')).addBack().appendTo($('.d-appendix')); }); // inject headers into references and footnotes var refs_header = $('

'); refs_header.text('References'); $('#refs').prepend(refs_header); var footnotes_header = $('

')); // trim code $('pre>code').each(function(i, val) { $(this).html($.trim($(this).html())); }); // move posts-container right before article $('.posts-container').insertBefore($('.d-article')); $('body').addClass('downlevel'); on_load_complete(); } function init_common() { // jquery plugin to change element types (function($) { $.fn.changeElementType = function(newType) { var attrs = {}; $.each(this[0].attributes, function(idx, attr) { attrs[attr.nodeName] = attr.nodeValue; }); this.replaceWith(function() { return $("<" + newType + "/>", attrs).append($(this).contents()); }); }; })(jQuery); // prevent underline for linked images $('a > img').parent().css({'border-bottom' : 'none'}); // mark non-body figures created by knitr chunks as 100% width $('.layout-chunk').each(function(i, val) { var figures = $(this).find('img, .html-widget'); // ignore leaflet img layers (#106) figures = figures.filter(':not(img[class*="leaflet"])') if ($(this).attr('data-layout') !== "l-body") { figures.css('width', '100%'); } else { figures.css('max-width', '100%'); figures.filter("[width]").each(function(i, val) { var fig = $(this); fig.css('width', fig.attr('width') + 'px'); }); } }); // auto-append index.html to post-preview links in file: protocol // and in rstudio ide preview $('.post-preview').each(function(i, val) { if (window.location.protocol === "file:") $(this).attr('href', $(this).attr('href') + "index.html"); }); // get rid of index.html references in header if (window.location.protocol !== "file:") { $('.distill-site-header a[href]').each(function(i,val) { $(this).attr('href', $(this).attr('href').replace(/^index[.]html/, "./")); }); } // add class to pandoc style tables $('tr.header').parent('thead').parent('table').addClass('pandoc-table'); $('.kable-table').children('table').addClass('pandoc-table'); // add figcaption style to table captions $('caption').parent('table').addClass("figcaption"); // initialize posts list if (window.init_posts_list) window.init_posts_list(); // implmement disqus comment link $('.disqus-comment-count').click(function() { window.headroom_prevent_pin = true; $('#disqus_thread').toggleClass('hidden'); if (!$('#disqus_thread').hasClass('hidden')) { var offset = $(this).offset(); $(window).resize(); $('html, body').animate({ scrollTop: offset.top - 35 }); } }); } document.addEventListener('DOMContentLoaded', function() { if (is_downlevel_browser()) init_downlevel(); else window.addEventListener('WebComponentsReady', init_distill); });

De-weaponizing reproducibility

Jeff Leek
2015-03-13

A couple of weeks ago Roger and I went to a conference on statistical reproducibility held at the National Academy of Sciences. The discussion was pretty wide ranging and I love that the thinking about reproducibility is coming back to statistics. There was pretty widespread support for the idea that prevention is the right way to approach reproducibility.

It turns out I was the last speaker of the whole conference. This is an unenviable position to be in with so many bright folks speaking first as they covered a huge amount of what I wanted to say. My talk focused on three key points:

  1. The tools for reproducibility already exist, the barrier isn’t tools
  2. We need to de-weaponize reproducibility
  3. Prevention is the right approach to reproducibility

 

In terms of the first point, tools like iPython, knitr, and Galaxy can be used to all but the absolutely largest analysis reproducible right now.  Our group does this all the time with our papers and so do many others. The problem isn’t a lack of tools.

Speaking to point two, I think many people would agree that part of the issue is culture change. One issue that is increasingly concerning to me is the “weaponization” of reproducibility.  I have been noticing is that some of us (like me, my students, other folks at JHU, and lots of particularly junior computational people elsewhere) are trying really hard to be reproducible. Most of the time this results in really positive reactions from the community. But when a co-author of mine and I wrote that paper about the science-wise false discovery rate, one of the discussants used our code (great), improved on it (great), identified a bug (great), and then did his level best to humiliate us both in front of the editor and the general public because of that bug (not so great).

I have seen this happen several times. Most of the time if a paper is reproducible the authors get a pat on the back and their code is either ignored, or used in a positive way. But for high-profile and important problems, people  largely use eproducibility to:

  1.  Impose regulatory hurdles in the short term while people transition to reproducibility. One clear example of this is the Secret Science Reform Act which is a bill that imposes strict reproducibility conditions on all science before it can be used as evidence for regulation.
  2. Humiliate people who aren’t good coders or who make mistakes in their code. This is what happened in my paper when I produced reproducible code for my analysis, but has also happened to other people.
  3. Take advantage of people’s code to plagiarize/straight up steal work. I have stories about this I’d rather not put on the internet

 

Of the three, I feel like (1) and (2) are the most common. Plagiarism and scooping by theft I think are actually relatively rare based on my own anecdotal experience. But I think that the “weaponization” of reproducibility to block regulation or to humiliate folks who are new to computational sciences is more common than I’d like it to be. Until reproducibility is the standard for everyone - which I think is possible now and will happen as the culture changes -  the people who are the early adopters are at risk of being bludgeoned with their own reproducibility. As a community, if we want widespread reproducibility adoption we have to be ferocious about not allowing this to happen.