4 Generating the R package

4.1 Sending code chunks to the package

We start by defining a chunk hook, which is a function that runs both before and after each code chunk is run in the knitting process. In this case, the function (called send_to_package) is responsible for determining whether the code chunk looks like something that should be exported to the R package. We don’t want all code sent off to our R package. For example, sometimes we’ll want to demonstrate in the Rmd file how a certain function we’ve just created is used by running it on an example or making a plot. That bit of example code would not be included in the package.

We start by making sure that code is only sent to the R package once (arbitrarily we have code outputted to the package before and not after the chunk is run).

The function then checks if this code chunk is code that should be put into the package. There are four specific cases it considers:

  1. If the special option send_to is used in this code chunk, then things are very simple in that the user has explicitly told us where this code should be added. For example, if send_to="R/file.R", then the code in this chunk will be appended to R/file.R in the R package (and if that file doesn’t yet exist, it will be created).

  2. Is it a piece of code to be sent to the R/ directory? In particular, it checks to see if the code chunk begins with the characteristic roxygen2 characters #'. If it does, then the name of the object being documented is identified (could be a function, a dataset, an S4 object, etc.) and then we write the code chunk to the file R/<objectname>.R.

  3. If the code chunk does not start with #', then we check if it has any line starting with test_that( or testthat::test_that(. If so, then this whole code chunk is appended to tests/testthat/tests.R (and this file is created the first time a test chunk is sent to the package).

  4. Next, it checks if the language engine is Rcpp. This occurs when the code chunk starts with {Rcpp, rather than the usual {r (or alternatively when the engine="Rcpp" option is used). We then set things up appropriately for the use of Rcpp within the package (by adapting some code from within the usethis::use_rcpp() function). Finally, we write the code chunk to src/code.cpp. There is a common header used,

#include <Rcpp.h>
using namespace Rcpp;

and we only want this to appear once in code.cpp, so we do a bit of work to remove that if it appears in the code chunk.

#' A knitr chunk hook for writing R code and tests
#' 
#' This chunk hook detects whether a chunk is defining a function or dataset
#' to be included in the R package (looks for the `roxygen2` comment format `#' `).
#' If so, then it is written to the `R/` directory.  It also looks for chunks 
#' that have one or more lines that start with `test_that(` or 
#' `testthat::test_that(` (potentially with some leading whitespace).  These 
#' chunks are then written to the `tests` directory of the R package.
#' 
#' When the `send_to` option is used, this chunk hook instead simply writes the
#' code chunk to the file specified.
#' 
#' @param before Indicates whether this is being called before or after the 
#' chunk code is executed
#' @param options Has information from the chunk
#' @param envir Environment
#' @keywords internal
send_to_package <- function(before, options, envir) {
  msg <- do_not_edit_message(knitr::current_input(), type = "R")
  if (before == FALSE) {
    # Don't do anything after the code chunk has been executed.
    return()
  }
  package_dir <- knitr::opts_knit$get("root.dir")
  package_name <- fs::path_file(package_dir)
  if (!is.null(options$send_to)) {
    # the user has defined an option that indicates where in the package this
    # code should be written
    file <- file.path(package_dir, options$send_to)
    add_text_to_file(options$code, file, pad = TRUE, msg = msg)
    return()
  }
  if (stringr::str_detect(options$code[1], "^#' ")) {
    # starts with roxygen2, so let's assume this chunk is defining an R function
    # or dataset that belongs in the package
    non_comment <- stringr::str_subset(options$code, "^#", negate = TRUE)
    if (length(non_comment) > 0) {
      if (stringr::str_detect(non_comment[1], "<-")) {
        # a function is being defined
        objname <- stringr::str_match(non_comment[1], "^(.*)\\s*<-\\s*function")[, 2]
        objname <- stringr::str_trim(objname)
      } else if (stringr::str_detect(non_comment[1], '^".+"$')) {
        # a dataset is being documented
        objname <- stringr::str_sub(non_comment[1], start = 2, end = -2)
      } else {
        # Roxygen2 comment wasn't followed by anything recognized, so do not 
        # send this to package
        return()
      }
      file <- file.path(package_dir, "R", stringr::str_glue("{objname}.R"))
      cat(paste(c(msg, "", options$code, ""), collapse = "\n"), file = file)
    }
  }
  else if (any(stringr::str_detect(options$code,
                                   "^\\s*(testthat::)?test_that\\("))) {
    # This chunk is inferred to be a test
    test_dir <- file.path(package_dir, "tests", "testthat")
    test_file <- file.path(test_dir, "tests.R")
    if (!file.exists(test_file)) {
      # It's the first chunk with tests
      if (!dir.exists(test_dir)) usethis::use_testthat()
      cat(c(msg, ""), collapse = "\n", file = test_file)
    }
    cat(
      paste(c(options$code, "", ""), collapse = "\n"),
      file = test_file,
      append = TRUE
    )
  } else if (options$engine == "Rcpp") {
    # To add Rcpp code, we need the package documentation file to exist 
    if (!file.exists(file.path(
      package_dir,
      "R",
      paste0(package_name, "-package.R"))
      )) {
      usethis::use_package_doc(open = FALSE)
    }
    cpp_file <- file.path(package_dir, "src", "code.cpp")
    if (!file.exists(cpp_file)) {
      # set up package for Rcpp
      # these next few lines are taken from usethis::use_rcpp()
      # it approximates a call to usethis::use_rcpp(name = "code")
      usethis:::use_dependency("Rcpp", "LinkingTo")
      usethis:::use_dependency("Rcpp", "Imports")
      usethis:::roxygen_ns_append("@importFrom Rcpp sourceCpp")
      usethis:::use_src()
      usethis::use_template("code.cpp", save_as = "src/code.cpp")

      msg <- do_not_edit_message(knitr::current_input(), type = "c")
      cat(msg, file = cpp_file, append = TRUE)
    }
    # append code to code.cpp, but remove lines that are `#include <Rcpp.h>`
    # or `using namespace Rcpp;` since this already appears at top of file
    cat(paste(c(
      "",
      stringr::str_subset(
        options$code,
        r"(^#include <Rcpp.h>$|^using namespace Rcpp;$)",
        negate = TRUE),
      ""), collapse = "\n"), 
        file = cpp_file,
        append = TRUE)
  }
  return()
}

The above code makes use of a number of functions from the stringr and usethis packages, so we’ll need to add those packages to the Imports section of the DESCRIPTION file:

usethis::use_package("stringr")
usethis::use_package("usethis")
## ✔ Adding 'stringr' to Imports field in DESCRIPTION
## • Refer to functions with `stringr::fun()`
## ✔ Adding 'usethis' to Imports field in DESCRIPTION
## • Refer to functions with `usethis::fun()`

The code also calls the function do_not_edit_message(), which adds a line at the top of the files sent to the R package reminding the user that these are not source files to be edited but rather output of the generating .Rmd file. There are two variations on this message.

#' Generate do-not-edit message to put at top of file
#' 
#' @param rmd_file Name of the Rmd file to mention
#' @param type Whether this is a R/ file, man/ file, or a c file
#' @keywords internal
do_not_edit_message <- function(rmd_file, type = c("R", "man", "c")) {
  if (type[1] == "R")
    return(stringr::str_glue("# Generated from {rmd_file}: do not edit by hand"))
  else if (type[1] == "man")
    return(stringr::str_glue("% Please edit documentation in {rmd_file}."))
  else if (type[1] == "c")
    return(stringr::str_glue("// Generated from {rmd_file}: do not edit by hand"))
  else
    stop("type must be either 'R', 'man', or 'c'.")
}

This function will also be used with type = "man" by litr::document().

The above also makes use of a simple helper function that inserts text into a specified location of a file (or creates that file if it doesn’t exist). Actually currently it doesn’t, but we can replace cat() in the above with calls to add_text_to_file().

#' Add Some Text to a File
#' 
#' The text will be added to the file at a particular line specified by
#' `location`.  The first line of `txt` will be on line `location` of the
#' modified file.  If `location` is NULL, then text is added to end of file.
#' If file does not exist, it is created and `location` is ignored (unless 
#' `req_exist` is `TRUE`, in which case an error is thrown).
#' 
#' @param txt Character vector to add to file
#' @param filename Name of file
#' @param location Specifies where text should be added. See description for more.
#' @param req_exist If TRUE, then throws an error if file doesn't exist
#' @param pad If TRUE, then when text is being added to a preexisting file, it adds a newline
#' @param msg An optional message to put at top of file if this is a new file
#' @keywords internal
add_text_to_file <- function(txt, filename, location = NULL, req_exist = FALSE,
                             pad = FALSE, msg = NULL) {
  if (!file.exists(filename)) {
    if (req_exist) stop(stringr::str_glue("Cannot find file {filename}."))
    if (!is.null(msg)) txt <- c(msg, "", txt)
    writeLines(txt, con = filename)
    return()
  }
  if (pad) txt <- c("", txt)
  filetxt <- readLines(filename)
  if (is.null(location) || location == length(filetxt) + 1) {
    filetxt <- c(filetxt, txt)
  }
  else if (location > length(filetxt) + 1 | location < 1) 
    stop("Invalid location")
  else if (location == 1) {
    filetxt <- c(txt, filetxt)
  } else {
    # location is somewhere in middle
    filetxt <- c(filetxt[1:(location - 1)],
                 txt,
                 filetxt[location:length(filetxt)])
  }
  writeLines(filetxt, con = filename)
}
testthat::test_that("add_text_to_file() works", {
  dir <- tempfile()
  if (fs::file_exists(dir)) fs::file_delete(dir)
  fs::dir_create(dir)
  
  # should throw error when file does not exist and req_exist is TRUE:
  myfile <- file.path(dir, "file.txt")
  sometxt <- c("hello", "there")
  testthat::expect_error(add_text_to_file(sometxt, myfile, req_exist = TRUE))

  # should create a new file where one does not exist:
  myfile <- file.path(dir, "file.txt")
  sometxt <- c("hello", "there")
  add_text_to_file(sometxt, myfile)
  testthat::expect_true(fs::file_exists(myfile))
  testthat::expect_equal(sometxt, readLines(myfile))
  
  # should append to end of file by default
  moretxt <- "world"
  add_text_to_file(moretxt, myfile)
  testthat::expect_equal(c(sometxt, moretxt), readLines(myfile))
   
  # should throw error for invalid locations:
  testthat::expect_error(add_text_to_file(sometxt, myfile, 0))
  testthat::expect_error(add_text_to_file(sometxt, myfile, -1))
  testthat::expect_error(add_text_to_file(sometxt, myfile, 5))

  # should add to specified line:
  moretxt2 <- "hi"
  add_text_to_file(moretxt2, myfile, 1)
  testthat::expect_equal(c(moretxt2, sometxt, moretxt), readLines(myfile))

  # should add to specified line:
  moretxt3 <- "hi2"
  add_text_to_file(moretxt3, myfile, 2)
  testthat::expect_equal(c(moretxt2, moretxt3, sometxt, moretxt),
                         readLines(myfile))

  # should add to specified line:
  moretxt4 <- "hi3"
  add_text_to_file(moretxt4, myfile, 6)
  testthat::expect_equal(c(moretxt2, moretxt3, sometxt, moretxt, moretxt4),
                         readLines(myfile))
  fs::dir_delete(dir)
})
## Test passed

4.2 Setting up the R package creation

When the user calls litr::render() (either in the console or by pressing “Knit” in RStudio), one of the first things that function does is to call the function litr::setup(), which does several things:

  1. Creates a new empty directory at the specified location while first making sure that it won’t overwrite something it shouldn’t. In particular, we guard against the case that the package was generated by litr::render() but then someone went in manually and made some changes. Even though users should never manually edit the package that was generated by litr::render(), we don’t want to have them inadvertently lose their work by doing so. Thus, we only overwrite an R package if we can tell that it is the unedited output of a call to litr::render(). The function check_unedited() is responsible for checking this, and is a pretty interesting function which we will describe in the next section. This part of the code also makes use of a function litr::make_noticeable(), which is simply a way of making error messages produced by litr more easy to see amid a lot of knitr output.

  2. Adjusts the root directory from the generating .Rmd file’s location to the R package’s location. Note: This behavior might not actually be desirable now that additional files will be loaded in. It might be awkward for a user writing the generating .Rmd file to have to make everything relative to the package. It might be convenient to provide a litr::add_file(from, to) function, where from is the path relative to the .Rmd file and to is the path relative to the package’s location.

  3. Makes it so that the send_to_package() chunk hook is active for each code chunk. This involves registering a new chunk hook using the function knitr::knit_hooks$set() and then setting an option with the same name to TRUE within each chunk.

  4. Deactivates an internal function of the usethis package, usethis:::challenge_nested_project(). This was actually a difficult issue to address that involves the intersection of usethis, here, and our particular use case. The problem is that usethis was not designed for our setting in which an R package is being created programmatically. When using litr, the project directory will have the generating .Rmd file and when this is knit it will create an R package within this project. However, this leads usethis to prompt the user with a message of the form

“New project ‘[…]’ is nested inside an existing project ‘[…]’. This is rarely a good idea. Do you wish to create anyway?”

But since this is encountered through knitting rather than interactively, this results in an error. This usethis issue describes this exact problem. The solution suggested there by jennybc involving testthat::with_mock() is along the lines of what we want; however, that would lead to some ugly looking code in the generating .Rmd file. The best solution I could find was to use utils::assignInNamespace() as described here. This function allows us to change the internal function usethis:::challenge_nested_project() so that it no longer prompts the user with concerns about nested projects.

  1. Changes how chunk references are handled. In particular, consider the following code chunk:
a <- 2
<<my-chunk>>
a

The way knitr handles this, the code chunk would no longer look like this but it would rather have replaced the <<my-chunk>> line by the code that appears in the code chunk labeled “my-chunk”. We instead would like the above code chunk to appear as written and then for the code chunk labeled “my-chunk” to have its label visible to the reader of the .html file. This gives the coder more control over when the reader learns about different parts of the code. It also more closely resembles Donald Knuth’s form of literate programming. For convenience, we’d like <<my-chunk>> to be a link that navigates to the code chunk labeled “my-chunk”. To accomplish this, we modify the document output hook in setup() (and then we also add a function called add_chunk_label_hyperlinks() within render()).

  1. Define a package_doc engine which allows users to define package-level documentation.
#' Code for setup chunk
#' 
#' * Creates directory where package will be. (Deletes what is currently there as 
#' long as it appears to have been created by litr and does not have any 
#' subsequent manual edits.)
#' * Sets the root directory to this directory
#' * Sets up the main chunk hook `litr::send_to_package()` that sends code to the 
#' R package directory.
#' * In the case that `minimal_eval=TRUE`, sets up an options hook for `eval` so
#'   chunks are only evaluated if there is a `usethis` or `litr::document()`
#'   command
#' * Deactivates an internal function of the `usethis` package
#' * Redefines the document output hook to handle chunk references differently  
#' * Sets up a [custom language engine](https://bookdown.org/yihui/rmarkdown-cookbook/custom-engine.html) called
#' `package_doc` that creates a package documentation file and then inserts
#' whatever the user puts in the chunk.
#' 
#' Returns the original state of the knitr objects that have been modified in 
#' setup.  This allows us to return things to the previous state after we are
#' finished.  This is relevant in the case where litr-knitting occurs in the 
#' current session and we don't want to leave things in a permanently modified
#' state.
#' 
#' @param package_dir Directory where R package will be created
<<param-minimal_eval>>
#' @keywords internal
setup <- function(package_dir, minimal_eval) {
  if (file.exists(package_dir)) {
    unedited <- tryCatch(check_unedited(package_dir),
                         error = function(e) {
                           # contents of package_dir does not resemble
                           # a litr package
                           return(FALSE)
                         })
    if (!unedited) {
      stop(make_noticeable(paste(
        stringr::str_glue("The directory {normalizePath(package_dir)}"),
        "already exists and either was not created by litr or may have manual",
        "edits. In either case, please rename that directory (or delete it)", 
        "and then try again.", 
        sep = "\n")))
    }
    unlink(package_dir, recursive = TRUE)
  }
  fs::dir_create(package_dir)
  usethis:::proj_set_(usethis:::proj_path_prep(package_dir))

  # let's keep a version of the knitr objects before modifying them:
  original_knitr <- list(opts_knit = knitr::opts_knit$get(),
                         knit_hooks = knitr::knit_hooks$get(),
                         opts_chunk = knitr::opts_chunk$get(),
                         opts_hooks = knitr::opts_hooks$get(),
                         knit_engines = knitr::knit_engines$get()
                         )
  
  knitr::opts_knit$set(root.dir = package_dir) # sets wd of future chunks
  knitr::knit_hooks$set(send_to_package = send_to_package)
  knitr::opts_chunk$set(send_to_package = TRUE)
  if (minimal_eval) {
    # only evaluate chunks that appear to include usethis commands or 
    # a call to litr::document() but if someone has specifically set eval=FALSE
    # in a particular chunk, do honor that
    usethis_exports <- getNamespaceExports("usethis")
    patterns <- paste(c("usethis::", usethis_exports, "litr::document\\("), collapse = "|")
    knitr::opts_hooks$set(eval = function(options) {
      if (options$eval)
        options$eval <- any(stringr::str_detect(options$code, patterns))
      return(options)
    })
  }
  
  
  # change usethis:::challenge_nested_project so that it will not complain
  # about creating a nested project (e.g. if this is called within a git 
  # subdirectory)
  utils::assignInNamespace("challenge_nested_project", function(...) NULL, ns = "usethis")

  # define document hook to handle chunk references:
  knitr::knit_hooks$set(document = function(x) {
    # get the indices of x corresponding to code chunks
    chunk_start <- "^(\n```+[a-zA-Z0-9_]+\n)"
    idx_block <- stringr::str_which(x, chunk_start)
    original_code <- knitr::knit_code$get()
    # We first get indices of skipped chunks in original_code list
    skipped_chunks <- which(sapply(original_code, function(x){
      return(isFALSE(attr(x, "chunk_opts")$echo) || isFALSE(attr(x, "chunk_opts")$include))
    }))

    # Next we remove the indices of skipped chunks
    original_code_idx_fixed <- setdiff(seq(length(original_code)), skipped_chunks)
    
    labels <- names(original_code)
    # replace each x[i] that has code in it with the original code
    for (i in seq_along(idx_block)) {
      # break code into multiple lines:
      chunk <- strsplit(x[idx_block[i]], "\n")[[1]]
      # get the fence used (in case it's more than three ticks):
      i_start <- stringr::str_which(chunk, "^```+[a-zA-Z0-9_]+")
      fence <- stringr::str_replace(chunk[i_start[1]],
                                    "^(```+)[a-zA-Z0-9_]+", "\\1")
      i_fences <- stringr::str_which(chunk, paste0("^", fence))
      # there can be multiple code and output chunks strung together 
      # within a single x[i] if results are not held to end
      i_all_code <- c()
      for (j in seq_along(i_start)) {
        # get the elements corresponding the j-th code chunk within chunk
        i_code_end <- i_fences[which(i_fences == i_start[j]) + 1]
        i_all_code <- c(i_all_code, i_start[j]:i_code_end)
      }
      i_all_code <- setdiff(i_all_code, i_start[1])
      chunk_no_code <- chunk[-i_all_code]
      chunk <- c(chunk_no_code[1:i_start[1]],
                 original_code[original_code_idx_fixed[i]][[1]],
                 # insert the original version, accounting for skipped chunks
                 fence)
      if (i_start[1] < length(chunk_no_code))
        chunk <- c(chunk, chunk_no_code[(i_start[1] + 1):length(chunk_no_code)])
        x[idx_block[i]] <- paste(chunk, collapse = "\n")
    }
    
    # replace code chunks with the original code
    # (so we'll still have <<label>> chunk references)
    refs <- c() # labels that get referred to
    for (label in labels) {
      refs <- c(refs, find_labels(original_code[[label]])$chunk_ids)
    }
    refs <- unique(refs)
    adj_labels <- labels[!labels %in% names(skipped_chunks)]
    ref_id <- match(refs, adj_labels)
    if (any(is.na(ref_id))) {
      stop(make_noticeable(paste(
        stringr::str_glue("The chunk reference <<{refs[is.na(ref_id)][1]}>> ",
        "is used, but there is no chunk with that label.", 
        sep = "\n"))))
      }
    to_insert <- paste0('###"', adj_labels[ref_id], '"###\n')
    x[idx_block[ref_id]] <- stringr::str_replace(x[idx_block[ref_id]],
                                                 chunk_start,
                                                 paste0("\\1", to_insert))
    x
  })
  
  # setup package_doc engine
  knitr::knit_engines$set(package_doc = function(options) {
    # create package_doc
    usethis::use_package_doc(open = FALSE)
    
    # insert the contents of the code chunk into the package_doc
    pkgdoc <- file.path("R", paste0(fs::path_file(package_dir), "-package.R"))
    add_text_to_file(options$code, filename = pkgdoc, location = 1)
    
    # now treat this as if it were standard R code with eval=FALSE
    r_engine <- knitr::knit_engines$get("R")
    options[["eval"]] <- FALSE
    return(r_engine(options))
  })
  return(original_knitr)
}

In our new document output hook defined above, we call a function find_labels(). It takes a block of code and returns both a logical vector of which lines contained chunk labels and another vector containing the labels of those referenced chunks. We define it here:

#' Find a .Rmd chunk label in a code chunk
#' 
#' @param chunk_code Character vector of code from a .Rmd code chunk. Each element is a line of the code chunk.
#' @return List where chunk_idx is a logical vector for each line of the chunk corresponding to whether a chunk label of the form `<<label>>` was found and chunk_ids is a character vector of chunk label was found in that chunk.
#' @keywords internal
find_labels <- function(chunk_code) {
  rc <- knitr::all_patterns$md$ref.chunk
  chunk_idx <- any(idx = grepl(rc, chunk_code))
  chunk_ids <- stringr::str_trim(sub(rc, "\\1", chunk_code[grepl(rc, chunk_code)]))
  return(list(chunk_idx = chunk_idx, chunk_ids = chunk_ids))
}

The setup() function also uses a small function, make_noticeable(), which we define here:

#' Make error messages noticeable
#' 
#' Since litr error messages are amid a lot of output from knitting, we'd like 
#' the litr ones to be eye-catching.
#' 
#' @param msg Error message
#' @keywords internal
make_noticeable <- function(msg) {
  paste("",
        "======",
        "Please read your friendly litr error message here:",
        paste("> ", msg),
        "======",
        sep = "\n")
}

The code in this section used the fs and knitr packages, so we import those:

usethis::use_package("fs")
usethis::use_package("knitr")
## ✔ Adding 'fs' to Imports field in DESCRIPTION
## • Refer to functions with `fs::fun()`
## ✔ Adding 'knitr' to Imports field in DESCRIPTION
## • Refer to functions with `knitr::fun()`