#' Plot the Probability Weighting Function
#' 
#' Plots the Probability Weighting Function created by \code{\link{nullp}} by
#' binning together genes.
#' 
#' This function is almost always called using the output from the
#' \code{\link{nullp}} function.  However, it can be used to visualize the
#' length (or any other type of quantifiable) bias in ability to detect DE in a
#' data set.  The \code{pwf} argument needs to be a data frame with 3 columns
#' each containing numeric entries (although NAs are permitted in the bias.data
#' and pwf columns), which must be named "DEgenes", "bias.data" and "pwf",
#' although they can appear in any order.  The row names are taken to be the
#' gene names. The DEgenes column should be 0s or 1s where 1 represents a DE
#' gene, 0 a gene which is not DE.  The bias.data column is a quantification of
#' the quantity for which there is a bias in detecting DE for the associated
#' gene, this is usually gene length or the number of counts associated with a
#' gene.  Finally, the pwf column gives the probability weighting to be applied
#' for a given gene.
#' 
#' @param pwf A data frame with 3 columns named DEgenes, bias.data & pwf and
#' row names giving the gene names.  Usually generated by \code{\link{nullp}}.
#' @param binsize Calculate and plot the fraction of genes that are DE in bins
#' of this size.  If set to "auto" the best binsize for visualization is
#' attempted to be found automatically.
#' @param pwf_col The colour of the probability weighting function
#' @param pwf_lwd The width of the probability weighting function
#' @param xlab The x-axis label. \code{<binsize>} is replaced by the binsize
#' used.
#' @param ylab The y-axis label.
#' @param ...  Extra arguments that are passed to plot.
#' @return Nothing is returned.
#' 
#' @author Matthew D. Young \email{myoung@@wehi.edu.au}
#' 
#' @seealso \code{\link{nullp}}
#' 
#' @export
#' 
#' @references Young, M. D., Wakefield, M. J., Smyth, G. K., Oshlack, A. (2010)
#' \emph{Gene ontology analysis for RNA-seq: accounting for selection bias}
#' Genome Biology Date: Feb 2010 Vol: 11 Issue: 2 Pages: R14
#' 
#' @examples
#' data(genes)
#' pwf <- nullp(genes, 'hg19', 'ensGene',plot.fit=FALSE)
#' plotPWF(pwf,binsize=200)
#' 
plotPWF <- function(pwf, binsize = "auto", pwf_col = 3, pwf_lwd = 2, xlab = "Biased Data in <binsize> gene bins.", ylab = "Proportion DE", ...) {
  # We shouldn't try and plot NAs obviously...
  w <- !is.na(pwf$bias.data)
  o <- order(pwf$bias.data[w])
  # What is the total range in the fit?
  rang <- max(pwf$pwf, na.rm = TRUE) - min(pwf$pwf, na.rm = TRUE)
  if (rang == 0 & binsize == "auto") binsize <- 1000
  if (binsize == "auto") {
    # A low number of starting genes to bin, usually 100
    binsize <- max(1, min(100, floor(sum(w) * .08)))
    resid <- rang
    # Turn off warnings till we've worked out what we're doing
    oldwarn <- options()$warn
    options(warn = -1)
    # Keep increasing the number of genes in each bin until the scatter around the lines reaches the cutoff.
    # Stop if we reach only 10 bins for the entire plot
    while (binsize <= floor(sum(w) * .1) & resid / rang > .001) {
      binsize <- binsize + 100
      # Assign each gene a "bin number"
      splitter <- ceiling(1:length(pwf$DEgenes[w][o]) / binsize)
      # Determine the percentage DE in each bin
      de <- sapply(split(pwf$DEgenes[w][o], splitter), mean)
      # Determine the average length in each bin
      binlen <- sapply(split(as.numeric(pwf$bias.data[w][o]), splitter), mean)
      # Calculate the residuals, how much the binned data deviates from the PWF
      resid <- sum((de - approx(pwf$bias.data[w][o], pwf$pwf[w][o], binlen)$y)^2) / length(binlen)
    }
    options(warn = oldwarn)
  } else {
    # Assign each gene a "bin number"
    splitter <- ceiling(1:length(pwf$DEgenes[w][o]) / binsize)
    # Determine the percentage DE in each bin
    de <- sapply(split(pwf$DEgenes[w][o], splitter), mean)
    # Determine the average length in each bin
    binlen <- sapply(split(as.numeric(pwf$bias.data[w][o]), splitter), mean)
  }
  # Now we've settled on a binsize, plot it
  # Did the user specify the labels? If so we can't put in the defaults or they'll be used twice and errors result.
  xlab <- gsub("<binsize>", as.character(binsize), xlab)
  if ("xlab" %in% names(list(...))) {
    if ("ylab" %in% names(list(...))) {
      plot(binlen, de, ...)
    } else {
      plot(binlen, de, ylab = ylab, ...)
    }
  } else if ("ylab" %in% names(list(...))) {
    plot(binlen, de, xlab = xlab, ...)
  } else {
    plot(binlen, de, xlab = xlab, ylab = ylab, ...)
  }
  # Add the PWF
  lines(pwf$bias.data[w][o], pwf$pwf[w][o], col = pwf_col, lwd = pwf_lwd)
}
