% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/by_summaryBy.R
\name{by-summary}
\alias{by-summary}
\alias{summary_by}
\alias{summaryBy}
\title{Groupwise summary statistics}
\usage{
summary_by(
  data,
  formula,
  id = NULL,
  FUN = mean,
  keep.names = FALSE,
  p2d = FALSE,
  order = TRUE,
  full.dimension = FALSE,
  var.names = NULL,
  fun.names = NULL,
  ...
)

summaryBy(
  formula,
  data = parent.frame(),
  id = NULL,
  FUN = mean,
  keep.names = FALSE,
  p2d = FALSE,
  order = TRUE,
  full.dimension = FALSE,
  var.names = NULL,
  fun.names = NULL,
  ...
)
}
\arguments{
\item{data}{A data frame.}

\item{formula}{A formula specifying response and grouping variables.}

\item{id}{A formula indicating variables to retain (not grouped by).}

\item{FUN}{A function or list of functions to apply to the response variables.}

\item{keep.names}{Logical; keep original variable names if only one function is applied.}

\item{p2d}{Replace parentheses in output names with dots?}

\item{order}{Logical; should result be ordered by grouping variables?}

\item{full.dimension}{Logical; if TRUE, repeat rows so output matches input size.}

\item{var.names}{Optional custom names for response variables.}

\item{fun.names}{Optional custom names for functions applied.}

\item{...}{Additional arguments passed to functions in \code{FUN}.}
}
\value{
A data frame of grouped summary statistics.
}
\description{
Computes summary statistics by groups, similar to the \code{summary} procedure in SAS.
A more flexible alternative to base R's \code{\link[stats]{aggregate}}.
}
\details{
Extra arguments in \code{...} are passed to all functions in \code{FUN}. If needed, wrap functions to handle these consistently (e.g., for \code{na.rm = TRUE}).
}
\examples{
data(CO2)

# Simple groupwise mean
summaryBy(uptake ~ Type + Treatment, data = CO2, FUN = mean)
summaryBy(cbind(uptake, conc) ~ Type + Treatment, data = CO2, FUN = mean)

# Compare with
aggregate(cbind(uptake, conc) ~ Type + Treatment, data = CO2, FUN = mean)

## Using '.' on the right hand side of a formula means to stratify by
## all variables not used elsewhere:
summaryBy(uptake ~ ., data = CO2, FUN = mean)

# Multiple functions using a custom summary function
myfun <- function(x, ...)
  c(m = mean(x, na.rm = TRUE), v = var(x, na.rm = TRUE), n = length(x))
summaryBy(uptake ~ Type + Treatment, data = CO2, FUN = myfun)

# Summary on transformed variables
# works:
summaryBy(cbind(lu=log(uptake), conc) ~ Type, data = CO2, FUN = mean)
# fails:
#summaryBy(cbind(log(uptake), conc) ~ Type, data = CO2, FUN = mean)
}
\seealso{
\code{\link[stats]{aggregate}}, \code{\link{orderBy}}, \code{\link{transformBy}}, \code{\link{splitBy}}
}
\author{
Søren Højsgaard, \email{sorenh@math.aau.dk}
}
\concept{byfunction}
\keyword{grouping}
\keyword{summary}
