download.file("http://www.openintro.org/stat/data/ames.csv", destfile = "ames.csv")
data <- read.csv("/Users/majerus/Downloads/ames.csv") # update to file path on your computer
# This calculation requires the 'plyr' package
if( !is.element("plyr", installed.packages()[,1]) )
install.packages("plyr")
library(plyr)
# list class of each variable
sapply(data, class)
## Order PID MS.SubClass MS.Zoning
## "integer" "integer" "integer" "factor"
## Lot.Frontage Lot.Area Street Alley
## "integer" "integer" "factor" "factor"
## Lot.Shape Land.Contour Utilities Lot.Config
## "factor" "factor" "factor" "factor"
## Land.Slope Neighborhood Condition.1 Condition.2
## "factor" "factor" "factor" "factor"
## Bldg.Type House.Style Overall.Qual Overall.Cond
## "factor" "factor" "integer" "integer"
## Year.Built Year.Remod.Add Roof.Style Roof.Matl
## "integer" "integer" "factor" "factor"
## Exterior.1st Exterior.2nd Mas.Vnr.Type Mas.Vnr.Area
## "factor" "factor" "factor" "integer"
## Exter.Qual Exter.Cond Foundation Bsmt.Qual
## "factor" "factor" "factor" "factor"
## Bsmt.Cond Bsmt.Exposure BsmtFin.Type.1 BsmtFin.SF.1
## "factor" "factor" "factor" "integer"
## BsmtFin.Type.2 BsmtFin.SF.2 Bsmt.Unf.SF Total.Bsmt.SF
## "factor" "integer" "integer" "integer"
## Heating Heating.QC Central.Air Electrical
## "factor" "factor" "factor" "factor"
## X1st.Flr.SF X2nd.Flr.SF Low.Qual.Fin.SF Gr.Liv.Area
## "integer" "integer" "integer" "integer"
## Bsmt.Full.Bath Bsmt.Half.Bath Full.Bath Half.Bath
## "integer" "integer" "integer" "integer"
## Bedroom.AbvGr Kitchen.AbvGr Kitchen.Qual TotRms.AbvGrd
## "integer" "integer" "factor" "integer"
## Functional Fireplaces Fireplace.Qu Garage.Type
## "factor" "integer" "factor" "factor"
## Garage.Yr.Blt Garage.Finish Garage.Cars Garage.Area
## "integer" "factor" "integer" "integer"
## Garage.Qual Garage.Cond Paved.Drive Wood.Deck.SF
## "factor" "factor" "factor" "integer"
## Open.Porch.SF Enclosed.Porch X3Ssn.Porch Screen.Porch
## "integer" "integer" "integer" "integer"
## Pool.Area Pool.QC Fence Misc.Feature
## "integer" "factor" "factor" "factor"
## Misc.Val Mo.Sold Yr.Sold Sale.Type
## "integer" "integer" "integer" "factor"
## Sale.Condition SalePrice
## "factor" "integer"
# keep only continuous variables
data_continuous <- data[,sapply(data,is.numeric) | sapply(data,is.integer)]
# calculate mean for every column in the data frame by year
means <- ddply(data_continuous, .(Yr.Sold), numcolwise(mean), na.rm=T)
# This demo requires the 'ggplot' package
if( !is.element("ggplot2", installed.packages()[,1]) )
install.packages("ggplot2")
suppressPackageStartupMessages(library(ggplot2))
# this function will create a line graph of every variable in your data frame
# the function takes two arguments 'x' (the name of your data frame) and 'time' (the name variable to be plotted on the x-axis)
line <- function(x, time, na.rm = TRUE, ...) {
nm <- names(x)
for (i in seq_along(nm)) {
print(ggplot(x, aes_string(x = time, y = nm[i])) + geom_line(size=2, color = "darkblue") + theme_classic()) }
}
line(x=means, time="means$Yr.Sold")