#### Create Missing Values in mtcars data

# For this example we will pretend we are missing mpg data for Merc 280, Dodge Challenger and Ferrari Dino in mtcars

mtcars$mpg <- ifelse(rownames(mtcars) == 'Merc 280'| rownames(mtcars) == 'Dodge Challenger'| rownames(mtcars) == 'Ferrari Dino', -99, mtcars$mpg)

#### Change Missing Value Code to NA

mtcars[mtcars==-99] <- NA

#### Identify Missing Values in Data Frame

# list total number of missing values by variable
colSums(is.na(mtcars))
##  mpg  cyl disp   hp drat   wt qsec   vs   am gear carb
##    3    0    0    0    0    0    0    0    0    0    0
# list names of cars with missing mpg
rownames(mtcars)[is.na(mtcars$mpg)] ## [1] "Merc 280" "Dodge Challenger" "Ferrari Dino" #### Calculate Mean MPG mean(mtcars$mpg)   # missing values mess up even simple calculations
## [1] NA
mean(mtcars$mpg, na.rm = TRUE) # we can get around this by telling R to ignore missing values  ## [1] 20.29 #### Mean Imputation # Mean Imputation mtcars.imputed <- mtcars mtcars.imputed$mpg <- ifelse(is.na(mtcars$mpg), mean(mtcars$mpg, na.rm = TRUE), mtcars$mpg) colSums(is.na(mtcars.imputed)) # no more missing data  ## mpg cyl disp hp drat wt qsec vs am gear carb ## 0 0 0 0 0 0 0 0 0 0 0 summary(mtcars.imputed$mpg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##    10.4    15.7    20.3    20.3    22.8    33.9

#### Mean Imputation over every Column

# Create mean imputation function
mean.imputation <- function(df,...) {
apply(df, 2, function(x) {x <- ifelse(is.na(x), mean(x, na.rm = TRUE), x)})
}

mtcars.imputed <- mean.imputation(mtcars)

colSums(is.na(mtcars.imputed)) # no more missing data 
##  mpg  cyl disp   hp drat   wt qsec   vs   am gear carb
##    0    0    0    0    0    0    0    0    0    0    0

#### Percentile Imputation

#  Imputation
mtcars.imputed <- mtcars
mtcars.imputed$mpg[is.na(mtcars.imputed$mpg)] <- quantile(mtcars.imputed$mpg, .95, na.rm = TRUE) # impute missing with 95th percentile colSums(is.na(mtcars.imputed)) # no more missing data  ## mpg cyl disp hp drat wt qsec vs am gear carb ## 0 0 0 0 0 0 0 0 0 0 0 summary(mtcars.imputed$mpg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##    10.4    15.7    21.0    21.4    26.3    33.9