Para replicar las secciones de esta clase, primero debe descargar el
siguiente proyecto
de R y abrir el archivo clase-06.Rproj.
## Instalar/llamar las librerÃas de la clase
require(pacman)
p_load(tidyverse,rio,skimr,viridis)set.seed(101010)house <- import("input/house_prices.rds") %>% na.omit()sample_test <- sample(x = nrow(house) , size = nrow(house)*.2)
length(sample_test)## [1] 3133
test <- house[sample_test,]
nrow(test)## [1] 3133
train <- house[-sample_test,]
nrow(train)## [1] 12532
## estimacion
specification1 <- lm(price~1,data=train)
## summary
summary(specification1)##
## Call:
## lm(formula = price ~ 1, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.455e+08 -3.955e+08 -2.455e+08 8.445e+07 2.535e+10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 645548810 7028684 91.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 786800000 on 12531 degrees of freedom
## coeficiente
coef(specification1)## (Intercept)
## 645548810
## mean price
mean(train$price) ## [1] 645548810
## predicciones
test$specification1 <- predict(specification1 , newdata = test)
## MSE
mse1 <- with(test,mean((price-specification1)^2))
mse1## [1] 4.915094e+17
## Almacenar el resultado del modelo
models <- tibble(modelo=rep(NA,5),MSE=rep(NA,5))
models$modelo[1] = "Modelo 1"
models$MSE[1] = mse1
models## # A tibble: 5 × 2
## modelo MSE
## <chr> <dbl>
## 1 Modelo 1 4.92e17
## 2 <NA> NA
## 3 <NA> NA
## 4 <NA> NA
## 5 <NA> NA
specification2 <- lm(price~as.factor(l3),data=train)
test$specification2<-predict(specification2,newdata = test)
mse2 <- with(test,mean((price-specification2)^2))
mse2## [1] 4.574893e+17
models$modelo[2] = "Modelo 2"
models$MSE[2] = mse2
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 row(s) containing missing values (geom_path).
## modelo 3
specification3 <- lm(price~as.factor(l3)+rooms+bedrooms+bathrooms+surface_total,data=train)
test$specification3<-predict(specification3,newdata = test)
mse3 <- with(test,mean((price-specification3)^2,na.rm=T))
mse3## [1] 3.024646e+17
models$modelo[3] = "Modelo 3"
models$MSE[3] = mse3
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 row(s) containing missing values (geom_path).
## modelo 4
specification4 <- lm(price~as.factor(l3)+rooms+bedrooms+bathrooms+surface_total+lon+lat,data=train)
test$specification4<-predict(specification4,newdata = test)
mse4 <- with(test,mean((price-specification4)^2,na.rm=T))
mse4## [1] 2.968763e+17
models$modelo[4] = "Modelo 4"
models$MSE[4] = mse4
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
specification5 <- lm(price~as.factor(l3)+rooms+poly(bedrooms,2):poly(bathrooms,3)+surface_total+poly(lon,8):poly(lat,8),data=train)
test$specification5<-predict(specification5,newdata = test)
mse5 <- with(test,mean((price-specification5)^2,na.rm=T))
mse5## [1] 3.912147e+17
models$modelo[5] = "Modelo 5"
models$MSE[5] = mse5
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()