Para replicar las secciones de esta clase, primero debe descargar el
siguiente proyecto
de R y abrir el archivo clase-06.Rproj
.
## Instalar/llamar las librerÃas de la clase
require(pacman)
p_load(tidyverse,rio,skimr,viridis)
set.seed(101010)
<- import("input/house_prices.rds") %>% na.omit() house
<- sample(x = nrow(house) , size = nrow(house)*.2)
sample_test length(sample_test)
## [1] 3133
<- house[sample_test,]
test nrow(test)
## [1] 3133
<- house[-sample_test,]
train nrow(train)
## [1] 12532
## estimacion
<- lm(price~1,data=train)
specification1
## summary
summary(specification1)
##
## Call:
## lm(formula = price ~ 1, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.455e+08 -3.955e+08 -2.455e+08 8.445e+07 2.535e+10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 645548810 7028684 91.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 786800000 on 12531 degrees of freedom
## coeficiente
coef(specification1)
## (Intercept)
## 645548810
## mean price
mean(train$price)
## [1] 645548810
## predicciones
$specification1 <- predict(specification1 , newdata = test)
test
## MSE
<- with(test,mean((price-specification1)^2))
mse1 mse1
## [1] 4.915094e+17
## Almacenar el resultado del modelo
<- tibble(modelo=rep(NA,5),MSE=rep(NA,5))
models $modelo[1] = "Modelo 1"
models$MSE[1] = mse1
models models
## # A tibble: 5 × 2
## modelo MSE
## <chr> <dbl>
## 1 Modelo 1 4.92e17
## 2 <NA> NA
## 3 <NA> NA
## 4 <NA> NA
## 5 <NA> NA
<- lm(price~as.factor(l3),data=train)
specification2
$specification2<-predict(specification2,newdata = test)
test
<- with(test,mean((price-specification2)^2))
mse2 mse2
## [1] 4.574893e+17
$modelo[2] = "Modelo 2"
models$MSE[2] = mse2
models
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 row(s) containing missing values (geom_path).
## modelo 3
<- lm(price~as.factor(l3)+rooms+bedrooms+bathrooms+surface_total,data=train)
specification3
$specification3<-predict(specification3,newdata = test)
test
<- with(test,mean((price-specification3)^2,na.rm=T))
mse3 mse3
## [1] 3.024646e+17
$modelo[3] = "Modelo 3"
models$MSE[3] = mse3
models
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 row(s) containing missing values (geom_path).
## modelo 4
<- lm(price~as.factor(l3)+rooms+bedrooms+bathrooms+surface_total+lon+lat,data=train)
specification4
$specification4<-predict(specification4,newdata = test)
test
<- with(test,mean((price-specification4)^2,na.rm=T))
mse4 mse4
## [1] 2.968763e+17
$modelo[4] = "Modelo 4"
models$MSE[4] = mse4
models
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
<- lm(price~as.factor(l3)+rooms+poly(bedrooms,2):poly(bathrooms,3)+surface_total+poly(lon,8):poly(lat,8),data=train)
specification5
$specification5<-predict(specification5,newdata = test)
test
<- with(test,mean((price-specification5)^2,na.rm=T))
mse5 mse5
## [1] 3.912147e+17
$modelo[5] = "Modelo 5"
models$MSE[5] = mse5
models
ggplot(data=models , aes(x=modelo , y=MSE , group=1)) +
geom_point() + geom_line(col="red") + theme_test()