library(gganimate)
## Loading required package: ggplot2
library(readxl)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(transformr)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(gifski)
import1 <- read_csv("Desktop/y_train.csv")
## Rows: 4960 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): carID, price
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
import1$carID <- NULL
import2 <- read_csv("Desktop/x_train.csv")
## Rows: 4960 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): brand, model, transmission, fuelType
## dbl (6): carID, year, mileage, tax, mpg, engineSize
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dataset <- cbind(import1, import2)
options(scipen = 999)
yearprice <- ggplot(dataset, aes(x = year, y = price)) +
geom_point(size = 0.25) +
geom_smooth(method = lm, color = "green") +
labs(title = "Used Car Price Considering Model Year")+
xlab("Year of Car Model")+
ylab("Resale Price in $")
yearprice
## `geom_smooth()` using formula = 'y ~ x'
mileprice <- ggplot(dataset, aes(x = sqrt(mileage), y = price)) +
geom_point(size = 0.25) +
geom_smooth(method = lm, color = "red") +
labs(title = "Used Car Price Considering Mileage")+
xlab("SQRT Mileage")+
ylab("Resale Price in $")
mileprice
## `geom_smooth()` using formula = 'y ~ x'
milepriceyear <- ggplot(dataset, aes(mileage, price, frame = year, text = paste("Brand: ", brand, "<br>Model: ", model))) +
geom_point(size = 1, alpha = 0.5) +
labs(title = "Car Price By Mileage Considering Year")
ggplotly(milepriceyear) %>%
highlight("plotly_hover")
yearpricelm <- lm(price~year,data = dataset)
summary(yearpricelm)
##
## Call:
## lm(formula = price ~ year, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20921 -9570 -3818 5515 112926
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5727812.13 140984.81 -40.63 <0.0000000000000002 ***
## year 2851.77 69.91 40.79 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14200 on 4958 degrees of freedom
## Multiple R-squared: 0.2513, Adjusted R-squared: 0.2511
## F-statistic: 1664 on 1 and 4958 DF, p-value: < 0.00000000000000022
mileagepricelm <- lm(price~mileage,data = dataset)
summary(mileagepricelm)
##
## Call:
## lm(formula = price ~ mileage, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24571 -9749 -3045 6205 114154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31488.731916 292.348708 107.71 <0.0000000000000002 ***
## mileage -0.321313 0.008369 -38.39 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14410 on 4958 degrees of freedom
## Multiple R-squared: 0.2292, Adjusted R-squared: 0.229
## F-statistic: 1474 on 1 and 4958 DF, p-value: < 0.00000000000000022
mileageyearpricelm <- lm(price~mileage+year,data = dataset)
summary(mileageyearpricelm)
##
## Call:
## lm(formula = price ~ mileage + year, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21570 -9441 -3485 5809 112359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3711018.2696 217253.7952 -17.08 <0.0000000000000002 ***
## mileage -0.1533 0.0127 -12.08 <0.0000000000000002 ***
## year 1853.6444 107.6046 17.23 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13990 on 4957 degrees of freedom
## Multiple R-squared: 0.2727, Adjusted R-squared: 0.2724
## F-statistic: 929.3 on 2 and 4957 DF, p-value: < 0.00000000000000022