Showing posts with label Simple Linear Regression. Show all posts
Showing posts with label Simple Linear Regression. Show all posts

## Thursday, August 29, 2019

### Simple Linear Regression

1: In library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE,  :
there is no package called ‘fOptions’
2: In library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE,  :
there is no package called ‘fOptions’
> a = 3
> b = 4
> sqrt(a ^ 2 + b ^ 2)
[1] 5
>
> plot(sqrt(a ^ 2 + b ^ 2))
> library("ggplot2", lib.loc="~/R/win-library/3.6")
# A tibble: 10 x 11
manufacturer model displ  year   cyl trans
<chr>        <chr> <dbl> <int> <int> <chr>
1 audi         a4      1.8  1999     4 auto~
2 audi         a4      1.8  1999     4 manu~
3 audi         a4      2    2008     4 manu~
4 audi         a4      2    2008     4 auto~
5 audi         a4      2.8  1999     6 auto~
6 audi         a4      2.8  1999     6 manu~
7 audi         a4      3.1  2008     6 auto~
8 audi         a4 q~   1.8  1999     4 manu~
9 audi         a4 q~   1.8  1999     4 auto~
10 audi         a4 q~   2    2008     4 manu~
# ... with 5 more variables: drv <chr>,
#   cty <int>, hwy <int>, fl <chr>,
#   class <chr>
> The function str() will display the “structure” of the data frame
Error: unexpected 'function' in "The function"
> str(mpg)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 234 obs. of  11 variables:
\$ manufacturer: chr  "audi" "audi" "audi" "audi" ...
\$ model       : chr  "a4" "a4" "a4" "a4" ...
\$ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
\$ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
\$ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
\$ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
\$ drv         : chr  "f" "f" "f" "f" ...
\$ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
\$ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
\$ fl          : chr  "p" "p" "p" "p" ...
\$ class       : chr  "compact" "compact" "compact" "compact" ...
>
> Central Tendency
Error: unexpected symbol in "Central Tendency"
> mean(mpg\$cty)
[1] 16.85897
> median(mpg\$cty)
[1] 17
> var(mpg\$cty)
[1] 18.11307
> sd(mpg\$cty)
[1] 4.255946
> IQR(mpg\$cty)
[1] 5
> min(mpg\$cty)
[1] 9
> max(mpg\$cty)
[1] 35
> range(mpg\$cty)
[1]  9 35
> table(mpg\$drv)

4   f   r
103 106  25
> table(mpg\$drv) / nrow(mpg)

4         f         r
0.4401709 0.4529915 0.1068376
>
> o visualize the data
> Histograms
> • Barplots
> • Boxplots
> • Scatterplots

### > Simple Linear Regression

Error: unexpected symbol in "Simple Linear"
> View(cars)
> plot(dist ~ speed, data = cars,
+      xlab = "Speed (in Miles Per Hour)",
+      ylab = "Stopping Distance (in Feet)",
+      main = "Stopping Distance vs Speed",
+      pch = 20,
+      cex = 2,
+      col = "grey")
>
> In the cars example, we are interested in using the predictor variable speed to predict and explain the
Error: unexpected symbol in "In the"
> response variable dist.
> Y = f(X) + ϵ.
"Y = f(X) + \"
> • Response = Prediction + Error
> • Response = Signal + Noise
> • Response = Model + Unexplained
> • Response = Deterministic + Random
> • Response = Explainable + Unexplainable
> we will store the response variable as y and the
Error: unexpected symbol in "we will"
> predictor variable as x
> x = cars\$speed
> y = cars\$dist
> We then calculate the three sums of squares
> Sxy = sum((x - mean(x)) * (y - mean(y)))
> Sxx = sum((x - mean(x)) ^ 2)
> Syy = sum((y - mean(y)) ^ 2)
> c(Sxy, Sxx, Syy)
[1]  5387.40  1370.00 32538.98
> Then finally calculate βˆ
> 0 and βˆ
symbol in "0 and" 1.
[1] 1
> beta_1_hat = Sxy / Sxx
> beta_0_hat = mean(y) - beta_1_hat * mean(x)
> c(beta_0_hat, beta_1_hat)
[1] -17.579095   3.932409
> yˆ = βˆ
> 0 + βˆ
> 1x.
> yˆ = βˆ
> 0 + βˆ
> 1x.
> unique(cars\$speed)
[1]  4  7  8  9 10 11 12 13 14 15 16 17 18 19
[15] 20 22 23 24 25
>
> SST = sum((y - mean(y)) ^ 2)
> SSReg = sum((y_hat - mean(y)) ^ 2)
> SSE = sum((y - y_hat) ^ 2)
> c(SST = SST, SSReg = SSReg, SSE = SSE)
> plot(dist ~ speed, data = cars,
+      xlab = "Speed (in Miles Per Hour)",
+      ylab = "Stopping Distance (in Feet)",
+      main = "Stopping Distance vs Speed",
+      pch = 20,
+      cex = 2,
+      col = "grey")
> abline(stop_dist_model, lwd = 3, col = "darkorange")

Error in abline(stop_dist_model, lwd = 3, col = "darkorange") :
> abline(stop_dist_model, lwd = 3, col = "darkorange")
Error in abline(stop_dist_model, lwd = 3, col = "darkorange") :
> stop_dist_model = lm(dist ~ speed, data = cars)
> stop_dist_model = lm(dist ~ speed, data = cars)
> abline(stop_dist_model, lwd = 3, col = "darkorange")
> c(beta_0_hat, beta_1_hat)
[1] -17.579095   3.932409
>
> abline(stop_dist_model, lwd = 3, col = "darkorange")
> names(stop_dist_model)
[1] "coefficients"  "residuals"
[3] "effects"       "rank"
[5] "fitted.values" "assign"
[7] "qr"            "df.residual"
[9] "xlevels"       "call"
[11] "terms"         "model"
>
> stop_dist_model\$residuals
1          2          3          4
3.849460  11.849460  -5.947766  12.052234
5          6          7          8
2.119825  -7.812584  -3.744993   4.255007
9         10         11         12
12.255007  -8.677401   2.322599 -15.609810
13         14         15         16
-9.609810  -5.609810  -1.609810  -7.542219
17         18         19         20
0.457781   0.457781  12.457781 -11.474628
21         22         23         24
-1.474628  22.525372  42.525372 -21.407036
25         26         27         28
-15.407036  12.592964 -13.339445  -5.339445
29         30         31         32
-17.271854  -9.271854   0.728146 -11.204263
33         34         35         36
2.795737  22.795737  30.795737 -21.136672
37         38         39         40
-11.136672  10.863328 -29.069080 -13.069080
41         42         43         44
-9.069080  -5.069080   2.930920  -2.933898
45         46         47         48
-18.866307  -6.798715  15.201285  16.201285
49         50
43.201285   4.268876
> coef(stop_dist_model)
(Intercept)       speed
-17.579095    3.932409
>
> resid(stop_dist_model)
1          2          3          4
3.849460  11.849460  -5.947766  12.052234
5          6          7          8
2.119825  -7.812584  -3.744993   4.255007
9         10         11         12
12.255007  -8.677401   2.322599 -15.609810
13         14         15         16
-9.609810  -5.609810  -1.609810  -7.542219
17         18         19         20
0.457781   0.457781  12.457781 -11.474628
21         22         23         24
-1.474628  22.525372  42.525372 -21.407036
25         26         27         28
-15.407036  12.592964 -13.339445  -5.339445
29         30         31         32
-17.271854  -9.271854   0.728146 -11.204263
33         34         35         36
2.795737  22.795737  30.795737 -21.136672
37         38         39         40
-11.136672  10.863328 -29.069080 -13.069080
41         42         43         44
-9.069080  -5.069080   2.930920  -2.933898
45         46         47         48
-18.866307  -6.798715  15.201285  16.201285
49         50
43.201285   4.268876
> fitted(stop_dist_model)
1         2         3         4
-1.849460 -1.849460  9.947766  9.947766
5         6         7         8
13.880175 17.812584 21.744993 21.744993
9        10        11        12
21.744993 25.677401 25.677401 29.609810
13        14        15        16
29.609810 29.609810 29.609810 33.542219
17        18        19        20
33.542219 33.542219 33.542219 37.474628
21        22        23        24
37.474628 37.474628 37.474628 41.407036
25        26        27        28
41.407036 41.407036 45.339445 45.339445
29        30        31        32
49.271854 49.271854 49.271854 53.204263
33        34        35        36
53.204263 53.204263 53.204263 57.136672
37        38        39        40
57.136672 57.136672 61.069080 61.069080
41        42        43        44
61.069080 61.069080 61.069080 68.933898
45        46        47        48
72.866307 76.798715 76.798715 76.798715
49        50
76.798715 80.731124
>
> summary(stop_dist_model)

Call:
lm(formula = dist ~ speed, data = cars)

Residuals:
Min      1Q  Median      3Q     Max
-29.069  -9.525  -2.272   9.215  43.201

Coefficients:
Estimate Std. Error t value
(Intercept) -17.5791     6.7584  -2.601
speed         3.9324     0.4155   9.464
Pr(>|t|)
(Intercept)   0.0123 *
speed       1.49e-12 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 15.38 on 48 degrees of freedom
Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438
F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12

>
> names(summary(stop_dist_model))
[1] "call"          "terms"
[3] "residuals"     "coefficients"
[5] "aliased"       "sigma"
[7] "df"            "r.squared"
[11] "cov.unscaled"
>
> summary(stop_dist_model)\$r.squared
[1] 0.6510794
> summary(stop_dist_model)\$sigma
[1] 15.37959
>
> predict(stop_dist_model, newdata = data.frame(speed = 8))
1
13.88018
> predict(stop_dist_model, newdata = data.frame(speed = c(8, 21, 50)))
1         2         3
13.88018  65.00149 179.04134
> predict(stop_dist_model, newdata = cars)
1         2         3         4
-1.849460 -1.849460  9.947766  9.947766
5         6         7         8
13.880175 17.812584 21.744993 21.744993
9        10        11        12
21.744993 25.677401 25.677401 29.609810
13        14        15        16
29.609810 29.609810 29.609810 33.542219
17        18        19        20
33.542219 33.542219 33.542219 37.474628
21        22        23        24
37.474628 37.474628 37.474628 41.407036
25        26        27        28
41.407036 41.407036 45.339445 45.339445
29        30        31        32
49.271854 49.271854 49.271854 53.204263
33        34        35        36
53.204263 53.204263 53.204263 57.136672
37        38        39        40
57.136672 57.136672 61.069080 61.069080
41        42        43        44
61.069080 61.069080 61.069080 68.933898
45        46        47        48
72.866307 76.798715 76.798715 76.798715
49        50
76.798715 80.731124
>
> predict(stop_dist_model)
1         2         3         4
-1.849460 -1.849460  9.947766  9.947766
5         6         7         8
13.880175 17.812584 21.744993 21.744993
9        10        11        12
21.744993 25.677401 25.677401 29.609810
13        14        15        16
29.609810 29.609810 29.609810 33.542219
17        18        19        20
33.542219 33.542219 33.542219 37.474628
21        22        23        24
37.474628 37.474628 37.474628 41.407036
25        26        27        28
41.407036 41.407036 45.339445 45.339445
29        30        31        32
49.271854 49.271854 49.271854 53.204263
33        34        35        36
53.204263 53.204263 53.204263 57.136672
37        38        39        40
57.136672 57.136672 61.069080 61.069080
41        42        43        44
61.069080 61.069080 61.069080 68.933898
45        46        47        48
72.866307 76.798715 76.798715 76.798715
49        50
76.798715 80.731124
>
> fitted(stop_dist_model)
1         2         3         4
-1.849460 -1.849460  9.947766  9.947766
5         6         7         8
13.880175 17.812584 21.744993 21.744993
9        10        11        12
21.744993 25.677401 25.677401 29.609810
13        14        15        16
29.609810 29.609810 29.609810 33.542219
17        18        19        20
33.542219 33.542219 33.542219 37.474628
21        22        23        24
37.474628 37.474628 37.474628 41.407036
25        26        27        28
41.407036 41.407036 45.339445 45.339445
29        30        31        32
49.271854 49.271854 49.271854 53.204263
33        34        35        36
53.204263 53.204263 53.204263 57.136672
37        38        39        40
57.136672 57.136672 61.069080 61.069080
41        42        43        44
61.069080 61.069080 61.069080 68.933898
45        46        47        48
72.866307 76.798715 76.798715 76.798715
49        50
76.798715 80.731124

### Black-Scholes formula-R

Black-Scholes formula-R > BlackScholes <- function(TypeFlag = c("c", "p"), S, X, Time, r, b, sigma) { TypeFla...