DATA SCIENCE FINANCIAL CODE

 DATA SCIENCE  FINANCIAL CODE
Add code

> set.seed(4711)
> Index <- 1:260
> Returns <- rnorm(260)
> plot(x = Index, y = Returns)
> plot(x = Index, y = Returns, main = "Artificial Returns")
> par(mfrow = c(2, 1))
> Price = cumsum(Returns)
> plot(Index, Price, type = "l", main = "Line Plot")
> plot(Index, Returns, type = "h", main = "Histogram-like Vertical Lines")
> hist(Returns, probability = TRUE)
> curve(dnorm(x), min(Returns), max(Returns), add = TRUE, lwd = 2)
> plot(xf)
Error in plot(xf) : object 'xf' not found
> plot(x.df)
Error in plot(x.df) : object 'x.df' not found
> factorData <- factor(sample(c(rep("AMEX", times = 40), rep("NASDAQ",
+ times = 180), rep("NYSE", times = 90))))
> tsData = ts(matrix(rnorm(64), 64, 1), start = c(2001, 1), frequency = 12)
> plot(factorData, col = "steelblue")
> > plot(factorData, rnorm(length(factorData)), col = "orange")
Error: unexpected '>' in ">"
>  plot(factorData, rnorm(length(factorData)), col = "orange")
> plot(dnorm, min(tsData[, 1]), max(tsData[, 1]), xlab = "Returns",
+      yla = "Density", main = "Density")
> grid()
> plot(tsData, xlab = "Index", ylab = "Returns", main = "Return Series")
> abline(h = 0)
> abline(h = 2)
> abline(h=-2)

How to predict coronavirus data Bayes and Joint Probability Distributions

 There are predicting coronavirus data Bayes and Joint Probability Distributions.

we have 300,000 people in the population, to scale down the numbers from the millions for convenience.

These 1,500 have CORONAVIRUS(covs). So let’s create the population and then sample
from it.
> people = seq ( 1,300000 )
> people_covs = sample ( people,1500 )
> people_nocovs = setdiff(people, people_covs)
#how we also used the setdiff function to get the complement
set of the people who do not have coronavirus(covs). Now, of the people who have
coronavirus, we know that 99% of them test positive so let’s subset that list,
and also take its complement. These are joint events, and their numbers
proscribe the joint distribution.
> people_covs_pos =sample(people_covs, 1500*0.99 )
> people_covs_neg = setdiff(people_covs,people_covs_pos)
> length(people_covs_pos)
[1] 1485
> length(people_covs_neg)
[1] 15
#We can also subset the group that does not have CORONAVIRUS(covs), as we know that
the test is negative for them 95% of the time.
> people_nocovs_neg = sample(people_nocovs ,298500*0.95)
> people_nocovs_pos =setdiff(people_nocovs,people_nocovs_neg)
> length (people_nocovs_neg)
[1] 283575
> length(people_nocovs_pos)
[1] 14925
#We can now compute the probability that someone actually has coronavirus(covs)
when the test comes out positive.
> pr_covs_given_pos = (length(people_covs_pos))/(length(people_covs_pos)+length( people_nocovs_pos))
> pr_covs_given_pos
[1] 0.0904936
#we had examined earlier, what’s the chance
that you have coronavirus(covs)when the test is negative, i.e., a false negative?
> pr_covs_given_neg = (length(people_covs_neg))/(length(people_covs_neg)+length(people_nocovs_neg))
> pr_covs_given_neg
[1] 5.289326e-05

How to learn probabilistic linear models to predict a qualitative

#we looked at using probabilistic linear models to predict a qualitative
> response with the two most common methods: logistic regression and discriminant
package ‘leaps’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
C:\Users\ADMIN\AppData\Local\Temp\RtmpUDOrMk\downloaded_packages
> library("leaps", lib.loc="~/R/win-library/3.6")
Warning message:
package ‘leaps’ was built under R version 3.6.3 
> x<-matrix(rnorm(100),ncol=4)
> y<-rnorm(25)
> leaps(x,y
+ )
$which
      1     2     3     4
1 FALSE FALSE FALSE  TRUE
1  TRUE FALSE FALSE FALSE
1 FALSE FALSE  TRUE FALSE
1 FALSE  TRUE FALSE FALSE
2  TRUE FALSE FALSE  TRUE
2 FALSE FALSE  TRUE  TRUE
2 FALSE  TRUE FALSE  TRUE
2  TRUE FALSE  TRUE FALSE
2  TRUE  TRUE FALSE FALSE
2 FALSE  TRUE  TRUE FALSE
3  TRUE FALSE  TRUE  TRUE
3  TRUE  TRUE FALSE  TRUE
3 FALSE  TRUE  TRUE  TRUE
3  TRUE  TRUE  TRUE FALSE
4  TRUE  TRUE  TRUE  TRUE

$label
[1] "(Intercept)" "1"          
[3] "2"           "3"          
[5] "4"          

$size
 [1] 2 2 2 2 3 3 3 3 3 3 4 4 4 4 5

$Cp
 [1] 0.291457 1.330360 2.597818 2.800123
 [5] 1.479960 2.123237 2.214481 2.669777
 [9] 3.315351 4.290161 3.099933 3.465247
[13] 3.937318 4.529656 5.000000

> data(swiss)
> a<-regsubsets(as.matrix(swiss[,-1]),swiss[,1])
> summary(a)
Subset selection object
5 Variables  (and intercept)
                 Forced in Forced out
Agriculture          FALSE      FALSE
Examination          FALSE      FALSE
Education            FALSE      FALSE
Catholic             FALSE      FALSE
Infant.Mortality     FALSE      FALSE
1 subsets of each size up to 5
Selection Algorithm: exhaustive
         Agriculture Examination Education
1  ( 1 ) " "         " "         "*"      
2  ( 1 ) " "         " "         "*"      
3  ( 1 ) " "         " "         "*"      
4  ( 1 ) "*"         " "         "*"      
5  ( 1 ) "*"         "*"         "*"      
         Catholic Infant.Mortality
1  ( 1 ) " "      " "             
2  ( 1 ) "*"      " "             
3  ( 1 ) "*"      "*"             
4  ( 1 ) "*"      "*"             
5  ( 1 ) "*"      "*"             
> coef(a, 1:3)
[[1]]
(Intercept)   Education 
 79.6100585  -0.8623503 

[[2]]
(Intercept)   Education    Catholic 
 74.2336892  -0.7883293   0.1109210 

[[3]]
     (Intercept)        Education 
     48.67707330      -0.75924577 
        Catholic Infant.Mortality 
      0.09606607       1.29614813 

> vcov(a, 3)
                  (Intercept)
(Intercept)      62.711883147
Education        -0.234998201
Catholic         -0.001112006
Infant.Mortality -2.952862263
                     Education
(Intercept)      -0.2349982009
Education         0.0136416868
Catholic          0.0004427309
Infant.Mortality  0.0033603646
                      Catholic
(Intercept)      -0.0011120059
Education         0.0004427309
Catholic          0.0007408169
Infant.Mortality -0.0017163629
                 Infant.Mortality
(Intercept)          -2.952862263
Education             0.003360365
Catholic             -0.001716363
Infant.Mortality      0.149759535
> > library(MASS)
Error: unexpected '>' in ">"
>  library(MASS)
> data(biopsy)
> str(biopsy)
'data.frame': 699 obs. of  11 variables:
 $ ID   : chr  "1000025" "1002945" "1015425" "1016277" ...
 $ V1   : int  5 5 3 6 4 8 1 2 2 4 ...
 $ V2   : int  1 4 1 8 1 10 1 1 1 2 ...
 $ V3   : int  1 4 1 8 1 10 1 2 1 1 ...
 $ V4   : int  1 5 1 1 3 8 1 1 1 1 ...
 $ V5   : int  2 7 2 3 2 7 2 2 2 2 ...
 $ V6   : int  1 10 2 4 1 10 10 1 1 1 ...
 $ V7   : int  3 3 3 3 3 9 3 3 1 2 ...
 $ V8   : int  1 2 1 7 1 7 1 1 1 1 ...
 $ V9   : int  1 1 1 1 1 1 1 1 5 1 ...
 $ class: Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
> biopsy$ID = NULL
> names(biopsy) = c("thick", "u.size", "u.shape", "adhsn", "s.size",
+                   "nucl", "chrom", "n.nuc", "mit", "class")
> names(biopsy)
 [1] "thick"   "u.size"  "u.shape"
 [4] "adhsn"   "s.size"  "nucl"   
 [7] "chrom"   "n.nuc"   "mit"    
[10] "class"  
> biopsy.v2 = na.omit(biopsy)
> library(reshape2)
> library(ggplot2)
> biop.m = melt(biopsy.v2, id.var="class")
> ggplot(data=biop.m, aes(x=class, y=value)) + geom_boxplot()
> +facet_wrap(~variable,ncol = 3)
Error: Cannot use `+.gg()` with a single argument. Did you accidentally put + on a new line?
> library(corrplot)
corrplot 0.84 loaded
Warning message:
package ‘corrplot’ was built under R version 3.6.3 
> bc = cor(biopsy.v2[ ,1:9])
> corrplot.mixed(bc)

> set.seed(123)
> ind = sample(2, nrow(biopsy.v2), replace=TRUE, prob=c(0.7, 0.3))
> train = biopsy.v2[ind==1,] 
> test = biopsy.v2[ind==2,]
> str(test) 
'data.frame': 209 obs. of  10 variables:
 $ thick  : int  5 6 4 2 1 7 6 7 1 3 ...
 $ u.size : int  4 8 1 1 1 4 1 3 1 2 ...
 $ u.shape: int  4 8 1 2 1 6 1 2 1 1 ...
 $ adhsn  : int  5 1 3 1 1 4 1 10 1 1 ...
 $ s.size : int  7 3 2 2 1 6 2 5 2 1 ...
 $ nucl   : int  10 4 1 1 1 1 1 10 1 1 ...
 $ chrom  : int  3 3 3 3 3 4 3 5 3 2 ...
 $ n.nuc  : int  2 7 1 1 1 3 1 4 1 1 ...
 $ mit    : int  1 1 1 1 1 1 1 4 1 1 ...
 $ class  : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 2 1 1 ...
 - attr(*, "na.action")= 'omit' Named int  24 41 140 146 159 165 236 250 276 293 ...
  ..- attr(*, "names")= chr  "24" "41" "140" "146" ...
> table(train$class)

   benign malignant 
      302       172 
> table(test$class)

   benign malignant 
      142        67 
> full.fit = glm(class~., family=binomial, data=train)
> summary(full.fit)

Call:
glm(formula = class ~ ., family = binomial, data = train)

Deviance Residuals: 
    Min       1Q   Median       3Q  
-3.3397  -0.1387  -0.0716   0.0321  
    Max  
 2.3559  

Coefficients:
            Estimate Std. Error z value
(Intercept)  -9.4293     1.2273  -7.683
thick         0.5252     0.1601   3.280
u.size       -0.1045     0.2446  -0.427
u.shape       0.2798     0.2526   1.108
adhsn         0.3086     0.1738   1.776
s.size        0.2866     0.2074   1.382
nucl          0.4057     0.1213   3.344
chrom         0.2737     0.2174   1.259
n.nuc         0.2244     0.1373   1.635
mit           0.4296     0.3393   1.266
            Pr(>|z|)    
(Intercept) 1.55e-14 ***
thick       0.001039 ** 
u.size      0.669165    
u.shape     0.268044    
adhsn       0.075722 .  
s.size      0.167021    
nucl        0.000826 ***
chrom       0.208006    
n.nuc       0.102126    
mit         0.205402    
---
Signif. codes:  
  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’
  0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 620.989  on 473  degrees of freedom
Residual deviance:  78.373  on 464  degrees of freedom
AIC: 98.373

Number of Fisher Scoring iterations: 8

> confint(full.fit)
Waiting for profiling to be done...
                   2.5 %     97.5 %
(Intercept) -12.23786660 -7.3421509
thick         0.23250518  0.8712407
u.size       -0.56108960  0.4212527
u.shape      -0.24551513  0.7725505
adhsn        -0.02257952  0.6760586
s.size       -0.11769714  0.7024139
nucl          0.17687420  0.6582354
chrom        -0.13992177  0.7232904
n.nuc        -0.03813490  0.5110293
mit          -0.14099177  1.0142786
> exp(coef(full.fit))
 (Intercept)        thick       u.size 
8.033466e-05 1.690879e+00 9.007478e-01 
     u.shape        adhsn       s.size 
1.322844e+00 1.361533e+00 1.331940e+00 
        nucl        chrom        n.nuc 
1.500309e+00 1.314783e+00 1.251551e+00 
         mit 
1.536709e+00 

USE KURTOSIS AND SKEWNESS

HOW TO USE KURTOSIS AND SKEWNESS

 

KURTOSIS AND SKEWNESS 



package ‘e1071’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
C:\Users\ADMIN\AppData\Local\Temp\Rtmp48CkwN\downloaded_packages
> library("e1071", lib.loc="~/R/win-library/3.6")

Attaching package: ‘e1071’

    kurtosis, skewness

Warning message:
package ‘e1071’ was built under R version 3.6.3 
> x <- rnorm(100)
> kurtosis(x)
[1] -0.09225218
> x <- rnorm(100)
> skewness(x)
[1] 0.2068412
> sigmoid(x)
  [1] 0.7924609 0.6012173 0.4015700
  [4] 0.1168985 0.5446880 0.4903706
  [7] 0.7195269 0.7096943 0.6016002
 [10] 0.2019729 0.3525087 0.5408626
 [13] 0.4601341 0.2470393 0.3445243
 [16] 0.6738519 0.6390173 0.5017720
 [19] 0.3256660 0.3072329 0.5314812
 [22] 0.6289924 0.8502339 0.4765683
 [25] 0.5006393 0.3940206 0.6075869
 [28] 0.4465308 0.8705377 0.6635064
 [31] 0.7139986 0.7578363 0.5363243
 [34] 0.3642131 0.6399949 0.2983353
 [37] 0.2082072 0.6549224 0.4033734
 [40] 0.4480780 0.2010318 0.5400087
 [43] 0.7412638 0.4817815 0.3450085
 [46] 0.7708486 0.6844388 0.2889012
 [49] 0.6963281 0.5850107 0.7936972
 [52] 0.6423643 0.6100179 0.9271042
 [55] 0.9262851 0.3325886 0.3185873
 [58] 0.5040252 0.3851227 0.6991765
 [61] 0.7593792 0.7302417 0.6990238
 [64] 0.8273355 0.1847269 0.5340112
 [67] 0.2264907 0.3353517 0.9484346
 [70] 0.8229770 0.4842939 0.4178209
 [73] 0.2967966 0.5143271 0.3565440
 [76] 0.3225993 0.5873630 0.2234188
 [79] 0.8293041 0.3681796 0.8147751
 [82] 0.2231620 0.7632167 0.4017692
 [85] 0.4799441 0.6182596 0.5266224
 [88] 0.5912255 0.8460056 0.1184548
 [91] 0.6034997 0.6074844 0.6780144
 [94] 0.4481176 0.7328068 0.6624322
 [97] 0.6486271 0.3152354 0.4292004
[100] 0.8286585
> dsigmoid(x)
  [1] 0.16446660 0.23975505 0.24031154
  [4] 0.10323324 0.24800298 0.24990727
  [7] 0.20180793 0.20602831 0.23967741
 [10] 0.16117983 0.22824631 0.24833025
 [13] 0.24841071 0.18601089 0.22582730
 [16] 0.21977550 0.23067419 0.24999686
 [19] 0.21960765 0.21284085 0.24900893
 [22] 0.23336097 0.12733620 0.24945095
 [25] 0.24999959 0.23876837 0.23842506
 [28] 0.24714104 0.11270181 0.22326567
 [31] 0.20420462 0.18352043 0.24868054
 [34] 0.23156192 0.23040143 0.20933134
 [37] 0.16485699 0.22599906 0.24066330
 [40] 0.24730411 0.16061803 0.24839930
 [43] 0.19179176 0.24966809 0.22597763
 [46] 0.17664105 0.21598233 0.20543730
 [49] 0.21145526 0.24277318 0.16374195
 [52] 0.22973240 0.23789606 0.06758202
 [55] 0.06828103 0.22197343 0.21708943
 [58] 0.24998380 0.23680320 0.21032871
 [61] 0.18272245 0.19698874 0.21038951
 [64] 0.14285149 0.15060289 0.24884324
 [67] 0.17519267 0.22289095 0.04890642
 [70] 0.14568584 0.24975332 0.24324659
 [73] 0.20870840 0.24979474 0.22942037
 [76] 0.21852898 0.24236770 0.17350285
 [79] 0.14155880 0.23262337 0.15091666
 [82] 0.17336070 0.18071697 0.24035071
 [85] 0.24959776 0.23601467 0.24929125
 [88] 0.24167791 0.13028010 0.10442327
 [91] 0.23928782 0.23844711 0.21831089
 [94] 0.24730821 0.19580098 0.22361579
 [97] 0.22790998 0.21586205 0.24498742
[100] 0.14198362
> d2sigmoid(x)
  [1] -0.0962001121 -0.0485347410
  [3]  0.0473077111  0.0790976167
  [5] -0.0221655221  0.0048129378
  [7] -0.0886045466 -0.0864059134
  [9] -0.0487025268  0.0960719243
 [11]  0.0673287026 -0.0202948456
 [13]  0.0198062388  0.0941068862
 [15]  0.0702213253 -0.0764167954
 [17] -0.0641354039 -0.0008859772
 [19]  0.0765701691  0.0820574196
 [21] -0.0156782067 -0.0602035621
 [23] -0.0891949150  0.0116901260
 [25] -0.0003196607  0.0506090505
 [27] -0.0513028320  0.0264288900
 [29] -0.0835205388 -0.0730107132
 [31] -0.0873989881 -0.0946364661
 [33] -0.0180663040  0.0628861539
 [35] -0.0645100404  0.0844294905
 [37]  0.0962081495 -0.0700246146
 [39]  0.0465089613  0.0256810513
 [41]  0.0960393588 -0.0198762770
 [43] -0.0925448333  0.0090971429
 [45]  0.0700492322 -0.0956859528
 [47] -0.0796710462  0.0867351325
 [49] -0.0830292377 -0.0412766260
 [51] -0.0961811072 -0.0654113974
 [53] -0.0523456462 -0.0577291270
 [55] -0.0582143683  0.0743217566
 [57]  0.0787655589 -0.0020124786
 [59]  0.0544066307 -0.0837850845
 [61] -0.0947887922 -0.0907100610
 [63] -0.0837450567 -0.0935207186
 [65]  0.0949620716 -0.0169268988
 [67]  0.0958336440  0.0733972126
 [69] -0.0438626631 -0.0941063585
 [71]  0.0078452920  0.0399795769
 [73]  0.0848204933 -0.0071576502
 [75]  0.0658234717  0.0775344030
 [77] -0.0423479494  0.0959752468
 [79] -0.0932317896  0.0613290307
 [81] -0.0950096028  0.0959856723
 [83] -0.0951354489  0.0472196933
 [85]  0.0100117943 -0.0558219940
 [87] -0.0132734716 -0.0440943718
 [89] -0.0901552964  0.0796843926
 [91] -0.0495324249 -0.0512586801
 [93] -0.0777249452  0.0256619068
 [95] -0.0911676106 -0.0726447915
 [97] -0.0677472046  0.0797673218
 [99]  0.0346900170 -0.0933282339
> plot(sigmoid, -5, 5, ylim = c(-.2, 1))
> plot(dsigmoid, -5, 5, add = TRUE, col = 2)
> plot(d2sigmoid, -5, 5, add = TRUE, col = 3)
> tckr = c('SPY')
> end<- format(Sys.Date(),"%Y-%m-%d")
> start<-format(Sys.Date() - 365*20,"%Y-%m-%d")
> dat1 = (getSymbols(tckr[1], src="yahoo", from=start, to=end, auto.assign = FALSE))
‘getSymbols’ currently uses auto.assign=TRUE by default, but will
use auto.assign=FALSE in 0.5-0. You will still be able to use
‘loadSymbols’ to automatically load data. getOption("getSymbols.env")
and getOption("getSymbols.auto.assign") will still be checked for
alternate defaults.

This message is shown once per session and may be disabled by setting 
options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.

> ret1 = as.numeric((dat1[,4] - dat1[,1])/dat1[,1] )
> n = length(as.numeric(ret1))
> time0 = index(dat1)
> head(time0) ; tail(time0)
[1] "2000-04-18" "2000-04-19" "2000-04-20"
[4] "2000-04-24" "2000-04-25" "2000-04-26"
[1] "2020-04-02" "2020-04-03" "2020-04-06"
[4] "2020-04-07" "2020-04-08" "2020-04-09"
> s = seq(1,n,22)
> k = NULL
> for (i in 1:(length(s)-1)){
+     k[i] = 3+kurtosis(ret1[s[i]:s[(i+1)]])
+ }
> time0[s[which(k>quantile(k,.95))]]
 [1] "2005-12-27" "2007-02-16" "2009-10-30"
 [4] "2011-01-21" "2012-06-14" "2012-12-24"
 [7] "2013-01-28" "2017-05-09" "2017-07-12"
[10] "2019-03-12" "2019-08-16" "2020-01-24"
> plot(k~time0[s[1:(length(s)-1)] ], ty = "b", xlab = "Time", ylab = "Monthly Kurtosis", 
+      main = "Monthly Kurtosis over Time")
> abline(a = quantile(k,.90), b = 0, lwd = 2, col = 2)
> abline(a = quantile(k,.95), b = 0, lwd = 2, col = 3)
> abline(a = quantile(k,.99), b = 0, lwd = 2, col = 4)

Black-Scholes formula-R

 Black-Scholes formula-R

> BlackScholes <- function(TypeFlag = c("c", "p"), S, X, Time, r, b, sigma) { TypeFlag = TypeFlag[1] d1 = (log(S/X) + (b + sigma * sigma/2) * Time)/(sigma * sqrt(Time)) d2 = d1 - sigma * sqrt(Time) if (TypeFlag == "c") price = S * exp((b - r) * Time) * pnorm(d1) - X * exp(-r * Time) * pnorm(d2) else if (TypeFlag == "p") price = X * exp(-r * Time) * pnorm(-d2) - S * exp((b - r) * Time) * pnorm(-d1) param <- list(TypeFlag = TypeFlag, S = S, X = X, Time = Time, r = r, b = b, sigma = sigma) ans <- list(parameters = param, price = price, option = "Black Scholes") class(ans) <- c("option", "list") ans }


> library("ggplot2", lib.loc="~/R/win-library/3.6")
> x <- seq(-4, 4, l = 50)
> y <- x
> myf <- function(x, y) {
+     sin(x) + cos(y)
+ }
> z <- outer(x, y, FUN = myf)
> persp(x, y, z, theta = 45, phi = 45, shade = 0.45)
> Cap = floor(Capitalization/1000)
> Cap
                2003  2004 2005  2006  2007 2008
Euronext US     1328 12707 3632 15421 15650 9208
TSX Group        888  1177 1482  1700  2186 1033
Australian SE    585   776  804  1095  1298  683
Bombay SE        278   386  553   818  1819  647
Hong Kong SE     714   861 1054  1714  2654 1328
NSE India        252   363  515   774  1660  600
Shanghai SE      360   314  286   917  3694 1425
Tokyo SE        2953  3557 4572  4614  4330 3115
BME Spanish SE   726   940  959  1322  1781  948
Deutsche Boerse 1079  1194 1221  1637  2105 1110
London SE       2460  2865 3058  3794  3851 1868
Euronext EU     2076  2441 2706  3712  4222 2101
SIX SE           727   826  935  1212  1271  857
> barplot(t(Cap)/1e+06, beside = TRUE, las = 2, ylab = "Capitalization [Mio USD]")
> title(main = "Major Stock Markets")
>
 function


> mtext(side = 3, "2003 - 2008")
> barplot(Cap/1e+06, beside = TRUE, ylab = "Capitalization [Mio USD]")
> palette(rainbow(13, s = 0.6, v = 0.75))
> stars(t(log(Cap)), draw.segments = TRUE, ncol = 3, nrow = 2,
+       key.loc = c(4.6, -0.5), mar = c(15, 0, 0, 0))
> mtext(side = 3, line = 2.2, text = "Growth and Decline of Major Stock Markets",
+       cex = 1.5, font = 2)
> abline(h = 0.9)
calldelta(100, 100, 0.30, (15/365), 0.02, 0.02)
[1] 0.5117085
> callgreek("delta", 100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.5127391
> callgreek("delta", 100, 100, 0.20, (15/365), 0.02, 0.02)
[1] 0.5076694
> callgreek("gamma", 100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.05663458
> callgreek("gamma", 100, 100, 0.20, (15/365), 0.02, 0.02)
[1] 0.09829574
> callpremium(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 2.794086
> callrho(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.05976964
> calltheta(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] -0.02979083
> dv(s = 100, x1 = 90, x2 = 95, x3 = 105, x4 = 110, t = 0.08, r = 0.02, sigma = 0.2, vol = 0.3)
                    V1
Spot            100.00
Long.Put         90.00
Short.Put        95.00
Short.Call      105.00
Long.Call       110.00
Lower.BE         94.01
Higher.BE       105.99
Prob.BE           0.45
Prob.Max.Profit   0.38
Prob.Max.Loss     0.32
> iv.calc(type = "call", price = 42.93, s = 100, x = 100, t = (15/365), r = 0.02, d = 0)
[1] 5
> lambda(type = "put", s = 100, x = 100, sigma = 0.15, t = 45/365, r = 0.02)
[1] -23.80998
> opteval(100, 100, 0.20, (45/365), 0.02, 0.02)
  Option  Premium      Delta      Gamma
1   Call 2.794086  0.5127391 0.05663458
2    Put 2.794086 -0.4847982 0.05663458
       Vega       Theta         Rho
1 0.1396469 -0.02979083  0.05976964
2 0.1396469 -0.02979083 -0.06321441
> opteval(100, 9200, 0.20, (45/365), 0.02, 0.02)
  Option  Premium      Delta Gamma Vega
1   Call    0.000  0.0000000     0    0
2    Put 9077.589 -0.9975373     0    0
     Theta       Rho
1 0.000000   0.00000
2 4.034484 -11.31453
> opteval(100, 100, 0.20, (45/365), 0.02, 0.02)
  Option  Premium      Delta      Gamma
1   Call 2.794086  0.5127391 0.05663458
2    Put 2.794086 -0.4847982 0.05663458
       Vega       Theta         Rho
1 0.1396469 -0.02979083  0.05976964
2 0.1396469 -0.02979083 -0.06321441
> plotbearcall(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotbearput(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,
+             sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotbullcall(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,
+              sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotbullput(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,
+             sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotdv(s= 100, x1 = 90, x2 = 95, x3 = 105, x4 = 110, t = (45/365), r = 0.02, sigma = 0.20)
> plotvertical("call", 100, 90, 110, (45/365), 0.02, 0.20)
> prob.above(spot = 100, lower = 110, mean = 0, dsd = 0.01, dte = 45)
[1] 0.06801856
> prob.above(spot = 100, mean = 0, dsd = 0.01, dte = 45, p = 0.75, quantile = TRUE)
  probability percent.change    price
1        0.75    -0.04524615 95.47539
> prob.below(spot = 100, upper = 110, mean = 0, dsd = 0.01, dte = 45)
[1] 0.9319814
> prob.below(spot = 100, mean = 0, dsd = 0.01, dte = 45, p = 0.75, quantile = TRUE)
  probability percent.change    price
1        0.75     0.04524615 104.5246
> prob.btwn(spot = 100, mean = 0, dsd = 0.01, dte = 45, p = 0.75, quantile = TRUE)
  probability percent.change     price
1        0.75    -0.07716778  92.28322
2        0.75     0.07716778 107.71678
> putdelta(100, 0.20, (45/365), 0.02, 0.02)
[1] 0
> puteval(100, 100, 0.20, (45/365), 0.02, 0.02)
   Premium      Delta      Gamma      Vega
1 2.794086 -0.4847982 0.05663458 0.1396469
        Theta         Rho
1 -0.02979083 -0.06321441
> putgreek("vega", 100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.1396469
> putpremium(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 2.794086
> putrho(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] -0.06321441
> puttheta(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] -0.02979083
> r.cont(0.12, 2)
[1] 0.1165378
> vertical("call", s = 100, x1 = 90, x2 = 110, t = (45/365), r =  0.025, sigma = 0.20, vol = 0.25)
                    V1
Spot            100.00
Short.Strike     90.00
Long.Strike     110.00
Max.Profit       10.13
Max.Loss         -9.87
Breakeven       100.13
Prob.BE           0.50
Prob.Max.Profit   0.17
Prob.Max.Loss     0.17
Initial.DC       10.13
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method=c("best"),
+           sampling=c("QMC"),metpar=list(maxiter=100,tol=1.e-14,cvmethod="splitting"),
+           sampar=list(nout=50,n=2039,a=1487,baker=TRUE,genmethod="pca"))
          result error estimate
price 6.15604075   3.336613e-08
delta 0.59385393   2.731083e-09
gamma 0.03057798   6.326157e-10
> AsianCall_AppLord(T = 1, d = 12, K = 100, r = 0.05, sigma = 0.2, S0 = 100)
[1] 6.15604
> AsianCall(T=1,d=12,K=170,r=0.05,sigma=0.2,S0=100,method="best",
+           sampling="MC",metpar=list(maxiter=100,tol=1.e-14,np=1000),
+           sampar=list(n=10^5))
            result error estimate
price 1.522912e-04   3.763444e-09
delta 5.091253e-05   8.851702e-10
gamma 1.578444e-05   1.759080e-10
> AsianCall_AppLord(T = 1, d = 12, K = 170, r = 0.05, sigma = 0.2, S0 = 100)
[1] 0.0001524371
> AsianCall_AppLord(T = 1, d = 12, K = 100, r = 0.05, sigma = 0.2, S0 = 100)
[1] 6.15604
> BS_EC(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
     price      delta      gamma 
4.61499713 0.56946018 0.05694602 
> BS_EP(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
      price       delta       gamma 
 3.37277718 -0.43053982  0.05694602 
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method=c("best","naive"),
+           sampling=c("QMC","MC"),
+           metpar=list(maxiter=100,tol=1.e-14,cvmethod="splitting"),
+           sampar=list(nout=50,seq.type="korobov",n=2039,a=1487,
+                       baker=TRUE,genmethod="pca"))
          result error estimate
price 6.15604079   2.957191e-08
delta 0.59385393   2.440579e-09
gamma 0.03057798   5.350724e-10
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method="best",
+           sampling="MC",metpar=list(maxiter=100,tol=1.e-14,np=1000),
+           sampar=list(n=10^5))
          result error estimate
price 6.15604075   1.945657e-07
delta 0.59385394   1.369256e-08
gamma 0.03057798   3.526989e-09
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method="naive",
+           sampling="QMC",
+           sampar=list(nout=50,n=2039,a=1487,baker=TRUE,genmethod="pca"))
          result error estimate
price 6.15618443   0.0007920166
delta 0.59392376   0.0004117103
gamma 0.03059009   0.0001390430
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method="naive",
+           sampling="MC",sampar=list(n=10^5))
          result error estimate
price 6.16783497   0.0527644567
delta 0.59392926   0.0032910230
gamma 0.03071819   0.0007318875
> AsianCall_AppLord(T = 1, d = 12, K = 100, r = 0.05, sigma = 0.25, S0 = 100, all = TRUE)
[1] 7.312461
> BS_EC(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
     price      delta      gamma 
4.61499713 0.56946018 0.05694602 
> BS_EP(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
      price       delta       gamma 
 3.37277718 -0.43053982  0.05694602 
> price = 1200 # current price of the bond
> C = 40 # coupon payment
> T= 30 # time to maturity
> par = 1000 # par value of the bond
> r = seq(0.02, 0.05, length = 300)

Logistic regression

we will focus on the logistic function. 

The logistic function used in logistic regression
 data(Carseats)
> str(Carseats)
'data.frame': 400 obs. of  11 variables:
 $ Sales      : num  9.5 11.22 10.06 7.4 4.15 ...
 $ CompPrice  : num  138 111 113 117 141 124 115 136 132 132 ...
 $ Income     : num  73 48 35 100 64 113 105 81 110 113 ...
 $ Advertising: num  11 16 10 4 3 13 0 15 0 0 ...
 $ Population : num  276 260 269 466 340 501 45 425 108 131 ...
 $ Price      : num  120 83 80 97 128 72 108 120 124 124 ...
 $ ShelveLoc  : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
 $ Age        : num  42 65 59 55 38 78 71 67 76 76 ...
 $ Education  : num  17 10 12 14 13 16 15 10 10 17 ...
 $ Urban      : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
 $ US         : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
>
> data(Smarke)
Warning message:
In data(Smarke) : data set ‘Smarke’ not found
> data(Smarket)
> str(Smarket)
'data.frame': 1250 obs. of  9 variables:
 $ Year     : num  2001 2001 2001 2001 2001 ...
 $ Lag1     : num  0.381 0.959 1.032 -0.623 0.614 ...
 $ Lag2     : num  -0.192 0.381 0.959 1.032 -0.623 ...
 $ Lag3     : num  -2.624 -0.192 0.381 0.959 1.032 ...
 $ Lag4     : num  -1.055 -2.624 -0.192 0.381 0.959 ...
 $ Lag5     : num  5.01 -1.055 -2.624 -0.192 0.381 ...
 $ Volume   : num  1.19 1.3 1.41 1.28 1.21 ...
 $ Today    : num  0.959 1.032 -0.623 0.614 0.213 ...
 $ Direction: Factor w/ 2 levels "Down","Up": 2 2 1 2 2 2 1 2 2 2 ...
> sales.fit = lm(Sales~Advertising+ShelveLoc, data=Carseats)
>
> summary(sales.fit)

Call:
lm(formula = Sales ~ Advertising + ShelveLoc, data = Carseats)

Residuals:
    Min      1Q  Median      3Q     Max
-6.6480 -1.6198 -0.0476  1.5308  6.4098

Coefficients:
                Estimate Std. Error t value Pr(>|t|)   
(Intercept)      4.89662    0.25207  19.426  < 2e-16 ***
Advertising      0.10071    0.01692   5.951 5.88e-09 ***
ShelveLocGood    4.57686    0.33479  13.671  < 2e-16 ***
ShelveLocMedium  1.75142    0.27475   6.375 5.11e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.244 on 396 degrees of freedom
Multiple R-squared:  0.3733, Adjusted R-squared:  0.3685
F-statistic: 78.62 on 3 and 396 DF,  p-value: < 2.2e-16

> contrasts(Carseats$ShelveLoc)
       Good Medium
Bad       0      0
Good      1      0
Medium    0      1
> contrasts(Carseats$Urban)
    Yes
No    0
Yes   1
> contrasts(Carseats$Us)
Error in contrasts(Carseats$Us) : contrasts apply only to factors
> contrasts(Carseats$us)
Error in contrasts(Carseats$us) : contrasts apply only to factors
> contrasts(Carseats$US)
    Yes
No    0
Yes   1
> contrasts(Carseats$Price)
Error in contrasts(Carseats$Price) : contrasts apply only to factors
> lm(Today~Lag1+Lag2,data=Smarket)

Call:
lm(formula = Today ~ Lag1 + Lag2, data = Smarket)

Coefficients:
(Intercept)         Lag1         Lag2 
   0.003283    -0.026444    -0.010946 

> library("MASS", lib.loc="C:/Program Files/R/R-3.6.1/library")
> data(biopsy)
>
> str(biopsy)
'data.frame': 699 obs. of  11 variables:
 $ ID   : chr  "1000025" "1002945" "1015425" "1016277" ...
 $ V1   : int  5 5 3 6 4 8 1 2 2 4 ...
 $ V2   : int  1 4 1 8 1 10 1 1 1 2 ...
 $ V3   : int  1 4 1 8 1 10 1 2 1 1 ...
 $ V4   : int  1 5 1 1 3 8 1 1 1 1 ...
 $ V5   : int  2 7 2 3 2 7 2 2 2 2 ...
 $ V6   : int  1 10 2 4 1 10 10 1 1 1 ...
 $ V7   : int  3 3 3 3 3 9 3 3 1 2 ...
 $ V8   : int  1 2 1 7 1 7 1 1 1 1 ...
 $ V9   : int  1 1 1 1 1 1 1 1 5 1 ...
 $ class: Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
> biopsy$ID = NULL
>
> names(biopsy) = c("thick", "u.size", "u.shape", "adhsn", "s.size",
+                   "nucl", "chrom", "n.nuc", "mit", "class")
>
> names(biopsy)
 [1] "thick"   "u.size"  "u.shape" "adhsn"   "s.size"  "nucl" 
 [7] "chrom"   "n.nuc"   "mit"     "class" 
>
> biopsy.v2 = na.omit(biopsy)
>
> library("reshape2", lib.loc="~/R/win-library/3.6")
> library("ggplot2", lib.loc="~/R/win-library/3.6")
> biop.m = melt(biopsy.v2, id.var="class")
>
> ggplot(data=biop.m, aes(x=class, y=value)) + geom_boxplot()
> +facet_wrap(~variable,ncol = 3)
Error: Cannot use `+.gg()` with a single argument. Did you accidentally put + on a new line?
>
>
> library("corrplot", lib.loc="~/R/win-library/3.6")
corrplot 0.84 loaded
Warning message:
package ‘corrplot’ was built under R version 3.6.3
> bc = cor(biopsy.v2[ ,1:9])
> corrplot.mixed(bc)
>
rcorelation
> set.seed(123)
> ind = sample(2, nrow(biopsy.v2), replace=TRUE, prob=c(0.7, 0.3))
>
> train = biopsy.v2[ind==1,]
> test = biopsy.v2[ind==2,]
> str(test)
'data.frame': 209 obs. of  10 variables:
 $ thick  : int  5 6 4 2 1 7 6 7 1 3 ...
 $ u.size : int  4 8 1 1 1 4 1 3 1 2 ...
 $ u.shape: int  4 8 1 2 1 6 1 2 1 1 ...
 $ adhsn  : int  5 1 3 1 1 4 1 10 1 1 ...
 $ s.size : int  7 3 2 2 1 6 2 5 2 1 ...
 $ nucl   : int  10 4 1 1 1 1 1 10 1 1 ...
 $ chrom  : int  3 3 3 3 3 4 3 5 3 2 ...
 $ n.nuc  : int  2 7 1 1 1 3 1 4 1 1 ...
 $ mit    : int  1 1 1 1 1 1 1 4 1 1 ...
 $ class  : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 2 1 1 ...
 - attr(*, "na.action")= 'omit' Named int  24 41 140 146 159 165 236 250 276 293 ...
  ..- attr(*, "names")= chr  "24" "41" "140" "146" ...
> table(train$class)

   benign malignant
      302       172
>
> table(test$class)

   benign malignant
      142        67
>
> full.fit = glm(class~., family=binomial, data=train)
> summary(full.fit)

Call:
glm(formula = class ~ ., family = binomial, data = train)

Deviance Residuals:
    Min       1Q   Median       3Q      Max 
-3.3397  -0.1387  -0.0716   0.0321   2.3559 

Coefficients:
            Estimate Std. Error z value Pr(>|z|)
(Intercept)  -9.4293     1.2273  -7.683 1.55e-14
thick         0.5252     0.1601   3.280 0.001039
u.size       -0.1045     0.2446  -0.427 0.669165
u.shape       0.2798     0.2526   1.108 0.268044
adhsn         0.3086     0.1738   1.776 0.075722
s.size        0.2866     0.2074   1.382 0.167021
nucl          0.4057     0.1213   3.344 0.000826
chrom         0.2737     0.2174   1.259 0.208006
n.nuc         0.2244     0.1373   1.635 0.102126
mit           0.4296     0.3393   1.266 0.205402
             
(Intercept) ***
thick       **
u.size       
u.shape       
adhsn       . 
s.size       
nucl        ***
chrom         
n.nuc         
mit           
---
Signif. codes: 
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 620.989  on 473  degrees of freedom
Residual deviance:  78.373  on 464  degrees of freedom
AIC: 98.373

Number of Fisher Scoring iterations: 8

>
> confint(full.fit)
Waiting for profiling to be done...
                   2.5 %     97.5 %
(Intercept) -12.23786660 -7.3421509
thick         0.23250518  0.8712407
u.size       -0.56108960  0.4212527
u.shape      -0.24551513  0.7725505
adhsn        -0.02257952  0.6760586
s.size       -0.11769714  0.7024139
nucl          0.17687420  0.6582354
chrom        -0.13992177  0.7232904
n.nuc        -0.03813490  0.5110293
mit          -0.14099177  1.0142786
> exp(coef(full.fit))
 (Intercept)        thick       u.size
8.033466e-05 1.690879e+00 9.007478e-01
     u.shape        adhsn       s.size
1.322844e+00 1.361533e+00 1.331940e+00
        nucl        chrom        n.nuc
1.500309e+00 1.314783e+00 1.251551e+00
         mit
1.536709e+00

Decision-making.

We can make powerful and insightful predictions to support

> decision-making.
 head(leaps)
                                                                                         
1 function (x, y, wt = rep(1, NROW(x)), int = TRUE, method = c("Cp",                     
2     "adjr2", "r2"), nbest = 10, names = NULL, df = NROW(x), strictly.compatible = TRUE)
3 {                                                                                     
4     if (!is.logical(int))                                                             
5         stop("int should be TRUE or FALSE")                                           
6     if (!is.null(names))                                                               
> fit=lm(BSAAM~., data=socal.water)
Error in is.data.frame(data) : object 'socal.water' not found
>
> fit=lm(BSAAM~.,data=socal.water)
Error in is.data.frame(data) : object 'socal.water' not found
> x<-matrix(rnorm(100),ncol=4)
> y<-rnorm(25)
> leaps(x,y)
$which
      1     2     3     4
1 FALSE FALSE FALSE  TRUE
1  TRUE FALSE FALSE FALSE
1 FALSE FALSE  TRUE FALSE
1 FALSE  TRUE FALSE FALSE
2  TRUE FALSE FALSE  TRUE
2 FALSE FALSE  TRUE  TRUE
2 FALSE  TRUE FALSE  TRUE
2  TRUE FALSE  TRUE FALSE
2  TRUE  TRUE FALSE FALSE
2 FALSE  TRUE  TRUE FALSE
3  TRUE FALSE  TRUE  TRUE
3  TRUE  TRUE FALSE  TRUE
3 FALSE  TRUE  TRUE  TRUE
3  TRUE  TRUE  TRUE FALSE
4  TRUE  TRUE  TRUE  TRUE

$label
[1] "(Intercept)" "1"           "2"           "3"         
[5] "4"         

$size
 [1] 2 2 2 2 3 3 3 3 3 3 4 4 4 4 5

$Cp
 [1] 0.291457 1.330360 2.597818 2.800123 1.479960 2.123237 2.214481
 [8] 2.669777 3.315351 4.290161 3.099933 3.465247 3.937318 4.529656
[15] 5.000000

> data(swiss)
> a<-regsubsets(as.matrix(swiss[,-1]),swiss[,1])
> summary(a)
Subset selection object
5 Variables  (and intercept)
                 Forced in Forced out
Agriculture          FALSE      FALSE
Examination          FALSE      FALSE
Education            FALSE      FALSE
Catholic             FALSE      FALSE
Infant.Mortality     FALSE      FALSE
1 subsets of each size up to 5
Selection Algorithm: exhaustive
         Agriculture Examination Education Catholic Infant.Mortality
1  ( 1 ) " "         " "         "*"       " "      " "           
2  ( 1 ) " "         " "         "*"       "*"      " "           
3  ( 1 ) " "         " "         "*"       "*"      "*"           
4  ( 1 ) "*"         " "         "*"       "*"      "*"           
5  ( 1 ) "*"         "*"         "*"       "*"      "*"           
> b<-regsubsets(Fertility~.,data=swiss,nbest=2)
> summary(b)
Subset selection object
Call: regsubsets.formula(Fertility ~ ., data = swiss, nbest = 2)
5 Variables  (and intercept)
                 Forced in Forced out
Agriculture          FALSE      FALSE
Examination          FALSE      FALSE
Education            FALSE      FALSE
Catholic             FALSE      FALSE
Infant.Mortality     FALSE      FALSE
2 subsets of each size up to 5
Selection Algorithm: exhaustive
         Agriculture Examination Education Catholic Infant.Mortality
1  ( 1 ) " "         " "         "*"       " "      " "           
1  ( 2 ) " "         "*"         " "       " "      " "           
2  ( 1 ) " "         " "         "*"       "*"      " "           
2  ( 2 ) " "         " "         "*"       " "      "*"           
3  ( 1 ) " "         " "         "*"       "*"      "*"           
3  ( 2 ) "*"         " "         "*"       "*"      " "           
4  ( 1 ) "*"         " "         "*"       "*"      "*"           
4  ( 2 ) " "         "*"         "*"       "*"      "*"           
5  ( 1 ) "*"         "*"         "*"       "*"      "*"           
> coef(a, 1:3)
[[1]]
(Intercept)   Education
 79.6100585  -0.8623503

[[2]]
(Intercept)   Education    Catholic
 74.2336892  -0.7883293   0.1109210

[[3]]
     (Intercept)        Education         Catholic Infant.Mortality
     48.67707330      -0.75924577       0.09606607       1.29614813

> vcov(a, 3)
                  (Intercept)     Education      Catholic
(Intercept)      62.711883147 -0.2349982009 -0.0011120059
Education        -0.234998201  0.0136416868  0.0004427309
Catholic         -0.001112006  0.0004427309  0.0007408169
Infant.Mortality -2.952862263  0.0033603646 -0.0017163629
                 Infant.Mortality
(Intercept)          -2.952862263
Education             0.003360365
Catholic             -0.001716363
Infant.Mortality      0.149759535
> plot(a)
> plot(a,scale="r2")
PAIRS

PLOTS

linear regression in R, one uses the lm() function to create a model i

 Linear regression in R, one uses the lm() function to create a model in the

> standard form of fit = lm(Y~X)
R is a collaborative project with many contributors.
 > data(snake)
>
> attach(snake)
> dim(snake)
[1] 17  2
>
> head(snake)
     X    Y
1 23.1 10.5
2 32.8 16.7
3 31.8 18.2
4 32.0 17.0
5 30.4 16.3
6 24.0 10.5
> names(snake) = c("Independent", "dependent")
> attach(snake)
> head(snake)
  Independent dependent
1        23.1      10.5
2        32.8      16.7
3        31.8      18.2
4        32.0      17.0
5        30.4      16.3
6        24.0      10.5
>
> plot(Independent, dependent, xlab="water content of snow", ylab="water yield")
>
> # linear regression in R, one uses the lm() function to create a model in the
> standard form of fit = lm(Y~X).
Error: unexpected symbol in "standard form"
> yield.fit = lm(dependent~Independent)
>
> summary(yield.fit)

Call:
lm(formula = dependent ~ Independent)

Residuals:
    Min      1Q  Median      3Q     Max
-2.1793 -1.5149 -0.3624  1.6276  3.1973

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)  0.72538    1.54882   0.468    0.646   
Independent  0.49808    0.04952  10.058 4.63e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.743 on 15 degrees of freedom
Multiple R-squared:  0.8709, Adjusted R-squared:  0.8623
F-statistic: 101.2 on 1 and 15 DF,  p-value: 4.632e-08

>
> plot(Independent,dependent)
> abline(yield.fit, lwd=3, col="red")
>
linear model

> par(mfrow=c(2,2))
>
> plot(yield.fit)
>
residual
> qqPlot(yield.fit)
[1]  7 10
>
> data(water)
>
> str(water)
'data.frame': 43 obs. of  8 variables:
 $ Year   : int  1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 ...
 $ APMAM  : num  9.13 5.28 4.2 4.6 7.15 9.7 5.02 6.7 10.5 9.1 ...
 $ APSAB  : num  3.58 4.82 3.77 4.46 4.99 5.65 1.45 7.44 5.85 6.13 ...
 $ APSLAKE: num  3.91 5.2 3.67 3.93 4.88 4.91 1.77 6.51 3.38 4.08 ...
 $ OPBPC  : num  4.1 7.55 9.52 11.14 16.34 ...
 $ OPRC   : num  7.43 11.11 12.2 15.15 20.05 ...
 $ OPSLAKE: num  6.47 10.26 11.35 11.13 22.81 ...
 $ BSAAM  : int  54235 67567 66161 68094 107080 67594 65356 67909 92715 70024 ...
>
> socal.water = water[ ,-1] #new dataframe with the deletion of column 1
>
> head(socal.water)
  APMAM APSAB APSLAKE OPBPC  OPRC OPSLAKE  BSAAM
1  9.13  3.58    3.91  4.10  7.43    6.47  54235
2  5.28  4.82    5.20  7.55 11.11   10.26  67567
3  4.20  3.77    3.67  9.52 12.20   11.35  66161
4  4.60  4.46    3.93 11.14 15.15   11.13  68094
5  7.15  4.99    4.88 16.34 20.05   22.81 107080
6  9.70  5.65    4.91  8.88  8.15    7.41  67594
> water.cor = cor(socal.water)
> water.cor
            APMAM      APSAB    APSLAKE      OPBPC      OPRC
APMAM   1.0000000 0.82768637 0.81607595 0.12238567 0.1544155
APSAB   0.8276864 1.00000000 0.90030474 0.03954211 0.1056396
APSLAKE 0.8160760 0.90030474 1.00000000 0.09344773 0.1063836
OPBPC   0.1223857 0.03954211 0.09344773 1.00000000 0.8647073
OPRC    0.1544155 0.10563959 0.10638359 0.86470733 1.0000000
OPSLAKE 0.1075421 0.02961175 0.10058669 0.94334741 0.9191447
BSAAM   0.2385695 0.18329499 0.24934094 0.88574778 0.9196270
           OPSLAKE     BSAAM
APMAM   0.10754212 0.2385695
APSAB   0.02961175 0.1832950
APSLAKE 0.10058669 0.2493409
OPBPC   0.94334741 0.8857478
OPRC    0.91914467 0.9196270
OPSLAKE 1.00000000 0.9384360
BSAAM   0.93843604 1.0000000
>library("corrplot", lib.loc="~/R/win-library/3.6")
> corrplot(water.cor, method="ellipse")