### DATA SCIENCE FINANCIAL CODE

DATA SCIENCE  FINANCIAL CODE

> set.seed(4711)
> Index <- 1:260
> Returns <- rnorm(260)
> plot(x = Index, y = Returns)
> plot(x = Index, y = Returns, main = "Artificial Returns")
> par(mfrow = c(2, 1))
> Price = cumsum(Returns)
> plot(Index, Price, type = "l", main = "Line Plot")
> plot(Index, Returns, type = "h", main = "Histogram-like Vertical Lines")
> hist(Returns, probability = TRUE)
> curve(dnorm(x), min(Returns), max(Returns), add = TRUE, lwd = 2)
> plot(xf)
> plot(x.df)
> factorData <- factor(sample(c(rep("AMEX", times = 40), rep("NASDAQ",
+ times = 180), rep("NYSE", times = 90))))
> tsData = ts(matrix(rnorm(64), 64, 1), start = c(2001, 1), frequency = 12)
> plot(factorData, col = "steelblue")
> > plot(factorData, rnorm(length(factorData)), col = "orange")
Error: unexpected '>' in ">"
>  plot(factorData, rnorm(length(factorData)), col = "orange")
> plot(dnorm, min(tsData[, 1]), max(tsData[, 1]), xlab = "Returns",
+      yla = "Density", main = "Density")
> grid()
> plot(tsData, xlab = "Index", ylab = "Returns", main = "Return Series")
> abline(h = 0)
> abline(h = 2)
> abline(h=-2)

# There are predicting coronavirus data Bayes and Joint Probability Distributions.

## we have 300,000 people in the population, to scale down the numbers from the millions for convenience.

These 1,500 have CORONAVIRUS(covs). So let’s create the population and then sample
from it.
> people = seq ( 1,300000 )
> people_covs = sample ( people,1500 )
> people_nocovs = setdiff(people, people_covs)
#how we also used the setdiff function to get the complement
set of the people who do not have coronavirus(covs). Now, of the people who have
coronavirus, we know that 99% of them test positive so let’s subset that list,
and also take its complement. These are joint events, and their numbers
proscribe the joint distribution.
> people_covs_pos =sample(people_covs, 1500*0.99 )
> people_covs_neg = setdiff(people_covs,people_covs_pos)
> length(people_covs_pos)
[1] 1485
> length(people_covs_neg)
[1] 15
#We can also subset the group that does not have CORONAVIRUS(covs), as we know that
the test is negative for them 95% of the time.
> people_nocovs_neg = sample(people_nocovs ,298500*0.95)
> people_nocovs_pos =setdiff(people_nocovs,people_nocovs_neg)
> length (people_nocovs_neg)
[1] 283575
> length(people_nocovs_pos)
[1] 14925
#We can now compute the probability that someone actually has coronavirus(covs)
when the test comes out positive.
> pr_covs_given_pos = (length(people_covs_pos))/(length(people_covs_pos)+length( people_nocovs_pos))
> pr_covs_given_pos
[1] 0.0904936
#we had examined earlier, what’s the chance
that you have coronavirus(covs)when the test is negative, i.e., a false negative?
> pr_covs_given_neg = (length(people_covs_neg))/(length(people_covs_neg)+length(people_nocovs_neg))
> pr_covs_given_neg
[1] 5.289326e-05

### How to learn probabilistic linear models to predict a qualitative

#we looked at using probabilistic linear models to predict a qualitative
> response with the two most common methods: logistic regression and discriminant
package ‘leaps’ successfully unpacked and MD5 sums checked

> library("leaps", lib.loc="~/R/win-library/3.6")
Warning message:
package ‘leaps’ was built under R version 3.6.3
> x<-matrix(rnorm(100),ncol=4)
> y<-rnorm(25)
> leaps(x,y
+ )
\$which
1     2     3     4
1 FALSE FALSE FALSE  TRUE
1  TRUE FALSE FALSE FALSE
1 FALSE FALSE  TRUE FALSE
1 FALSE  TRUE FALSE FALSE
2  TRUE FALSE FALSE  TRUE
2 FALSE FALSE  TRUE  TRUE
2 FALSE  TRUE FALSE  TRUE
2  TRUE FALSE  TRUE FALSE
2  TRUE  TRUE FALSE FALSE
2 FALSE  TRUE  TRUE FALSE
3  TRUE FALSE  TRUE  TRUE
3  TRUE  TRUE FALSE  TRUE
3 FALSE  TRUE  TRUE  TRUE
3  TRUE  TRUE  TRUE FALSE
4  TRUE  TRUE  TRUE  TRUE

\$label
[1] "(Intercept)" "1"
[3] "2"           "3"
[5] "4"

\$size
[1] 2 2 2 2 3 3 3 3 3 3 4 4 4 4 5

\$Cp
[1] 0.291457 1.330360 2.597818 2.800123
[5] 1.479960 2.123237 2.214481 2.669777
[9] 3.315351 4.290161 3.099933 3.465247
[13] 3.937318 4.529656 5.000000

> data(swiss)
> a<-regsubsets(as.matrix(swiss[,-1]),swiss[,1])
> summary(a)
Subset selection object
5 Variables  (and intercept)
Forced in Forced out
Agriculture          FALSE      FALSE
Examination          FALSE      FALSE
Education            FALSE      FALSE
Catholic             FALSE      FALSE
Infant.Mortality     FALSE      FALSE
1 subsets of each size up to 5
Selection Algorithm: exhaustive
Agriculture Examination Education
1  ( 1 ) " "         " "         "*"
2  ( 1 ) " "         " "         "*"
3  ( 1 ) " "         " "         "*"
4  ( 1 ) "*"         " "         "*"
5  ( 1 ) "*"         "*"         "*"
Catholic Infant.Mortality
1  ( 1 ) " "      " "
2  ( 1 ) "*"      " "
3  ( 1 ) "*"      "*"
4  ( 1 ) "*"      "*"
5  ( 1 ) "*"      "*"
> coef(a, 1:3)
[[1]]
(Intercept)   Education
79.6100585  -0.8623503

[[2]]
(Intercept)   Education    Catholic
74.2336892  -0.7883293   0.1109210

[[3]]
(Intercept)        Education
48.67707330      -0.75924577
Catholic Infant.Mortality
0.09606607       1.29614813

> vcov(a, 3)
(Intercept)
(Intercept)      62.711883147
Education        -0.234998201
Catholic         -0.001112006
Infant.Mortality -2.952862263
Education
(Intercept)      -0.2349982009
Education         0.0136416868
Catholic          0.0004427309
Infant.Mortality  0.0033603646
Catholic
(Intercept)      -0.0011120059
Education         0.0004427309
Catholic          0.0007408169
Infant.Mortality -0.0017163629
Infant.Mortality
(Intercept)          -2.952862263
Education             0.003360365
Catholic             -0.001716363
Infant.Mortality      0.149759535
> > library(MASS)
Error: unexpected '>' in ">"
>  library(MASS)
> data(biopsy)
> str(biopsy)
'data.frame': 699 obs. of  11 variables:
\$ ID   : chr  "1000025" "1002945" "1015425" "1016277" ...
\$ V1   : int  5 5 3 6 4 8 1 2 2 4 ...
\$ V2   : int  1 4 1 8 1 10 1 1 1 2 ...
\$ V3   : int  1 4 1 8 1 10 1 2 1 1 ...
\$ V4   : int  1 5 1 1 3 8 1 1 1 1 ...
\$ V5   : int  2 7 2 3 2 7 2 2 2 2 ...
\$ V6   : int  1 10 2 4 1 10 10 1 1 1 ...
\$ V7   : int  3 3 3 3 3 9 3 3 1 2 ...
\$ V8   : int  1 2 1 7 1 7 1 1 1 1 ...
\$ V9   : int  1 1 1 1 1 1 1 1 5 1 ...
\$ class: Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
> biopsy\$ID = NULL
> names(biopsy) = c("thick", "u.size", "u.shape", "adhsn", "s.size",
+                   "nucl", "chrom", "n.nuc", "mit", "class")
> names(biopsy)
[1] "thick"   "u.size"  "u.shape"
[7] "chrom"   "n.nuc"   "mit"
[10] "class"
> biopsy.v2 = na.omit(biopsy)
> library(reshape2)
> library(ggplot2)
> biop.m = melt(biopsy.v2, id.var="class")
> ggplot(data=biop.m, aes(x=class, y=value)) + geom_boxplot()
> +facet_wrap(~variable,ncol = 3)
Error: Cannot use `+.gg()` with a single argument. Did you accidentally put + on a new line?
> library(corrplot)
Warning message:
package ‘corrplot’ was built under R version 3.6.3
> bc = cor(biopsy.v2[ ,1:9])
> corrplot.mixed(bc)

> set.seed(123)
> ind = sample(2, nrow(biopsy.v2), replace=TRUE, prob=c(0.7, 0.3))
> train = biopsy.v2[ind==1,]
> test = biopsy.v2[ind==2,]
> str(test)
'data.frame': 209 obs. of  10 variables:
\$ thick  : int  5 6 4 2 1 7 6 7 1 3 ...
\$ u.size : int  4 8 1 1 1 4 1 3 1 2 ...
\$ u.shape: int  4 8 1 2 1 6 1 2 1 1 ...
\$ adhsn  : int  5 1 3 1 1 4 1 10 1 1 ...
\$ s.size : int  7 3 2 2 1 6 2 5 2 1 ...
\$ nucl   : int  10 4 1 1 1 1 1 10 1 1 ...
\$ chrom  : int  3 3 3 3 3 4 3 5 3 2 ...
\$ n.nuc  : int  2 7 1 1 1 3 1 4 1 1 ...
\$ mit    : int  1 1 1 1 1 1 1 4 1 1 ...
\$ class  : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 2 1 1 ...
- attr(*, "na.action")= 'omit' Named int  24 41 140 146 159 165 236 250 276 293 ...
..- attr(*, "names")= chr  "24" "41" "140" "146" ...
> table(train\$class)

benign malignant
302       172
> table(test\$class)

benign malignant
142        67
> full.fit = glm(class~., family=binomial, data=train)
> summary(full.fit)

Call:
glm(formula = class ~ ., family = binomial, data = train)

Deviance Residuals:
Min       1Q   Median       3Q
-3.3397  -0.1387  -0.0716   0.0321
Max
2.3559

Coefficients:
Estimate Std. Error z value
(Intercept)  -9.4293     1.2273  -7.683
thick         0.5252     0.1601   3.280
u.size       -0.1045     0.2446  -0.427
u.shape       0.2798     0.2526   1.108
s.size        0.2866     0.2074   1.382
nucl          0.4057     0.1213   3.344
chrom         0.2737     0.2174   1.259
n.nuc         0.2244     0.1373   1.635
mit           0.4296     0.3393   1.266
Pr(>|z|)
(Intercept) 1.55e-14 ***
thick       0.001039 **
u.size      0.669165
u.shape     0.268044
s.size      0.167021
nucl        0.000826 ***
chrom       0.208006
n.nuc       0.102126
mit         0.205402
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’
0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 620.989  on 473  degrees of freedom
Residual deviance:  78.373  on 464  degrees of freedom
AIC: 98.373

Number of Fisher Scoring iterations: 8

> confint(full.fit)
Waiting for profiling to be done...
2.5 %     97.5 %
(Intercept) -12.23786660 -7.3421509
thick         0.23250518  0.8712407
u.size       -0.56108960  0.4212527
u.shape      -0.24551513  0.7725505
s.size       -0.11769714  0.7024139
nucl          0.17687420  0.6582354
chrom        -0.13992177  0.7232904
n.nuc        -0.03813490  0.5110293
mit          -0.14099177  1.0142786
> exp(coef(full.fit))
(Intercept)        thick       u.size
8.033466e-05 1.690879e+00 9.007478e-01
1.322844e+00 1.361533e+00 1.331940e+00
nucl        chrom        n.nuc
1.500309e+00 1.314783e+00 1.251551e+00
mit
1.536709e+00

# KURTOSIS AND SKEWNESS

package ‘e1071’ successfully unpacked and MD5 sums checked

> library("e1071", lib.loc="~/R/win-library/3.6")

Attaching package: ‘e1071’

kurtosis, skewness

Warning message:
package ‘e1071’ was built under R version 3.6.3
> x <- rnorm(100)
> kurtosis(x)
[1] -0.09225218
> x <- rnorm(100)
> skewness(x)
[1] 0.2068412
> sigmoid(x)
[1] 0.7924609 0.6012173 0.4015700
[4] 0.1168985 0.5446880 0.4903706
[7] 0.7195269 0.7096943 0.6016002
[10] 0.2019729 0.3525087 0.5408626
[13] 0.4601341 0.2470393 0.3445243
[16] 0.6738519 0.6390173 0.5017720
[19] 0.3256660 0.3072329 0.5314812
[22] 0.6289924 0.8502339 0.4765683
[25] 0.5006393 0.3940206 0.6075869
[28] 0.4465308 0.8705377 0.6635064
[31] 0.7139986 0.7578363 0.5363243
[34] 0.3642131 0.6399949 0.2983353
[37] 0.2082072 0.6549224 0.4033734
[40] 0.4480780 0.2010318 0.5400087
[43] 0.7412638 0.4817815 0.3450085
[46] 0.7708486 0.6844388 0.2889012
[49] 0.6963281 0.5850107 0.7936972
[52] 0.6423643 0.6100179 0.9271042
[55] 0.9262851 0.3325886 0.3185873
[58] 0.5040252 0.3851227 0.6991765
[61] 0.7593792 0.7302417 0.6990238
[64] 0.8273355 0.1847269 0.5340112
[67] 0.2264907 0.3353517 0.9484346
[70] 0.8229770 0.4842939 0.4178209
[73] 0.2967966 0.5143271 0.3565440
[76] 0.3225993 0.5873630 0.2234188
[79] 0.8293041 0.3681796 0.8147751
[82] 0.2231620 0.7632167 0.4017692
[85] 0.4799441 0.6182596 0.5266224
[88] 0.5912255 0.8460056 0.1184548
[91] 0.6034997 0.6074844 0.6780144
[94] 0.4481176 0.7328068 0.6624322
[97] 0.6486271 0.3152354 0.4292004
[100] 0.8286585
> dsigmoid(x)
[1] 0.16446660 0.23975505 0.24031154
[4] 0.10323324 0.24800298 0.24990727
[7] 0.20180793 0.20602831 0.23967741
[10] 0.16117983 0.22824631 0.24833025
[13] 0.24841071 0.18601089 0.22582730
[16] 0.21977550 0.23067419 0.24999686
[19] 0.21960765 0.21284085 0.24900893
[22] 0.23336097 0.12733620 0.24945095
[25] 0.24999959 0.23876837 0.23842506
[28] 0.24714104 0.11270181 0.22326567
[31] 0.20420462 0.18352043 0.24868054
[34] 0.23156192 0.23040143 0.20933134
[37] 0.16485699 0.22599906 0.24066330
[40] 0.24730411 0.16061803 0.24839930
[43] 0.19179176 0.24966809 0.22597763
[46] 0.17664105 0.21598233 0.20543730
[49] 0.21145526 0.24277318 0.16374195
[52] 0.22973240 0.23789606 0.06758202
[55] 0.06828103 0.22197343 0.21708943
[58] 0.24998380 0.23680320 0.21032871
[61] 0.18272245 0.19698874 0.21038951
[64] 0.14285149 0.15060289 0.24884324
[67] 0.17519267 0.22289095 0.04890642
[70] 0.14568584 0.24975332 0.24324659
[73] 0.20870840 0.24979474 0.22942037
[76] 0.21852898 0.24236770 0.17350285
[79] 0.14155880 0.23262337 0.15091666
[82] 0.17336070 0.18071697 0.24035071
[85] 0.24959776 0.23601467 0.24929125
[88] 0.24167791 0.13028010 0.10442327
[91] 0.23928782 0.23844711 0.21831089
[94] 0.24730821 0.19580098 0.22361579
[97] 0.22790998 0.21586205 0.24498742
[100] 0.14198362
> d2sigmoid(x)
[1] -0.0962001121 -0.0485347410
[3]  0.0473077111  0.0790976167
[5] -0.0221655221  0.0048129378
[7] -0.0886045466 -0.0864059134
[9] -0.0487025268  0.0960719243
[11]  0.0673287026 -0.0202948456
[13]  0.0198062388  0.0941068862
[15]  0.0702213253 -0.0764167954
[17] -0.0641354039 -0.0008859772
[19]  0.0765701691  0.0820574196
[21] -0.0156782067 -0.0602035621
[23] -0.0891949150  0.0116901260
[25] -0.0003196607  0.0506090505
[27] -0.0513028320  0.0264288900
[29] -0.0835205388 -0.0730107132
[31] -0.0873989881 -0.0946364661
[33] -0.0180663040  0.0628861539
[35] -0.0645100404  0.0844294905
[37]  0.0962081495 -0.0700246146
[39]  0.0465089613  0.0256810513
[41]  0.0960393588 -0.0198762770
[43] -0.0925448333  0.0090971429
[45]  0.0700492322 -0.0956859528
[47] -0.0796710462  0.0867351325
[49] -0.0830292377 -0.0412766260
[51] -0.0961811072 -0.0654113974
[53] -0.0523456462 -0.0577291270
[55] -0.0582143683  0.0743217566
[57]  0.0787655589 -0.0020124786
[59]  0.0544066307 -0.0837850845
[61] -0.0947887922 -0.0907100610
[63] -0.0837450567 -0.0935207186
[65]  0.0949620716 -0.0169268988
[67]  0.0958336440  0.0733972126
[69] -0.0438626631 -0.0941063585
[71]  0.0078452920  0.0399795769
[73]  0.0848204933 -0.0071576502
[75]  0.0658234717  0.0775344030
[77] -0.0423479494  0.0959752468
[79] -0.0932317896  0.0613290307
[81] -0.0950096028  0.0959856723
[83] -0.0951354489  0.0472196933
[85]  0.0100117943 -0.0558219940
[87] -0.0132734716 -0.0440943718
[89] -0.0901552964  0.0796843926
[91] -0.0495324249 -0.0512586801
[93] -0.0777249452  0.0256619068
[95] -0.0911676106 -0.0726447915
[97] -0.0677472046  0.0797673218
[99]  0.0346900170 -0.0933282339
> plot(sigmoid, -5, 5, ylim = c(-.2, 1))
> plot(dsigmoid, -5, 5, add = TRUE, col = 2)
> plot(d2sigmoid, -5, 5, add = TRUE, col = 3)
> tckr = c('SPY')
> end<- format(Sys.Date(),"%Y-%m-%d")
> start<-format(Sys.Date() - 365*20,"%Y-%m-%d")
> dat1 = (getSymbols(tckr[1], src="yahoo", from=start, to=end, auto.assign = FALSE))
‘getSymbols’ currently uses auto.assign=TRUE by default, but will
use auto.assign=FALSE in 0.5-0. You will still be able to use
and getOption("getSymbols.auto.assign") will still be checked for
alternate defaults.

This message is shown once per session and may be disabled by setting
options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.

> ret1 = as.numeric((dat1[,4] - dat1[,1])/dat1[,1] )
> n = length(as.numeric(ret1))
> time0 = index(dat1)
[1] "2000-04-18" "2000-04-19" "2000-04-20"
[4] "2000-04-24" "2000-04-25" "2000-04-26"
[1] "2020-04-02" "2020-04-03" "2020-04-06"
[4] "2020-04-07" "2020-04-08" "2020-04-09"
> s = seq(1,n,22)
> k = NULL
> for (i in 1:(length(s)-1)){
+     k[i] = 3+kurtosis(ret1[s[i]:s[(i+1)]])
+ }
> time0[s[which(k>quantile(k,.95))]]
[1] "2005-12-27" "2007-02-16" "2009-10-30"
[4] "2011-01-21" "2012-06-14" "2012-12-24"
[7] "2013-01-28" "2017-05-09" "2017-07-12"
[10] "2019-03-12" "2019-08-16" "2020-01-24"
> plot(k~time0[s[1:(length(s)-1)] ], ty = "b", xlab = "Time", ylab = "Monthly Kurtosis",
+      main = "Monthly Kurtosis over Time")
> abline(a = quantile(k,.90), b = 0, lwd = 2, col = 2)
> abline(a = quantile(k,.95), b = 0, lwd = 2, col = 3)
> abline(a = quantile(k,.99), b = 0, lwd = 2, col = 4)

## Black-Scholes formula-R

> BlackScholes <- function(TypeFlag = c("c", "p"), S, X, Time, r, b, sigma) { TypeFlag = TypeFlag[1] d1 = (log(S/X) + (b + sigma * sigma/2) * Time)/(sigma * sqrt(Time)) d2 = d1 - sigma * sqrt(Time) if (TypeFlag == "c") price = S * exp((b - r) * Time) * pnorm(d1) - X * exp(-r * Time) * pnorm(d2) else if (TypeFlag == "p") price = X * exp(-r * Time) * pnorm(-d2) - S * exp((b - r) * Time) * pnorm(-d1) param <- list(TypeFlag = TypeFlag, S = S, X = X, Time = Time, r = r, b = b, sigma = sigma) ans <- list(parameters = param, price = price, option = "Black Scholes") class(ans) <- c("option", "list") ans }

> library("ggplot2", lib.loc="~/R/win-library/3.6")
> x <- seq(-4, 4, l = 50)
> y <- x
> myf <- function(x, y) {
+     sin(x) + cos(y)
+ }
> z <- outer(x, y, FUN = myf)
> persp(x, y, z, theta = 45, phi = 45, shade = 0.45)
> Cap = floor(Capitalization/1000)
> Cap
2003  2004 2005  2006  2007 2008
Euronext US     1328 12707 3632 15421 15650 9208
TSX Group        888  1177 1482  1700  2186 1033
Australian SE    585   776  804  1095  1298  683
Bombay SE        278   386  553   818  1819  647
Hong Kong SE     714   861 1054  1714  2654 1328
NSE India        252   363  515   774  1660  600
Shanghai SE      360   314  286   917  3694 1425
Tokyo SE        2953  3557 4572  4614  4330 3115
BME Spanish SE   726   940  959  1322  1781  948
Deutsche Boerse 1079  1194 1221  1637  2105 1110
London SE       2460  2865 3058  3794  3851 1868
Euronext EU     2076  2441 2706  3712  4222 2101
SIX SE           727   826  935  1212  1271  857
> barplot(t(Cap)/1e+06, beside = TRUE, las = 2, ylab = "Capitalization [Mio USD]")
> title(main = "Major Stock Markets")
>
 function

> mtext(side = 3, "2003 - 2008")
> barplot(Cap/1e+06, beside = TRUE, ylab = "Capitalization [Mio USD]")
> palette(rainbow(13, s = 0.6, v = 0.75))
> stars(t(log(Cap)), draw.segments = TRUE, ncol = 3, nrow = 2,
+       key.loc = c(4.6, -0.5), mar = c(15, 0, 0, 0))
> mtext(side = 3, line = 2.2, text = "Growth and Decline of Major Stock Markets",
+       cex = 1.5, font = 2)
> abline(h = 0.9)
calldelta(100, 100, 0.30, (15/365), 0.02, 0.02)
[1] 0.5117085
> callgreek("delta", 100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.5127391
> callgreek("delta", 100, 100, 0.20, (15/365), 0.02, 0.02)
[1] 0.5076694
> callgreek("gamma", 100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.05663458
> callgreek("gamma", 100, 100, 0.20, (15/365), 0.02, 0.02)
[1] 0.09829574
> callpremium(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 2.794086
> callrho(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.05976964
> calltheta(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] -0.02979083
> dv(s = 100, x1 = 90, x2 = 95, x3 = 105, x4 = 110, t = 0.08, r = 0.02, sigma = 0.2, vol = 0.3)
V1
Spot            100.00
Long.Put         90.00
Short.Put        95.00
Short.Call      105.00
Long.Call       110.00
Lower.BE         94.01
Higher.BE       105.99
Prob.BE           0.45
Prob.Max.Profit   0.38
Prob.Max.Loss     0.32
> iv.calc(type = "call", price = 42.93, s = 100, x = 100, t = (15/365), r = 0.02, d = 0)
[1] 5
> lambda(type = "put", s = 100, x = 100, sigma = 0.15, t = 45/365, r = 0.02)
[1] -23.80998
> opteval(100, 100, 0.20, (45/365), 0.02, 0.02)
1   Call 2.794086  0.5127391 0.05663458
2    Put 2.794086 -0.4847982 0.05663458
Vega       Theta         Rho
1 0.1396469 -0.02979083  0.05976964
2 0.1396469 -0.02979083 -0.06321441
> opteval(100, 9200, 0.20, (45/365), 0.02, 0.02)
1   Call    0.000  0.0000000     0    0
2    Put 9077.589 -0.9975373     0    0
Theta       Rho
1 0.000000   0.00000
2 4.034484 -11.31453
> opteval(100, 100, 0.20, (45/365), 0.02, 0.02)
1   Call 2.794086  0.5127391 0.05663458
2    Put 2.794086 -0.4847982 0.05663458
Vega       Theta         Rho
1 0.1396469 -0.02979083  0.05976964
2 0.1396469 -0.02979083 -0.06321441
> plotbearcall(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotbearput(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,
+             sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotbullcall(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,
+              sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotbullput(s= 100, x1 = 95, x2 = 105, t = (45/365), r = 0.02,
+             sigma = 0.20, sigma2 = 0.20, d = 0, ll = 0.75, ul = 1.25)
> plotdv(s= 100, x1 = 90, x2 = 95, x3 = 105, x4 = 110, t = (45/365), r = 0.02, sigma = 0.20)
> plotvertical("call", 100, 90, 110, (45/365), 0.02, 0.20)
> prob.above(spot = 100, lower = 110, mean = 0, dsd = 0.01, dte = 45)
[1] 0.06801856
> prob.above(spot = 100, mean = 0, dsd = 0.01, dte = 45, p = 0.75, quantile = TRUE)
probability percent.change    price
1        0.75    -0.04524615 95.47539
> prob.below(spot = 100, upper = 110, mean = 0, dsd = 0.01, dte = 45)
[1] 0.9319814
> prob.below(spot = 100, mean = 0, dsd = 0.01, dte = 45, p = 0.75, quantile = TRUE)
probability percent.change    price
1        0.75     0.04524615 104.5246
> prob.btwn(spot = 100, mean = 0, dsd = 0.01, dte = 45, p = 0.75, quantile = TRUE)
probability percent.change     price
1        0.75    -0.07716778  92.28322
2        0.75     0.07716778 107.71678
> putdelta(100, 0.20, (45/365), 0.02, 0.02)
[1] 0
> puteval(100, 100, 0.20, (45/365), 0.02, 0.02)
1 2.794086 -0.4847982 0.05663458 0.1396469
Theta         Rho
1 -0.02979083 -0.06321441
> putgreek("vega", 100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 0.1396469
> putpremium(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] 2.794086
> putrho(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] -0.06321441
> puttheta(100, 100, 0.20, (45/365), 0.02, 0.02)
[1] -0.02979083
> r.cont(0.12, 2)
[1] 0.1165378
> vertical("call", s = 100, x1 = 90, x2 = 110, t = (45/365), r =  0.025, sigma = 0.20, vol = 0.25)
V1
Spot            100.00
Short.Strike     90.00
Long.Strike     110.00
Max.Profit       10.13
Max.Loss         -9.87
Breakeven       100.13
Prob.BE           0.50
Prob.Max.Profit   0.17
Prob.Max.Loss     0.17
Initial.DC       10.13
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method=c("best"),
+           sampling=c("QMC"),metpar=list(maxiter=100,tol=1.e-14,cvmethod="splitting"),
+           sampar=list(nout=50,n=2039,a=1487,baker=TRUE,genmethod="pca"))
result error estimate
price 6.15604075   3.336613e-08
delta 0.59385393   2.731083e-09
gamma 0.03057798   6.326157e-10
> AsianCall_AppLord(T = 1, d = 12, K = 100, r = 0.05, sigma = 0.2, S0 = 100)
[1] 6.15604
> AsianCall(T=1,d=12,K=170,r=0.05,sigma=0.2,S0=100,method="best",
+           sampling="MC",metpar=list(maxiter=100,tol=1.e-14,np=1000),
+           sampar=list(n=10^5))
result error estimate
price 1.522912e-04   3.763444e-09
delta 5.091253e-05   8.851702e-10
gamma 1.578444e-05   1.759080e-10
> AsianCall_AppLord(T = 1, d = 12, K = 170, r = 0.05, sigma = 0.2, S0 = 100)
[1] 0.0001524371
> AsianCall_AppLord(T = 1, d = 12, K = 100, r = 0.05, sigma = 0.2, S0 = 100)
[1] 6.15604
> BS_EC(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
price      delta      gamma
4.61499713 0.56946018 0.05694602
> BS_EP(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
price       delta       gamma
3.37277718 -0.43053982  0.05694602
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method=c("best","naive"),
+           sampling=c("QMC","MC"),
+           metpar=list(maxiter=100,tol=1.e-14,cvmethod="splitting"),
+           sampar=list(nout=50,seq.type="korobov",n=2039,a=1487,
+                       baker=TRUE,genmethod="pca"))
result error estimate
price 6.15604079   2.957191e-08
delta 0.59385393   2.440579e-09
gamma 0.03057798   5.350724e-10
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method="best",
+           sampling="MC",metpar=list(maxiter=100,tol=1.e-14,np=1000),
+           sampar=list(n=10^5))
result error estimate
price 6.15604075   1.945657e-07
delta 0.59385394   1.369256e-08
gamma 0.03057798   3.526989e-09
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method="naive",
+           sampling="QMC",
+           sampar=list(nout=50,n=2039,a=1487,baker=TRUE,genmethod="pca"))
result error estimate
price 6.15618443   0.0007920166
delta 0.59392376   0.0004117103
gamma 0.03059009   0.0001390430
> AsianCall(T=1,d=12,K=100,r=0.05,sigma=0.2,S0=100,method="naive",
+           sampling="MC",sampar=list(n=10^5))
result error estimate
price 6.16783497   0.0527644567
delta 0.59392926   0.0032910230
gamma 0.03071819   0.0007318875
> AsianCall_AppLord(T = 1, d = 12, K = 100, r = 0.05, sigma = 0.25, S0 = 100, all = TRUE)
[1] 7.312461
> BS_EC(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
price      delta      gamma
4.61499713 0.56946018 0.05694602
> BS_EP(K=100, r = 0.05, sigma = 0.2, T = 0.25, S0 = 100)
price       delta       gamma
3.37277718 -0.43053982  0.05694602
> price = 1200 # current price of the bond
> C = 40 # coupon payment
> T= 30 # time to maturity
> par = 1000 # par value of the bond
> r = seq(0.02, 0.05, length = 300)

## we will focus on the logistic function.

The logistic function used in logistic regression
data(Carseats)
> str(Carseats)
'data.frame': 400 obs. of  11 variables:
\$ Sales      : num  9.5 11.22 10.06 7.4 4.15 ...
\$ CompPrice  : num  138 111 113 117 141 124 115 136 132 132 ...
\$ Income     : num  73 48 35 100 64 113 105 81 110 113 ...
\$ Advertising: num  11 16 10 4 3 13 0 15 0 0 ...
\$ Population : num  276 260 269 466 340 501 45 425 108 131 ...
\$ Price      : num  120 83 80 97 128 72 108 120 124 124 ...
\$ ShelveLoc  : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
\$ Age        : num  42 65 59 55 38 78 71 67 76 76 ...
\$ Education  : num  17 10 12 14 13 16 15 10 10 17 ...
\$ Urban      : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
\$ US         : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
>
> data(Smarke)
Warning message:
> data(Smarket)
> str(Smarket)
'data.frame': 1250 obs. of  9 variables:
\$ Year     : num  2001 2001 2001 2001 2001 ...
\$ Lag1     : num  0.381 0.959 1.032 -0.623 0.614 ...
\$ Lag2     : num  -0.192 0.381 0.959 1.032 -0.623 ...
\$ Lag3     : num  -2.624 -0.192 0.381 0.959 1.032 ...
\$ Lag4     : num  -1.055 -2.624 -0.192 0.381 0.959 ...
\$ Lag5     : num  5.01 -1.055 -2.624 -0.192 0.381 ...
\$ Volume   : num  1.19 1.3 1.41 1.28 1.21 ...
\$ Today    : num  0.959 1.032 -0.623 0.614 0.213 ...
\$ Direction: Factor w/ 2 levels "Down","Up": 2 2 1 2 2 2 1 2 2 2 ...
>
> summary(sales.fit)

Call:
lm(formula = Sales ~ Advertising + ShelveLoc, data = Carseats)

Residuals:
Min      1Q  Median      3Q     Max
-6.6480 -1.6198 -0.0476  1.5308  6.4098

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept)      4.89662    0.25207  19.426  < 2e-16 ***
Advertising      0.10071    0.01692   5.951 5.88e-09 ***
ShelveLocGood    4.57686    0.33479  13.671  < 2e-16 ***
ShelveLocMedium  1.75142    0.27475   6.375 5.11e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.244 on 396 degrees of freedom
Multiple R-squared:  0.3733, Adjusted R-squared:  0.3685
F-statistic: 78.62 on 3 and 396 DF,  p-value: < 2.2e-16

> contrasts(Carseats\$ShelveLoc)
Good Medium
Good      1      0
Medium    0      1
> contrasts(Carseats\$Urban)
Yes
No    0
Yes   1
> contrasts(Carseats\$Us)
Error in contrasts(Carseats\$Us) : contrasts apply only to factors
> contrasts(Carseats\$us)
Error in contrasts(Carseats\$us) : contrasts apply only to factors
> contrasts(Carseats\$US)
Yes
No    0
Yes   1
> contrasts(Carseats\$Price)
Error in contrasts(Carseats\$Price) : contrasts apply only to factors
> lm(Today~Lag1+Lag2,data=Smarket)

Call:
lm(formula = Today ~ Lag1 + Lag2, data = Smarket)

Coefficients:
(Intercept)         Lag1         Lag2
0.003283    -0.026444    -0.010946

> library("MASS", lib.loc="C:/Program Files/R/R-3.6.1/library")
> data(biopsy)
>
> str(biopsy)
'data.frame': 699 obs. of  11 variables:
\$ ID   : chr  "1000025" "1002945" "1015425" "1016277" ...
\$ V1   : int  5 5 3 6 4 8 1 2 2 4 ...
\$ V2   : int  1 4 1 8 1 10 1 1 1 2 ...
\$ V3   : int  1 4 1 8 1 10 1 2 1 1 ...
\$ V4   : int  1 5 1 1 3 8 1 1 1 1 ...
\$ V5   : int  2 7 2 3 2 7 2 2 2 2 ...
\$ V6   : int  1 10 2 4 1 10 10 1 1 1 ...
\$ V7   : int  3 3 3 3 3 9 3 3 1 2 ...
\$ V8   : int  1 2 1 7 1 7 1 1 1 1 ...
\$ V9   : int  1 1 1 1 1 1 1 1 5 1 ...
\$ class: Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
> biopsy\$ID = NULL
>
> names(biopsy) = c("thick", "u.size", "u.shape", "adhsn", "s.size",
+                   "nucl", "chrom", "n.nuc", "mit", "class")
>
> names(biopsy)
[1] "thick"   "u.size"  "u.shape" "adhsn"   "s.size"  "nucl"
[7] "chrom"   "n.nuc"   "mit"     "class"
>
> biopsy.v2 = na.omit(biopsy)
>
> library("reshape2", lib.loc="~/R/win-library/3.6")
> library("ggplot2", lib.loc="~/R/win-library/3.6")
> biop.m = melt(biopsy.v2, id.var="class")
>
> ggplot(data=biop.m, aes(x=class, y=value)) + geom_boxplot()
> +facet_wrap(~variable,ncol = 3)
Error: Cannot use `+.gg()` with a single argument. Did you accidentally put + on a new line?
>
>
> library("corrplot", lib.loc="~/R/win-library/3.6")
Warning message:
package ‘corrplot’ was built under R version 3.6.3
> bc = cor(biopsy.v2[ ,1:9])
> corrplot.mixed(bc)
>
> set.seed(123)
> ind = sample(2, nrow(biopsy.v2), replace=TRUE, prob=c(0.7, 0.3))
>
> train = biopsy.v2[ind==1,]
> test = biopsy.v2[ind==2,]
> str(test)
'data.frame': 209 obs. of  10 variables:
\$ thick  : int  5 6 4 2 1 7 6 7 1 3 ...
\$ u.size : int  4 8 1 1 1 4 1 3 1 2 ...
\$ u.shape: int  4 8 1 2 1 6 1 2 1 1 ...
\$ adhsn  : int  5 1 3 1 1 4 1 10 1 1 ...
\$ s.size : int  7 3 2 2 1 6 2 5 2 1 ...
\$ nucl   : int  10 4 1 1 1 1 1 10 1 1 ...
\$ chrom  : int  3 3 3 3 3 4 3 5 3 2 ...
\$ n.nuc  : int  2 7 1 1 1 3 1 4 1 1 ...
\$ mit    : int  1 1 1 1 1 1 1 4 1 1 ...
\$ class  : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 2 1 1 ...
- attr(*, "na.action")= 'omit' Named int  24 41 140 146 159 165 236 250 276 293 ...
..- attr(*, "names")= chr  "24" "41" "140" "146" ...
> table(train\$class)

benign malignant
302       172
>
> table(test\$class)

benign malignant
142        67
>
> full.fit = glm(class~., family=binomial, data=train)
> summary(full.fit)

Call:
glm(formula = class ~ ., family = binomial, data = train)

Deviance Residuals:
Min       1Q   Median       3Q      Max
-3.3397  -0.1387  -0.0716   0.0321   2.3559

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)  -9.4293     1.2273  -7.683 1.55e-14
thick         0.5252     0.1601   3.280 0.001039
u.size       -0.1045     0.2446  -0.427 0.669165
u.shape       0.2798     0.2526   1.108 0.268044
s.size        0.2866     0.2074   1.382 0.167021
nucl          0.4057     0.1213   3.344 0.000826
chrom         0.2737     0.2174   1.259 0.208006
n.nuc         0.2244     0.1373   1.635 0.102126
mit           0.4296     0.3393   1.266 0.205402

(Intercept) ***
thick       **
u.size
u.shape
s.size
nucl        ***
chrom
n.nuc
mit
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 620.989  on 473  degrees of freedom
Residual deviance:  78.373  on 464  degrees of freedom
AIC: 98.373

Number of Fisher Scoring iterations: 8

>
> confint(full.fit)
Waiting for profiling to be done...
2.5 %     97.5 %
(Intercept) -12.23786660 -7.3421509
thick         0.23250518  0.8712407
u.size       -0.56108960  0.4212527
u.shape      -0.24551513  0.7725505
s.size       -0.11769714  0.7024139
nucl          0.17687420  0.6582354
chrom        -0.13992177  0.7232904
n.nuc        -0.03813490  0.5110293
mit          -0.14099177  1.0142786
> exp(coef(full.fit))
(Intercept)        thick       u.size
8.033466e-05 1.690879e+00 9.007478e-01
1.322844e+00 1.361533e+00 1.331940e+00
nucl        chrom        n.nuc
1.500309e+00 1.314783e+00 1.251551e+00
mit
1.536709e+00

## We can make powerful and insightful predictions to support

> decision-making.

1 function (x, y, wt = rep(1, NROW(x)), int = TRUE, method = c("Cp",
2     "adjr2", "r2"), nbest = 10, names = NULL, df = NROW(x), strictly.compatible = TRUE)
3 {
4     if (!is.logical(int))
5         stop("int should be TRUE or FALSE")
6     if (!is.null(names))
> fit=lm(BSAAM~., data=socal.water)
>
> fit=lm(BSAAM~.,data=socal.water)
> x<-matrix(rnorm(100),ncol=4)
> y<-rnorm(25)
> leaps(x,y)
\$which
1     2     3     4
1 FALSE FALSE FALSE  TRUE
1  TRUE FALSE FALSE FALSE
1 FALSE FALSE  TRUE FALSE
1 FALSE  TRUE FALSE FALSE
2  TRUE FALSE FALSE  TRUE
2 FALSE FALSE  TRUE  TRUE
2 FALSE  TRUE FALSE  TRUE
2  TRUE FALSE  TRUE FALSE
2  TRUE  TRUE FALSE FALSE
2 FALSE  TRUE  TRUE FALSE
3  TRUE FALSE  TRUE  TRUE
3  TRUE  TRUE FALSE  TRUE
3 FALSE  TRUE  TRUE  TRUE
3  TRUE  TRUE  TRUE FALSE
4  TRUE  TRUE  TRUE  TRUE

\$label
[1] "(Intercept)" "1"           "2"           "3"
[5] "4"

\$size
[1] 2 2 2 2 3 3 3 3 3 3 4 4 4 4 5

\$Cp
[1] 0.291457 1.330360 2.597818 2.800123 1.479960 2.123237 2.214481
[8] 2.669777 3.315351 4.290161 3.099933 3.465247 3.937318 4.529656
[15] 5.000000

> data(swiss)
> a<-regsubsets(as.matrix(swiss[,-1]),swiss[,1])
> summary(a)
Subset selection object
5 Variables  (and intercept)
Forced in Forced out
Agriculture          FALSE      FALSE
Examination          FALSE      FALSE
Education            FALSE      FALSE
Catholic             FALSE      FALSE
Infant.Mortality     FALSE      FALSE
1 subsets of each size up to 5
Selection Algorithm: exhaustive
Agriculture Examination Education Catholic Infant.Mortality
1  ( 1 ) " "         " "         "*"       " "      " "
2  ( 1 ) " "         " "         "*"       "*"      " "
3  ( 1 ) " "         " "         "*"       "*"      "*"
4  ( 1 ) "*"         " "         "*"       "*"      "*"
5  ( 1 ) "*"         "*"         "*"       "*"      "*"
> b<-regsubsets(Fertility~.,data=swiss,nbest=2)
> summary(b)
Subset selection object
Call: regsubsets.formula(Fertility ~ ., data = swiss, nbest = 2)
5 Variables  (and intercept)
Forced in Forced out
Agriculture          FALSE      FALSE
Examination          FALSE      FALSE
Education            FALSE      FALSE
Catholic             FALSE      FALSE
Infant.Mortality     FALSE      FALSE
2 subsets of each size up to 5
Selection Algorithm: exhaustive
Agriculture Examination Education Catholic Infant.Mortality
1  ( 1 ) " "         " "         "*"       " "      " "
1  ( 2 ) " "         "*"         " "       " "      " "
2  ( 1 ) " "         " "         "*"       "*"      " "
2  ( 2 ) " "         " "         "*"       " "      "*"
3  ( 1 ) " "         " "         "*"       "*"      "*"
3  ( 2 ) "*"         " "         "*"       "*"      " "
4  ( 1 ) "*"         " "         "*"       "*"      "*"
4  ( 2 ) " "         "*"         "*"       "*"      "*"
5  ( 1 ) "*"         "*"         "*"       "*"      "*"
> coef(a, 1:3)
[[1]]
(Intercept)   Education
79.6100585  -0.8623503

[[2]]
(Intercept)   Education    Catholic
74.2336892  -0.7883293   0.1109210

[[3]]
(Intercept)        Education         Catholic Infant.Mortality
48.67707330      -0.75924577       0.09606607       1.29614813

> vcov(a, 3)
(Intercept)     Education      Catholic
(Intercept)      62.711883147 -0.2349982009 -0.0011120059
Education        -0.234998201  0.0136416868  0.0004427309
Catholic         -0.001112006  0.0004427309  0.0007408169
Infant.Mortality -2.952862263  0.0033603646 -0.0017163629
Infant.Mortality
(Intercept)          -2.952862263
Education             0.003360365
Catholic             -0.001716363
Infant.Mortality      0.149759535
> plot(a)
> plot(a,scale="r2")

## Linear regression in R, one uses the lm() function to create a model in the

> standard form of fit = lm(Y~X)
R is a collaborative project with many contributors.
> data(snake)
>
> attach(snake)
> dim(snake)
[1] 17  2
>
X    Y
1 23.1 10.5
2 32.8 16.7
3 31.8 18.2
4 32.0 17.0
5 30.4 16.3
6 24.0 10.5
> names(snake) = c("Independent", "dependent")
> attach(snake)
Independent dependent
1        23.1      10.5
2        32.8      16.7
3        31.8      18.2
4        32.0      17.0
5        30.4      16.3
6        24.0      10.5
>
> plot(Independent, dependent, xlab="water content of snow", ylab="water yield")
>
> # linear regression in R, one uses the lm() function to create a model in the
> standard form of fit = lm(Y~X).
Error: unexpected symbol in "standard form"
> yield.fit = lm(dependent~Independent)
>
> summary(yield.fit)

Call:
lm(formula = dependent ~ Independent)

Residuals:
Min      1Q  Median      3Q     Max
-2.1793 -1.5149 -0.3624  1.6276  3.1973

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept)  0.72538    1.54882   0.468    0.646
Independent  0.49808    0.04952  10.058 4.63e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.743 on 15 degrees of freedom
Multiple R-squared:  0.8709, Adjusted R-squared:  0.8623
F-statistic: 101.2 on 1 and 15 DF,  p-value: 4.632e-08

>
> plot(Independent,dependent)
> abline(yield.fit, lwd=3, col="red")
>

> par(mfrow=c(2,2))
>
> plot(yield.fit)
>
> qqPlot(yield.fit)
[1]  7 10
>
> data(water)
>
> str(water)
'data.frame': 43 obs. of  8 variables:
\$ Year   : int  1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 ...
\$ APMAM  : num  9.13 5.28 4.2 4.6 7.15 9.7 5.02 6.7 10.5 9.1 ...
\$ APSAB  : num  3.58 4.82 3.77 4.46 4.99 5.65 1.45 7.44 5.85 6.13 ...
\$ APSLAKE: num  3.91 5.2 3.67 3.93 4.88 4.91 1.77 6.51 3.38 4.08 ...
\$ OPBPC  : num  4.1 7.55 9.52 11.14 16.34 ...
\$ OPRC   : num  7.43 11.11 12.2 15.15 20.05 ...
\$ OPSLAKE: num  6.47 10.26 11.35 11.13 22.81 ...
\$ BSAAM  : int  54235 67567 66161 68094 107080 67594 65356 67909 92715 70024 ...
>
> socal.water = water[ ,-1] #new dataframe with the deletion of column 1
>
APMAM APSAB APSLAKE OPBPC  OPRC OPSLAKE  BSAAM
1  9.13  3.58    3.91  4.10  7.43    6.47  54235
2  5.28  4.82    5.20  7.55 11.11   10.26  67567
3  4.20  3.77    3.67  9.52 12.20   11.35  66161
4  4.60  4.46    3.93 11.14 15.15   11.13  68094
5  7.15  4.99    4.88 16.34 20.05   22.81 107080
6  9.70  5.65    4.91  8.88  8.15    7.41  67594
> water.cor = cor(socal.water)
> water.cor
APMAM      APSAB    APSLAKE      OPBPC      OPRC
APMAM   1.0000000 0.82768637 0.81607595 0.12238567 0.1544155
APSAB   0.8276864 1.00000000 0.90030474 0.03954211 0.1056396
APSLAKE 0.8160760 0.90030474 1.00000000 0.09344773 0.1063836
OPBPC   0.1223857 0.03954211 0.09344773 1.00000000 0.8647073
OPRC    0.1544155 0.10563959 0.10638359 0.86470733 1.0000000
OPSLAKE 0.1075421 0.02961175 0.10058669 0.94334741 0.9191447
BSAAM   0.2385695 0.18329499 0.24934094 0.88574778 0.9196270
OPSLAKE     BSAAM
APMAM   0.10754212 0.2385695
APSAB   0.02961175 0.1832950
APSLAKE 0.10058669 0.2493409
OPBPC   0.94334741 0.8857478
OPRC    0.91914467 0.9196270
OPSLAKE 1.00000000 0.9384360
BSAAM   0.93843604 1.0000000
>library("corrplot", lib.loc="~/R/win-library/3.6")
> corrplot(water.cor, method="ellipse")