Coverage for src/scrilla/analysis/models/geometric/statistics.py: 50%

1# This file is part of scrilla: https://github.com/chinchalinchin/scrilla.

3# scrilla is free software: you can redistribute it and/or modify

4# it under the terms of the GNU General Public License version 3

5# as published by the Free Software Foundation.

7# scrilla is distributed in the hope that it will be useful,

8# but WITHOUT ANY WARRANTY; without even the implied warranty of

9# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

10# GNU General Public License for more details.

12# You should have received a copy of the GNU General Public License

13# along with scrilla. If not, see <https://www.gnu.org/licenses/>

14# or <https://github.com/chinchalinchin/scrilla/blob/develop/main/LICENSE>.

16from scrilla.util import errors, outputter, helper, dater

17from scrilla.analysis import estimators

18from scrilla.static import keys, functions, constants

19from scrilla import services, files, settings, cache

20from numpy import inf

21from datetime import date

22from itertools import groupby

23import datetime

24import itertools

25from typing import Dict, List, Union

26from math import log, sqrt

27from scipy.stats import norm, multivariate_normal

28from scipy.optimize import fsolve, least_squares

31logger = outputter.Logger(

32 'scrilla.analysis.models.geometric.statistics', settings.LOG_LEVEL)

33profile_cache = cache.ProfileCache()

34correlation_cache = cache.CorrelationCache()

37def get_sample_of_returns(ticker: str, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, start_date: Union[date, None] = None, end_date: Union[date, None] = None, asset_type: Union[str, None] = None, daily: bool = False) -> List[float]:

38 """

39 Generates a list of logarithmic returns on the sample `prices`. Sample return is annualized.

41 Parameters

42 ----------

43 1. **ticker**: ``str``

45 2. **start_date** : ``Union[date, None]``

46 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.

47 3. **end_date** : ``Union[date, None]``

48 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.

49 4. **sample_prices** : ``Union[Dict[str, Dict[str, float]], None]``

50 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`

51 5. **asset_type** : ``Union[str, None]``

52 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']`

53 6. **daily**: ``bool``

55 Raises

56 ------

57 1. **scrilla.errors.SampleSizeError**

58 If the date range passed in does not have enough dates to compute the logarithmic sample (n>1), then this error is thrown. If `sample_prices` was passed in to override the `start_date` and `end_date` arguments, this error will be thrown if the `len(sample_prices)<1`.

60 .. notes::

61 * the `trading_period` for a single asset can be determined from its `asset_type`...should i use a conditional and fork constants.constants['ONE_TRADING_DAY'] instead of passing it in?

62 """

63 asset_type = errors.validate_asset_type(ticker, asset_type)

64 trading_period = functions.get_trading_period(asset_type)

66 if sample_prices is None:

67 if (asset_type == keys.keys['ASSETS']['CRYPTO'] and dater.days_between(start_date, end_date) == 1) \

68 or (asset_type == keys.keys['ASSETS']['EQUITY'] and dater.business_days_between(start_date, end_date) == 1):

69 raise errors.SampleSizeError(

70 'Not enough price data to compute returns')

71 elif len(sample_prices) < 1: 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true

72 raise errors.SampleSizeError(

73 'Not enough price data to compute returns')

75 if sample_prices is None:

76 logger.debug('No sample prices provided, calling service.',

77 'get_sample_of_returns')

78 start_date, end_date = errors.validate_dates(

79 start_date, end_date, asset_type)

80 prices = services.get_daily_price_history(

81 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)

82 else:

83 logger.debug(

84 f'{ticker} sample prices provided, skipping service call.', 'get_sample_of_returns')

85 prices = sample_prices

87 today = False

88 sample_of_returns = []

89 trading_period = functions.get_trading_period(asset_type=asset_type)

91 for this_date in prices:

92 todays_price = prices[this_date][keys.keys['PRICES']['CLOSE']]

94 if today:

95 logger.verbose(

96 f'{this_date}: (todays_price, tomorrows_price) = ({todays_price}, {tomorrows_price})', 'get_sample_of_returns')

97 # NOTE: crypto prices may have weekends and holidays removed during correlation algorithm

98 # so samples can be compared to equities, need to account for these dates by increasing

99 # the time_delta by the number of missed days.

100 if asset_type == keys.keys['ASSETS']['CRYPTO'] or \

101 (asset_type == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):

102 time_delta = (dater.parse(

103 tomorrows_date) - dater.parse(this_date)).days

104 else:

105 time_delta = 1

106

107 todays_return = log(float(tomorrows_price) /

108 float(todays_price))/(time_delta)

109

110 if not daily: 110 ↛ 113line 110 didn't jump to line 113, because the condition on line 110 was never false

111 todays_return = todays_return/trading_period

112

113 sample_of_returns.append(todays_return)

114 else:

115 today = True

116

117 tomorrows_price = prices[this_date][keys.keys['PRICES']['CLOSE']]

118 tomorrows_date = this_date

119

120 return sample_of_returns

121

122

123def calculate_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, Dict[str, float]]:

124 if method == keys.keys['ESTIMATION']['MOMENT']: 124 ↛ 128line 124 didn't jump to line 128, because the condition on line 124 was never false

125 return _calculate_moment_moving_averages(ticker=ticker,

126 start_date=start_date,

127 end_date=end_date)

128 if method == keys.keys['ESTIMATION']['PERCENT']:

129 return _calculate_percentile_moving_averages(ticker=ticker,

130 start_date=start_date,

131 end_date=end_date)

132 if method == keys.keys['ESTIMATION']['LIKE']:

133 return _calculate_likelihood_moving_averages(ticker=ticker,

134 start_date=start_date,

135 end_date=end_date)

136 raise errors.ConfigurationError('Statistical estimation method not found')

137

138

139def calculate_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, float]:

140 """

141 Returns the correlation between *ticker_1* and *ticker_2* from *start_date* to *end_date* using the estimation method *method*.

142

143 Parameters

144 ----------

145 1. **ticker_1** : ``str``

146 Ticker symbol for first asset.

147 2. **ticker_2** : ``str``

148 Ticker symbol for second asset

149 3. **asset_type_1** : ``str``

150 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

151 4. **asset_type_2** : ``str``

152 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

153 5. *start_date* : ``datetime.date``

154 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.

155 6. **end_date** : ``datetime.date``

156 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.

157 7. **sample_prices** : ``dict``

158 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

159 8. **method** : ``str``

160 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`.

161

162 Raises

163 ------

164 1. **KeyError**

165 If the `method` passed in doesn't map to one of the allowable estimation method values, this error will be thrown.

166

167 Returns

168 ------

169 ``Dict[str, float]``

170 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`.

171 """

172 if method == keys.keys['ESTIMATION']['MOMENT']:

173 return _calculate_moment_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends)

174 if method == keys.keys['ESTIMATION']['LIKE']: 174 ↛ 176line 174 didn't jump to line 176, because the condition on line 174 was never false

175 return _calculate_likelihood_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends)

176 if method == keys.keys['ESTIMATION']['PERCENT']:

177 return _calculate_percentile_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends)

178 raise KeyError('Estimation method not found')

179

180

181def calculate_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, float]:

182 """

183 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Uses the method passed in through `method` to estimate the model parameters.

184

185 Parameters

186 ----------

187 1. **ticker** : ``str``

188 Ticker symbol whose risk-return profile is to be calculated.

189 2. **start_date** : ``datetime.date``

190 Optional. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.

191 3. **end_date** : ``datetime.date``

192 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.

193 4. **sample_prices** : ``list``

194 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`

195 5. **asset_type** : ``str``

196 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']`

197 6. **method**: ``str``

198 *Optional*. The calculation method to be used in estimating model parameters, i.e. the mean and volatility. Allowable values are accessible through `scrilla.static.keys.keys['ESTIMATION']`. Defaults to the method set in `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the environment variable, **DEFAULT_ESTIMATION_METHOD**. If this variable is not found, the value will default to `scrilla.static.keys.keys['ESTIMATION']['MOMENT']`.

199

200 Raises

201 ------

202 1. **scrilla.errors.ConfigurationError**

203 If the inputted `method` does not map to one of the allowable values in the `scrilla.static.keys.keys` dictionary, then this error will be thrown.

204

205

206 Returns

207 ------

208 ``Dict[str, float]``

209 Dictionary containing the annualized return and volatility. Formatted as follows,

210

211 ```

212 {

213 'annual_return': value,

214 'annual_volatility': value

215 }

216 ```

217

218 .. notes::

219 * assumes price history is ordered from latest to earliest date.

220 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth.

221 """

222 if method == keys.keys['ESTIMATION']['MOMENT']: 222 ↛ 229line 222 didn't jump to line 229, because the condition on line 222 was never false

223 return _calculate_moment_risk_return(ticker=ticker,

224 start_date=start_date,

225 end_date=end_date,

226 sample_prices=sample_prices,

227 asset_type=asset_type,

228 weekends=weekends)

229 if method == keys.keys['ESTIMATION']['PERCENT']:

230 return _calculate_percentile_risk_return(ticker=ticker,

231 start_date=start_date,

232 end_date=end_date,

233 sample_prices=sample_prices,

234 asset_type=asset_type,

235 weekends=weekends)

236 if method == keys.keys['ESTIMATION']['LIKE']:

237 return _calculate_likelihood_risk_return(ticker=ticker,

238 start_date=start_date,

239 end_date=end_date,

240 sample_prices=sample_prices,

241 asset_type=asset_type,

242 weekends=weekends)

243 raise errors.ConfigurationError('Statistical estimation method not found')

244

245

246def _calculate_moment_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]:

247 """

248 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively.

249

250 Parameters

251 ----------

252 1. **tickers** : ``list``.

253 array of ticker symbols correspond to the moving averages to be calculated.

254 2. **start_date** : ``datetime.date``

255 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated.

256 3. **end_date**: ``datetime.date``

257 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated.

258 4. **sample_prices** : ``dict``

259 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

260

261 Output

262 ------

263 ``Dict[str, Dict[str,float]]``

264 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value.

265

266 ```

267 {

268 'date': {

269 'MA_1': value,

270 'MA_2': value,

271 'MA_3': value

272 },

273 ...

274 }

275 ```

276

277 .. notes::

278 * assumes `sample_prices` is ordered from latest to earliest date.

279 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned.

280 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average.

281 """

282 asset_type = files.get_asset_type(ticker)

283 trading_period = functions.get_trading_period(asset_type)

284

285 if start_date is None: 285 ↛ 286line 285 didn't jump to line 286, because the condition on line 285 was never true

286 if asset_type == keys.keys['ASSETS']['EQUITY']:

287 start_date = dater.this_date_or_last_trading_date()

288 elif asset_type == keys.keys['ASSETS']['CRYPTO']:

289 start_date = dater.today()

290 if end_date is None: 290 ↛ 291line 290 didn't jump to line 291, because the condition on line 290 was never true

291 end_date = start_date

292

293 if asset_type == keys.keys['ASSETS']['EQUITY']:

294 ma_date_range = dater.business_dates_between(start_date, end_date)

295 sample_start = dater.decrement_date_by_business_days(

296 start_date, settings.MA_3_PERIOD)

297 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 297 ↛ 302line 297 didn't jump to line 302, because the condition on line 297 was never false

298 ma_date_range = dater.dates_between(start_date, end_date)

299 sample_start = dater.decrement_date_by_days(

300 start_date, settings.MA_3_PERIOD)

301

302 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start,

303 end_date=end_date, asset_type=asset_type)

304

305 moving_averages = {}

306 for this_date in ma_date_range:

307 logger.debug(

308 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', 'get_sample_of_returns')

309 this_date_index = list(sample_prices).index(dater.to_string(this_date))

310 mas = []

311 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]:

312 ma_range = dict(itertools.islice(

313 sample_prices.items(), this_date_index, this_date_index+ma_period+1))

314 last_date, first_date = list(ma_range)[0], list(ma_range)[-1]

315 last_price = ma_range[last_date][keys.keys['PRICES']['CLOSE']]

316 first_price = ma_range[first_date][keys.keys['PRICES']['CLOSE']]

317 mas.append(log(float(last_price)/float(first_price)) /

318 (trading_period*ma_period))

319

320 moving_averages[dater.to_string(this_date)] = {

321 f'MA_{settings.MA_1_PERIOD}': mas[0],

322 f'MA_{settings.MA_2_PERIOD}': mas[1],

323 f'MA_{settings.MA_3_PERIOD}': mas[2]

324 }

325

326 return moving_averages

327

328

329def _calculate_percentile_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]:

330 """

331 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively.

332

333 Parameters

334 ----------

335 1. **tickers** : ``list``.

336 array of ticker symbols correspond to the moving averages to be calculated.

337 2. **start_date** : ``datetime.date``

338 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated.

339 3. **end_date**: ``datetime.date``

340 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated.

341 4. **sample_prices** : ``dict``

342 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

343

344 Output

345 ------

346 ``Dict[str, Dict[str,float]]``

347 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value.

348

349 ```

350 {

351 'date': {

352 'MA_1': value,

353 'MA_2': value,

354 'MA_3': value

355 },

356 ...

357 }

358 ```

359

360 .. notes::

361 * assumes `sample_prices` is ordered from latest to earliest date.

362 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned.

363 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average.

364 """

365 asset_type = files.get_asset_type(ticker)

366 trading_period = functions.get_trading_period(asset_type)

367

368 if start_date is None:

369 if asset_type == keys.keys['ASSETS']['EQUITY']:

370 start_date = dater.this_date_or_last_trading_date()

371 elif asset_type == keys.keys['ASSETS']['CRYPTO']:

372 start_date = dater.today()

373 if end_date is None:

374 end_date = start_date

375

376 if asset_type == keys.keys['ASSETS']['EQUITY']:

377 ma_date_range = dater.business_dates_between(start_date, end_date)

378 sample_start = dater.decrement_date_by_business_days(

379 start_date, settings.MA_3_PERIOD)

380 elif asset_type == keys.keys['ASSETS']['CRYPTO']:

381 ma_date_range = dater.dates_between(start_date, end_date)

382 sample_start = dater.decrement_date_by_days(

383 start_date, settings.MA_3_PERIOD)

384

385 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start,

386 end_date=end_date, asset_type=asset_type)

387

388 moving_averages = {}

389 for this_date in ma_date_range:

390 logger.debug(

391 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', '_calculate_percentile_moving_averages')

392 this_date_index = list(sample_prices).index(dater.to_string(this_date))

393 mas = []

394 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]:

395 ma_range = dict(itertools.islice(

396 sample_prices.items(), this_date_index, this_date_index+ma_period+1))

397 sample_of_returns = get_sample_of_returns(

398 ticker=ticker, sample_prices=ma_range)

399

400 first_quartile = estimators.sample_percentile(

401 data=sample_of_returns, percentile=0.25)

402 median = estimators.sample_percentile(

403 data=sample_of_returns, percentile=0.50)

404 third_quartile = estimators.sample_percentile(

405 data=sample_of_returns, percentile=0.75)

406 guess = (median, (third_quartile-first_quartile)/2)

407

408 mean, vol = fsolve(lambda params, first=first_quartile, third=third_quartile:

409 [norm.cdf(x=first, loc=params[0], scale=params[1]) - 0.25,

410 norm.cdf(x=third, loc=params[0], scale=params[1]) - 0.75],

411 guess)

412

413 # NOTE: Var(dln(S)/delta_t) = (1/delta_t^2)*Var(dlnS) = sigma^2*delta_t / delta_t^2 = sigma^2 / delta_t

414 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.

415 vol = vol * sqrt(trading_period)

416 # ito's lemma

417 mean = mean + 0.5 * (vol ** 2)

418 mas.append(mean)

419 moving_averages[dater.to_string(this_date)] = {

420 f'MA_{settings.MA_1_PERIOD}': mas[0],

421 f'MA_{settings.MA_2_PERIOD}': mas[1],

422 f'MA_{settings.MA_3_PERIOD}': mas[2]

423 }

424

425 return moving_averages

426

427

428def _calculate_likelihood_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]:

429 """

430 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively.

431

432 Parameters

433 ----------

434 1. **tickers** : ``list``.

435 array of ticker symbols correspond to the moving averages to be calculated.

436 2. **start_date** : ``datetime.date``

437 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated.

438 3. **end_date**: ``datetime.date``

439 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated.

440 4. **sample_prices** : ``dict``

441 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

442

443 Output

444 ------

445 ``Dict[str, Dict[str,float]]``

446 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value.

447

448 ```

449 {

450 'date': {

451 'MA_1': value,

452 'MA_2': value,

453 'MA_3': value

454 },

455 ...

456 }

457 ```

458

459 .. notes::

460 * assumes `sample_prices` is ordered from latest to earliest date.

461 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned.

462 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average.

463 """

464 from scrilla.analysis.optimizer import maximize_univariate_normal_likelihood

465

466 asset_type = files.get_asset_type(ticker)

467 trading_period = functions.get_trading_period(asset_type)

468

469 if start_date is None:

470 if asset_type == keys.keys['ASSETS']['EQUITY']:

471 start_date = dater.this_date_or_last_trading_date()

472 elif asset_type == keys.keys['ASSETS']['CRYPTO']:

473 start_date = dater.today()

474 if end_date is None:

475 end_date = start_date

476

477 if asset_type == keys.keys['ASSETS']['EQUITY']:

478 ma_date_range = dater.business_dates_between(start_date, end_date)

479 sample_start = dater.decrement_date_by_business_days(

480 start_date, settings.MA_3_PERIOD)

481 elif asset_type == keys.keys['ASSETS']['CRYPTO']:

482 ma_date_range = dater.dates_between(start_date, end_date)

483 sample_start = dater.decrement_date_by_days(

484 start_date, settings.MA_3_PERIOD)

485

486 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start,

487 end_date=end_date, asset_type=asset_type)

488

489 moving_averages = {}

490 for this_date in ma_date_range:

491 logger.debug(

492 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', '_calculate_likelihood_moving_averages')

493 this_date_index = list(sample_prices).index(dater.to_string(this_date))

494 mas = []

495 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]:

496 ma_range = dict(itertools.islice(

497 sample_prices.items(), this_date_index, this_date_index+ma_period+1))

498 sample_of_returns = get_sample_of_returns(

499 ticker=ticker, sample_prices=ma_range)

500

501 likelihood_estimates = maximize_univariate_normal_likelihood(

502 data=sample_of_returns)

503 # See NOTE in docstring

504 # NOTE: E(dln(S)/delta_t) = (mu - 0.5 * sigma ** 2) * delta_t / delta_t = mu - 0.5 * sigma ** 2

505 # TODO: add :math to docstring with this

506 # NOTE: Var(dln(S)/delta_t) = (1/delta_t**2)*Var(dlnS) = sigma**2*delta_t / delta_t**2 = sigma**2 / delta_t

507 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.

508 vol = likelihood_estimates[1]*sqrt(trading_period)

509 # ito's lemma

510 mean = likelihood_estimates[0] + 0.5 * (vol ** 2)

511 mas.append(mean)

512 moving_averages[dater.to_string(this_date)] = {

513 f'MA_{settings.MA_1_PERIOD}': mas[0],

514 f'MA_{settings.MA_2_PERIOD}': mas[1],

515 f'MA_{settings.MA_3_PERIOD}': mas[2]

516 }

517

518 return moving_averages

519

520

521def _calculate_likelihood_risk_return(ticker, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:

522 """

523 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of maximum likelihood estimation. The probability of each observation is calculated and then the product is taken to find the probability of the intersection; this probability is maximized with respect to the parameters of the normal distribution, the mean and the volatility.

524

525 Parameters

526 ----------

527 1. **ticker** : ``str``

528 Ticker symbol whose risk-return profile is to be calculated.

529 2. **start_date** : ``datetime.date``

530 Optional. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.

531 3. **end_date** : ``datetime.date``

532 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.

533 4. **sample_prices** : ``list``

534 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`

535 5. **asset_type** : ``str``

536 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']`

537

538 Raises

539 ------

540 1. **scrilla.errors.PriceError**

541 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.

542

543

544 Returns

545 ------

546 ``Dict[str, float]``

547 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }`

548

549 .. notes::

550 * assumes price history is ordered from latest to earliest date.

551 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth.

552 """

553 from scrilla.analysis.optimizer import maximize_univariate_normal_likelihood

554

555 asset_type = errors.validate_asset_type(ticker, asset_type)

556 trading_period = functions.get_trading_period(asset_type)

557

558 if weekends is None:

559 if asset_type == keys.keys['ASSETS']['CRYPTO']:

560 weekends = 1

561 else:

562 weekends = 0

563

564 if sample_prices is None:

565 start_date, end_date = errors.validate_dates(

566 start_date, end_date, asset_type)

567 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date,

568 method=keys.keys['ESTIMATION']['LIKE'])

569

570 if results is not None \

571 and results[keys.keys['STATISTICS']['RETURN']] is not None \

572 and results[keys.keys['STATISTICS']['VOLATILITY']] is not None:

573 return results

574

575 logger.debug('No sample prices provided, calling service.',

576 '_calculate_likelihood_risk_return')

577 prices = services.get_daily_price_history(

578 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)

579

580 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:

581 logger.debug('Removing weekends from crypto sample',

582 '_calculate_likelihood_risk_return')

583 prices = dater.intersect_with_trading_dates(prices)

584

585 else:

586 logger.debug(

587 f'{ticker} sample prices provided, skipping service call.', '_calculate_likelihood_risk_return')

588 prices = sample_prices

589

590 if not prices:

591 raise errors.PriceError(f'No prices could be retrieved for {ticker}')

592

593 sample_of_returns = get_sample_of_returns(

594 ticker=ticker, sample_prices=prices, asset_type=asset_type)

595

596 likelihood_estimates = maximize_univariate_normal_likelihood(

597 data=sample_of_returns)

598 # See NOTE in docstring

599 # NOTE: E(dln(S)/delta_t) = (mu - 0.5 * sigma ** 2) * delta_t / delta_t = mu - 0.5 * sigma ** 2

600 # TODO: add :math to docstring with this

601 # NOTE: Var(dln(S)/delta_t) = (1/delta_t**2)*Var(dlnS) = sigma**2*delta_t / delta_t**2 = sigma**2 / delta_t

602 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.

603 vol = likelihood_estimates[1]*sqrt(trading_period)

604 # ito's lemma

605 mean = likelihood_estimates[0] + 0.5 * (vol ** 2)

606 results = {

607 keys.keys['STATISTICS']['RETURN']: mean,

608 keys.keys['STATISTICS']['VOLATILITY']: vol

609 }

610

611 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date,

612 method=keys.keys['ESTIMATION']['LIKE'], weekends=weekends,

613 annual_return=results[keys.keys['STATISTICS']['RETURN']],

614 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']])

615

616 return results

617

618

619def _calculate_percentile_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:

620 """

621 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of percentile matching, where the return and volatility are estimated by matching the 25th and 75th percentile calculated from the assumed GBM distribution to the sample of data.

622

623 Parameters

624 ----------

625 1. **ticker** : ``str``

626 Ticker symbol whose risk-return profile is to be calculated.

627 2. **start_date** : ``datetime.date``

628 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.

629 3. **end_date** : ``datetime.date``

630 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.

631 4.**sample_prices** : ``dict``

632 **Optional**. A list of the asset prices for which the correlation will be calculated. Overrides calls to service and forces calculation of correlation for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key of the dictionary as the latest and earliest date, respectively. In other words, `sample_prices` must be ordered from latest to earliest. Must be formatted: `{ 'date_1' : { 'open' : float, 'close' : float}, 'date_2': { 'open': float, 'close': float } ... }`

633 5. **asset_type** : ``str``

634 Optional. Specify asset type to prevent overusing redundant calculations. Allowable values: scrilla.keys.keys['ASSETS']['EQUITY'], scrilla.keys.keys['ASSETS']['CRYPTO']

635

636 Returns

637 -------

638 ``Dict[str, float]``

639 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }`

640

641 Raises

642 ------

643 1. **scrilla.errors.PriceError**

644 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.

645

646 .. notes::

647 * assumes price history is ordered from latest to earliest date.

648 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth.

649 """

650 asset_type = errors.validate_asset_type(ticker, asset_type)

651 trading_period = functions.get_trading_period(asset_type)

652

653 if weekends is None:

654 if asset_type == keys.keys['ASSETS']['CRYPTO']:

655 weekends = 1

656 else:

657 weekends = 0

658

659 if sample_prices is None:

660 if weekends == 1:

661 start_date, end_date = errors.validate_dates(

662 start_date, end_date, keys.keys['ASSETS']['CRYPTO'])

663 else:

664 start_date, end_date = errors.validate_dates(

665 start_date, end_date, keys.keys['ASSETS']['EQUITY'])

666

667 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date,

668 method=keys.keys['ESTIMATION']['PERCENT'],

669 weekends=weekends)

670

671 if results is not None \

672 and results[keys.keys['STATISTICS']['RETURN']] is not None \

673 and results[keys.keys['STATISTICS']['VOLATILITY']] is not None:

674 return results

675

676 logger.debug('No sample prices provided, calling service.',

677 '_calculate_percentile_risk_return')

678 prices = services.get_daily_price_history(

679 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)

680

681 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:

682 logger.debug('Removing weekends from crypto sample',

683 '_calculate_percentile_risk_return')

684 prices = dater.intersect_with_trading_dates(prices)

685 else:

686 logger.debug(

687 f'{ticker} sample prices provided, skipping service call.', '_calculate_percentile_risk_return')

688 prices = sample_prices

689

690 if not prices:

691 raise errors.PriceError(f'No prices could be retrieved for {ticker}')

692

693 sample_of_returns = get_sample_of_returns(

694 ticker=ticker, sample_prices=prices, asset_type=asset_type)

695

696 first_quartile = estimators.sample_percentile(

697 data=sample_of_returns, percentile=0.25)

698 median = estimators.sample_percentile(

699 data=sample_of_returns, percentile=0.50)

700 third_quartile = estimators.sample_percentile(

701 data=sample_of_returns, percentile=0.75)

702 guess = (median, (third_quartile-first_quartile)/2)

703

704 def objective(params):

705 return [norm.cdf(x=first_quartile, loc=params[0], scale=params[1]) - 0.25,

706 norm.cdf(x=third_quartile, loc=params[0], scale=params[1]) - 0.75]

707

708 mean, vol = fsolve(objective, guess)

709

710 # NOTE: Var(dln(S)/delta_t) = (1/delta_t^2)*Var(dlnS) = sigma^2*delta_t / delta_t^2 = sigma^2 / delta_t

711 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.

712 vol = vol * sqrt(trading_period)

713 # ito's lemma

714 mean = mean + 0.5 * (vol ** 2)

715 results = {

716 keys.keys['STATISTICS']['RETURN']: mean,

717 keys.keys['STATISTICS']['VOLATILITY']: vol

718 }

719

720 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date,

721 method=keys.keys['ESTIMATION']['PERCENT'], weekends=weekends,

722 annual_return=results[keys.keys['STATISTICS']['RETURN']],

723 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']])

724 return results

725

726

727def _calculate_moment_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:

728 """

729 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of moment matching, where the return is estimated by equating it to the first moment of the sample and the volatility is estimated by equating it to the square root of the second moment of the sample.

730

731 Parameters

732 ----------

733 1. **ticker** : ``str``

734 Ticker symbol whose risk-return profile is to be calculated.

735 2. **start_date** : ``Union[datetime.date, None]``

736 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.

737 3. **end_date** : ``Union[datetime.date, None]``

738 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date. \n \n

739 4. **sample_prices** : ``Union[Dict[str, Dict[str, float], None]`

740 Optional. A list of the asset prices for which correlation will be calculated. Overrides calls to service and forces calculation of correlation for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. If this argument is supplied, the function will bypass calls to the cache for stored calculations. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`

741 5. **asset_type** : ``Union[str, None]``

742 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values can be found in `scrilla.keys.keys['ASSETS']`

743 6. **weekends**: ``Union[int, None]``

744

745 Raises

746 ------

747 1. **scrilla.errors.PriceError***

748 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.

749

750 Returns

751 ------

752 ``Dict[str, float]``

753 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }`

754

755 .. notes::

756 * assumes price history is ordered from latest to earliest date.

757 * function will bypass the cache if `sample_prices` is provided. In other words, the calculation can be forced by specifying `sample_prices`.

758 """

759 asset_type = errors.validate_asset_type(ticker, asset_type)

760 trading_period = functions.get_trading_period(asset_type)

761

762 if weekends is None:

763 if asset_type == keys.keys['ASSETS']['CRYPTO']: 763 ↛ 764line 763 didn't jump to line 764, because the condition on line 763 was never true

764 weekends = 1

765 else:

766 weekends = 0

767

768 if sample_prices is None:

769 # NOTE: Cache is bypassed when sample_prices are not null.

770 if weekends == 1:

771 start_date, end_date = errors.validate_dates(

772 start_date, end_date, keys.keys['ASSETS']['CRYPTO'])

773 else:

774 start_date, end_date = errors.validate_dates(

775 start_date, end_date, keys.keys['ASSETS']['EQUITY'])

776

777 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date,

778 method=keys.keys['ESTIMATION']['MOMENT'], weekends=weekends)

779

780 if results is not None \

781 and results.get(keys.keys['STATISTICS']['RETURN']) is not None \

782 and results.get(keys.keys['STATISTICS']['VOLATILITY']) is not None:

783 return results

784

785 logger.debug('No sample prices provided, calling service.',

786 '_calculate_moment_risk_return')

787 prices = services.get_daily_price_history(

788 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)

789

790 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:

791 logger.debug('Removing weekends from crypto sample',

792 '_calculate_moment_risk_return')

793 prices = dater.intersect_with_trading_dates(prices)

794

795 else:

796 logger.debug(

797 f'{ticker} sample prices provided, skipping service call.', '_calculate_moment_risk_return')

798 prices = sample_prices

799

800 if not prices: 800 ↛ 801line 800 didn't jump to line 801, because the condition on line 800 was never true

801 raise errors.PriceError(f'No prices could be retrieved for {ticker}')

802

803 # Log of difference loses a sample

804 sample = len(prices) - 1

805 logger.debug(

806 f'Calculating mean annual return over last {sample} days for {ticker}', '_calculate_moment_risk_return')

807

808 # MEAN CALCULATION

809 # NOTE: mean return is a telescoping series, i.e. sum of log(x1/x0) only depends on the first and

810 # last terms' contributions (because log(x1/x0) + log(x2/x1)= log(x2) - log(x1) + log(x1) - log(x0)) = log(x2/x0))

811 # which raises the question how accurate a measure the sample mean return is of the population mean return.

812 last_date, first_date = list(prices)[0], list(prices)[-1]

813 last_price = prices[last_date][keys.keys['PRICES']['CLOSE']]

814 first_price = prices[first_date][keys.keys['PRICES']['CLOSE']]

815 mean_return = log(float(last_price)/float(first_price)) / \

816 (trading_period*sample)

817

818 # VOLATILITY CALCULATION

819 # NOTE / TODO : this is a 'naive' variance algorithm: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance

820 # although technically, this is only one pass, since the mean telescopes and doesn't require a full traversal of the

821 # the sample. I should see how this implementation compares to a Young and Cramer Updating algorithm implementation.

822 today, variance, tomorrows_price, tomorrows_date = False, 0, 0, None

823 # adjust the random variable being measured so expectation is easier to calculate.

824 mean_mod_return = mean_return*sqrt(trading_period)

825 logger.debug(

826 f'Calculating mean annual volatility over last {sample} days for {ticker}', '_calculate_moment_risk_return')

827

828 for this_date in prices:

829 todays_price = prices[this_date][keys.keys['PRICES']['CLOSE']]

830

831 if today:

832 logger.verbose(

833 f'{this_date}: (todays_price, tomorrows_price) = ({todays_price}, {tomorrows_price})', '_calculate_moment_risk_return')

834

835 # crypto prices may have weekends and holidays removed during correlation algorithm

836 # so samples can be compared to equities, need to account for these dates by increasing

837 # the time_delta by the number of missed days.

838 if asset_type == keys.keys['ASSETS']['CRYPTO'] or \

839 (asset_type == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):

840 time_delta = (dater.parse(

841 tomorrows_date) - dater.parse(this_date)).days

842 else:

843 time_delta = 1

844

845 current_mod_return = log(

846 float(tomorrows_price)/float(todays_price))/sqrt(time_delta*trading_period)

847 daily = (current_mod_return - mean_mod_return)**2/(sample - 1)

848 variance = variance + daily

849

850 logger.verbose(

851 f'{this_date}: (daily_variance, sample_variance) = ({round(daily, 4)}, {round(variance, 4)})', '_calculate_moment_risk_return')

852

853 else:

854 today = True

855

856 tomorrows_price = prices[this_date][keys.keys['PRICES']['CLOSE']]

857 tomorrows_date = this_date

858

859 # adjust for output

860 volatility = sqrt(variance)

861 # ito's lemma

862 mean_return = mean_return + 0.5*(volatility**2)

863 logger.debug(

864 f'(mean_return, sample_volatility) = ({round(mean_return, 2)}, {round(volatility, 2)})', '_calculate_moment_risk_return')

865

866 results = {

867 keys.keys['STATISTICS']['RETURN']: mean_return,

868 keys.keys['STATISTICS']['VOLATILITY']: volatility

869 }

870

871 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date,

872 method=keys.keys['ESTIMATION']['MOMENT'], weekends=weekends,

873 annual_return=results[keys.keys['STATISTICS']['RETURN']],

874 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']])

875 return results

876

877

878def _calculate_percentile_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:

879 """

880 Returns the sample correlation calculated using the method of Percentile Matching, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.

881

882 Parameters

883 ----------

884 1. **ticker_1** : ``str``

885 Ticker symbol for first asset.

886 2. **ticker_2** : ``str``

887 Ticker symbol for second asset

888 3. **asset_type_1** : ``str``

889 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

890 4. **asset_type_2** : ``str``

891 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

892 5. *start_date* : ``datetime.date``

893 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.

894 6. **end_date** : ``datetime.date``

895 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.

896 7. **sample_prices** : ``Dict[str, float]``

897 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

898

899 Raises

900 ------

901 1. **scrilla.errors.PriceError**

902 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.

903

904 Returns

905 ------

906 ``Dict[str, float]``

907 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`.

908

909 ```

910 {

911 'correlation': value

912 }

913 ```

914

915 .. notes ::

916 * Method uses the theory of copulas for multivariate distributions to break the joint distribution into component distributions in order to find the cumulative probability of the individual distribution's order statistics. See *references* for more information.

917

918 .. references::

919 - [How To Determine Quantile Isolines Of A Multivariate Normal Distribution](https://stats.stackexchange.com/questions/64680/how-to-determine-quantiles-isolines-of-a-multivariate-normal-distribution)

920 - [Copula (Probability Theory)](https://en.wikipedia.org/wiki/Copula_(probability_theory))

921 - [An Introduction To Copulas](http://www.columbia.edu/~mh2078/QRM/Copulas.pdf)

922 """

923 ### START ARGUMENT PARSING ###

924 asset_type_1 = errors.validate_asset_type(

925 ticker=ticker_1, asset_type=asset_type_1)

926 asset_type_2 = errors.validate_asset_type(

927 ticker=ticker_2, asset_type=asset_type_2)

928

929 # cache flag to signal if calculation includes weekends or not,

930 # only perform check if not passed in as argument

931 if weekends is None:

932 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']:

933 weekends = 1

934 else:

935 weekends = 0

936

937 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1:

938 # validate over total days.

939 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

940 asset_type=keys.keys['ASSETS']['CRYPTO'])

941 else:

942 # validate over trading days. since sample(date - 100 days) > (date - 100 trading days), always

943 # take the largest sample so intersect_dict_keys will return a sample of the correct size

944 # for mixed asset types.

945 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

946 asset_type=keys.keys['ASSETS']['EQUITY'])

947

948 if sample_prices is None:

949 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2,

950 start_date=start_date, end_date=end_date,

951 weekends=weekends,

952 method=keys.keys['ESTIMATION']['PERCENT'])

953 if correlation is not None:

954 return correlation

955

956 sample_prices = {}

957 logger.debug(

958 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_percentile_correlation')

959 logger.debug('Retrieving price histories for calculation.',

960 '_calculate_percentile_calculation')

961 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1, start_date=start_date,

962 end_date=end_date, asset_type=asset_type_1)

963 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2, start_date=start_date,

964 end_date=end_date, asset_type=asset_type_2)

965

966 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:

967 sample_prices[ticker_1] = dater.intersect_with_trading_dates(

968 sample_prices[ticker_1])

969 sample_prices[ticker_2] = dater.intersect_with_trading_dates(

970 sample_prices[ticker_2])

971 else:

972 # intersect with equity keys to get trading days

973 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys(

974 sample_prices[ticker_1], sample_prices[ticker_2])

975

976 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]:

977 raise errors.PriceError(

978 "Prices cannot be retrieved for correlation calculation", '_calculate_percentile_correlation')

979

980 sample_of_returns_1 = estimators.standardize(get_sample_of_returns(

981 ticker=ticker_1, sample_prices=sample_prices[ticker_1], asset_type=asset_type_1))

982 sample_of_returns_2 = estimators.standardize(get_sample_of_returns(

983 ticker=ticker_2, sample_prices=sample_prices[ticker_2], asset_type=asset_type_2))

984

985 combined_sample = [[el, sample_of_returns_2[i]]

986 for i, el in enumerate(sample_of_returns_1)]

987

988 percentiles = [0.1, 0.16, 0.5, 0.84, 0.9]

989 sample_percentiles_1, sample_percentiles_2 = [], []

990

991 for percentile in percentiles:

992 sample_percentiles_1.append(estimators.sample_percentile(

993 data=sample_of_returns_1, percentile=percentile))

994 sample_percentiles_2.append(estimators.sample_percentile(

995 data=sample_of_returns_2, percentile=percentile))

996

997 logger.debug(

998 f'Standardized sample percentiles for {ticker_1}: \n{sample_percentiles_1}', '_calculate_percentile_correlation')

999 logger.debug(

1000 f'Standardized sample percentiles for {ticker_2}: \n{sample_percentiles_2}', '_calculate_percentile_correlation')

1001

1002 def copula_matrix(params):

1003 determinant = 1 - params[0]**2

1004 if determinant == 0 or determinant < 0 or determinant < (10**(-constants.constants['ACCURACY'])):

1005 logger.verbose('Solution is non-positive semi-definite',

1006 '_calculate_percentile_correlation')

1007 return inf

1008 logger.verbose(

1009 f'Instantiating Copula Matrix: \n{[[1, params[0]], [params[0], 1]]}', '_calculate_percentile_correlation')

1010 return [[1, params[0]], [params[0], 1]]

1011

1012 # Calculate copula distribution of order statistics and constrain it against the empirical estimate

1013 def residuals(params):

1014 res = [

1015

1016 (multivariate_normal.cdf(x=[sample_percentiles_1[i], sample_percentiles_2[i]],

1017 mean=[0, 0], cov=copula_matrix(params))

1018 - estimators.empirical_copula(sample=combined_sample, x_order=sample_percentiles_1[i],

1019 y_order=sample_percentiles_2[i]))

1020 for i in enumerate(percentiles)

1021 ]

1022 logger.verbose(f'Residuals for {params}: \n{res}',

1023 '_calculate_percentile_correlation.residuals')

1024 return res

1025

1026 parameters = least_squares(residuals, (0), bounds=((-0.99999), (0.99999)))

1027

1028 correl = parameters.x[0]

1029 result = {keys.keys['STATISTICS']['CORRELATION']: correl}

1030

1031 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2,

1032 start_date=start_date, end_date=end_date,

1033 correlation=correlation, method=keys.keys['ESTIMATION']['PERCENT'],

1034 weekends=weekends)

1035 return result

1036

1037

1038def _calculate_likelihood_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:

1039 """

1040 Calculates the sample correlation using the maximum likelihood estimators, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.

1041

1042 Parameters

1043 ----------

1044 1. **ticker_1** : ``str``

1045 Ticker symbol for first asset.

1046 2. **ticker_2** : ``str``

1047 Ticker symbol for second asset

1048 3. **asset_type_1** : ``str``

1049 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

1050 4. **asset_type_2** : ``str``

1051 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

1052 5. *start_date* : ``datetime.date``

1053 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.

1054 6. **end_date** : ``datetime.date``

1055 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.

1056 7. **sample_prices** : ``dict``

1057 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

1058

1059 Raises

1060 ------

1061 1. **scrilla.errors.PriceError**

1062 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.

1063

1064 Returns

1065 ------

1066 ``Dict[str, float]``

1067 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`.

1068

1069 .. notes::

1070 *

1071

1072 $$ $$

1073 """

1074 from scrilla.analysis import optimizer

1075 ### START ARGUMENT PARSING ###

1076 asset_type_1 = errors.validate_asset_type(

1077 ticker=ticker_1, asset_type=asset_type_1)

1078 asset_type_2 = errors.validate_asset_type(

1079 ticker=ticker_2, asset_type=asset_type_2)

1080

1081 # cache flag to signal if calculation includes weekends or not,

1082 # only perform check if not passed in as argument

1083 if weekends is None: 1083 ↛ 1084line 1083 didn't jump to line 1084, because the condition on line 1083 was never true

1084 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']:

1085 weekends = 1

1086 else:

1087 weekends = 0

1088

1089 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1: 1089 ↛ 1091line 1089 didn't jump to line 1091, because the condition on line 1089 was never true

1090 # validate over total days.

1091 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

1092 asset_type=keys.keys['ASSETS']['CRYPTO'])

1093 else:

1094 # validate over trading days. since sample(date - 100 days) > (date - 100 trading days), always

1095 # take the largest sample so intersect_dict_keys will return a sample of the correct size

1096 # for mixed asset types.

1097 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

1098 asset_type=keys.keys['ASSETS']['EQUITY'])

1099

1100 if sample_prices is None: 1100 ↛ 1122line 1100 didn't jump to line 1122, because the condition on line 1100 was never false

1101 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2,

1102 start_date=start_date, end_date=end_date,

1103 weekends=weekends,

1104 method=keys.keys['ESTIMATION']['LIKE'])

1105 if correlation is not None:

1106 return correlation

1107

1108 sample_prices = {}

1109 logger.debug(

1110 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_likelihood_correlation')

1111 logger.debug('Retrieving price histories for calculation.',

1112 '_calculate_likelihood_correlation')

1113 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1,

1114 start_date=start_date,

1115 end_date=end_date,

1116 asset_type=asset_type_1)

1117 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2,

1118 start_date=start_date,

1119 end_date=end_date,

1120 asset_type=asset_type_2)

1121

1122 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 1122 ↛ 1123line 1122 didn't jump to line 1123, because the condition on line 1122 was never true

1123 sample_prices[ticker_1] = dater.intersect_with_trading_dates(

1124 sample_prices[ticker_1])

1125 sample_prices[ticker_2] = dater.intersect_with_trading_dates(

1126 sample_prices[ticker_2])

1127 else:

1128 # intersect with equity keys to get trading days

1129 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys(

1130 sample_prices[ticker_1], sample_prices[ticker_2])

1131

1132 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]: 1132 ↛ 1133line 1132 didn't jump to line 1133, because the condition on line 1132 was never true

1133 raise errors.PriceError(

1134 "Prices cannot be retrieved for correlation calculation")

1135

1136 sample_of_returns_1 = get_sample_of_returns(ticker=ticker_1,

1137 sample_prices=sample_prices[ticker_1],

1138 asset_type=asset_type_1)

1139 sample_of_returns_2 = get_sample_of_returns(ticker=ticker_2,

1140 sample_prices=sample_prices[ticker_2],

1141 asset_type=asset_type_2)

1142

1143 combined_sample = [[el, sample_of_returns_2[i]]

1144 for i, el in enumerate(sample_of_returns_1)]

1145

1146 likelihood_estimates = optimizer.maximize_bivariate_normal_likelihood(

1147 data=combined_sample)

1148

1149 # Var(d lnS / delta_t ) = Var(d lnS )/delta_t**2 = sigma**2 * delta_t / delta_t**2

1150 # = sigma**2/delta_t

1151 # Cov(d lnS/delta_t, d lnQ/delta_t) = Cov(d lnS, dlnQ)/delta_t**2

1152 # = rho * sigma_s * sigma_q / delta_t**2

1153 vol_1 = sqrt(likelihood_estimates[2])

1154 vol_2 = sqrt(likelihood_estimates[3])

1155

1156 correlation = likelihood_estimates[4] / (vol_1*vol_2)

1157

1158 result = {'correlation': correlation}

1159

1160 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2,

1161 start_date=start_date, end_date=end_date,

1162 correlation=correlation, weekends=weekends,

1163 method=keys.keys['ESTIMATION']['LIKE'])

1164 return result

1165

1166

1167def _calculate_moment_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:

1168 """

1169 Returns the sample correlation using the method of Moment Matching, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.

1170

1171 Parameters

1172 ----------

1173 1. **ticker_1** : ``str``

1174 Ticker symbol for first asset.

1175 2. **ticker_2** : ``str``

1176 Ticker symbol for second asset

1177 3. **asset_type_1** : ``Union[str,None]``

1178 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

1179 4. **asset_type_2** : ``Union[str,None]``

1180 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

1181 5. *start_date* : ``Union[datetime.date,None]``

1182 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.

1183 6. **end_date** : ``Union[datetime.date None]``

1184 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.

1185 7. **sample_prices** : ``Union[dict,None]``

1186 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

1187 8. **weekends** : ``Union[int,None]``

1188 **Optional**. A flag to signal that calculations should include/exclude weekend dates. See *notes* for more information. Defauts to `None` and is implicitly determined by the asset types passed in.

1189

1190 Raises

1191 ------

1192 1. **scrilla.errors.PriceError**

1193 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.

1194

1195 Returns

1196 ------

1197 ``Dict[str, float]``

1198 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`

1199

1200 .. notes::

1201 * when the asset types are mixed, i.e. `asset_type_1` == 'equity' and `asset_type_2`== 'crypto', the sample prices will contain different information, since crypto trades on weekends and holidays. The solution is to throw away the weekend and holiday prices for crypto. This presents another problem, since the risk profile for a crypto-currency that is cached in the local fileystem will be calculated over the entire sample including the missing data, whereas the risk profile required by the correlation needs to be calculated over the censored sample (i.e. the one with weekends and holidays removed) so that the means of the mixed asset types are scaled to the same time delta. In this case, the correlation algorithm needs to be able to override calls to the cache and force the risk profile algorithms to calculate based on the sample. Note: this issue only applies to correlation calculations using the method of moment matching, since the other methods determine the value of the correlation by solving constrained systems of equations instead of deriving it analytically with a formula.

1202 * The `weekends` flag is only relevant for assets of type `scrilla.static.keys.keys['ASSETS']['CRYPTO']`, i.e. cryptocurrency. It is passed in when the correlation calculation is part of a larger correlation matrix calculation, so that entries in the matrix have equivalent time frames. E.g., if the `scrilla.analysis.models.geometric.statistics.correlation_matrix` is calculating a matrix for a collection of mixed asset types, say, `["BTC", "ETH", "ALLY", "SPY"]`, the correlations between (crypto, equity) and (equity, equity) will only include weekdays, where as the (crypto,crypto) pairing will include weekends and thus result in an inaccurate matrix. To resolve this problem, the `weekends` flag can be passed into this calculation to prevent (crypto,crypto) pairings from including weekends.

1203

1204 """

1205 ### START ARGUMENT PARSING ###

1206 asset_type_1 = errors.validate_asset_type(

1207 ticker=ticker_1, asset_type=asset_type_1)

1208 asset_type_2 = errors.validate_asset_type(

1209 ticker=ticker_2, asset_type=asset_type_2)

1210

1211 # cache flag to signal if calculation includes weekends or not,

1212 # only perform check if not passed in as argument so that agent

1213 # calling can override default weekend behavior, i.e. make

1214 # crypto pairing forcibly exclude weekends from their calculation.

1215 if weekends is None:

1216 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']: 1216 ↛ 1217line 1216 didn't jump to line 1217, because the condition on line 1216 was never true

1217 weekends = 1

1218 else:

1219 weekends = 0

1220

1221 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1:

1222 # validate over total days.

1223 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

1224 asset_type=keys.keys['ASSETS']['CRYPTO'])

1225 else:

1226 # validate over trading days.

1227 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

1228 asset_type=keys.keys['ASSETS']['EQUITY'])

1229

1230 if sample_prices is None: 1230 ↛ 1249line 1230 didn't jump to line 1249, because the condition on line 1230 was never false

1231 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2,

1232 start_date=start_date, end_date=end_date,

1233 weekends=weekends,

1234 method=keys.keys['ESTIMATION']['MOMENT'])

1235 if correlation is not None:

1236 return correlation

1237

1238 sample_prices = {}

1239 logger.debug(

1240 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_moment_correlation')

1241 logger.debug('Retrieving price histories for calculation.',

1242 '_calculate_moment_correlation')

1243 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1, start_date=start_date,

1244 end_date=end_date, asset_type=asset_type_1)

1245 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2, start_date=start_date,

1246 end_date=end_date, asset_type=asset_type_2)

1247

1248 # TODO: pretty sure something about this is causing the issue.

1249 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:

1250 # remove weekends and holidays from sample

1251 logger.debug('Removing weekends from crypto sample',

1252 '_calculate_moment_correlation')

1253 sample_prices[ticker_1] = dater.intersect_with_trading_dates(

1254 sample_prices[ticker_1])

1255 sample_prices[ticker_2] = dater.intersect_with_trading_dates(

1256 sample_prices[ticker_2])

1257 else:

1258 # intersect with equity keys to get trading days

1259 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys(

1260 sample_prices[ticker_1], sample_prices[ticker_2])

1261

1262 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]: 1262 ↛ 1263line 1262 didn't jump to line 1263, because the condition on line 1262 was never true

1263 raise errors.PriceError(

1264 "Prices cannot be retrieved for correlation calculation")

1265

1266 if asset_type_1 == keys.keys['ASSETS']['CRYPTO']:

1267 trading_period_1 = constants.constants['ONE_TRADING_DAY']['CRYPTO']

1268 else:

1269 trading_period_1 = constants.constants['ONE_TRADING_DAY']['EQUITY']

1270 if asset_type_2 == keys.keys['ASSETS']['CRYPTO']:

1271 trading_period_2 = constants.constants['ONE_TRADING_DAY']['CRYPTO']

1272 else:

1273 trading_period_2 = constants.constants['ONE_TRADING_DAY']['EQUITY']

1274 ### END ARGUMENT PARSING ###

1275

1276 ### START SAMPLE STATISTICS CALCULATION DEPENDENCIES ###

1277 # i.e. statistics that need to be calculated before correlation can be calculated

1278 logger.debug(

1279 f'Preparing calculation dependencies for ({ticker_1},{ticker_2}) correlation', '_calculate_moment_correlation')

1280

1281 stats_1 = _calculate_moment_risk_return(ticker=ticker_1,

1282 start_date=start_date,

1283 end_date=end_date,

1284 asset_type=asset_type_1,

1285 weekends=weekends)

1286

1287 stats_2 = _calculate_moment_risk_return(ticker=ticker_2,

1288 start_date=start_date,

1289 end_date=end_date,

1290 asset_type=asset_type_2,

1291 weekends=weekends)

1292

1293 # ito's lemma

1294 mod_mean_1 = (stats_1['annual_return'] - 0.5*(stats_1['annual_volatility'])

1295 ** 2)*sqrt(trading_period_1)

1296

1297 mod_mean_2 = (stats_2['annual_return'] - 0.5*(stats_2['annual_volatility'])

1298 ** 2)*sqrt(trading_period_2)

1299

1300 logger.debug(

1301 f'Calculating ({ticker_1}, {ticker_2}) correlation.', '_calculate_moment_correlation')

1302 # END SAMPLE STATISTICS CALCULATION DEPENDENCIES

1303

1304 # Initialize loop variables

1305 covariance, time_delta = 0, 1

1306 today, tomorrows_date = False, None

1307 sample = len(sample_prices[ticker_1])

1308

1309 #### START CORRELATION LOOP ####

1310 for this_date in sample_prices[ticker_1]:

1311 todays_price_1 = sample_prices[ticker_1][this_date][keys.keys['PRICES']['CLOSE']]

1312 todays_price_2 = sample_prices[ticker_2][this_date][keys.keys['PRICES']['CLOSE']]

1313

1314 if today:

1315 logger.verbose(f'today = {this_date}',

1316 '_calculate_moment_correlation')

1317 logger.verbose(

1318 f'(todays_price, tomorrows_price)_{ticker_1} = ({todays_price_1}, {tomorrows_price_1})', '_calculate_moment_correlation')

1319 logger.verbose(

1320 f'(todays_price, tomorrows_price)_{ticker_2} = ({todays_price_2}, {tomorrows_price_2})', '_calculate_moment_correlation')

1321

1322 # NOTE: crypto prices may have weekends and holidays removed during correlation algorithm

1323 # so samples can be compared to equities, need to account for these dates by increasing

1324 # the time_delta by the number of missed days, to offset the weekend and holiday return.

1325 if asset_type_1 == keys.keys['ASSETS']['CRYPTO'] or \

1326 (asset_type_1 == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):

1327 time_delta = (dater.parse(

1328 tomorrows_date) - dater.parse(this_date)).days

1329 else:

1330 time_delta = 1

1331 current_mod_return_1 = log(

1332 float(tomorrows_price_1)/float(todays_price_1))/sqrt(time_delta*trading_period_1)

1333

1334 # see above note

1335 if asset_type_2 == keys.keys['ASSETS']['CRYPTO'] or \

1336 (asset_type_2 == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):

1337 time_delta = (dater.parse(

1338 tomorrows_date) - dater.parse(this_date)).days

1339 else:

1340 time_delta = 1

1341

1342 current_mod_return_2 = log(

1343 float(tomorrows_price_2)/float(todays_price_2))/sqrt(time_delta*trading_period_2)

1344

1345 current_sample_covariance = (

1346 current_mod_return_1 - mod_mean_1)*(current_mod_return_2 - mod_mean_2)/(sample - 1)

1347 covariance = covariance + current_sample_covariance

1348

1349 logger.verbose(

1350 f'(return_{ticker_1}, return_{ticker_2}) = ({round(current_mod_return_1, 2)}, {round(current_mod_return_2, 2)})', '_calculate_moment_correlation')

1351 logger.verbose(

1352 f'(current_sample_covariance, covariance) = ({round(current_sample_covariance, 2)}, {round(covariance, 2)})', '_calculate_moment_correlation')

1353

1354 else:

1355 today = True

1356

1357 tomorrows_price_1, tomorrows_price_2, tomorrows_date = todays_price_1, todays_price_2, this_date

1358 #### END CORRELATION LOOP ####

1359

1360 # Scale covariance into correlation

1361 correlation = covariance / \

1362 (stats_1['annual_volatility']*stats_2['annual_volatility'])

1363

1364 logger.debug(

1365 f'correlation = ({round(correlation, 2)})', '_calculate_moment_correlation')

1366

1367 result = {'correlation': correlation}

1368

1369 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2,

1370 start_date=start_date, end_date=end_date,

1371 correlation=correlation, weekends=weekends,

1372 method=keys.keys['ESTIMATION']['MOMENT'])

1373 return result

1374

1375

1376def correlation_matrix(tickers, asset_types=None, start_date=None, end_date=None, sample_prices=None, method=settings.ESTIMATION_METHOD, weekends: Union[int, None] = None) -> List[List[float]]:

1377 """

1378 Returns the correlation matrix for *tickers* from *start_date* to *end_date* using the estimation method *method*.

1379

1380 Parameters

1381 ----------

1382 1. **tickers** : ``list``

1383 List of ticker symbols whose correlation matrix is to be calculated. Format: `['ticker_1', 'ticker_2', ...]`

1384 2. **asset_type2** : ``list``

1385 *Optional*. List of asset types that map to the `tickers` list. Specify **asset_types** to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.

1386 3. *start_date* : ``datetime.date``

1387 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.

1388 4. **end_date** : ``datetime.date``

1389 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.

1390 5. **sample_prices** : ``dict``

1391 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.

1392 6. **method** : ``str``

1393 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`.

1394

1395 Raises

1396 ------

1397 1. **scrilla.errors.SampleSizeErrors**

1398 If list of tickers is not large enough to calculate a correlation matrix, this error will be thrown.

1399

1400 Returns

1401 ------

1402 ``List[List[float]]``

1403 correlation matrix of `tickers`. indices correspond to the Cartesian product of `tickers` x `tickers`.

1404 """

1405 correl_matrix = [

1406 [0 for _ in tickers] for _ in tickers

1407 ]

1408

1409 # let correlation function handle argument parsing

1410 if asset_types is None: 1410 ↛ 1421line 1410 didn't jump to line 1421, because the condition on line 1410 was never false

1411 asset_types = [errors.validate_asset_type(

1412 ticker) for ticker in tickers]

1413

1414 # NOTE: since crypto trades on weekends and equities do not, the function

1415 # must determine if the inputted assets are of mixed type. If any

1416 # single asset is of a different type, weekends must be truncated

1417 # from sample to ensure correlation is calculated over the samples

1418 # of like size.

1419

1420 # By default, exclude weekends.

1421 if weekends is None:

1422 weekends = 0

1423

1424 asset_groups = 0

1425 for _ in groupby(sorted(asset_types)):

1426 asset_groups += 1

1427

1428 # if all assets of the same type, include weekends only if asset type is crypto

1429 if asset_groups == 1 and asset_types[0] == keys.keys['ASSETS']['CRYPTO']:

1430 logger.debug(

1431 'Assets of same type, which is crypto, keeping weekends', 'correlation_matrix')

1432 weekends = 1

1433 else:

1434 if asset_groups > 1:

1435 logger.debug(

1436 'Assets of different type, removing weekends', 'correlation_matrix')

1437 else:

1438 logger.debug(

1439 'Assets of same type, which is equity, excluding weekends', 'correlation_matrix')

1440

1441 if(len(tickers) > 1): 1441 ↛ 1459line 1441 didn't jump to line 1459, because the condition on line 1441 was never false

1442 for i, item in enumerate(tickers):

1443 correl_matrix[i][i] = 1

1444 for j in range(i+1, len(tickers)):

1445 cor = calculate_correlation(ticker_1=item,

1446 ticker_2=tickers[j],

1447 asset_type_1=asset_types[i],

1448 asset_type_2=asset_types[j],

1449 start_date=start_date,

1450 end_date=end_date,

1451 sample_prices=sample_prices,

1452 weekends=weekends,

1453 method=method)

1454 correl_matrix[i][j] = cor['correlation']

1455 correl_matrix[j][i] = correl_matrix[i][j]

1456

1457 correl_matrix[len(tickers) - 1][len(tickers) - 1] = 1

1458 return correl_matrix

1459 if (len(tickers) == 1):

1460 correl_matrix[0][0] = 1

1461 return correl_matrix

1462 raise errors.SampleSizeError(

1463 'Cannot calculate correlation matrix for portfolio size <= 1.')

1464

1465

1466def calculate_moment_correlation_series(ticker_1: str, ticker_2: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, float]:

1467 asset_type_1 = errors.validate_asset_type(ticker=ticker_1)

1468 asset_type_2 = errors.validate_asset_type(ticker=ticker_2)

1469 if asset_type_1 == keys.keys['ASSETS']['CRYPTO'] and asset_type_2 == keys.keys['ASSETS']['CRYPTO']:

1470 # validate over all days

1471 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

1472 asset_type=keys.keys['ASSETS']['CRYPTO'])

1473 else:

1474 # validate over trading days. since (date - 100 days) > (date - 100 trading days), always

1475 # take the largest sample so intersect_dict_keys will return a sample of the correct size

1476 # for mixed asset types.

1477 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,

1478 asset_type=keys.keys['ASSETS']['EQUITY'])

1479

1480 same_type = False

1481 correlation_series = {}

1482

1483 if asset_type_1 == asset_type_2:

1484 same_type = True

1485

1486 # TODO: what if start_date or end_date is None?

1487 if same_type and asset_type_1 == keys.keys['ASSETS']['CRYPTO']:

1488 date_range = [start_date] + dater.dates_between(start_date, end_date)

1489 else: # default to business days

1490 date_range = [dater.get_previous_business_date(

1491 start_date)] + dater.business_dates_between(start_date, end_date)

1492

1493 for this_date in date_range:

1494 calc_date_end = this_date

1495 todays_cor = _calculate_moment_correlation(ticker_1=ticker_1,

1496 ticker_2=ticker_2,

1497 end_date=calc_date_end)

1498 correlation_series[dater.to_string(

1499 this_date)] = todays_cor['correlation']

1500

1501 # result = {f'{ticker_1}_{ticker_2}_correlation_time_series': correlation_series}

1502 return correlation_series

1503

1504

1505def calculate_return_covariance(ticker_1: str, ticker_2: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, correlation: Union[dict, None] = None, profile_1: Union[dict, None] = None, profile_2: Union[dict, None] = None, method=settings.ESTIMATION_METHOD) -> float:

1506 """

1507 Returns the return covariance between *ticker_1* and *ticker_2* from *start_date* to *end_date* using the estimation method *method*.

1508

1509 Parameters

1510 ----------

1511 1. **ticker_1** : ``str``

1512 Ticker symbol for first asset.

1513 2. **ticker_2** : ``str``

1514 Ticker symbol for second asset

1515 3. *start_date* : ``datetime.date``

1516 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.

1517 4. **end_date** : ``datetime.date``

1518 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.

1519 5. **sample_prices** : ``dict``

1520 *Optional*. A dictionary containing the asset prices. Must be formatted as : ` { 'ticker_1': { 'date_1': value, ...}, 'ticker_2': { 'date_2' : value, ...}}.

1521 6. **correlation** : ``dict``

1522 *Optional*. Overrides correlation caluclation. A dictionary containing the correlation that should be used in lieu of estimating it from historical data. Formatted as : `{ 'correlation': value }

1523 7. **profile_1** : ``dict``

1524 *Optional*. Overrides asset 1's risk profile calculation. A dictionary containing the risk profile of the first asset that should be used in lieu of estimating it from historical data.

1525 8. **profile_2** : ``dict``

1526 *Optional*. Overrides asset 2's risk profile calculation. A dictionary containing the risk profile of the second asset that should be used in lieu of estimating it from historical data.

1527 9. **method** : ``str``

1528 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`.

1529

1530 Returns

1531 ------

1532 ``float`` : return covariance

1533 """

1534 if correlation is None: 1534 ↛ 1542line 1534 didn't jump to line 1542, because the condition on line 1534 was never false

1535 if sample_prices is None: 1535 ↛ 1539line 1535 didn't jump to line 1539, because the condition on line 1535 was never false

1536 correlation = calculate_correlation(ticker_1=ticker_1, ticker_2=ticker_2, start_date=start_date,

1537 end_date=end_date, method=method)

1538 else:

1539 correlation = calculate_correlation(ticker_1=ticker_1, ticker_2=ticker_2,

1540 sample_prices=sample_prices, method=method)

1541

1542 if profile_1 is None: 1542 ↛ 1543line 1542 didn't jump to line 1543, because the condition on line 1542 was never true

1543 if sample_prices is None:

1544 profile_1 = calculate_risk_return(ticker=ticker_1, start_date=start_date, end_date=end_date,

1545 method=method)

1546 else:

1547 profile_1 = calculate_risk_return(ticker=ticker_1, sample_prices=sample_prices[ticker_1],

1548 method=method)

1549

1550 if profile_2 is None: 1550 ↛ 1551line 1550 didn't jump to line 1551, because the condition on line 1550 was never true

1551 if sample_prices is None:

1552 profile_2 = calculate_risk_return(ticker=ticker_2, start_date=start_date, end_date=end_date,

1553 method=method)

1554 else:

1555 profile_2 = calculate_risk_return(ticker=ticker_2, sample_prices=sample_prices[ticker_2],

1556 method=method)

1557

1558 covariance = profile_1['annual_volatility'] * \

1559 profile_2['annual_volatility']*correlation['correlation']

1560 return covariance