Coverage for src/scrilla/analysis/models/geometric/statistics.py: 50%

501 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-18 18:14 +0000

1# This file is part of scrilla: https://github.com/chinchalinchin/scrilla. 

2 

3# scrilla is free software: you can redistribute it and/or modify 

4# it under the terms of the GNU General Public License version 3 

5# as published by the Free Software Foundation. 

6 

7# scrilla is distributed in the hope that it will be useful, 

8# but WITHOUT ANY WARRANTY; without even the implied warranty of 

9# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

10# GNU General Public License for more details. 

11 

12# You should have received a copy of the GNU General Public License 

13# along with scrilla. If not, see <https://www.gnu.org/licenses/> 

14# or <https://github.com/chinchalinchin/scrilla/blob/develop/main/LICENSE>. 

15 

16from scrilla.util import errors, outputter, helper, dater 

17from scrilla.analysis import estimators 

18from scrilla.static import keys, functions, constants 

19from scrilla import services, files, settings, cache 

20from numpy import inf 

21from datetime import date 

22from itertools import groupby 

23import datetime 

24import itertools 

25from typing import Dict, List, Union 

26from math import log, sqrt 

27from scipy.stats import norm, multivariate_normal 

28from scipy.optimize import fsolve, least_squares 

29 

30 

31logger = outputter.Logger( 

32 'scrilla.analysis.models.geometric.statistics', settings.LOG_LEVEL) 

33profile_cache = cache.ProfileCache() 

34correlation_cache = cache.CorrelationCache() 

35 

36 

37def get_sample_of_returns(ticker: str, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, start_date: Union[date, None] = None, end_date: Union[date, None] = None, asset_type: Union[str, None] = None, daily: bool = False) -> List[float]: 

38 """ 

39 Generates a list of logarithmic returns on the sample `prices`. Sample return is annualized. 

40 

41 Parameters 

42 ---------- 

43 1. **ticker**: ``str`` 

44 

45 2. **start_date** : ``Union[date, None]`` 

46 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days. 

47 3. **end_date** : ``Union[date, None]`` 

48 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date. 

49 4. **sample_prices** : ``Union[Dict[str, Dict[str, float]], None]`` 

50 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }` 

51 5. **asset_type** : ``Union[str, None]`` 

52 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']` 

53 6. **daily**: ``bool`` 

54 

55 Raises 

56 ------ 

57 1. **scrilla.errors.SampleSizeError** 

58 If the date range passed in does not have enough dates to compute the logarithmic sample (n>1), then this error is thrown. If `sample_prices` was passed in to override the `start_date` and `end_date` arguments, this error will be thrown if the `len(sample_prices)<1`.  

59 

60 .. notes:: 

61 * the `trading_period` for a single asset can be determined from its `asset_type`...should i use a conditional and fork constants.constants['ONE_TRADING_DAY'] instead of passing it in? 

62 """ 

63 asset_type = errors.validate_asset_type(ticker, asset_type) 

64 trading_period = functions.get_trading_period(asset_type) 

65 

66 if sample_prices is None: 

67 if (asset_type == keys.keys['ASSETS']['CRYPTO'] and dater.days_between(start_date, end_date) == 1) \ 

68 or (asset_type == keys.keys['ASSETS']['EQUITY'] and dater.business_days_between(start_date, end_date) == 1): 

69 raise errors.SampleSizeError( 

70 'Not enough price data to compute returns') 

71 elif len(sample_prices) < 1: 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true

72 raise errors.SampleSizeError( 

73 'Not enough price data to compute returns') 

74 

75 if sample_prices is None: 

76 logger.debug('No sample prices provided, calling service.', 

77 'get_sample_of_returns') 

78 start_date, end_date = errors.validate_dates( 

79 start_date, end_date, asset_type) 

80 prices = services.get_daily_price_history( 

81 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type) 

82 else: 

83 logger.debug( 

84 f'{ticker} sample prices provided, skipping service call.', 'get_sample_of_returns') 

85 prices = sample_prices 

86 

87 today = False 

88 sample_of_returns = [] 

89 trading_period = functions.get_trading_period(asset_type=asset_type) 

90 

91 for this_date in prices: 

92 todays_price = prices[this_date][keys.keys['PRICES']['CLOSE']] 

93 

94 if today: 

95 logger.verbose( 

96 f'{this_date}: (todays_price, tomorrows_price) = ({todays_price}, {tomorrows_price})', 'get_sample_of_returns') 

97 # NOTE: crypto prices may have weekends and holidays removed during correlation algorithm 

98 # so samples can be compared to equities, need to account for these dates by increasing 

99 # the time_delta by the number of missed days. 

100 if asset_type == keys.keys['ASSETS']['CRYPTO'] or \ 

101 (asset_type == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)): 

102 time_delta = (dater.parse( 

103 tomorrows_date) - dater.parse(this_date)).days 

104 else: 

105 time_delta = 1 

106 

107 todays_return = log(float(tomorrows_price) / 

108 float(todays_price))/(time_delta) 

109 

110 if not daily: 110 ↛ 113line 110 didn't jump to line 113, because the condition on line 110 was never false

111 todays_return = todays_return/trading_period 

112 

113 sample_of_returns.append(todays_return) 

114 else: 

115 today = True 

116 

117 tomorrows_price = prices[this_date][keys.keys['PRICES']['CLOSE']] 

118 tomorrows_date = this_date 

119 

120 return sample_of_returns 

121 

122 

123def calculate_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, Dict[str, float]]: 

124 if method == keys.keys['ESTIMATION']['MOMENT']: 124 ↛ 128line 124 didn't jump to line 128, because the condition on line 124 was never false

125 return _calculate_moment_moving_averages(ticker=ticker, 

126 start_date=start_date, 

127 end_date=end_date) 

128 if method == keys.keys['ESTIMATION']['PERCENT']: 

129 return _calculate_percentile_moving_averages(ticker=ticker, 

130 start_date=start_date, 

131 end_date=end_date) 

132 if method == keys.keys['ESTIMATION']['LIKE']: 

133 return _calculate_likelihood_moving_averages(ticker=ticker, 

134 start_date=start_date, 

135 end_date=end_date) 

136 raise errors.ConfigurationError('Statistical estimation method not found') 

137 

138 

139def calculate_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, float]: 

140 """ 

141 Returns the correlation between *ticker_1* and *ticker_2* from *start_date* to *end_date* using the estimation method *method*. 

142 

143 Parameters 

144 ---------- 

145 1. **ticker_1** : ``str`` 

146 Ticker symbol for first asset. 

147 2. **ticker_2** : ``str`` 

148 Ticker symbol for second asset 

149 3. **asset_type_1** : ``str`` 

150 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

151 4. **asset_type_2** : ``str`` 

152 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

153 5. *start_date* : ``datetime.date`` 

154 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago. 

155 6. **end_date** : ``datetime.date`` 

156 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day. 

157 7. **sample_prices** : ``dict`` 

158 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date. 

159 8. **method** : ``str`` 

160 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`. 

161 

162 Raises 

163 ------ 

164 1. **KeyError** 

165 If the `method` passed in doesn't map to one of the allowable estimation method values, this error will be thrown. 

166 

167 Returns 

168 ------ 

169 ``Dict[str, float]`` 

170 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`. 

171 """ 

172 if method == keys.keys['ESTIMATION']['MOMENT']: 

173 return _calculate_moment_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends) 

174 if method == keys.keys['ESTIMATION']['LIKE']: 174 ↛ 176line 174 didn't jump to line 176, because the condition on line 174 was never false

175 return _calculate_likelihood_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends) 

176 if method == keys.keys['ESTIMATION']['PERCENT']: 

177 return _calculate_percentile_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends) 

178 raise KeyError('Estimation method not found') 

179 

180 

181def calculate_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, float]: 

182 """ 

183 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Uses the method passed in through `method` to estimate the model parameters. 

184 

185 Parameters 

186 ---------- 

187 1. **ticker** : ``str`` 

188 Ticker symbol whose risk-return profile is to be calculated. 

189 2. **start_date** : ``datetime.date`` 

190 Optional. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days. 

191 3. **end_date** : ``datetime.date`` 

192 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date. 

193 4. **sample_prices** : ``list`` 

194 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }` 

195 5. **asset_type** : ``str`` 

196 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']` 

197 6. **method**: ``str`` 

198 *Optional*. The calculation method to be used in estimating model parameters, i.e. the mean and volatility. Allowable values are accessible through `scrilla.static.keys.keys['ESTIMATION']`. Defaults to the method set in `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the environment variable, **DEFAULT_ESTIMATION_METHOD**. If this variable is not found, the value will default to `scrilla.static.keys.keys['ESTIMATION']['MOMENT']`. 

199 

200 Raises 

201 ------ 

202 1. **scrilla.errors.ConfigurationError** 

203 If the inputted `method` does not map to one of the allowable values in the `scrilla.static.keys.keys` dictionary, then this error will be thrown. 

204 

205 

206 Returns 

207 ------ 

208 ``Dict[str, float]`` 

209 Dictionary containing the annualized return and volatility. Formatted as follows, 

210 

211 ``` 

212 { 

213 'annual_return': value, 

214 'annual_volatility': value 

215 } 

216 ``` 

217 

218 .. notes:: 

219 * assumes price history is ordered from latest to earliest date. 

220 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth. 

221 """ 

222 if method == keys.keys['ESTIMATION']['MOMENT']: 222 ↛ 229line 222 didn't jump to line 229, because the condition on line 222 was never false

223 return _calculate_moment_risk_return(ticker=ticker, 

224 start_date=start_date, 

225 end_date=end_date, 

226 sample_prices=sample_prices, 

227 asset_type=asset_type, 

228 weekends=weekends) 

229 if method == keys.keys['ESTIMATION']['PERCENT']: 

230 return _calculate_percentile_risk_return(ticker=ticker, 

231 start_date=start_date, 

232 end_date=end_date, 

233 sample_prices=sample_prices, 

234 asset_type=asset_type, 

235 weekends=weekends) 

236 if method == keys.keys['ESTIMATION']['LIKE']: 

237 return _calculate_likelihood_risk_return(ticker=ticker, 

238 start_date=start_date, 

239 end_date=end_date, 

240 sample_prices=sample_prices, 

241 asset_type=asset_type, 

242 weekends=weekends) 

243 raise errors.ConfigurationError('Statistical estimation method not found') 

244 

245 

246def _calculate_moment_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]: 

247 """ 

248 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively. 

249 

250 Parameters 

251 ---------- 

252 1. **tickers** : ``list``. 

253 array of ticker symbols correspond to the moving averages to be calculated. 

254 2. **start_date** : ``datetime.date`` 

255 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated. 

256 3. **end_date**: ``datetime.date`` 

257 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated. 

258 4. **sample_prices** : ``dict`` 

259 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date. 

260 

261 Output 

262 ------ 

263 ``Dict[str, Dict[str,float]]`` 

264 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value. 

265 

266 ``` 

267 { 

268 'date': { 

269 'MA_1': value, 

270 'MA_2': value, 

271 'MA_3': value 

272 }, 

273 ... 

274 } 

275 ``` 

276 

277 .. notes:: 

278 * assumes `sample_prices` is ordered from latest to earliest date. 

279 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned. 

280 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average. 

281 """ 

282 asset_type = files.get_asset_type(ticker) 

283 trading_period = functions.get_trading_period(asset_type) 

284 

285 if start_date is None: 285 ↛ 286line 285 didn't jump to line 286, because the condition on line 285 was never true

286 if asset_type == keys.keys['ASSETS']['EQUITY']: 

287 start_date = dater.this_date_or_last_trading_date() 

288 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 

289 start_date = dater.today() 

290 if end_date is None: 290 ↛ 291line 290 didn't jump to line 291, because the condition on line 290 was never true

291 end_date = start_date 

292 

293 if asset_type == keys.keys['ASSETS']['EQUITY']: 

294 ma_date_range = dater.business_dates_between(start_date, end_date) 

295 sample_start = dater.decrement_date_by_business_days( 

296 start_date, settings.MA_3_PERIOD) 

297 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 297 ↛ 302line 297 didn't jump to line 302, because the condition on line 297 was never false

298 ma_date_range = dater.dates_between(start_date, end_date) 

299 sample_start = dater.decrement_date_by_days( 

300 start_date, settings.MA_3_PERIOD) 

301 

302 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start, 

303 end_date=end_date, asset_type=asset_type) 

304 

305 moving_averages = {} 

306 for this_date in ma_date_range: 

307 logger.debug( 

308 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', 'get_sample_of_returns') 

309 this_date_index = list(sample_prices).index(dater.to_string(this_date)) 

310 mas = [] 

311 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]: 

312 ma_range = dict(itertools.islice( 

313 sample_prices.items(), this_date_index, this_date_index+ma_period+1)) 

314 last_date, first_date = list(ma_range)[0], list(ma_range)[-1] 

315 last_price = ma_range[last_date][keys.keys['PRICES']['CLOSE']] 

316 first_price = ma_range[first_date][keys.keys['PRICES']['CLOSE']] 

317 mas.append(log(float(last_price)/float(first_price)) / 

318 (trading_period*ma_period)) 

319 

320 moving_averages[dater.to_string(this_date)] = { 

321 f'MA_{settings.MA_1_PERIOD}': mas[0], 

322 f'MA_{settings.MA_2_PERIOD}': mas[1], 

323 f'MA_{settings.MA_3_PERIOD}': mas[2] 

324 } 

325 

326 return moving_averages 

327 

328 

329def _calculate_percentile_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]: 

330 """ 

331 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively. 

332 

333 Parameters 

334 ---------- 

335 1. **tickers** : ``list``. 

336 array of ticker symbols correspond to the moving averages to be calculated. 

337 2. **start_date** : ``datetime.date`` 

338 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated. 

339 3. **end_date**: ``datetime.date`` 

340 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated. 

341 4. **sample_prices** : ``dict`` 

342 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date. 

343 

344 Output 

345 ------ 

346 ``Dict[str, Dict[str,float]]`` 

347 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value. 

348 

349 ``` 

350 { 

351 'date': { 

352 'MA_1': value, 

353 'MA_2': value, 

354 'MA_3': value 

355 }, 

356 ... 

357 } 

358 ``` 

359 

360 .. notes:: 

361 * assumes `sample_prices` is ordered from latest to earliest date. 

362 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned. 

363 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average. 

364 """ 

365 asset_type = files.get_asset_type(ticker) 

366 trading_period = functions.get_trading_period(asset_type) 

367 

368 if start_date is None: 

369 if asset_type == keys.keys['ASSETS']['EQUITY']: 

370 start_date = dater.this_date_or_last_trading_date() 

371 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 

372 start_date = dater.today() 

373 if end_date is None: 

374 end_date = start_date 

375 

376 if asset_type == keys.keys['ASSETS']['EQUITY']: 

377 ma_date_range = dater.business_dates_between(start_date, end_date) 

378 sample_start = dater.decrement_date_by_business_days( 

379 start_date, settings.MA_3_PERIOD) 

380 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 

381 ma_date_range = dater.dates_between(start_date, end_date) 

382 sample_start = dater.decrement_date_by_days( 

383 start_date, settings.MA_3_PERIOD) 

384 

385 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start, 

386 end_date=end_date, asset_type=asset_type) 

387 

388 moving_averages = {} 

389 for this_date in ma_date_range: 

390 logger.debug( 

391 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', '_calculate_percentile_moving_averages') 

392 this_date_index = list(sample_prices).index(dater.to_string(this_date)) 

393 mas = [] 

394 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]: 

395 ma_range = dict(itertools.islice( 

396 sample_prices.items(), this_date_index, this_date_index+ma_period+1)) 

397 sample_of_returns = get_sample_of_returns( 

398 ticker=ticker, sample_prices=ma_range) 

399 

400 first_quartile = estimators.sample_percentile( 

401 data=sample_of_returns, percentile=0.25) 

402 median = estimators.sample_percentile( 

403 data=sample_of_returns, percentile=0.50) 

404 third_quartile = estimators.sample_percentile( 

405 data=sample_of_returns, percentile=0.75) 

406 guess = (median, (third_quartile-first_quartile)/2) 

407 

408 mean, vol = fsolve(lambda params, first=first_quartile, third=third_quartile: 

409 [norm.cdf(x=first, loc=params[0], scale=params[1]) - 0.25, 

410 norm.cdf(x=third, loc=params[0], scale=params[1]) - 0.75], 

411 guess) 

412 

413 # NOTE: Var(dln(S)/delta_t) = (1/delta_t^2)*Var(dlnS) = sigma^2*delta_t / delta_t^2 = sigma^2 / delta_t 

414 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale. 

415 vol = vol * sqrt(trading_period) 

416 # ito's lemma 

417 mean = mean + 0.5 * (vol ** 2) 

418 mas.append(mean) 

419 moving_averages[dater.to_string(this_date)] = { 

420 f'MA_{settings.MA_1_PERIOD}': mas[0], 

421 f'MA_{settings.MA_2_PERIOD}': mas[1], 

422 f'MA_{settings.MA_3_PERIOD}': mas[2] 

423 } 

424 

425 return moving_averages 

426 

427 

428def _calculate_likelihood_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]: 

429 """ 

430 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively.  

431 

432 Parameters 

433 ---------- 

434 1. **tickers** : ``list``. 

435 array of ticker symbols correspond to the moving averages to be calculated.  

436 2. **start_date** : ``datetime.date`` 

437 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated. 

438 3. **end_date**: ``datetime.date`` 

439 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated.  

440 4. **sample_prices** : ``dict`` 

441 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date. 

442 

443 Output 

444 ------ 

445 ``Dict[str, Dict[str,float]]``  

446 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value.  

447 

448 ``` 

449 { 

450 'date': { 

451 'MA_1': value, 

452 'MA_2': value, 

453 'MA_3': value 

454 }, 

455 ... 

456 } 

457 ``` 

458 

459 .. notes:: 

460 * assumes `sample_prices` is ordered from latest to earliest date.  

461 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned. 

462 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average.  

463 """ 

464 from scrilla.analysis.optimizer import maximize_univariate_normal_likelihood 

465 

466 asset_type = files.get_asset_type(ticker) 

467 trading_period = functions.get_trading_period(asset_type) 

468 

469 if start_date is None: 

470 if asset_type == keys.keys['ASSETS']['EQUITY']: 

471 start_date = dater.this_date_or_last_trading_date() 

472 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 

473 start_date = dater.today() 

474 if end_date is None: 

475 end_date = start_date 

476 

477 if asset_type == keys.keys['ASSETS']['EQUITY']: 

478 ma_date_range = dater.business_dates_between(start_date, end_date) 

479 sample_start = dater.decrement_date_by_business_days( 

480 start_date, settings.MA_3_PERIOD) 

481 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 

482 ma_date_range = dater.dates_between(start_date, end_date) 

483 sample_start = dater.decrement_date_by_days( 

484 start_date, settings.MA_3_PERIOD) 

485 

486 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start, 

487 end_date=end_date, asset_type=asset_type) 

488 

489 moving_averages = {} 

490 for this_date in ma_date_range: 

491 logger.debug( 

492 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', '_calculate_likelihood_moving_averages') 

493 this_date_index = list(sample_prices).index(dater.to_string(this_date)) 

494 mas = [] 

495 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]: 

496 ma_range = dict(itertools.islice( 

497 sample_prices.items(), this_date_index, this_date_index+ma_period+1)) 

498 sample_of_returns = get_sample_of_returns( 

499 ticker=ticker, sample_prices=ma_range) 

500 

501 likelihood_estimates = maximize_univariate_normal_likelihood( 

502 data=sample_of_returns) 

503 # See NOTE in docstring 

504 # NOTE: E(dln(S)/delta_t) = (mu - 0.5 * sigma ** 2) * delta_t / delta_t = mu - 0.5 * sigma ** 2 

505 # TODO: add :math to docstring with this 

506 # NOTE: Var(dln(S)/delta_t) = (1/delta_t**2)*Var(dlnS) = sigma**2*delta_t / delta_t**2 = sigma**2 / delta_t 

507 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale. 

508 vol = likelihood_estimates[1]*sqrt(trading_period) 

509 # ito's lemma 

510 mean = likelihood_estimates[0] + 0.5 * (vol ** 2) 

511 mas.append(mean) 

512 moving_averages[dater.to_string(this_date)] = { 

513 f'MA_{settings.MA_1_PERIOD}': mas[0], 

514 f'MA_{settings.MA_2_PERIOD}': mas[1], 

515 f'MA_{settings.MA_3_PERIOD}': mas[2] 

516 } 

517 

518 return moving_averages 

519 

520 

521def _calculate_likelihood_risk_return(ticker, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]: 

522 """ 

523 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of maximum likelihood estimation. The probability of each observation is calculated and then the product is taken to find the probability of the intersection; this probability is maximized with respect to the parameters of the normal distribution, the mean and the volatility. 

524 

525 Parameters 

526 ---------- 

527 1. **ticker** : ``str`` 

528 Ticker symbol whose risk-return profile is to be calculated. 

529 2. **start_date** : ``datetime.date`` 

530 Optional. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days. 

531 3. **end_date** : ``datetime.date`` 

532 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.  

533 4. **sample_prices** : ``list`` 

534 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }` 

535 5. **asset_type** : ``str`` 

536 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']` 

537 

538 Raises  

539 ------ 

540 1. **scrilla.errors.PriceError** 

541 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown. 

542 

543 

544 Returns 

545 ------ 

546 ``Dict[str, float]`` 

547 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }` 

548 

549 .. notes:: 

550 * assumes price history is ordered from latest to earliest date. 

551 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth. 

552 """ 

553 from scrilla.analysis.optimizer import maximize_univariate_normal_likelihood 

554 

555 asset_type = errors.validate_asset_type(ticker, asset_type) 

556 trading_period = functions.get_trading_period(asset_type) 

557 

558 if weekends is None: 

559 if asset_type == keys.keys['ASSETS']['CRYPTO']: 

560 weekends = 1 

561 else: 

562 weekends = 0 

563 

564 if sample_prices is None: 

565 start_date, end_date = errors.validate_dates( 

566 start_date, end_date, asset_type) 

567 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date, 

568 method=keys.keys['ESTIMATION']['LIKE']) 

569 

570 if results is not None \ 

571 and results[keys.keys['STATISTICS']['RETURN']] is not None \ 

572 and results[keys.keys['STATISTICS']['VOLATILITY']] is not None: 

573 return results 

574 

575 logger.debug('No sample prices provided, calling service.', 

576 '_calculate_likelihood_risk_return') 

577 prices = services.get_daily_price_history( 

578 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type) 

579 

580 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 

581 logger.debug('Removing weekends from crypto sample', 

582 '_calculate_likelihood_risk_return') 

583 prices = dater.intersect_with_trading_dates(prices) 

584 

585 else: 

586 logger.debug( 

587 f'{ticker} sample prices provided, skipping service call.', '_calculate_likelihood_risk_return') 

588 prices = sample_prices 

589 

590 if not prices: 

591 raise errors.PriceError(f'No prices could be retrieved for {ticker}') 

592 

593 sample_of_returns = get_sample_of_returns( 

594 ticker=ticker, sample_prices=prices, asset_type=asset_type) 

595 

596 likelihood_estimates = maximize_univariate_normal_likelihood( 

597 data=sample_of_returns) 

598 # See NOTE in docstring 

599 # NOTE: E(dln(S)/delta_t) = (mu - 0.5 * sigma ** 2) * delta_t / delta_t = mu - 0.5 * sigma ** 2 

600 # TODO: add :math to docstring with this 

601 # NOTE: Var(dln(S)/delta_t) = (1/delta_t**2)*Var(dlnS) = sigma**2*delta_t / delta_t**2 = sigma**2 / delta_t 

602 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale. 

603 vol = likelihood_estimates[1]*sqrt(trading_period) 

604 # ito's lemma 

605 mean = likelihood_estimates[0] + 0.5 * (vol ** 2) 

606 results = { 

607 keys.keys['STATISTICS']['RETURN']: mean, 

608 keys.keys['STATISTICS']['VOLATILITY']: vol 

609 } 

610 

611 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date, 

612 method=keys.keys['ESTIMATION']['LIKE'], weekends=weekends, 

613 annual_return=results[keys.keys['STATISTICS']['RETURN']], 

614 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']]) 

615 

616 return results 

617 

618 

619def _calculate_percentile_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]: 

620 """ 

621 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of percentile matching, where the return and volatility are estimated by matching the 25th and 75th percentile calculated from the assumed GBM distribution to the sample of data. 

622 

623 Parameters 

624 ---------- 

625 1. **ticker** : ``str`` 

626 Ticker symbol whose risk-return profile is to be calculated. 

627 2. **start_date** : ``datetime.date``  

628 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days. 

629 3. **end_date** : ``datetime.date`` 

630 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date. 

631 4.**sample_prices** : ``dict`` 

632 **Optional**. A list of the asset prices for which the correlation will be calculated. Overrides calls to service and forces calculation of correlation for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key of the dictionary as the latest and earliest date, respectively. In other words, `sample_prices` must be ordered from latest to earliest. Must be formatted: `{ 'date_1' : { 'open' : float, 'close' : float}, 'date_2': { 'open': float, 'close': float } ... }` 

633 5. **asset_type** : ``str`` 

634 Optional. Specify asset type to prevent overusing redundant calculations. Allowable values: scrilla.keys.keys['ASSETS']['EQUITY'], scrilla.keys.keys['ASSETS']['CRYPTO'] 

635 

636 Returns 

637 ------- 

638 ``Dict[str, float]`` 

639 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }` 

640 

641 Raises  

642 ------ 

643 1. **scrilla.errors.PriceError** 

644 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown. 

645 

646 .. notes:: 

647 * assumes price history is ordered from latest to earliest date. 

648 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth. 

649 """ 

650 asset_type = errors.validate_asset_type(ticker, asset_type) 

651 trading_period = functions.get_trading_period(asset_type) 

652 

653 if weekends is None: 

654 if asset_type == keys.keys['ASSETS']['CRYPTO']: 

655 weekends = 1 

656 else: 

657 weekends = 0 

658 

659 if sample_prices is None: 

660 if weekends == 1: 

661 start_date, end_date = errors.validate_dates( 

662 start_date, end_date, keys.keys['ASSETS']['CRYPTO']) 

663 else: 

664 start_date, end_date = errors.validate_dates( 

665 start_date, end_date, keys.keys['ASSETS']['EQUITY']) 

666 

667 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date, 

668 method=keys.keys['ESTIMATION']['PERCENT'], 

669 weekends=weekends) 

670 

671 if results is not None \ 

672 and results[keys.keys['STATISTICS']['RETURN']] is not None \ 

673 and results[keys.keys['STATISTICS']['VOLATILITY']] is not None: 

674 return results 

675 

676 logger.debug('No sample prices provided, calling service.', 

677 '_calculate_percentile_risk_return') 

678 prices = services.get_daily_price_history( 

679 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type) 

680 

681 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 

682 logger.debug('Removing weekends from crypto sample', 

683 '_calculate_percentile_risk_return') 

684 prices = dater.intersect_with_trading_dates(prices) 

685 else: 

686 logger.debug( 

687 f'{ticker} sample prices provided, skipping service call.', '_calculate_percentile_risk_return') 

688 prices = sample_prices 

689 

690 if not prices: 

691 raise errors.PriceError(f'No prices could be retrieved for {ticker}') 

692 

693 sample_of_returns = get_sample_of_returns( 

694 ticker=ticker, sample_prices=prices, asset_type=asset_type) 

695 

696 first_quartile = estimators.sample_percentile( 

697 data=sample_of_returns, percentile=0.25) 

698 median = estimators.sample_percentile( 

699 data=sample_of_returns, percentile=0.50) 

700 third_quartile = estimators.sample_percentile( 

701 data=sample_of_returns, percentile=0.75) 

702 guess = (median, (third_quartile-first_quartile)/2) 

703 

704 def objective(params): 

705 return [norm.cdf(x=first_quartile, loc=params[0], scale=params[1]) - 0.25, 

706 norm.cdf(x=third_quartile, loc=params[0], scale=params[1]) - 0.75] 

707 

708 mean, vol = fsolve(objective, guess) 

709 

710 # NOTE: Var(dln(S)/delta_t) = (1/delta_t^2)*Var(dlnS) = sigma^2*delta_t / delta_t^2 = sigma^2 / delta_t 

711 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale. 

712 vol = vol * sqrt(trading_period) 

713 # ito's lemma 

714 mean = mean + 0.5 * (vol ** 2) 

715 results = { 

716 keys.keys['STATISTICS']['RETURN']: mean, 

717 keys.keys['STATISTICS']['VOLATILITY']: vol 

718 } 

719 

720 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date, 

721 method=keys.keys['ESTIMATION']['PERCENT'], weekends=weekends, 

722 annual_return=results[keys.keys['STATISTICS']['RETURN']], 

723 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']]) 

724 return results 

725 

726 

727def _calculate_moment_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]: 

728 """ 

729 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of moment matching, where the return is estimated by equating it to the first moment of the sample and the volatility is estimated by equating it to the square root of the second moment of the sample. 

730 

731 Parameters 

732 ---------- 

733 1. **ticker** : ``str`` 

734 Ticker symbol whose risk-return profile is to be calculated. 

735 2. **start_date** : ``Union[datetime.date, None]`` 

736 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days. 

737 3. **end_date** : ``Union[datetime.date, None]``  

738 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date. \n \n 

739 4. **sample_prices** : ``Union[Dict[str, Dict[str, float], None]` 

740 Optional. A list of the asset prices for which correlation will be calculated. Overrides calls to service and forces calculation of correlation for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. If this argument is supplied, the function will bypass calls to the cache for stored calculations. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }` 

741 5. **asset_type** : ``Union[str, None]`` 

742 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values can be found in `scrilla.keys.keys['ASSETS']` 

743 6. **weekends**: ``Union[int, None]`` 

744 

745 Raises  

746 ------ 

747 1. **scrilla.errors.PriceError*** 

748 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown. 

749 

750 Returns  

751 ------ 

752 ``Dict[str, float]`` 

753 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }` 

754 

755 .. notes:: 

756 * assumes price history is ordered from latest to earliest date. 

757 * function will bypass the cache if `sample_prices` is provided. In other words, the calculation can be forced by specifying `sample_prices`. 

758 """ 

759 asset_type = errors.validate_asset_type(ticker, asset_type) 

760 trading_period = functions.get_trading_period(asset_type) 

761 

762 if weekends is None: 

763 if asset_type == keys.keys['ASSETS']['CRYPTO']: 763 ↛ 764line 763 didn't jump to line 764, because the condition on line 763 was never true

764 weekends = 1 

765 else: 

766 weekends = 0 

767 

768 if sample_prices is None: 

769 # NOTE: Cache is bypassed when sample_prices are not null. 

770 if weekends == 1: 

771 start_date, end_date = errors.validate_dates( 

772 start_date, end_date, keys.keys['ASSETS']['CRYPTO']) 

773 else: 

774 start_date, end_date = errors.validate_dates( 

775 start_date, end_date, keys.keys['ASSETS']['EQUITY']) 

776 

777 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date, 

778 method=keys.keys['ESTIMATION']['MOMENT'], weekends=weekends) 

779 

780 if results is not None \ 

781 and results.get(keys.keys['STATISTICS']['RETURN']) is not None \ 

782 and results.get(keys.keys['STATISTICS']['VOLATILITY']) is not None: 

783 return results 

784 

785 logger.debug('No sample prices provided, calling service.', 

786 '_calculate_moment_risk_return') 

787 prices = services.get_daily_price_history( 

788 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type) 

789 

790 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 

791 logger.debug('Removing weekends from crypto sample', 

792 '_calculate_moment_risk_return') 

793 prices = dater.intersect_with_trading_dates(prices) 

794 

795 else: 

796 logger.debug( 

797 f'{ticker} sample prices provided, skipping service call.', '_calculate_moment_risk_return') 

798 prices = sample_prices 

799 

800 if not prices: 800 ↛ 801line 800 didn't jump to line 801, because the condition on line 800 was never true

801 raise errors.PriceError(f'No prices could be retrieved for {ticker}') 

802 

803 # Log of difference loses a sample 

804 sample = len(prices) - 1 

805 logger.debug( 

806 f'Calculating mean annual return over last {sample} days for {ticker}', '_calculate_moment_risk_return') 

807 

808 # MEAN CALCULATION 

809 # NOTE: mean return is a telescoping series, i.e. sum of log(x1/x0) only depends on the first and 

810 # last terms' contributions (because log(x1/x0) + log(x2/x1)= log(x2) - log(x1) + log(x1) - log(x0)) = log(x2/x0)) 

811 # which raises the question how accurate a measure the sample mean return is of the population mean return. 

812 last_date, first_date = list(prices)[0], list(prices)[-1] 

813 last_price = prices[last_date][keys.keys['PRICES']['CLOSE']] 

814 first_price = prices[first_date][keys.keys['PRICES']['CLOSE']] 

815 mean_return = log(float(last_price)/float(first_price)) / \ 

816 (trading_period*sample) 

817 

818 # VOLATILITY CALCULATION 

819 # NOTE / TODO : this is a 'naive' variance algorithm: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 

820 # although technically, this is only one pass, since the mean telescopes and doesn't require a full traversal of the 

821 # the sample. I should see how this implementation compares to a Young and Cramer Updating algorithm implementation. 

822 today, variance, tomorrows_price, tomorrows_date = False, 0, 0, None 

823 # adjust the random variable being measured so expectation is easier to calculate. 

824 mean_mod_return = mean_return*sqrt(trading_period) 

825 logger.debug( 

826 f'Calculating mean annual volatility over last {sample} days for {ticker}', '_calculate_moment_risk_return') 

827 

828 for this_date in prices: 

829 todays_price = prices[this_date][keys.keys['PRICES']['CLOSE']] 

830 

831 if today: 

832 logger.verbose( 

833 f'{this_date}: (todays_price, tomorrows_price) = ({todays_price}, {tomorrows_price})', '_calculate_moment_risk_return') 

834 

835 # crypto prices may have weekends and holidays removed during correlation algorithm 

836 # so samples can be compared to equities, need to account for these dates by increasing 

837 # the time_delta by the number of missed days. 

838 if asset_type == keys.keys['ASSETS']['CRYPTO'] or \ 

839 (asset_type == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)): 

840 time_delta = (dater.parse( 

841 tomorrows_date) - dater.parse(this_date)).days 

842 else: 

843 time_delta = 1 

844 

845 current_mod_return = log( 

846 float(tomorrows_price)/float(todays_price))/sqrt(time_delta*trading_period) 

847 daily = (current_mod_return - mean_mod_return)**2/(sample - 1) 

848 variance = variance + daily 

849 

850 logger.verbose( 

851 f'{this_date}: (daily_variance, sample_variance) = ({round(daily, 4)}, {round(variance, 4)})', '_calculate_moment_risk_return') 

852 

853 else: 

854 today = True 

855 

856 tomorrows_price = prices[this_date][keys.keys['PRICES']['CLOSE']] 

857 tomorrows_date = this_date 

858 

859 # adjust for output 

860 volatility = sqrt(variance) 

861 # ito's lemma 

862 mean_return = mean_return + 0.5*(volatility**2) 

863 logger.debug( 

864 f'(mean_return, sample_volatility) = ({round(mean_return, 2)}, {round(volatility, 2)})', '_calculate_moment_risk_return') 

865 

866 results = { 

867 keys.keys['STATISTICS']['RETURN']: mean_return, 

868 keys.keys['STATISTICS']['VOLATILITY']: volatility 

869 } 

870 

871 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date, 

872 method=keys.keys['ESTIMATION']['MOMENT'], weekends=weekends, 

873 annual_return=results[keys.keys['STATISTICS']['RETURN']], 

874 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']]) 

875 return results 

876 

877 

878def _calculate_percentile_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]: 

879 """ 

880 Returns the sample correlation calculated using the method of Percentile Matching, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.  

881 

882 Parameters 

883 ---------- 

884 1. **ticker_1** : ``str`` 

885 Ticker symbol for first asset. 

886 2. **ticker_2** : ``str`` 

887 Ticker symbol for second asset 

888 3. **asset_type_1** : ``str`` 

889 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

890 4. **asset_type_2** : ``str`` 

891 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

892 5. *start_date* : ``datetime.date`` 

893 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago. 

894 6. **end_date** : ``datetime.date``  

895 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day. 

896 7. **sample_prices** : ``Dict[str, float]`` 

897 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.  

898 

899 Raises 

900 ------ 

901 1. **scrilla.errors.PriceError** 

902 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown. 

903 

904 Returns 

905 ------ 

906 ``Dict[str, float]`` 

907 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`. 

908 

909 ``` 

910 { 

911 'correlation': value 

912 } 

913 ``` 

914 

915 .. notes :: 

916 * Method uses the theory of copulas for multivariate distributions to break the joint distribution into component distributions in order to find the cumulative probability of the individual distribution's order statistics. See *references* for more information. 

917 

918 .. references:: 

919 - [How To Determine Quantile Isolines Of A Multivariate Normal Distribution](https://stats.stackexchange.com/questions/64680/how-to-determine-quantiles-isolines-of-a-multivariate-normal-distribution) 

920 - [Copula (Probability Theory)](https://en.wikipedia.org/wiki/Copula_(probability_theory)) 

921 - [An Introduction To Copulas](http://www.columbia.edu/~mh2078/QRM/Copulas.pdf) 

922 """ 

923 ### START ARGUMENT PARSING ### 

924 asset_type_1 = errors.validate_asset_type( 

925 ticker=ticker_1, asset_type=asset_type_1) 

926 asset_type_2 = errors.validate_asset_type( 

927 ticker=ticker_2, asset_type=asset_type_2) 

928 

929 # cache flag to signal if calculation includes weekends or not, 

930 # only perform check if not passed in as argument 

931 if weekends is None: 

932 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']: 

933 weekends = 1 

934 else: 

935 weekends = 0 

936 

937 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1: 

938 # validate over total days. 

939 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

940 asset_type=keys.keys['ASSETS']['CRYPTO']) 

941 else: 

942 # validate over trading days. since sample(date - 100 days) > (date - 100 trading days), always 

943 # take the largest sample so intersect_dict_keys will return a sample of the correct size 

944 # for mixed asset types. 

945 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

946 asset_type=keys.keys['ASSETS']['EQUITY']) 

947 

948 if sample_prices is None: 

949 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2, 

950 start_date=start_date, end_date=end_date, 

951 weekends=weekends, 

952 method=keys.keys['ESTIMATION']['PERCENT']) 

953 if correlation is not None: 

954 return correlation 

955 

956 sample_prices = {} 

957 logger.debug( 

958 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_percentile_correlation') 

959 logger.debug('Retrieving price histories for calculation.', 

960 '_calculate_percentile_calculation') 

961 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1, start_date=start_date, 

962 end_date=end_date, asset_type=asset_type_1) 

963 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2, start_date=start_date, 

964 end_date=end_date, asset_type=asset_type_2) 

965 

966 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 

967 sample_prices[ticker_1] = dater.intersect_with_trading_dates( 

968 sample_prices[ticker_1]) 

969 sample_prices[ticker_2] = dater.intersect_with_trading_dates( 

970 sample_prices[ticker_2]) 

971 else: 

972 # intersect with equity keys to get trading days 

973 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys( 

974 sample_prices[ticker_1], sample_prices[ticker_2]) 

975 

976 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]: 

977 raise errors.PriceError( 

978 "Prices cannot be retrieved for correlation calculation", '_calculate_percentile_correlation') 

979 

980 sample_of_returns_1 = estimators.standardize(get_sample_of_returns( 

981 ticker=ticker_1, sample_prices=sample_prices[ticker_1], asset_type=asset_type_1)) 

982 sample_of_returns_2 = estimators.standardize(get_sample_of_returns( 

983 ticker=ticker_2, sample_prices=sample_prices[ticker_2], asset_type=asset_type_2)) 

984 

985 combined_sample = [[el, sample_of_returns_2[i]] 

986 for i, el in enumerate(sample_of_returns_1)] 

987 

988 percentiles = [0.1, 0.16, 0.5, 0.84, 0.9] 

989 sample_percentiles_1, sample_percentiles_2 = [], [] 

990 

991 for percentile in percentiles: 

992 sample_percentiles_1.append(estimators.sample_percentile( 

993 data=sample_of_returns_1, percentile=percentile)) 

994 sample_percentiles_2.append(estimators.sample_percentile( 

995 data=sample_of_returns_2, percentile=percentile)) 

996 

997 logger.debug( 

998 f'Standardized sample percentiles for {ticker_1}: \n{sample_percentiles_1}', '_calculate_percentile_correlation') 

999 logger.debug( 

1000 f'Standardized sample percentiles for {ticker_2}: \n{sample_percentiles_2}', '_calculate_percentile_correlation') 

1001 

1002 def copula_matrix(params): 

1003 determinant = 1 - params[0]**2 

1004 if determinant == 0 or determinant < 0 or determinant < (10**(-constants.constants['ACCURACY'])): 

1005 logger.verbose('Solution is non-positive semi-definite', 

1006 '_calculate_percentile_correlation') 

1007 return inf 

1008 logger.verbose( 

1009 f'Instantiating Copula Matrix: \n{[[1, params[0]], [params[0], 1]]}', '_calculate_percentile_correlation') 

1010 return [[1, params[0]], [params[0], 1]] 

1011 

1012 # Calculate copula distribution of order statistics and constrain it against the empirical estimate 

1013 def residuals(params): 

1014 res = [ 

1015 

1016 (multivariate_normal.cdf(x=[sample_percentiles_1[i], sample_percentiles_2[i]], 

1017 mean=[0, 0], cov=copula_matrix(params)) 

1018 - estimators.empirical_copula(sample=combined_sample, x_order=sample_percentiles_1[i], 

1019 y_order=sample_percentiles_2[i])) 

1020 for i in enumerate(percentiles) 

1021 ] 

1022 logger.verbose(f'Residuals for {params}: \n{res}', 

1023 '_calculate_percentile_correlation.residuals') 

1024 return res 

1025 

1026 parameters = least_squares(residuals, (0), bounds=((-0.99999), (0.99999))) 

1027 

1028 correl = parameters.x[0] 

1029 result = {keys.keys['STATISTICS']['CORRELATION']: correl} 

1030 

1031 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2, 

1032 start_date=start_date, end_date=end_date, 

1033 correlation=correlation, method=keys.keys['ESTIMATION']['PERCENT'], 

1034 weekends=weekends) 

1035 return result 

1036 

1037 

1038def _calculate_likelihood_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]: 

1039 """ 

1040 Calculates the sample correlation using the maximum likelihood estimators, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.  

1041 

1042 Parameters 

1043 ---------- 

1044 1. **ticker_1** : ``str`` 

1045 Ticker symbol for first asset. 

1046 2. **ticker_2** : ``str`` 

1047 Ticker symbol for second asset 

1048 3. **asset_type_1** : ``str`` 

1049 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

1050 4. **asset_type_2** : ``str`` 

1051 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

1052 5. *start_date* : ``datetime.date`` 

1053 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago. 

1054 6. **end_date** : ``datetime.date``  

1055 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day. 

1056 7. **sample_prices** : ``dict`` 

1057 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.  

1058 

1059 Raises 

1060 ------ 

1061 1. **scrilla.errors.PriceError** 

1062 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown. 

1063 

1064 Returns 

1065 ------ 

1066 ``Dict[str, float]`` 

1067 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`. 

1068 

1069 .. notes:: 

1070 * 

1071 

1072 $$ $$ 

1073 """ 

1074 from scrilla.analysis import optimizer 

1075 ### START ARGUMENT PARSING ### 

1076 asset_type_1 = errors.validate_asset_type( 

1077 ticker=ticker_1, asset_type=asset_type_1) 

1078 asset_type_2 = errors.validate_asset_type( 

1079 ticker=ticker_2, asset_type=asset_type_2) 

1080 

1081 # cache flag to signal if calculation includes weekends or not, 

1082 # only perform check if not passed in as argument 

1083 if weekends is None: 1083 ↛ 1084line 1083 didn't jump to line 1084, because the condition on line 1083 was never true

1084 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']: 

1085 weekends = 1 

1086 else: 

1087 weekends = 0 

1088 

1089 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1: 1089 ↛ 1091line 1089 didn't jump to line 1091, because the condition on line 1089 was never true

1090 # validate over total days. 

1091 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

1092 asset_type=keys.keys['ASSETS']['CRYPTO']) 

1093 else: 

1094 # validate over trading days. since sample(date - 100 days) > (date - 100 trading days), always 

1095 # take the largest sample so intersect_dict_keys will return a sample of the correct size 

1096 # for mixed asset types. 

1097 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

1098 asset_type=keys.keys['ASSETS']['EQUITY']) 

1099 

1100 if sample_prices is None: 1100 ↛ 1122line 1100 didn't jump to line 1122, because the condition on line 1100 was never false

1101 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2, 

1102 start_date=start_date, end_date=end_date, 

1103 weekends=weekends, 

1104 method=keys.keys['ESTIMATION']['LIKE']) 

1105 if correlation is not None: 

1106 return correlation 

1107 

1108 sample_prices = {} 

1109 logger.debug( 

1110 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_likelihood_correlation') 

1111 logger.debug('Retrieving price histories for calculation.', 

1112 '_calculate_likelihood_correlation') 

1113 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1, 

1114 start_date=start_date, 

1115 end_date=end_date, 

1116 asset_type=asset_type_1) 

1117 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2, 

1118 start_date=start_date, 

1119 end_date=end_date, 

1120 asset_type=asset_type_2) 

1121 

1122 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 1122 ↛ 1123line 1122 didn't jump to line 1123, because the condition on line 1122 was never true

1123 sample_prices[ticker_1] = dater.intersect_with_trading_dates( 

1124 sample_prices[ticker_1]) 

1125 sample_prices[ticker_2] = dater.intersect_with_trading_dates( 

1126 sample_prices[ticker_2]) 

1127 else: 

1128 # intersect with equity keys to get trading days 

1129 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys( 

1130 sample_prices[ticker_1], sample_prices[ticker_2]) 

1131 

1132 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]: 1132 ↛ 1133line 1132 didn't jump to line 1133, because the condition on line 1132 was never true

1133 raise errors.PriceError( 

1134 "Prices cannot be retrieved for correlation calculation") 

1135 

1136 sample_of_returns_1 = get_sample_of_returns(ticker=ticker_1, 

1137 sample_prices=sample_prices[ticker_1], 

1138 asset_type=asset_type_1) 

1139 sample_of_returns_2 = get_sample_of_returns(ticker=ticker_2, 

1140 sample_prices=sample_prices[ticker_2], 

1141 asset_type=asset_type_2) 

1142 

1143 combined_sample = [[el, sample_of_returns_2[i]] 

1144 for i, el in enumerate(sample_of_returns_1)] 

1145 

1146 likelihood_estimates = optimizer.maximize_bivariate_normal_likelihood( 

1147 data=combined_sample) 

1148 

1149 # Var(d lnS / delta_t ) = Var(d lnS )/delta_t**2 = sigma**2 * delta_t / delta_t**2 

1150 # = sigma**2/delta_t 

1151 # Cov(d lnS/delta_t, d lnQ/delta_t) = Cov(d lnS, dlnQ)/delta_t**2 

1152 # = rho * sigma_s * sigma_q / delta_t**2 

1153 vol_1 = sqrt(likelihood_estimates[2]) 

1154 vol_2 = sqrt(likelihood_estimates[3]) 

1155 

1156 correlation = likelihood_estimates[4] / (vol_1*vol_2) 

1157 

1158 result = {'correlation': correlation} 

1159 

1160 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2, 

1161 start_date=start_date, end_date=end_date, 

1162 correlation=correlation, weekends=weekends, 

1163 method=keys.keys['ESTIMATION']['LIKE']) 

1164 return result 

1165 

1166 

1167def _calculate_moment_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]: 

1168 """ 

1169 Returns the sample correlation using the method of Moment Matching, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.  

1170 

1171 Parameters 

1172 ---------- 

1173 1. **ticker_1** : ``str`` 

1174 Ticker symbol for first asset. 

1175 2. **ticker_2** : ``str`` 

1176 Ticker symbol for second asset 

1177 3. **asset_type_1** : ``Union[str,None]`` 

1178 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

1179 4. **asset_type_2** : ``Union[str,None]`` 

1180 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

1181 5. *start_date* : ``Union[datetime.date,None]`` 

1182 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago. 

1183 6. **end_date** : ``Union[datetime.date None]``  

1184 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day. 

1185 7. **sample_prices** : ``Union[dict,None]`` 

1186 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.  

1187 8. **weekends** : ``Union[int,None]`` 

1188 **Optional**. A flag to signal that calculations should include/exclude weekend dates. See *notes* for more information. Defauts to `None` and is implicitly determined by the asset types passed in. 

1189 

1190 Raises 

1191 ------ 

1192 1. **scrilla.errors.PriceError** 

1193 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown. 

1194 

1195 Returns 

1196 ------ 

1197 ``Dict[str, float]`` 

1198 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }` 

1199 

1200 .. notes::  

1201 * when the asset types are mixed, i.e. `asset_type_1` == 'equity' and `asset_type_2`== 'crypto', the sample prices will contain different information, since crypto trades on weekends and holidays. The solution is to throw away the weekend and holiday prices for crypto. This presents another problem, since the risk profile for a crypto-currency that is cached in the local fileystem will be calculated over the entire sample including the missing data, whereas the risk profile required by the correlation needs to be calculated over the censored sample (i.e. the one with weekends and holidays removed) so that the means of the mixed asset types are scaled to the same time delta. In this case, the correlation algorithm needs to be able to override calls to the cache and force the risk profile algorithms to calculate based on the sample. Note: this issue only applies to correlation calculations using the method of moment matching, since the other methods determine the value of the correlation by solving constrained systems of equations instead of deriving it analytically with a formula.  

1202 * The `weekends` flag is only relevant for assets of type `scrilla.static.keys.keys['ASSETS']['CRYPTO']`, i.e. cryptocurrency. It is passed in when the correlation calculation is part of a larger correlation matrix calculation, so that entries in the matrix have equivalent time frames. E.g., if the `scrilla.analysis.models.geometric.statistics.correlation_matrix` is calculating a matrix for a collection of mixed asset types, say, `["BTC", "ETH", "ALLY", "SPY"]`, the correlations between (crypto, equity) and (equity, equity) will only include weekdays, where as the (crypto,crypto) pairing will include weekends and thus result in an inaccurate matrix. To resolve this problem, the `weekends` flag can be passed into this calculation to prevent (crypto,crypto) pairings from including weekends. 

1203 

1204 """ 

1205 ### START ARGUMENT PARSING ### 

1206 asset_type_1 = errors.validate_asset_type( 

1207 ticker=ticker_1, asset_type=asset_type_1) 

1208 asset_type_2 = errors.validate_asset_type( 

1209 ticker=ticker_2, asset_type=asset_type_2) 

1210 

1211 # cache flag to signal if calculation includes weekends or not, 

1212 # only perform check if not passed in as argument so that agent 

1213 # calling can override default weekend behavior, i.e. make 

1214 # crypto pairing forcibly exclude weekends from their calculation. 

1215 if weekends is None: 

1216 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']: 1216 ↛ 1217line 1216 didn't jump to line 1217, because the condition on line 1216 was never true

1217 weekends = 1 

1218 else: 

1219 weekends = 0 

1220 

1221 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1: 

1222 # validate over total days. 

1223 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

1224 asset_type=keys.keys['ASSETS']['CRYPTO']) 

1225 else: 

1226 # validate over trading days. 

1227 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

1228 asset_type=keys.keys['ASSETS']['EQUITY']) 

1229 

1230 if sample_prices is None: 1230 ↛ 1249line 1230 didn't jump to line 1249, because the condition on line 1230 was never false

1231 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2, 

1232 start_date=start_date, end_date=end_date, 

1233 weekends=weekends, 

1234 method=keys.keys['ESTIMATION']['MOMENT']) 

1235 if correlation is not None: 

1236 return correlation 

1237 

1238 sample_prices = {} 

1239 logger.debug( 

1240 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_moment_correlation') 

1241 logger.debug('Retrieving price histories for calculation.', 

1242 '_calculate_moment_correlation') 

1243 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1, start_date=start_date, 

1244 end_date=end_date, asset_type=asset_type_1) 

1245 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2, start_date=start_date, 

1246 end_date=end_date, asset_type=asset_type_2) 

1247 

1248 # TODO: pretty sure something about this is causing the issue. 

1249 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 

1250 # remove weekends and holidays from sample 

1251 logger.debug('Removing weekends from crypto sample', 

1252 '_calculate_moment_correlation') 

1253 sample_prices[ticker_1] = dater.intersect_with_trading_dates( 

1254 sample_prices[ticker_1]) 

1255 sample_prices[ticker_2] = dater.intersect_with_trading_dates( 

1256 sample_prices[ticker_2]) 

1257 else: 

1258 # intersect with equity keys to get trading days 

1259 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys( 

1260 sample_prices[ticker_1], sample_prices[ticker_2]) 

1261 

1262 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]: 1262 ↛ 1263line 1262 didn't jump to line 1263, because the condition on line 1262 was never true

1263 raise errors.PriceError( 

1264 "Prices cannot be retrieved for correlation calculation") 

1265 

1266 if asset_type_1 == keys.keys['ASSETS']['CRYPTO']: 

1267 trading_period_1 = constants.constants['ONE_TRADING_DAY']['CRYPTO'] 

1268 else: 

1269 trading_period_1 = constants.constants['ONE_TRADING_DAY']['EQUITY'] 

1270 if asset_type_2 == keys.keys['ASSETS']['CRYPTO']: 

1271 trading_period_2 = constants.constants['ONE_TRADING_DAY']['CRYPTO'] 

1272 else: 

1273 trading_period_2 = constants.constants['ONE_TRADING_DAY']['EQUITY'] 

1274 ### END ARGUMENT PARSING ### 

1275 

1276 ### START SAMPLE STATISTICS CALCULATION DEPENDENCIES ### 

1277 # i.e. statistics that need to be calculated before correlation can be calculated 

1278 logger.debug( 

1279 f'Preparing calculation dependencies for ({ticker_1},{ticker_2}) correlation', '_calculate_moment_correlation') 

1280 

1281 stats_1 = _calculate_moment_risk_return(ticker=ticker_1, 

1282 start_date=start_date, 

1283 end_date=end_date, 

1284 asset_type=asset_type_1, 

1285 weekends=weekends) 

1286 

1287 stats_2 = _calculate_moment_risk_return(ticker=ticker_2, 

1288 start_date=start_date, 

1289 end_date=end_date, 

1290 asset_type=asset_type_2, 

1291 weekends=weekends) 

1292 

1293 # ito's lemma 

1294 mod_mean_1 = (stats_1['annual_return'] - 0.5*(stats_1['annual_volatility']) 

1295 ** 2)*sqrt(trading_period_1) 

1296 

1297 mod_mean_2 = (stats_2['annual_return'] - 0.5*(stats_2['annual_volatility']) 

1298 ** 2)*sqrt(trading_period_2) 

1299 

1300 logger.debug( 

1301 f'Calculating ({ticker_1}, {ticker_2}) correlation.', '_calculate_moment_correlation') 

1302 # END SAMPLE STATISTICS CALCULATION DEPENDENCIES 

1303 

1304 # Initialize loop variables 

1305 covariance, time_delta = 0, 1 

1306 today, tomorrows_date = False, None 

1307 sample = len(sample_prices[ticker_1]) 

1308 

1309 #### START CORRELATION LOOP #### 

1310 for this_date in sample_prices[ticker_1]: 

1311 todays_price_1 = sample_prices[ticker_1][this_date][keys.keys['PRICES']['CLOSE']] 

1312 todays_price_2 = sample_prices[ticker_2][this_date][keys.keys['PRICES']['CLOSE']] 

1313 

1314 if today: 

1315 logger.verbose(f'today = {this_date}', 

1316 '_calculate_moment_correlation') 

1317 logger.verbose( 

1318 f'(todays_price, tomorrows_price)_{ticker_1} = ({todays_price_1}, {tomorrows_price_1})', '_calculate_moment_correlation') 

1319 logger.verbose( 

1320 f'(todays_price, tomorrows_price)_{ticker_2} = ({todays_price_2}, {tomorrows_price_2})', '_calculate_moment_correlation') 

1321 

1322 # NOTE: crypto prices may have weekends and holidays removed during correlation algorithm 

1323 # so samples can be compared to equities, need to account for these dates by increasing 

1324 # the time_delta by the number of missed days, to offset the weekend and holiday return. 

1325 if asset_type_1 == keys.keys['ASSETS']['CRYPTO'] or \ 

1326 (asset_type_1 == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)): 

1327 time_delta = (dater.parse( 

1328 tomorrows_date) - dater.parse(this_date)).days 

1329 else: 

1330 time_delta = 1 

1331 current_mod_return_1 = log( 

1332 float(tomorrows_price_1)/float(todays_price_1))/sqrt(time_delta*trading_period_1) 

1333 

1334 # see above note 

1335 if asset_type_2 == keys.keys['ASSETS']['CRYPTO'] or \ 

1336 (asset_type_2 == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)): 

1337 time_delta = (dater.parse( 

1338 tomorrows_date) - dater.parse(this_date)).days 

1339 else: 

1340 time_delta = 1 

1341 

1342 current_mod_return_2 = log( 

1343 float(tomorrows_price_2)/float(todays_price_2))/sqrt(time_delta*trading_period_2) 

1344 

1345 current_sample_covariance = ( 

1346 current_mod_return_1 - mod_mean_1)*(current_mod_return_2 - mod_mean_2)/(sample - 1) 

1347 covariance = covariance + current_sample_covariance 

1348 

1349 logger.verbose( 

1350 f'(return_{ticker_1}, return_{ticker_2}) = ({round(current_mod_return_1, 2)}, {round(current_mod_return_2, 2)})', '_calculate_moment_correlation') 

1351 logger.verbose( 

1352 f'(current_sample_covariance, covariance) = ({round(current_sample_covariance, 2)}, {round(covariance, 2)})', '_calculate_moment_correlation') 

1353 

1354 else: 

1355 today = True 

1356 

1357 tomorrows_price_1, tomorrows_price_2, tomorrows_date = todays_price_1, todays_price_2, this_date 

1358 #### END CORRELATION LOOP #### 

1359 

1360 # Scale covariance into correlation 

1361 correlation = covariance / \ 

1362 (stats_1['annual_volatility']*stats_2['annual_volatility']) 

1363 

1364 logger.debug( 

1365 f'correlation = ({round(correlation, 2)})', '_calculate_moment_correlation') 

1366 

1367 result = {'correlation': correlation} 

1368 

1369 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2, 

1370 start_date=start_date, end_date=end_date, 

1371 correlation=correlation, weekends=weekends, 

1372 method=keys.keys['ESTIMATION']['MOMENT']) 

1373 return result 

1374 

1375 

1376def correlation_matrix(tickers, asset_types=None, start_date=None, end_date=None, sample_prices=None, method=settings.ESTIMATION_METHOD, weekends: Union[int, None] = None) -> List[List[float]]: 

1377 """ 

1378 Returns the correlation matrix for *tickers* from *start_date* to *end_date* using the estimation method *method*. 

1379 

1380 Parameters 

1381 ---------- 

1382 1. **tickers** : ``list`` 

1383 List of ticker symbols whose correlation matrix is to be calculated. Format: `['ticker_1', 'ticker_2', ...]` 

1384 2. **asset_type2** : ``list`` 

1385 *Optional*. List of asset types that map to the `tickers` list. Specify **asset_types** to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary. 

1386 3. *start_date* : ``datetime.date`` 

1387 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago. 

1388 4. **end_date** : ``datetime.date``  

1389 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day. 

1390 5. **sample_prices** : ``dict`` 

1391 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.  

1392 6. **method** : ``str`` 

1393 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`. 

1394 

1395 Raises 

1396 ------ 

1397 1. **scrilla.errors.SampleSizeErrors** 

1398 If list of tickers is not large enough to calculate a correlation matrix, this error will be thrown.  

1399 

1400 Returns 

1401 ------ 

1402 ``List[List[float]]``  

1403 correlation matrix of `tickers`. indices correspond to the Cartesian product of `tickers` x `tickers`.  

1404 """ 

1405 correl_matrix = [ 

1406 [0 for _ in tickers] for _ in tickers 

1407 ] 

1408 

1409 # let correlation function handle argument parsing 

1410 if asset_types is None: 1410 ↛ 1421line 1410 didn't jump to line 1421, because the condition on line 1410 was never false

1411 asset_types = [errors.validate_asset_type( 

1412 ticker) for ticker in tickers] 

1413 

1414 # NOTE: since crypto trades on weekends and equities do not, the function 

1415 # must determine if the inputted assets are of mixed type. If any 

1416 # single asset is of a different type, weekends must be truncated 

1417 # from sample to ensure correlation is calculated over the samples 

1418 # of like size. 

1419 

1420 # By default, exclude weekends. 

1421 if weekends is None: 

1422 weekends = 0 

1423 

1424 asset_groups = 0 

1425 for _ in groupby(sorted(asset_types)): 

1426 asset_groups += 1 

1427 

1428 # if all assets of the same type, include weekends only if asset type is crypto 

1429 if asset_groups == 1 and asset_types[0] == keys.keys['ASSETS']['CRYPTO']: 

1430 logger.debug( 

1431 'Assets of same type, which is crypto, keeping weekends', 'correlation_matrix') 

1432 weekends = 1 

1433 else: 

1434 if asset_groups > 1: 

1435 logger.debug( 

1436 'Assets of different type, removing weekends', 'correlation_matrix') 

1437 else: 

1438 logger.debug( 

1439 'Assets of same type, which is equity, excluding weekends', 'correlation_matrix') 

1440 

1441 if(len(tickers) > 1): 1441 ↛ 1459line 1441 didn't jump to line 1459, because the condition on line 1441 was never false

1442 for i, item in enumerate(tickers): 

1443 correl_matrix[i][i] = 1 

1444 for j in range(i+1, len(tickers)): 

1445 cor = calculate_correlation(ticker_1=item, 

1446 ticker_2=tickers[j], 

1447 asset_type_1=asset_types[i], 

1448 asset_type_2=asset_types[j], 

1449 start_date=start_date, 

1450 end_date=end_date, 

1451 sample_prices=sample_prices, 

1452 weekends=weekends, 

1453 method=method) 

1454 correl_matrix[i][j] = cor['correlation'] 

1455 correl_matrix[j][i] = correl_matrix[i][j] 

1456 

1457 correl_matrix[len(tickers) - 1][len(tickers) - 1] = 1 

1458 return correl_matrix 

1459 if (len(tickers) == 1): 

1460 correl_matrix[0][0] = 1 

1461 return correl_matrix 

1462 raise errors.SampleSizeError( 

1463 'Cannot calculate correlation matrix for portfolio size <= 1.') 

1464 

1465 

1466def calculate_moment_correlation_series(ticker_1: str, ticker_2: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, float]: 

1467 asset_type_1 = errors.validate_asset_type(ticker=ticker_1) 

1468 asset_type_2 = errors.validate_asset_type(ticker=ticker_2) 

1469 if asset_type_1 == keys.keys['ASSETS']['CRYPTO'] and asset_type_2 == keys.keys['ASSETS']['CRYPTO']: 

1470 # validate over all days 

1471 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

1472 asset_type=keys.keys['ASSETS']['CRYPTO']) 

1473 else: 

1474 # validate over trading days. since (date - 100 days) > (date - 100 trading days), always 

1475 # take the largest sample so intersect_dict_keys will return a sample of the correct size 

1476 # for mixed asset types. 

1477 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date, 

1478 asset_type=keys.keys['ASSETS']['EQUITY']) 

1479 

1480 same_type = False 

1481 correlation_series = {} 

1482 

1483 if asset_type_1 == asset_type_2: 

1484 same_type = True 

1485 

1486 # TODO: what if start_date or end_date is None? 

1487 if same_type and asset_type_1 == keys.keys['ASSETS']['CRYPTO']: 

1488 date_range = [start_date] + dater.dates_between(start_date, end_date) 

1489 else: # default to business days 

1490 date_range = [dater.get_previous_business_date( 

1491 start_date)] + dater.business_dates_between(start_date, end_date) 

1492 

1493 for this_date in date_range: 

1494 calc_date_end = this_date 

1495 todays_cor = _calculate_moment_correlation(ticker_1=ticker_1, 

1496 ticker_2=ticker_2, 

1497 end_date=calc_date_end) 

1498 correlation_series[dater.to_string( 

1499 this_date)] = todays_cor['correlation'] 

1500 

1501 # result = {f'{ticker_1}_{ticker_2}_correlation_time_series': correlation_series} 

1502 return correlation_series 

1503 

1504 

1505def calculate_return_covariance(ticker_1: str, ticker_2: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, correlation: Union[dict, None] = None, profile_1: Union[dict, None] = None, profile_2: Union[dict, None] = None, method=settings.ESTIMATION_METHOD) -> float: 

1506 """ 

1507 Returns the return covariance between *ticker_1* and *ticker_2* from *start_date* to *end_date* using the estimation method *method*. 

1508 

1509 Parameters 

1510 ---------- 

1511 1. **ticker_1** : ``str`` 

1512 Ticker symbol for first asset. 

1513 2. **ticker_2** : ``str`` 

1514 Ticker symbol for second asset 

1515 3. *start_date* : ``datetime.date`` 

1516 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago. 

1517 4. **end_date** : ``datetime.date``  

1518 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day. 

1519 5. **sample_prices** : ``dict`` 

1520 *Optional*. A dictionary containing the asset prices. Must be formatted as : ` { 'ticker_1': { 'date_1': value, ...}, 'ticker_2': { 'date_2' : value, ...}}. 

1521 6. **correlation** : ``dict`` 

1522 *Optional*. Overrides correlation caluclation. A dictionary containing the correlation that should be used in lieu of estimating it from historical data. Formatted as : `{ 'correlation': value } 

1523 7. **profile_1** : ``dict`` 

1524 *Optional*. Overrides asset 1's risk profile calculation. A dictionary containing the risk profile of the first asset that should be used in lieu of estimating it from historical data. 

1525 8. **profile_2** : ``dict`` 

1526 *Optional*. Overrides asset 2's risk profile calculation. A dictionary containing the risk profile of the second asset that should be used in lieu of estimating it from historical data. 

1527 9. **method** : ``str`` 

1528 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`. 

1529 

1530 Returns 

1531 ------ 

1532 ``float`` : return covariance 

1533 """ 

1534 if correlation is None: 1534 ↛ 1542line 1534 didn't jump to line 1542, because the condition on line 1534 was never false

1535 if sample_prices is None: 1535 ↛ 1539line 1535 didn't jump to line 1539, because the condition on line 1535 was never false

1536 correlation = calculate_correlation(ticker_1=ticker_1, ticker_2=ticker_2, start_date=start_date, 

1537 end_date=end_date, method=method) 

1538 else: 

1539 correlation = calculate_correlation(ticker_1=ticker_1, ticker_2=ticker_2, 

1540 sample_prices=sample_prices, method=method) 

1541 

1542 if profile_1 is None: 1542 ↛ 1543line 1542 didn't jump to line 1543, because the condition on line 1542 was never true

1543 if sample_prices is None: 

1544 profile_1 = calculate_risk_return(ticker=ticker_1, start_date=start_date, end_date=end_date, 

1545 method=method) 

1546 else: 

1547 profile_1 = calculate_risk_return(ticker=ticker_1, sample_prices=sample_prices[ticker_1], 

1548 method=method) 

1549 

1550 if profile_2 is None: 1550 ↛ 1551line 1550 didn't jump to line 1551, because the condition on line 1550 was never true

1551 if sample_prices is None: 

1552 profile_2 = calculate_risk_return(ticker=ticker_2, start_date=start_date, end_date=end_date, 

1553 method=method) 

1554 else: 

1555 profile_2 = calculate_risk_return(ticker=ticker_2, sample_prices=sample_prices[ticker_2], 

1556 method=method) 

1557 

1558 covariance = profile_1['annual_volatility'] * \ 

1559 profile_2['annual_volatility']*correlation['correlation'] 

1560 return covariance