Coverage for src/scrilla/analysis/models/geometric/statistics.py: 50%
501 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-18 18:14 +0000
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-18 18:14 +0000
1# This file is part of scrilla: https://github.com/chinchalinchin/scrilla.
3# scrilla is free software: you can redistribute it and/or modify
4# it under the terms of the GNU General Public License version 3
5# as published by the Free Software Foundation.
7# scrilla is distributed in the hope that it will be useful,
8# but WITHOUT ANY WARRANTY; without even the implied warranty of
9# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10# GNU General Public License for more details.
12# You should have received a copy of the GNU General Public License
13# along with scrilla. If not, see <https://www.gnu.org/licenses/>
14# or <https://github.com/chinchalinchin/scrilla/blob/develop/main/LICENSE>.
16from scrilla.util import errors, outputter, helper, dater
17from scrilla.analysis import estimators
18from scrilla.static import keys, functions, constants
19from scrilla import services, files, settings, cache
20from numpy import inf
21from datetime import date
22from itertools import groupby
23import datetime
24import itertools
25from typing import Dict, List, Union
26from math import log, sqrt
27from scipy.stats import norm, multivariate_normal
28from scipy.optimize import fsolve, least_squares
31logger = outputter.Logger(
32 'scrilla.analysis.models.geometric.statistics', settings.LOG_LEVEL)
33profile_cache = cache.ProfileCache()
34correlation_cache = cache.CorrelationCache()
37def get_sample_of_returns(ticker: str, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, start_date: Union[date, None] = None, end_date: Union[date, None] = None, asset_type: Union[str, None] = None, daily: bool = False) -> List[float]:
38 """
39 Generates a list of logarithmic returns on the sample `prices`. Sample return is annualized.
41 Parameters
42 ----------
43 1. **ticker**: ``str``
45 2. **start_date** : ``Union[date, None]``
46 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.
47 3. **end_date** : ``Union[date, None]``
48 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.
49 4. **sample_prices** : ``Union[Dict[str, Dict[str, float]], None]``
50 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`
51 5. **asset_type** : ``Union[str, None]``
52 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']`
53 6. **daily**: ``bool``
55 Raises
56 ------
57 1. **scrilla.errors.SampleSizeError**
58 If the date range passed in does not have enough dates to compute the logarithmic sample (n>1), then this error is thrown. If `sample_prices` was passed in to override the `start_date` and `end_date` arguments, this error will be thrown if the `len(sample_prices)<1`.
60 .. notes::
61 * the `trading_period` for a single asset can be determined from its `asset_type`...should i use a conditional and fork constants.constants['ONE_TRADING_DAY'] instead of passing it in?
62 """
63 asset_type = errors.validate_asset_type(ticker, asset_type)
64 trading_period = functions.get_trading_period(asset_type)
66 if sample_prices is None:
67 if (asset_type == keys.keys['ASSETS']['CRYPTO'] and dater.days_between(start_date, end_date) == 1) \
68 or (asset_type == keys.keys['ASSETS']['EQUITY'] and dater.business_days_between(start_date, end_date) == 1):
69 raise errors.SampleSizeError(
70 'Not enough price data to compute returns')
71 elif len(sample_prices) < 1: 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true
72 raise errors.SampleSizeError(
73 'Not enough price data to compute returns')
75 if sample_prices is None:
76 logger.debug('No sample prices provided, calling service.',
77 'get_sample_of_returns')
78 start_date, end_date = errors.validate_dates(
79 start_date, end_date, asset_type)
80 prices = services.get_daily_price_history(
81 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)
82 else:
83 logger.debug(
84 f'{ticker} sample prices provided, skipping service call.', 'get_sample_of_returns')
85 prices = sample_prices
87 today = False
88 sample_of_returns = []
89 trading_period = functions.get_trading_period(asset_type=asset_type)
91 for this_date in prices:
92 todays_price = prices[this_date][keys.keys['PRICES']['CLOSE']]
94 if today:
95 logger.verbose(
96 f'{this_date}: (todays_price, tomorrows_price) = ({todays_price}, {tomorrows_price})', 'get_sample_of_returns')
97 # NOTE: crypto prices may have weekends and holidays removed during correlation algorithm
98 # so samples can be compared to equities, need to account for these dates by increasing
99 # the time_delta by the number of missed days.
100 if asset_type == keys.keys['ASSETS']['CRYPTO'] or \
101 (asset_type == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):
102 time_delta = (dater.parse(
103 tomorrows_date) - dater.parse(this_date)).days
104 else:
105 time_delta = 1
107 todays_return = log(float(tomorrows_price) /
108 float(todays_price))/(time_delta)
110 if not daily: 110 ↛ 113line 110 didn't jump to line 113, because the condition on line 110 was never false
111 todays_return = todays_return/trading_period
113 sample_of_returns.append(todays_return)
114 else:
115 today = True
117 tomorrows_price = prices[this_date][keys.keys['PRICES']['CLOSE']]
118 tomorrows_date = this_date
120 return sample_of_returns
123def calculate_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, Dict[str, float]]:
124 if method == keys.keys['ESTIMATION']['MOMENT']: 124 ↛ 128line 124 didn't jump to line 128, because the condition on line 124 was never false
125 return _calculate_moment_moving_averages(ticker=ticker,
126 start_date=start_date,
127 end_date=end_date)
128 if method == keys.keys['ESTIMATION']['PERCENT']:
129 return _calculate_percentile_moving_averages(ticker=ticker,
130 start_date=start_date,
131 end_date=end_date)
132 if method == keys.keys['ESTIMATION']['LIKE']:
133 return _calculate_likelihood_moving_averages(ticker=ticker,
134 start_date=start_date,
135 end_date=end_date)
136 raise errors.ConfigurationError('Statistical estimation method not found')
139def calculate_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, float]:
140 """
141 Returns the correlation between *ticker_1* and *ticker_2* from *start_date* to *end_date* using the estimation method *method*.
143 Parameters
144 ----------
145 1. **ticker_1** : ``str``
146 Ticker symbol for first asset.
147 2. **ticker_2** : ``str``
148 Ticker symbol for second asset
149 3. **asset_type_1** : ``str``
150 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
151 4. **asset_type_2** : ``str``
152 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
153 5. *start_date* : ``datetime.date``
154 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.
155 6. **end_date** : ``datetime.date``
156 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.
157 7. **sample_prices** : ``dict``
158 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
159 8. **method** : ``str``
160 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`.
162 Raises
163 ------
164 1. **KeyError**
165 If the `method` passed in doesn't map to one of the allowable estimation method values, this error will be thrown.
167 Returns
168 ------
169 ``Dict[str, float]``
170 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`.
171 """
172 if method == keys.keys['ESTIMATION']['MOMENT']:
173 return _calculate_moment_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends)
174 if method == keys.keys['ESTIMATION']['LIKE']: 174 ↛ 176line 174 didn't jump to line 176, because the condition on line 174 was never false
175 return _calculate_likelihood_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends)
176 if method == keys.keys['ESTIMATION']['PERCENT']:
177 return _calculate_percentile_correlation(ticker_1, ticker_2, asset_type_1, asset_type_2, start_date, end_date, sample_prices, weekends)
178 raise KeyError('Estimation method not found')
181def calculate_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None, method: str = settings.ESTIMATION_METHOD) -> Dict[str, float]:
182 """
183 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Uses the method passed in through `method` to estimate the model parameters.
185 Parameters
186 ----------
187 1. **ticker** : ``str``
188 Ticker symbol whose risk-return profile is to be calculated.
189 2. **start_date** : ``datetime.date``
190 Optional. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.
191 3. **end_date** : ``datetime.date``
192 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.
193 4. **sample_prices** : ``list``
194 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`
195 5. **asset_type** : ``str``
196 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']`
197 6. **method**: ``str``
198 *Optional*. The calculation method to be used in estimating model parameters, i.e. the mean and volatility. Allowable values are accessible through `scrilla.static.keys.keys['ESTIMATION']`. Defaults to the method set in `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the environment variable, **DEFAULT_ESTIMATION_METHOD**. If this variable is not found, the value will default to `scrilla.static.keys.keys['ESTIMATION']['MOMENT']`.
200 Raises
201 ------
202 1. **scrilla.errors.ConfigurationError**
203 If the inputted `method` does not map to one of the allowable values in the `scrilla.static.keys.keys` dictionary, then this error will be thrown.
206 Returns
207 ------
208 ``Dict[str, float]``
209 Dictionary containing the annualized return and volatility. Formatted as follows,
211 ```
212 {
213 'annual_return': value,
214 'annual_volatility': value
215 }
216 ```
218 .. notes::
219 * assumes price history is ordered from latest to earliest date.
220 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth.
221 """
222 if method == keys.keys['ESTIMATION']['MOMENT']: 222 ↛ 229line 222 didn't jump to line 229, because the condition on line 222 was never false
223 return _calculate_moment_risk_return(ticker=ticker,
224 start_date=start_date,
225 end_date=end_date,
226 sample_prices=sample_prices,
227 asset_type=asset_type,
228 weekends=weekends)
229 if method == keys.keys['ESTIMATION']['PERCENT']:
230 return _calculate_percentile_risk_return(ticker=ticker,
231 start_date=start_date,
232 end_date=end_date,
233 sample_prices=sample_prices,
234 asset_type=asset_type,
235 weekends=weekends)
236 if method == keys.keys['ESTIMATION']['LIKE']:
237 return _calculate_likelihood_risk_return(ticker=ticker,
238 start_date=start_date,
239 end_date=end_date,
240 sample_prices=sample_prices,
241 asset_type=asset_type,
242 weekends=weekends)
243 raise errors.ConfigurationError('Statistical estimation method not found')
246def _calculate_moment_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]:
247 """
248 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively.
250 Parameters
251 ----------
252 1. **tickers** : ``list``.
253 array of ticker symbols correspond to the moving averages to be calculated.
254 2. **start_date** : ``datetime.date``
255 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated.
256 3. **end_date**: ``datetime.date``
257 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated.
258 4. **sample_prices** : ``dict``
259 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
261 Output
262 ------
263 ``Dict[str, Dict[str,float]]``
264 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value.
266 ```
267 {
268 'date': {
269 'MA_1': value,
270 'MA_2': value,
271 'MA_3': value
272 },
273 ...
274 }
275 ```
277 .. notes::
278 * assumes `sample_prices` is ordered from latest to earliest date.
279 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned.
280 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average.
281 """
282 asset_type = files.get_asset_type(ticker)
283 trading_period = functions.get_trading_period(asset_type)
285 if start_date is None: 285 ↛ 286line 285 didn't jump to line 286, because the condition on line 285 was never true
286 if asset_type == keys.keys['ASSETS']['EQUITY']:
287 start_date = dater.this_date_or_last_trading_date()
288 elif asset_type == keys.keys['ASSETS']['CRYPTO']:
289 start_date = dater.today()
290 if end_date is None: 290 ↛ 291line 290 didn't jump to line 291, because the condition on line 290 was never true
291 end_date = start_date
293 if asset_type == keys.keys['ASSETS']['EQUITY']:
294 ma_date_range = dater.business_dates_between(start_date, end_date)
295 sample_start = dater.decrement_date_by_business_days(
296 start_date, settings.MA_3_PERIOD)
297 elif asset_type == keys.keys['ASSETS']['CRYPTO']: 297 ↛ 302line 297 didn't jump to line 302, because the condition on line 297 was never false
298 ma_date_range = dater.dates_between(start_date, end_date)
299 sample_start = dater.decrement_date_by_days(
300 start_date, settings.MA_3_PERIOD)
302 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start,
303 end_date=end_date, asset_type=asset_type)
305 moving_averages = {}
306 for this_date in ma_date_range:
307 logger.debug(
308 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', 'get_sample_of_returns')
309 this_date_index = list(sample_prices).index(dater.to_string(this_date))
310 mas = []
311 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]:
312 ma_range = dict(itertools.islice(
313 sample_prices.items(), this_date_index, this_date_index+ma_period+1))
314 last_date, first_date = list(ma_range)[0], list(ma_range)[-1]
315 last_price = ma_range[last_date][keys.keys['PRICES']['CLOSE']]
316 first_price = ma_range[first_date][keys.keys['PRICES']['CLOSE']]
317 mas.append(log(float(last_price)/float(first_price)) /
318 (trading_period*ma_period))
320 moving_averages[dater.to_string(this_date)] = {
321 f'MA_{settings.MA_1_PERIOD}': mas[0],
322 f'MA_{settings.MA_2_PERIOD}': mas[1],
323 f'MA_{settings.MA_3_PERIOD}': mas[2]
324 }
326 return moving_averages
329def _calculate_percentile_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]:
330 """
331 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively.
333 Parameters
334 ----------
335 1. **tickers** : ``list``.
336 array of ticker symbols correspond to the moving averages to be calculated.
337 2. **start_date** : ``datetime.date``
338 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated.
339 3. **end_date**: ``datetime.date``
340 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated.
341 4. **sample_prices** : ``dict``
342 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
344 Output
345 ------
346 ``Dict[str, Dict[str,float]]``
347 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value.
349 ```
350 {
351 'date': {
352 'MA_1': value,
353 'MA_2': value,
354 'MA_3': value
355 },
356 ...
357 }
358 ```
360 .. notes::
361 * assumes `sample_prices` is ordered from latest to earliest date.
362 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned.
363 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average.
364 """
365 asset_type = files.get_asset_type(ticker)
366 trading_period = functions.get_trading_period(asset_type)
368 if start_date is None:
369 if asset_type == keys.keys['ASSETS']['EQUITY']:
370 start_date = dater.this_date_or_last_trading_date()
371 elif asset_type == keys.keys['ASSETS']['CRYPTO']:
372 start_date = dater.today()
373 if end_date is None:
374 end_date = start_date
376 if asset_type == keys.keys['ASSETS']['EQUITY']:
377 ma_date_range = dater.business_dates_between(start_date, end_date)
378 sample_start = dater.decrement_date_by_business_days(
379 start_date, settings.MA_3_PERIOD)
380 elif asset_type == keys.keys['ASSETS']['CRYPTO']:
381 ma_date_range = dater.dates_between(start_date, end_date)
382 sample_start = dater.decrement_date_by_days(
383 start_date, settings.MA_3_PERIOD)
385 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start,
386 end_date=end_date, asset_type=asset_type)
388 moving_averages = {}
389 for this_date in ma_date_range:
390 logger.debug(
391 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', '_calculate_percentile_moving_averages')
392 this_date_index = list(sample_prices).index(dater.to_string(this_date))
393 mas = []
394 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]:
395 ma_range = dict(itertools.islice(
396 sample_prices.items(), this_date_index, this_date_index+ma_period+1))
397 sample_of_returns = get_sample_of_returns(
398 ticker=ticker, sample_prices=ma_range)
400 first_quartile = estimators.sample_percentile(
401 data=sample_of_returns, percentile=0.25)
402 median = estimators.sample_percentile(
403 data=sample_of_returns, percentile=0.50)
404 third_quartile = estimators.sample_percentile(
405 data=sample_of_returns, percentile=0.75)
406 guess = (median, (third_quartile-first_quartile)/2)
408 mean, vol = fsolve(lambda params, first=first_quartile, third=third_quartile:
409 [norm.cdf(x=first, loc=params[0], scale=params[1]) - 0.25,
410 norm.cdf(x=third, loc=params[0], scale=params[1]) - 0.75],
411 guess)
413 # NOTE: Var(dln(S)/delta_t) = (1/delta_t^2)*Var(dlnS) = sigma^2*delta_t / delta_t^2 = sigma^2 / delta_t
414 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.
415 vol = vol * sqrt(trading_period)
416 # ito's lemma
417 mean = mean + 0.5 * (vol ** 2)
418 mas.append(mean)
419 moving_averages[dater.to_string(this_date)] = {
420 f'MA_{settings.MA_1_PERIOD}': mas[0],
421 f'MA_{settings.MA_2_PERIOD}': mas[1],
422 f'MA_{settings.MA_3_PERIOD}': mas[2]
423 }
425 return moving_averages
428def _calculate_likelihood_moving_averages(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, Dict[str, float]]:
429 """
430 Returns the moving averages for the specified `ticker`. Each function call returns a group of three moving averages, calculated over different periods. The length of the periods is defined by the variables: `scrilla.settings.MA_1_PERIOD`, `scrilla.settings.MA_2_PERIOD` and `scrilla.settings.MA_3_PERIOD`. These variables are in turn configured by the values of the environment variables *MA_1*, *MA_2* and *MA_3*. If these environment variables are not found, they will default to 20, 60, 100 days, respectively.
432 Parameters
433 ----------
434 1. **tickers** : ``list``.
435 array of ticker symbols correspond to the moving averages to be calculated.
436 2. **start_date** : ``datetime.date``
437 *Optional*. Defaults to `None`. Start date of the time period over which the moving averages will be calculated.
438 3. **end_date**: ``datetime.date``
439 *Optional*. Defaults to `None`. End date of the time period over which the moving averages will be calculated.
440 4. **sample_prices** : ``dict``
441 *Optional*. Defaults to `None`. A list of the asset prices for which moving_averages will be calculated. Overrides calls to service for sample prices. Function will disregard `start_date` and `end_date` if `sample_price` is specified. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2'... }, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
443 Output
444 ------
445 ``Dict[str, Dict[str,float]]``
446 Dictionary with the date as the key and a nested dictionary containing the moving averages as the value.
448 ```
449 {
450 'date': {
451 'MA_1': value,
452 'MA_2': value,
453 'MA_3': value
454 },
455 ...
456 }
457 ```
459 .. notes::
460 * assumes `sample_prices` is ordered from latest to earliest date.
461 * If no start_date and end_date passed in, static snapshot of moving averages, i.e. the moving averages as of today (or last close), are calculated and returned.
462 * there are two different sets of dates. `(start_date, end_date)` refer to the endpoints of the date range for which the moving averages will be calculated. `(sample_start, sample_end)` refer to the endpoints of the sample necessary to calculate the previously define calculation. Note, `sample_end == end_date`, but `sample_start == start_date - max(MA_1_PERIOD, MA_2_PERIOD, MA_3_PERIOD)`, in order for the sample to contain enough data points to estimate the moving average.
463 """
464 from scrilla.analysis.optimizer import maximize_univariate_normal_likelihood
466 asset_type = files.get_asset_type(ticker)
467 trading_period = functions.get_trading_period(asset_type)
469 if start_date is None:
470 if asset_type == keys.keys['ASSETS']['EQUITY']:
471 start_date = dater.this_date_or_last_trading_date()
472 elif asset_type == keys.keys['ASSETS']['CRYPTO']:
473 start_date = dater.today()
474 if end_date is None:
475 end_date = start_date
477 if asset_type == keys.keys['ASSETS']['EQUITY']:
478 ma_date_range = dater.business_dates_between(start_date, end_date)
479 sample_start = dater.decrement_date_by_business_days(
480 start_date, settings.MA_3_PERIOD)
481 elif asset_type == keys.keys['ASSETS']['CRYPTO']:
482 ma_date_range = dater.dates_between(start_date, end_date)
483 sample_start = dater.decrement_date_by_days(
484 start_date, settings.MA_3_PERIOD)
486 sample_prices = services.get_daily_price_history(ticker=ticker, start_date=sample_start,
487 end_date=end_date, asset_type=asset_type)
489 moving_averages = {}
490 for this_date in ma_date_range:
491 logger.debug(
492 f'Calculating {ticker} moving averages on {dater.to_string(this_date)}', '_calculate_likelihood_moving_averages')
493 this_date_index = list(sample_prices).index(dater.to_string(this_date))
494 mas = []
495 for ma_period in [settings.MA_1_PERIOD, settings.MA_2_PERIOD, settings.MA_3_PERIOD]:
496 ma_range = dict(itertools.islice(
497 sample_prices.items(), this_date_index, this_date_index+ma_period+1))
498 sample_of_returns = get_sample_of_returns(
499 ticker=ticker, sample_prices=ma_range)
501 likelihood_estimates = maximize_univariate_normal_likelihood(
502 data=sample_of_returns)
503 # See NOTE in docstring
504 # NOTE: E(dln(S)/delta_t) = (mu - 0.5 * sigma ** 2) * delta_t / delta_t = mu - 0.5 * sigma ** 2
505 # TODO: add :math to docstring with this
506 # NOTE: Var(dln(S)/delta_t) = (1/delta_t**2)*Var(dlnS) = sigma**2*delta_t / delta_t**2 = sigma**2 / delta_t
507 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.
508 vol = likelihood_estimates[1]*sqrt(trading_period)
509 # ito's lemma
510 mean = likelihood_estimates[0] + 0.5 * (vol ** 2)
511 mas.append(mean)
512 moving_averages[dater.to_string(this_date)] = {
513 f'MA_{settings.MA_1_PERIOD}': mas[0],
514 f'MA_{settings.MA_2_PERIOD}': mas[1],
515 f'MA_{settings.MA_3_PERIOD}': mas[2]
516 }
518 return moving_averages
521def _calculate_likelihood_risk_return(ticker, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:
522 """
523 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of maximum likelihood estimation. The probability of each observation is calculated and then the product is taken to find the probability of the intersection; this probability is maximized with respect to the parameters of the normal distribution, the mean and the volatility.
525 Parameters
526 ----------
527 1. **ticker** : ``str``
528 Ticker symbol whose risk-return profile is to be calculated.
529 2. **start_date** : ``datetime.date``
530 Optional. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.
531 3. **end_date** : ``datetime.date``
532 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.
533 4. **sample_prices** : ``list``
534 *Optional*. A list of the asset prices for which the risk profile will be calculated. Overrides calls to service and forces calculation of risk price for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`
535 5. **asset_type** : ``str``
536 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values: `scrilla.keys.keys['ASSETS']['EQUITY']`, `scrilla.keys.keys['ASSETS']['CRYPTO']`
538 Raises
539 ------
540 1. **scrilla.errors.PriceError**
541 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.
544 Returns
545 ------
546 ``Dict[str, float]``
547 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }`
549 .. notes::
550 * assumes price history is ordered from latest to earliest date.
551 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth.
552 """
553 from scrilla.analysis.optimizer import maximize_univariate_normal_likelihood
555 asset_type = errors.validate_asset_type(ticker, asset_type)
556 trading_period = functions.get_trading_period(asset_type)
558 if weekends is None:
559 if asset_type == keys.keys['ASSETS']['CRYPTO']:
560 weekends = 1
561 else:
562 weekends = 0
564 if sample_prices is None:
565 start_date, end_date = errors.validate_dates(
566 start_date, end_date, asset_type)
567 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date,
568 method=keys.keys['ESTIMATION']['LIKE'])
570 if results is not None \
571 and results[keys.keys['STATISTICS']['RETURN']] is not None \
572 and results[keys.keys['STATISTICS']['VOLATILITY']] is not None:
573 return results
575 logger.debug('No sample prices provided, calling service.',
576 '_calculate_likelihood_risk_return')
577 prices = services.get_daily_price_history(
578 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)
580 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:
581 logger.debug('Removing weekends from crypto sample',
582 '_calculate_likelihood_risk_return')
583 prices = dater.intersect_with_trading_dates(prices)
585 else:
586 logger.debug(
587 f'{ticker} sample prices provided, skipping service call.', '_calculate_likelihood_risk_return')
588 prices = sample_prices
590 if not prices:
591 raise errors.PriceError(f'No prices could be retrieved for {ticker}')
593 sample_of_returns = get_sample_of_returns(
594 ticker=ticker, sample_prices=prices, asset_type=asset_type)
596 likelihood_estimates = maximize_univariate_normal_likelihood(
597 data=sample_of_returns)
598 # See NOTE in docstring
599 # NOTE: E(dln(S)/delta_t) = (mu - 0.5 * sigma ** 2) * delta_t / delta_t = mu - 0.5 * sigma ** 2
600 # TODO: add :math to docstring with this
601 # NOTE: Var(dln(S)/delta_t) = (1/delta_t**2)*Var(dlnS) = sigma**2*delta_t / delta_t**2 = sigma**2 / delta_t
602 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.
603 vol = likelihood_estimates[1]*sqrt(trading_period)
604 # ito's lemma
605 mean = likelihood_estimates[0] + 0.5 * (vol ** 2)
606 results = {
607 keys.keys['STATISTICS']['RETURN']: mean,
608 keys.keys['STATISTICS']['VOLATILITY']: vol
609 }
611 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date,
612 method=keys.keys['ESTIMATION']['LIKE'], weekends=weekends,
613 annual_return=results[keys.keys['STATISTICS']['RETURN']],
614 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']])
616 return results
619def _calculate_percentile_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:
620 """
621 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of percentile matching, where the return and volatility are estimated by matching the 25th and 75th percentile calculated from the assumed GBM distribution to the sample of data.
623 Parameters
624 ----------
625 1. **ticker** : ``str``
626 Ticker symbol whose risk-return profile is to be calculated.
627 2. **start_date** : ``datetime.date``
628 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.
629 3. **end_date** : ``datetime.date``
630 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date.
631 4.**sample_prices** : ``dict``
632 **Optional**. A list of the asset prices for which the correlation will be calculated. Overrides calls to service and forces calculation of correlation for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key of the dictionary as the latest and earliest date, respectively. In other words, `sample_prices` must be ordered from latest to earliest. Must be formatted: `{ 'date_1' : { 'open' : float, 'close' : float}, 'date_2': { 'open': float, 'close': float } ... }`
633 5. **asset_type** : ``str``
634 Optional. Specify asset type to prevent overusing redundant calculations. Allowable values: scrilla.keys.keys['ASSETS']['EQUITY'], scrilla.keys.keys['ASSETS']['CRYPTO']
636 Returns
637 -------
638 ``Dict[str, float]``
639 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }`
641 Raises
642 ------
643 1. **scrilla.errors.PriceError**
644 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.
646 .. notes::
647 * assumes price history is ordered from latest to earliest date.
648 * if the `sample_prices` dictionary is provided, the function will bypass the cache and the service call altogether. The function will assume `sample_prices` is the source of the truth.
649 """
650 asset_type = errors.validate_asset_type(ticker, asset_type)
651 trading_period = functions.get_trading_period(asset_type)
653 if weekends is None:
654 if asset_type == keys.keys['ASSETS']['CRYPTO']:
655 weekends = 1
656 else:
657 weekends = 0
659 if sample_prices is None:
660 if weekends == 1:
661 start_date, end_date = errors.validate_dates(
662 start_date, end_date, keys.keys['ASSETS']['CRYPTO'])
663 else:
664 start_date, end_date = errors.validate_dates(
665 start_date, end_date, keys.keys['ASSETS']['EQUITY'])
667 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date,
668 method=keys.keys['ESTIMATION']['PERCENT'],
669 weekends=weekends)
671 if results is not None \
672 and results[keys.keys['STATISTICS']['RETURN']] is not None \
673 and results[keys.keys['STATISTICS']['VOLATILITY']] is not None:
674 return results
676 logger.debug('No sample prices provided, calling service.',
677 '_calculate_percentile_risk_return')
678 prices = services.get_daily_price_history(
679 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)
681 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:
682 logger.debug('Removing weekends from crypto sample',
683 '_calculate_percentile_risk_return')
684 prices = dater.intersect_with_trading_dates(prices)
685 else:
686 logger.debug(
687 f'{ticker} sample prices provided, skipping service call.', '_calculate_percentile_risk_return')
688 prices = sample_prices
690 if not prices:
691 raise errors.PriceError(f'No prices could be retrieved for {ticker}')
693 sample_of_returns = get_sample_of_returns(
694 ticker=ticker, sample_prices=prices, asset_type=asset_type)
696 first_quartile = estimators.sample_percentile(
697 data=sample_of_returns, percentile=0.25)
698 median = estimators.sample_percentile(
699 data=sample_of_returns, percentile=0.50)
700 third_quartile = estimators.sample_percentile(
701 data=sample_of_returns, percentile=0.75)
702 guess = (median, (third_quartile-first_quartile)/2)
704 def objective(params):
705 return [norm.cdf(x=first_quartile, loc=params[0], scale=params[1]) - 0.25,
706 norm.cdf(x=third_quartile, loc=params[0], scale=params[1]) - 0.75]
708 mean, vol = fsolve(objective, guess)
710 # NOTE: Var(dln(S)/delta_t) = (1/delta_t^2)*Var(dlnS) = sigma^2*delta_t / delta_t^2 = sigma^2 / delta_t
711 # so need to multiply volatiliy by sqrt(delta_t) to get correct scale.
712 vol = vol * sqrt(trading_period)
713 # ito's lemma
714 mean = mean + 0.5 * (vol ** 2)
715 results = {
716 keys.keys['STATISTICS']['RETURN']: mean,
717 keys.keys['STATISTICS']['VOLATILITY']: vol
718 }
720 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date,
721 method=keys.keys['ESTIMATION']['PERCENT'], weekends=weekends,
722 annual_return=results[keys.keys['STATISTICS']['RETURN']],
723 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']])
724 return results
727def _calculate_moment_risk_return(ticker: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, asset_type: Union[str, None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:
728 """
729 Estimates the mean rate of return and volatility for a sample of asset prices as if the asset price followed a Geometric Brownian Motion process, i.e. the mean rate of return and volatility are constant and not functions of time or the asset price. Moreover, the return and volatility are estimated using the method of moment matching, where the return is estimated by equating it to the first moment of the sample and the volatility is estimated by equating it to the square root of the second moment of the sample.
731 Parameters
732 ----------
733 1. **ticker** : ``str``
734 Ticker symbol whose risk-return profile is to be calculated.
735 2. **start_date** : ``Union[datetime.date, None]``
736 *Optional*. Start date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which case the calculation proceeds as if `start_date` were set to 100 trading days prior to `end_date`. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['CRYPTO']`, this means 100 days regardless. If `get_asset_type(ticker)=scrilla.keys.keys['ASSETS']['EQUITY']`, this excludes weekends and holidays and decrements the `end_date` by 100 trading days.
737 3. **end_date** : ``Union[datetime.date, None]``
738 *Optional*. End date of the time period over which the risk-return profile is to be calculated. Defaults to `None`, in which the calculation proceeds as if `end_date` were set to today. If the `get_asset_type(ticker)==keys.keys['ASSETS']['CRYPTO']` this means today regardless. If `get_asset_type(ticker)=keys.keys['ASSETS']['EQUITY']` this excludes holidays and weekends and sets the end date to the last valid trading date. \n \n
739 4. **sample_prices** : ``Union[Dict[str, Dict[str, float], None]`
740 Optional. A list of the asset prices for which correlation will be calculated. Overrides calls to service and forces calculation of correlation for sample of prices supplied. Function will disregard `start_date` and `end_date` and use the first and last key as the latest and earliest date, respectively. In other words, the `sample_prices` dictionary must be ordered from latest to earliest. If this argument is supplied, the function will bypass calls to the cache for stored calculations. Format: `{ 'date_1' : { 'open' : number, 'close' : number}, 'date_2': { 'open': number, 'close': number} ... }`
741 5. **asset_type** : ``Union[str, None]``
742 *Optional*. Specify asset type to prevent overusing redundant calculations. Allowable values can be found in `scrilla.keys.keys['ASSETS']`
743 6. **weekends**: ``Union[int, None]``
745 Raises
746 ------
747 1. **scrilla.errors.PriceError***
748 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.
750 Returns
751 ------
752 ``Dict[str, float]``
753 Dictionary containing the annualized return and volatility. Formatted as `{ 'annual_return' : float, 'annual_volatility': float }`
755 .. notes::
756 * assumes price history is ordered from latest to earliest date.
757 * function will bypass the cache if `sample_prices` is provided. In other words, the calculation can be forced by specifying `sample_prices`.
758 """
759 asset_type = errors.validate_asset_type(ticker, asset_type)
760 trading_period = functions.get_trading_period(asset_type)
762 if weekends is None:
763 if asset_type == keys.keys['ASSETS']['CRYPTO']: 763 ↛ 764line 763 didn't jump to line 764, because the condition on line 763 was never true
764 weekends = 1
765 else:
766 weekends = 0
768 if sample_prices is None:
769 # NOTE: Cache is bypassed when sample_prices are not null.
770 if weekends == 1:
771 start_date, end_date = errors.validate_dates(
772 start_date, end_date, keys.keys['ASSETS']['CRYPTO'])
773 else:
774 start_date, end_date = errors.validate_dates(
775 start_date, end_date, keys.keys['ASSETS']['EQUITY'])
777 results = profile_cache.filter(ticker=ticker, start_date=start_date, end_date=end_date,
778 method=keys.keys['ESTIMATION']['MOMENT'], weekends=weekends)
780 if results is not None \
781 and results.get(keys.keys['STATISTICS']['RETURN']) is not None \
782 and results.get(keys.keys['STATISTICS']['VOLATILITY']) is not None:
783 return results
785 logger.debug('No sample prices provided, calling service.',
786 '_calculate_moment_risk_return')
787 prices = services.get_daily_price_history(
788 ticker=ticker, start_date=start_date, end_date=end_date, asset_type=asset_type)
790 if asset_type == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:
791 logger.debug('Removing weekends from crypto sample',
792 '_calculate_moment_risk_return')
793 prices = dater.intersect_with_trading_dates(prices)
795 else:
796 logger.debug(
797 f'{ticker} sample prices provided, skipping service call.', '_calculate_moment_risk_return')
798 prices = sample_prices
800 if not prices: 800 ↛ 801line 800 didn't jump to line 801, because the condition on line 800 was never true
801 raise errors.PriceError(f'No prices could be retrieved for {ticker}')
803 # Log of difference loses a sample
804 sample = len(prices) - 1
805 logger.debug(
806 f'Calculating mean annual return over last {sample} days for {ticker}', '_calculate_moment_risk_return')
808 # MEAN CALCULATION
809 # NOTE: mean return is a telescoping series, i.e. sum of log(x1/x0) only depends on the first and
810 # last terms' contributions (because log(x1/x0) + log(x2/x1)= log(x2) - log(x1) + log(x1) - log(x0)) = log(x2/x0))
811 # which raises the question how accurate a measure the sample mean return is of the population mean return.
812 last_date, first_date = list(prices)[0], list(prices)[-1]
813 last_price = prices[last_date][keys.keys['PRICES']['CLOSE']]
814 first_price = prices[first_date][keys.keys['PRICES']['CLOSE']]
815 mean_return = log(float(last_price)/float(first_price)) / \
816 (trading_period*sample)
818 # VOLATILITY CALCULATION
819 # NOTE / TODO : this is a 'naive' variance algorithm: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
820 # although technically, this is only one pass, since the mean telescopes and doesn't require a full traversal of the
821 # the sample. I should see how this implementation compares to a Young and Cramer Updating algorithm implementation.
822 today, variance, tomorrows_price, tomorrows_date = False, 0, 0, None
823 # adjust the random variable being measured so expectation is easier to calculate.
824 mean_mod_return = mean_return*sqrt(trading_period)
825 logger.debug(
826 f'Calculating mean annual volatility over last {sample} days for {ticker}', '_calculate_moment_risk_return')
828 for this_date in prices:
829 todays_price = prices[this_date][keys.keys['PRICES']['CLOSE']]
831 if today:
832 logger.verbose(
833 f'{this_date}: (todays_price, tomorrows_price) = ({todays_price}, {tomorrows_price})', '_calculate_moment_risk_return')
835 # crypto prices may have weekends and holidays removed during correlation algorithm
836 # so samples can be compared to equities, need to account for these dates by increasing
837 # the time_delta by the number of missed days.
838 if asset_type == keys.keys['ASSETS']['CRYPTO'] or \
839 (asset_type == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):
840 time_delta = (dater.parse(
841 tomorrows_date) - dater.parse(this_date)).days
842 else:
843 time_delta = 1
845 current_mod_return = log(
846 float(tomorrows_price)/float(todays_price))/sqrt(time_delta*trading_period)
847 daily = (current_mod_return - mean_mod_return)**2/(sample - 1)
848 variance = variance + daily
850 logger.verbose(
851 f'{this_date}: (daily_variance, sample_variance) = ({round(daily, 4)}, {round(variance, 4)})', '_calculate_moment_risk_return')
853 else:
854 today = True
856 tomorrows_price = prices[this_date][keys.keys['PRICES']['CLOSE']]
857 tomorrows_date = this_date
859 # adjust for output
860 volatility = sqrt(variance)
861 # ito's lemma
862 mean_return = mean_return + 0.5*(volatility**2)
863 logger.debug(
864 f'(mean_return, sample_volatility) = ({round(mean_return, 2)}, {round(volatility, 2)})', '_calculate_moment_risk_return')
866 results = {
867 keys.keys['STATISTICS']['RETURN']: mean_return,
868 keys.keys['STATISTICS']['VOLATILITY']: volatility
869 }
871 profile_cache.save_or_update_row(ticker=ticker, start_date=start_date, end_date=end_date,
872 method=keys.keys['ESTIMATION']['MOMENT'], weekends=weekends,
873 annual_return=results[keys.keys['STATISTICS']['RETURN']],
874 annual_volatility=results[keys.keys['STATISTICS']['VOLATILITY']])
875 return results
878def _calculate_percentile_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:
879 """
880 Returns the sample correlation calculated using the method of Percentile Matching, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.
882 Parameters
883 ----------
884 1. **ticker_1** : ``str``
885 Ticker symbol for first asset.
886 2. **ticker_2** : ``str``
887 Ticker symbol for second asset
888 3. **asset_type_1** : ``str``
889 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
890 4. **asset_type_2** : ``str``
891 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
892 5. *start_date* : ``datetime.date``
893 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.
894 6. **end_date** : ``datetime.date``
895 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.
896 7. **sample_prices** : ``Dict[str, float]``
897 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
899 Raises
900 ------
901 1. **scrilla.errors.PriceError**
902 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.
904 Returns
905 ------
906 ``Dict[str, float]``
907 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`.
909 ```
910 {
911 'correlation': value
912 }
913 ```
915 .. notes ::
916 * Method uses the theory of copulas for multivariate distributions to break the joint distribution into component distributions in order to find the cumulative probability of the individual distribution's order statistics. See *references* for more information.
918 .. references::
919 - [How To Determine Quantile Isolines Of A Multivariate Normal Distribution](https://stats.stackexchange.com/questions/64680/how-to-determine-quantiles-isolines-of-a-multivariate-normal-distribution)
920 - [Copula (Probability Theory)](https://en.wikipedia.org/wiki/Copula_(probability_theory))
921 - [An Introduction To Copulas](http://www.columbia.edu/~mh2078/QRM/Copulas.pdf)
922 """
923 ### START ARGUMENT PARSING ###
924 asset_type_1 = errors.validate_asset_type(
925 ticker=ticker_1, asset_type=asset_type_1)
926 asset_type_2 = errors.validate_asset_type(
927 ticker=ticker_2, asset_type=asset_type_2)
929 # cache flag to signal if calculation includes weekends or not,
930 # only perform check if not passed in as argument
931 if weekends is None:
932 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']:
933 weekends = 1
934 else:
935 weekends = 0
937 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1:
938 # validate over total days.
939 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
940 asset_type=keys.keys['ASSETS']['CRYPTO'])
941 else:
942 # validate over trading days. since sample(date - 100 days) > (date - 100 trading days), always
943 # take the largest sample so intersect_dict_keys will return a sample of the correct size
944 # for mixed asset types.
945 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
946 asset_type=keys.keys['ASSETS']['EQUITY'])
948 if sample_prices is None:
949 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2,
950 start_date=start_date, end_date=end_date,
951 weekends=weekends,
952 method=keys.keys['ESTIMATION']['PERCENT'])
953 if correlation is not None:
954 return correlation
956 sample_prices = {}
957 logger.debug(
958 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_percentile_correlation')
959 logger.debug('Retrieving price histories for calculation.',
960 '_calculate_percentile_calculation')
961 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1, start_date=start_date,
962 end_date=end_date, asset_type=asset_type_1)
963 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2, start_date=start_date,
964 end_date=end_date, asset_type=asset_type_2)
966 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:
967 sample_prices[ticker_1] = dater.intersect_with_trading_dates(
968 sample_prices[ticker_1])
969 sample_prices[ticker_2] = dater.intersect_with_trading_dates(
970 sample_prices[ticker_2])
971 else:
972 # intersect with equity keys to get trading days
973 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys(
974 sample_prices[ticker_1], sample_prices[ticker_2])
976 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]:
977 raise errors.PriceError(
978 "Prices cannot be retrieved for correlation calculation", '_calculate_percentile_correlation')
980 sample_of_returns_1 = estimators.standardize(get_sample_of_returns(
981 ticker=ticker_1, sample_prices=sample_prices[ticker_1], asset_type=asset_type_1))
982 sample_of_returns_2 = estimators.standardize(get_sample_of_returns(
983 ticker=ticker_2, sample_prices=sample_prices[ticker_2], asset_type=asset_type_2))
985 combined_sample = [[el, sample_of_returns_2[i]]
986 for i, el in enumerate(sample_of_returns_1)]
988 percentiles = [0.1, 0.16, 0.5, 0.84, 0.9]
989 sample_percentiles_1, sample_percentiles_2 = [], []
991 for percentile in percentiles:
992 sample_percentiles_1.append(estimators.sample_percentile(
993 data=sample_of_returns_1, percentile=percentile))
994 sample_percentiles_2.append(estimators.sample_percentile(
995 data=sample_of_returns_2, percentile=percentile))
997 logger.debug(
998 f'Standardized sample percentiles for {ticker_1}: \n{sample_percentiles_1}', '_calculate_percentile_correlation')
999 logger.debug(
1000 f'Standardized sample percentiles for {ticker_2}: \n{sample_percentiles_2}', '_calculate_percentile_correlation')
1002 def copula_matrix(params):
1003 determinant = 1 - params[0]**2
1004 if determinant == 0 or determinant < 0 or determinant < (10**(-constants.constants['ACCURACY'])):
1005 logger.verbose('Solution is non-positive semi-definite',
1006 '_calculate_percentile_correlation')
1007 return inf
1008 logger.verbose(
1009 f'Instantiating Copula Matrix: \n{[[1, params[0]], [params[0], 1]]}', '_calculate_percentile_correlation')
1010 return [[1, params[0]], [params[0], 1]]
1012 # Calculate copula distribution of order statistics and constrain it against the empirical estimate
1013 def residuals(params):
1014 res = [
1016 (multivariate_normal.cdf(x=[sample_percentiles_1[i], sample_percentiles_2[i]],
1017 mean=[0, 0], cov=copula_matrix(params))
1018 - estimators.empirical_copula(sample=combined_sample, x_order=sample_percentiles_1[i],
1019 y_order=sample_percentiles_2[i]))
1020 for i in enumerate(percentiles)
1021 ]
1022 logger.verbose(f'Residuals for {params}: \n{res}',
1023 '_calculate_percentile_correlation.residuals')
1024 return res
1026 parameters = least_squares(residuals, (0), bounds=((-0.99999), (0.99999)))
1028 correl = parameters.x[0]
1029 result = {keys.keys['STATISTICS']['CORRELATION']: correl}
1031 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2,
1032 start_date=start_date, end_date=end_date,
1033 correlation=correlation, method=keys.keys['ESTIMATION']['PERCENT'],
1034 weekends=weekends)
1035 return result
1038def _calculate_likelihood_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:
1039 """
1040 Calculates the sample correlation using the maximum likelihood estimators, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.
1042 Parameters
1043 ----------
1044 1. **ticker_1** : ``str``
1045 Ticker symbol for first asset.
1046 2. **ticker_2** : ``str``
1047 Ticker symbol for second asset
1048 3. **asset_type_1** : ``str``
1049 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
1050 4. **asset_type_2** : ``str``
1051 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
1052 5. *start_date* : ``datetime.date``
1053 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.
1054 6. **end_date** : ``datetime.date``
1055 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.
1056 7. **sample_prices** : ``dict``
1057 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
1059 Raises
1060 ------
1061 1. **scrilla.errors.PriceError**
1062 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.
1064 Returns
1065 ------
1066 ``Dict[str, float]``
1067 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`.
1069 .. notes::
1070 *
1072 $$ $$
1073 """
1074 from scrilla.analysis import optimizer
1075 ### START ARGUMENT PARSING ###
1076 asset_type_1 = errors.validate_asset_type(
1077 ticker=ticker_1, asset_type=asset_type_1)
1078 asset_type_2 = errors.validate_asset_type(
1079 ticker=ticker_2, asset_type=asset_type_2)
1081 # cache flag to signal if calculation includes weekends or not,
1082 # only perform check if not passed in as argument
1083 if weekends is None: 1083 ↛ 1084line 1083 didn't jump to line 1084, because the condition on line 1083 was never true
1084 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']:
1085 weekends = 1
1086 else:
1087 weekends = 0
1089 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1: 1089 ↛ 1091line 1089 didn't jump to line 1091, because the condition on line 1089 was never true
1090 # validate over total days.
1091 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
1092 asset_type=keys.keys['ASSETS']['CRYPTO'])
1093 else:
1094 # validate over trading days. since sample(date - 100 days) > (date - 100 trading days), always
1095 # take the largest sample so intersect_dict_keys will return a sample of the correct size
1096 # for mixed asset types.
1097 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
1098 asset_type=keys.keys['ASSETS']['EQUITY'])
1100 if sample_prices is None: 1100 ↛ 1122line 1100 didn't jump to line 1122, because the condition on line 1100 was never false
1101 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2,
1102 start_date=start_date, end_date=end_date,
1103 weekends=weekends,
1104 method=keys.keys['ESTIMATION']['LIKE'])
1105 if correlation is not None:
1106 return correlation
1108 sample_prices = {}
1109 logger.debug(
1110 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_likelihood_correlation')
1111 logger.debug('Retrieving price histories for calculation.',
1112 '_calculate_likelihood_correlation')
1113 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1,
1114 start_date=start_date,
1115 end_date=end_date,
1116 asset_type=asset_type_1)
1117 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2,
1118 start_date=start_date,
1119 end_date=end_date,
1120 asset_type=asset_type_2)
1122 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0: 1122 ↛ 1123line 1122 didn't jump to line 1123, because the condition on line 1122 was never true
1123 sample_prices[ticker_1] = dater.intersect_with_trading_dates(
1124 sample_prices[ticker_1])
1125 sample_prices[ticker_2] = dater.intersect_with_trading_dates(
1126 sample_prices[ticker_2])
1127 else:
1128 # intersect with equity keys to get trading days
1129 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys(
1130 sample_prices[ticker_1], sample_prices[ticker_2])
1132 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]: 1132 ↛ 1133line 1132 didn't jump to line 1133, because the condition on line 1132 was never true
1133 raise errors.PriceError(
1134 "Prices cannot be retrieved for correlation calculation")
1136 sample_of_returns_1 = get_sample_of_returns(ticker=ticker_1,
1137 sample_prices=sample_prices[ticker_1],
1138 asset_type=asset_type_1)
1139 sample_of_returns_2 = get_sample_of_returns(ticker=ticker_2,
1140 sample_prices=sample_prices[ticker_2],
1141 asset_type=asset_type_2)
1143 combined_sample = [[el, sample_of_returns_2[i]]
1144 for i, el in enumerate(sample_of_returns_1)]
1146 likelihood_estimates = optimizer.maximize_bivariate_normal_likelihood(
1147 data=combined_sample)
1149 # Var(d lnS / delta_t ) = Var(d lnS )/delta_t**2 = sigma**2 * delta_t / delta_t**2
1150 # = sigma**2/delta_t
1151 # Cov(d lnS/delta_t, d lnQ/delta_t) = Cov(d lnS, dlnQ)/delta_t**2
1152 # = rho * sigma_s * sigma_q / delta_t**2
1153 vol_1 = sqrt(likelihood_estimates[2])
1154 vol_2 = sqrt(likelihood_estimates[3])
1156 correlation = likelihood_estimates[4] / (vol_1*vol_2)
1158 result = {'correlation': correlation}
1160 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2,
1161 start_date=start_date, end_date=end_date,
1162 correlation=correlation, weekends=weekends,
1163 method=keys.keys['ESTIMATION']['LIKE'])
1164 return result
1167def _calculate_moment_correlation(ticker_1: str, ticker_2: str, asset_type_1: Union[str, None] = None, asset_type_2: Union[str, None] = None, start_date: Union[datetime.date, None] = None, end_date: Union[datetime.date, None] = None, sample_prices: Union[Dict[str, Dict[str, float]], None] = None, weekends: Union[int, None] = None) -> Dict[str, float]:
1168 """
1169 Returns the sample correlation using the method of Moment Matching, assuming underlying price process follows Geometric Brownian Motion, i.e. the price distribution is lognormal.
1171 Parameters
1172 ----------
1173 1. **ticker_1** : ``str``
1174 Ticker symbol for first asset.
1175 2. **ticker_2** : ``str``
1176 Ticker symbol for second asset
1177 3. **asset_type_1** : ``Union[str,None]``
1178 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
1179 4. **asset_type_2** : ``Union[str,None]``
1180 *Optional*. Specify asset type to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
1181 5. *start_date* : ``Union[datetime.date,None]``
1182 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.
1183 6. **end_date** : ``Union[datetime.date None]``
1184 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.
1185 7. **sample_prices** : ``Union[dict,None]``
1186 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
1187 8. **weekends** : ``Union[int,None]``
1188 **Optional**. A flag to signal that calculations should include/exclude weekend dates. See *notes* for more information. Defauts to `None` and is implicitly determined by the asset types passed in.
1190 Raises
1191 ------
1192 1. **scrilla.errors.PriceError**
1193 If no price data is inputted into the function and the application cannot retrieve price data from the cache or external services, this error will be thrown.
1195 Returns
1196 ------
1197 ``Dict[str, float]``
1198 Dictionary containing the correlation of `ticker_1` and `ticker_2`: Formatted as: `{ 'correlation' : float }`
1200 .. notes::
1201 * when the asset types are mixed, i.e. `asset_type_1` == 'equity' and `asset_type_2`== 'crypto', the sample prices will contain different information, since crypto trades on weekends and holidays. The solution is to throw away the weekend and holiday prices for crypto. This presents another problem, since the risk profile for a crypto-currency that is cached in the local fileystem will be calculated over the entire sample including the missing data, whereas the risk profile required by the correlation needs to be calculated over the censored sample (i.e. the one with weekends and holidays removed) so that the means of the mixed asset types are scaled to the same time delta. In this case, the correlation algorithm needs to be able to override calls to the cache and force the risk profile algorithms to calculate based on the sample. Note: this issue only applies to correlation calculations using the method of moment matching, since the other methods determine the value of the correlation by solving constrained systems of equations instead of deriving it analytically with a formula.
1202 * The `weekends` flag is only relevant for assets of type `scrilla.static.keys.keys['ASSETS']['CRYPTO']`, i.e. cryptocurrency. It is passed in when the correlation calculation is part of a larger correlation matrix calculation, so that entries in the matrix have equivalent time frames. E.g., if the `scrilla.analysis.models.geometric.statistics.correlation_matrix` is calculating a matrix for a collection of mixed asset types, say, `["BTC", "ETH", "ALLY", "SPY"]`, the correlations between (crypto, equity) and (equity, equity) will only include weekdays, where as the (crypto,crypto) pairing will include weekends and thus result in an inaccurate matrix. To resolve this problem, the `weekends` flag can be passed into this calculation to prevent (crypto,crypto) pairings from including weekends.
1204 """
1205 ### START ARGUMENT PARSING ###
1206 asset_type_1 = errors.validate_asset_type(
1207 ticker=ticker_1, asset_type=asset_type_1)
1208 asset_type_2 = errors.validate_asset_type(
1209 ticker=ticker_2, asset_type=asset_type_2)
1211 # cache flag to signal if calculation includes weekends or not,
1212 # only perform check if not passed in as argument so that agent
1213 # calling can override default weekend behavior, i.e. make
1214 # crypto pairing forcibly exclude weekends from their calculation.
1215 if weekends is None:
1216 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO']: 1216 ↛ 1217line 1216 didn't jump to line 1217, because the condition on line 1216 was never true
1217 weekends = 1
1218 else:
1219 weekends = 0
1221 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 1:
1222 # validate over total days.
1223 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
1224 asset_type=keys.keys['ASSETS']['CRYPTO'])
1225 else:
1226 # validate over trading days.
1227 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
1228 asset_type=keys.keys['ASSETS']['EQUITY'])
1230 if sample_prices is None: 1230 ↛ 1249line 1230 didn't jump to line 1249, because the condition on line 1230 was never false
1231 correlation = correlation_cache.filter(ticker_1=ticker_1, ticker_2=ticker_2,
1232 start_date=start_date, end_date=end_date,
1233 weekends=weekends,
1234 method=keys.keys['ESTIMATION']['MOMENT'])
1235 if correlation is not None:
1236 return correlation
1238 sample_prices = {}
1239 logger.debug(
1240 f'No sample prices provided or cached ({ticker_1}, {ticker_2}) correlation found.', '_calculate_moment_correlation')
1241 logger.debug('Retrieving price histories for calculation.',
1242 '_calculate_moment_correlation')
1243 sample_prices[ticker_1] = services.get_daily_price_history(ticker=ticker_1, start_date=start_date,
1244 end_date=end_date, asset_type=asset_type_1)
1245 sample_prices[ticker_2] = services.get_daily_price_history(ticker=ticker_2, start_date=start_date,
1246 end_date=end_date, asset_type=asset_type_2)
1248 # TODO: pretty sure something about this is causing the issue.
1249 if asset_type_1 == asset_type_2 and asset_type_2 == keys.keys['ASSETS']['CRYPTO'] and weekends == 0:
1250 # remove weekends and holidays from sample
1251 logger.debug('Removing weekends from crypto sample',
1252 '_calculate_moment_correlation')
1253 sample_prices[ticker_1] = dater.intersect_with_trading_dates(
1254 sample_prices[ticker_1])
1255 sample_prices[ticker_2] = dater.intersect_with_trading_dates(
1256 sample_prices[ticker_2])
1257 else:
1258 # intersect with equity keys to get trading days
1259 sample_prices[ticker_1], sample_prices[ticker_2] = helper.intersect_dict_keys(
1260 sample_prices[ticker_1], sample_prices[ticker_2])
1262 if 0 in [len(sample_prices[ticker_1]), len(sample_prices[ticker_2])]: 1262 ↛ 1263line 1262 didn't jump to line 1263, because the condition on line 1262 was never true
1263 raise errors.PriceError(
1264 "Prices cannot be retrieved for correlation calculation")
1266 if asset_type_1 == keys.keys['ASSETS']['CRYPTO']:
1267 trading_period_1 = constants.constants['ONE_TRADING_DAY']['CRYPTO']
1268 else:
1269 trading_period_1 = constants.constants['ONE_TRADING_DAY']['EQUITY']
1270 if asset_type_2 == keys.keys['ASSETS']['CRYPTO']:
1271 trading_period_2 = constants.constants['ONE_TRADING_DAY']['CRYPTO']
1272 else:
1273 trading_period_2 = constants.constants['ONE_TRADING_DAY']['EQUITY']
1274 ### END ARGUMENT PARSING ###
1276 ### START SAMPLE STATISTICS CALCULATION DEPENDENCIES ###
1277 # i.e. statistics that need to be calculated before correlation can be calculated
1278 logger.debug(
1279 f'Preparing calculation dependencies for ({ticker_1},{ticker_2}) correlation', '_calculate_moment_correlation')
1281 stats_1 = _calculate_moment_risk_return(ticker=ticker_1,
1282 start_date=start_date,
1283 end_date=end_date,
1284 asset_type=asset_type_1,
1285 weekends=weekends)
1287 stats_2 = _calculate_moment_risk_return(ticker=ticker_2,
1288 start_date=start_date,
1289 end_date=end_date,
1290 asset_type=asset_type_2,
1291 weekends=weekends)
1293 # ito's lemma
1294 mod_mean_1 = (stats_1['annual_return'] - 0.5*(stats_1['annual_volatility'])
1295 ** 2)*sqrt(trading_period_1)
1297 mod_mean_2 = (stats_2['annual_return'] - 0.5*(stats_2['annual_volatility'])
1298 ** 2)*sqrt(trading_period_2)
1300 logger.debug(
1301 f'Calculating ({ticker_1}, {ticker_2}) correlation.', '_calculate_moment_correlation')
1302 # END SAMPLE STATISTICS CALCULATION DEPENDENCIES
1304 # Initialize loop variables
1305 covariance, time_delta = 0, 1
1306 today, tomorrows_date = False, None
1307 sample = len(sample_prices[ticker_1])
1309 #### START CORRELATION LOOP ####
1310 for this_date in sample_prices[ticker_1]:
1311 todays_price_1 = sample_prices[ticker_1][this_date][keys.keys['PRICES']['CLOSE']]
1312 todays_price_2 = sample_prices[ticker_2][this_date][keys.keys['PRICES']['CLOSE']]
1314 if today:
1315 logger.verbose(f'today = {this_date}',
1316 '_calculate_moment_correlation')
1317 logger.verbose(
1318 f'(todays_price, tomorrows_price)_{ticker_1} = ({todays_price_1}, {tomorrows_price_1})', '_calculate_moment_correlation')
1319 logger.verbose(
1320 f'(todays_price, tomorrows_price)_{ticker_2} = ({todays_price_2}, {tomorrows_price_2})', '_calculate_moment_correlation')
1322 # NOTE: crypto prices may have weekends and holidays removed during correlation algorithm
1323 # so samples can be compared to equities, need to account for these dates by increasing
1324 # the time_delta by the number of missed days, to offset the weekend and holiday return.
1325 if asset_type_1 == keys.keys['ASSETS']['CRYPTO'] or \
1326 (asset_type_1 == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):
1327 time_delta = (dater.parse(
1328 tomorrows_date) - dater.parse(this_date)).days
1329 else:
1330 time_delta = 1
1331 current_mod_return_1 = log(
1332 float(tomorrows_price_1)/float(todays_price_1))/sqrt(time_delta*trading_period_1)
1334 # see above note
1335 if asset_type_2 == keys.keys['ASSETS']['CRYPTO'] or \
1336 (asset_type_2 == keys.keys['ASSETS']['EQUITY'] and not dater.consecutive_trading_days(tomorrows_date, this_date)):
1337 time_delta = (dater.parse(
1338 tomorrows_date) - dater.parse(this_date)).days
1339 else:
1340 time_delta = 1
1342 current_mod_return_2 = log(
1343 float(tomorrows_price_2)/float(todays_price_2))/sqrt(time_delta*trading_period_2)
1345 current_sample_covariance = (
1346 current_mod_return_1 - mod_mean_1)*(current_mod_return_2 - mod_mean_2)/(sample - 1)
1347 covariance = covariance + current_sample_covariance
1349 logger.verbose(
1350 f'(return_{ticker_1}, return_{ticker_2}) = ({round(current_mod_return_1, 2)}, {round(current_mod_return_2, 2)})', '_calculate_moment_correlation')
1351 logger.verbose(
1352 f'(current_sample_covariance, covariance) = ({round(current_sample_covariance, 2)}, {round(covariance, 2)})', '_calculate_moment_correlation')
1354 else:
1355 today = True
1357 tomorrows_price_1, tomorrows_price_2, tomorrows_date = todays_price_1, todays_price_2, this_date
1358 #### END CORRELATION LOOP ####
1360 # Scale covariance into correlation
1361 correlation = covariance / \
1362 (stats_1['annual_volatility']*stats_2['annual_volatility'])
1364 logger.debug(
1365 f'correlation = ({round(correlation, 2)})', '_calculate_moment_correlation')
1367 result = {'correlation': correlation}
1369 correlation_cache.save_row(ticker_1=ticker_1, ticker_2=ticker_2,
1370 start_date=start_date, end_date=end_date,
1371 correlation=correlation, weekends=weekends,
1372 method=keys.keys['ESTIMATION']['MOMENT'])
1373 return result
1376def correlation_matrix(tickers, asset_types=None, start_date=None, end_date=None, sample_prices=None, method=settings.ESTIMATION_METHOD, weekends: Union[int, None] = None) -> List[List[float]]:
1377 """
1378 Returns the correlation matrix for *tickers* from *start_date* to *end_date* using the estimation method *method*.
1380 Parameters
1381 ----------
1382 1. **tickers** : ``list``
1383 List of ticker symbols whose correlation matrix is to be calculated. Format: `['ticker_1', 'ticker_2', ...]`
1384 2. **asset_type2** : ``list``
1385 *Optional*. List of asset types that map to the `tickers` list. Specify **asset_types** to prevent redundant calculations down the stack. Allowable values can be found in `scrilla.keys.keys['ASSETS]' dictionary.
1386 3. *start_date* : ``datetime.date``
1387 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.
1388 4. **end_date** : ``datetime.date``
1389 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.
1390 5. **sample_prices** : ``dict``
1391 *Optional*. A list of the asset prices for which correlation will be calculated. Overrides calls to service and calculates correlation for sample of prices supplied. Will disregard start_date and end_date. Must be of the format: `{'ticker_1': { 'date_1' : 'price_1', 'date_2': 'price_2' ...}, 'ticker_2': { 'date_1' : 'price_1:, ... } }` and ordered from latest date to earliest date.
1392 6. **method** : ``str``
1393 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`.
1395 Raises
1396 ------
1397 1. **scrilla.errors.SampleSizeErrors**
1398 If list of tickers is not large enough to calculate a correlation matrix, this error will be thrown.
1400 Returns
1401 ------
1402 ``List[List[float]]``
1403 correlation matrix of `tickers`. indices correspond to the Cartesian product of `tickers` x `tickers`.
1404 """
1405 correl_matrix = [
1406 [0 for _ in tickers] for _ in tickers
1407 ]
1409 # let correlation function handle argument parsing
1410 if asset_types is None: 1410 ↛ 1421line 1410 didn't jump to line 1421, because the condition on line 1410 was never false
1411 asset_types = [errors.validate_asset_type(
1412 ticker) for ticker in tickers]
1414 # NOTE: since crypto trades on weekends and equities do not, the function
1415 # must determine if the inputted assets are of mixed type. If any
1416 # single asset is of a different type, weekends must be truncated
1417 # from sample to ensure correlation is calculated over the samples
1418 # of like size.
1420 # By default, exclude weekends.
1421 if weekends is None:
1422 weekends = 0
1424 asset_groups = 0
1425 for _ in groupby(sorted(asset_types)):
1426 asset_groups += 1
1428 # if all assets of the same type, include weekends only if asset type is crypto
1429 if asset_groups == 1 and asset_types[0] == keys.keys['ASSETS']['CRYPTO']:
1430 logger.debug(
1431 'Assets of same type, which is crypto, keeping weekends', 'correlation_matrix')
1432 weekends = 1
1433 else:
1434 if asset_groups > 1:
1435 logger.debug(
1436 'Assets of different type, removing weekends', 'correlation_matrix')
1437 else:
1438 logger.debug(
1439 'Assets of same type, which is equity, excluding weekends', 'correlation_matrix')
1441 if(len(tickers) > 1): 1441 ↛ 1459line 1441 didn't jump to line 1459, because the condition on line 1441 was never false
1442 for i, item in enumerate(tickers):
1443 correl_matrix[i][i] = 1
1444 for j in range(i+1, len(tickers)):
1445 cor = calculate_correlation(ticker_1=item,
1446 ticker_2=tickers[j],
1447 asset_type_1=asset_types[i],
1448 asset_type_2=asset_types[j],
1449 start_date=start_date,
1450 end_date=end_date,
1451 sample_prices=sample_prices,
1452 weekends=weekends,
1453 method=method)
1454 correl_matrix[i][j] = cor['correlation']
1455 correl_matrix[j][i] = correl_matrix[i][j]
1457 correl_matrix[len(tickers) - 1][len(tickers) - 1] = 1
1458 return correl_matrix
1459 if (len(tickers) == 1):
1460 correl_matrix[0][0] = 1
1461 return correl_matrix
1462 raise errors.SampleSizeError(
1463 'Cannot calculate correlation matrix for portfolio size <= 1.')
1466def calculate_moment_correlation_series(ticker_1: str, ticker_2: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None) -> Dict[str, float]:
1467 asset_type_1 = errors.validate_asset_type(ticker=ticker_1)
1468 asset_type_2 = errors.validate_asset_type(ticker=ticker_2)
1469 if asset_type_1 == keys.keys['ASSETS']['CRYPTO'] and asset_type_2 == keys.keys['ASSETS']['CRYPTO']:
1470 # validate over all days
1471 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
1472 asset_type=keys.keys['ASSETS']['CRYPTO'])
1473 else:
1474 # validate over trading days. since (date - 100 days) > (date - 100 trading days), always
1475 # take the largest sample so intersect_dict_keys will return a sample of the correct size
1476 # for mixed asset types.
1477 start_date, end_date = errors.validate_dates(start_date=start_date, end_date=end_date,
1478 asset_type=keys.keys['ASSETS']['EQUITY'])
1480 same_type = False
1481 correlation_series = {}
1483 if asset_type_1 == asset_type_2:
1484 same_type = True
1486 # TODO: what if start_date or end_date is None?
1487 if same_type and asset_type_1 == keys.keys['ASSETS']['CRYPTO']:
1488 date_range = [start_date] + dater.dates_between(start_date, end_date)
1489 else: # default to business days
1490 date_range = [dater.get_previous_business_date(
1491 start_date)] + dater.business_dates_between(start_date, end_date)
1493 for this_date in date_range:
1494 calc_date_end = this_date
1495 todays_cor = _calculate_moment_correlation(ticker_1=ticker_1,
1496 ticker_2=ticker_2,
1497 end_date=calc_date_end)
1498 correlation_series[dater.to_string(
1499 this_date)] = todays_cor['correlation']
1501 # result = {f'{ticker_1}_{ticker_2}_correlation_time_series': correlation_series}
1502 return correlation_series
1505def calculate_return_covariance(ticker_1: str, ticker_2: str, start_date: Union[date, None] = None, end_date: Union[date, None] = None, sample_prices: Union[dict, None] = None, correlation: Union[dict, None] = None, profile_1: Union[dict, None] = None, profile_2: Union[dict, None] = None, method=settings.ESTIMATION_METHOD) -> float:
1506 """
1507 Returns the return covariance between *ticker_1* and *ticker_2* from *start_date* to *end_date* using the estimation method *method*.
1509 Parameters
1510 ----------
1511 1. **ticker_1** : ``str``
1512 Ticker symbol for first asset.
1513 2. **ticker_2** : ``str``
1514 Ticker symbol for second asset
1515 3. *start_date* : ``datetime.date``
1516 *Optional*. Start date of the time period over which correlation will be calculated. If `None`, defaults to 100 trading days ago.
1517 4. **end_date** : ``datetime.date``
1518 *Optional*. End date of the time period over which correlation will be calculated. If `None`, defaults to last trading day.
1519 5. **sample_prices** : ``dict``
1520 *Optional*. A dictionary containing the asset prices. Must be formatted as : ` { 'ticker_1': { 'date_1': value, ...}, 'ticker_2': { 'date_2' : value, ...}}.
1521 6. **correlation** : ``dict``
1522 *Optional*. Overrides correlation caluclation. A dictionary containing the correlation that should be used in lieu of estimating it from historical data. Formatted as : `{ 'correlation': value }
1523 7. **profile_1** : ``dict``
1524 *Optional*. Overrides asset 1's risk profile calculation. A dictionary containing the risk profile of the first asset that should be used in lieu of estimating it from historical data.
1525 8. **profile_2** : ``dict``
1526 *Optional*. Overrides asset 2's risk profile calculation. A dictionary containing the risk profile of the second asset that should be used in lieu of estimating it from historical data.
1527 9. **method** : ``str``
1528 *Optional*. Defaults to the value set by `scrilla.settings.ESTIMATION_METHOD`, which in turn is configured by the **DEFAULT_ESTIMATION_METHOD** environment variable. Determines the estimation method used during the calculation of sample statistics. Allowable values can be accessed through `scrilla.keys.keys['ESTIMATION']`.
1530 Returns
1531 ------
1532 ``float`` : return covariance
1533 """
1534 if correlation is None: 1534 ↛ 1542line 1534 didn't jump to line 1542, because the condition on line 1534 was never false
1535 if sample_prices is None: 1535 ↛ 1539line 1535 didn't jump to line 1539, because the condition on line 1535 was never false
1536 correlation = calculate_correlation(ticker_1=ticker_1, ticker_2=ticker_2, start_date=start_date,
1537 end_date=end_date, method=method)
1538 else:
1539 correlation = calculate_correlation(ticker_1=ticker_1, ticker_2=ticker_2,
1540 sample_prices=sample_prices, method=method)
1542 if profile_1 is None: 1542 ↛ 1543line 1542 didn't jump to line 1543, because the condition on line 1542 was never true
1543 if sample_prices is None:
1544 profile_1 = calculate_risk_return(ticker=ticker_1, start_date=start_date, end_date=end_date,
1545 method=method)
1546 else:
1547 profile_1 = calculate_risk_return(ticker=ticker_1, sample_prices=sample_prices[ticker_1],
1548 method=method)
1550 if profile_2 is None: 1550 ↛ 1551line 1550 didn't jump to line 1551, because the condition on line 1550 was never true
1551 if sample_prices is None:
1552 profile_2 = calculate_risk_return(ticker=ticker_2, start_date=start_date, end_date=end_date,
1553 method=method)
1554 else:
1555 profile_2 = calculate_risk_return(ticker=ticker_2, sample_prices=sample_prices[ticker_2],
1556 method=method)
1558 covariance = profile_1['annual_volatility'] * \
1559 profile_2['annual_volatility']*correlation['correlation']
1560 return covariance