In [1]:
%config InLineBackend.figure_format='retina'
In [2]:
import numpy as np
import pandas as pd
import altair as alt
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
In [3]:
def correlated_streams(n: int, mean: float, risk: float, corr: float) -> np.array:
        """
        Generates n returns streams with a given average mean and risk 
        and with an average correlation level corr 
        """
        num_samples = 10_000
        means = np.full(n, mean)
        
        corr_mat = np.full((n,n),corr, dtype=np.dtype("d"))
        np.fill_diagonal(corr_mat, 1,)
        cov_mat = corr_mat * risk**2
        
        streams = np.random.multivariate_normal(means, cov_mat, size=num_samples)
        
        return streams.T
        
In [4]:
n=5
mean, std, corr = 10, 15, 0.6
streams = correlated_streams(n, mean, std, corr)
In [5]:
streams.mean(axis=1)
Out[5]:
array([10.12229747,  9.92797016,  9.98877207, 10.05103342,  9.90978558])
In [6]:
streams.std(axis=1)
Out[6]:
array([15.07254044, 15.05168254, 15.17926238, 15.2192544 , 15.14908131])
In [7]:
streams
Out[7]:
array([[  2.68277374,  26.2961793 ,  11.78031229, ...,  11.55402765,
          1.2952331 ,   9.08595787],
       [ 15.30537123,   7.39542686,  -2.82396479, ...,  24.86409114,
         14.01618077,  15.55587422],
       [ -5.2118669 ,  15.43746428,  15.57786758, ...,  -0.3456664 ,
        -14.55323808,  17.66691734],
       [ -1.09300273,   2.52764164,  25.44016093, ...,   1.64963583,
         20.81314878,  -1.26545688],
       [  7.59665087,  12.82385898,  28.68642545, ...,   1.39094052,
         16.2208108 ,   5.443885  ]])
In [8]:
np.corrcoef(streams)
Out[8]:
array([[1.        , 0.60676484, 0.61222918, 0.61179636, 0.60301561],
       [0.60676484, 1.        , 0.61036834, 0.61049393, 0.61073826],
       [0.61222918, 0.61036834, 1.        , 0.61526424, 0.61265281],
       [0.61179636, 0.61049393, 0.61526424, 1.        , 0.605607  ],
       [0.60301561, 0.61073826, 0.61265281, 0.605607  , 1.        ]])
In [9]:
def aggregate_risk(returns_streams: np.array, n:int) -> np.array:
    """
    Returns the ppoled risk (std) of the n first streams in return_streams
    """
    if len(returns_streams) < n:
        raise valueError(f"len of return_stream less than n: {n}")
    
    return (np.sum(returns_streams[:n], axis=0)/n).std()
In [10]:
max_assets = 20
assets = range(1, max_assets+1)

mean=10 # average mean return of 10%
risk_levels = range(1,15)

index = pd.MultiIndex.from_product([risk_levels,assets], 
                                   names=["risk_level","num_assets"])
simulated_data = pd.DataFrame(index=index)

for risk in risk_levels:
    for corr in np.arange(0.0,0.8,0.1):
        return_streams = correlated_streams(max_assets, mean, risk, corr)
        risk_level = np.zeros(max_assets)
        for num_assets in assets:
            risk_level[num_assets-1] = aggregate_risk(return_streams, num_assets)
        simulated_data.loc[(risk, ), round(corr,1)] = risk_level
simulated_data.columns.names=["correlation"]
In [11]:
simulated_data.query("risk_level==14")
Out[11]:
correlation 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7
risk_level num_assets
14 1 14.139732 14.083466 14.075192 14.211693 14.016018 14.237709 14.047985 14.124923
2 9.971890 10.354355 10.939584 11.367708 11.798633 12.261220 12.441364 13.007990
3 8.109977 8.819959 9.606007 10.213402 10.950501 11.577389 11.903425 12.604708
4 6.986998 7.949406 8.896181 9.628273 10.510822 11.179527 11.647545 12.408229
5 6.264615 7.401336 8.439524 9.254206 10.242789 10.937341 11.493025 12.281325
6 5.716743 6.977136 8.096929 9.013415 10.042193 10.778337 11.398521 12.207259
7 5.295934 6.671270 7.880070 8.834330 9.888923 10.683292 11.307895 12.158006
8 4.947562 6.405502 7.712366 8.731689 9.763589 10.602420 11.246477 12.135535
9 4.661358 6.229094 7.544881 8.634614 9.666221 10.528369 11.204164 12.112906
10 4.442710 6.074949 7.430312 8.528163 9.601574 10.485156 11.166506 12.091789
11 4.239165 5.943563 7.345270 8.478594 9.548874 10.440516 11.138271 12.058338
12 4.067129 5.827279 7.279427 8.414704 9.504433 10.400230 11.108095 12.038645
13 3.899028 5.742483 7.214245 8.362606 9.467853 10.376009 11.072029 12.033051
14 3.748972 5.661275 7.144119 8.309664 9.434444 10.348896 11.043802 12.023408
15 3.621510 5.581000 7.080641 8.272900 9.400416 10.317606 11.041845 12.010188
16 3.504821 5.507856 7.029948 8.238891 9.373601 10.290396 11.019383 12.013128
17 3.388423 5.461192 6.990301 8.201566 9.354307 10.275620 11.009002 12.003413
18 3.286793 5.397432 6.960062 8.170513 9.332449 10.270364 10.995412 12.003475
19 3.202366 5.345436 6.932196 8.144062 9.323543 10.256719 10.982377 11.989463
20 3.127220 5.314700 6.896133 8.123222 9.304375 10.239964 10.973629 11.985602
In [12]:
def plot_risk_level(data:np.array, risk_level:int):
    subset = data.query(f"risk_level=={risk_level}")
    stacked = subset.stack().reset_index(name='risk')
    stacked.head()
    
    chart = alt.Chart(data=stacked)
    
    highlight = alt.selection(type='single',on='mouseover',
                                   fields=['correlation'], nearest=True)
    
    base = chart.encode(
        alt.X("num_assets", axis=alt.Axis(title="Number of Assets")),
        alt.Y("risk", axis=alt.Axis(title="Risk[%]")),
        alt.Color("correlation:N", scale=alt.Scale(scheme='set2'))
        )
    points = base.mark_circle().encode(
        opacity=alt.value(0)
        ).add_selection(
        highlight
    ).properties(
        height=400,
        width=600,
        title="Risk % by number of assets in portfolio"
    )
    lines = base.mark_line().encode(
        size=alt.condition(~highlight,alt.value(1),alt.value(3)),
    tooltip=["correlation"]
    )
    return points+lines
In [13]:
plot_risk_level(simulated_data, 10)
Out[13]:

Your plot shows how diversification benefits to portfolios with assets that have a risk level of 10 percent. More highly correlated portfolios do not benefit as much from increased diversification. You get only small reduction by adding highly correlated assets beyond a total of three or four. In contrast, you can have the risk by adding just six or seven uncorrelated, or more realistically, weakly correlated assets to a portfolio. The benefits of diversification are reduced risk through exposure to different sources of trading revenue.

The insight that Dalio brings is that the construction of a diversified portfolio, through a combination of uncorrelated return streams, significantly reduces your overall risk raising in turn your return to risk or sharp ratio. By the careful mixing of uncorrelated assets, you can capture this true low-risk Alpha. This gives you the ability to add leverage and greatly increase your potential returns. This is the strategy that Dalio and Bridgewater have used successfully in their risk parity approach.