In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import scipy.optimize as optimize
import scipy.stats
import seaborn as sns
from scipy.stats import multivariate_normal as mvn
%matplotlib inline

# MAKE PRETTIER
from IPython.display import set_matplotlib_formats

set_matplotlib_formats('pdf', 'png')
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 14

plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 12

plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = "serif"
#plt.rcParams['font.serif'] = "cm"


# ASSUMPTIONS

CORRELATION_VOLUME_CUSTODY = 0.65
COST_TO_CUSTODY = (1 + .015)**(1. / 365) - 1  # Daily cost to store/insure the user's funds

# Notes on USER INFO
#  - These are all raw percentages indicating how much each user type uses the exchange feature
#  - For example, Institutions do 65% of their trading through Smart Order Router and only 0.5% through mobile
#  - Liq is used to model how often that type of user is force-liquidated (minor impact)
USER_INFO = {
    'hold': {'sor': 0.2, 'liq': 0.005, 'mobile': 0.05},
    'millenial': {'sor': 0.25, 'liq': 0.005, 'mobile': 0.1},
    'europe': {'sor': 0.3, 'liq': 0.005, 'mobile': 0.05},
    'asia': {'sor': 0.2, 'liq': 0.005, 'mobile': 0.1},
    'institution': {'sor': 0.65, 'liq': 0.005, 'mobile': 0.005}
}

FEES = {
    'take': 11 * 1e-4,
    'make': -2 * 1e-4,
    'sor': 12 * 1e-4,
    'mobile': 25 * 1e-4
}

AFFILIATE_BREAK = 0.25  
AFFILIATE_FRAC = 0.15  # From exchange contrac
LXDX_FRAC_EXCHANGE = 0.8
LXDX_FRAC_SOR = 0.9  # Users are likely more attuned to minimizing costs
TRADE_THROUGH_FRAC = 0.45
EFFECTIVE_SOR = 0.25
PROOF_OF_RISK = 0.05
EFFECTIVE_TOKEN_YIELD = 0.6
LXDX_PERCENT_TOKENS_IN_TREASURY = 0.25

TWO_YEARS = 365 * 2

user_types = ['hold', 'millenial', 'europe', 'asia', 'institution']

# PLOTTING METHODS

def paths_plot(ax, paths):
    for path in paths:
        ax.plot(path, color='k', alpha=0.1)
    ax.set_title("Simulated Total User Count Paths")
    ax.set_xlabel("Days from Launch")
    ax.set_ylabel("Total Users")

def hist(ax, data, label, **kwargs):
    
    round_floats = kwargs.pop("round_floats", False)
    currency = kwargs.pop("currency", False)
    bins = kwargs.pop("bins", 20)
    x_text_pos = kwargs.pop("xpos", 0.5)
    y_text_pos = kwargs.pop("ypos", 0.75)
    less_ticks = kwargs.pop("less_ticks", True)
    small_fonts = kwargs.pop("small_fonts", False)
    
    currency_sign = '' if not currency else '\$'
    mean = int(np.mean(data)) if round_floats else np.mean(data)
    f = "{:,}" if round_floats else "{:,.2f}"
    
    textstr = '$\mu=' + currency_sign + f.format(mean) +'$'
    formatter = plt.FuncFormatter(lambda x, loc: f.format(int(x) if round_floats else x))
    
    _, bins, _  = ax.hist(data, bins=bins, color='k', histtype='stepfilled')
    ax.ticklabel_format(axis='x', style='sci', scilimits=(-3,5))
    ax.axvline(np.mean(data), color='indianred')
    
    
    ax.set_xlabel(label) if not small_fonts else ax.set_xlabel(label, fontsize=10)
    ax.xaxis.set_major_formatter(formatter)
    if small_fonts:
        ax.text(x_text_pos, y_text_pos, textstr, transform=ax.transAxes, fontsize=10)
        [tick.label.set_fontsize(10) for tick in ax.xaxis.get_major_ticks()]
    else:
        ax.text(x_text_pos, y_text_pos, textstr, transform=ax.transAxes, fontsize=12)
    if less_ticks:
        ax.set_xticks(ax.get_xticks()[::2])

def revenues_plot(revenues, dates):
    fig, axes = plt.subplots(1, 3, figsize=(12,4))
    for i in range(3):
        l = 'Revenue {} days out'.format(dates[i])
        hist(axes[i], revenues[:, dates[i] - 1], l, round_floats=True, currency=True, xpos=.65)

def lxdx_value_plot(values, labels, dates):
    fig, axes = plt.subplots(1, 3, figsize=(12,4))
    for i in range(3):
        l = 'Value of {} {} days out'.format(labels[i], dates[i])
        hist(axes[i], values[:, dates[i] - 1], l, currency=True, xpos=.75)

def plot_demographics(axes, labels, user_composition):
    for i in range(len(axes)):
        axes[i].plot(user_composition[:, i], color='black')
        axes[i].set_ylabel('\% ' + labels[i])
    axes[-1].set_xlabel('Days From Launch') 
        
def plot_users_by_demographic(axes, labels, demo_user_paths):
    num_paths = len(demo_user_paths)
    for i in range(len(axes)):
        [axes[i].plot(demo_user_paths[k][:, i], color='black', alpha=.005) for k in range(num_paths)]
        axes[i].set_ylabel('Users' + labels[i])
        axes[i].set_title(labels[i])
        axes[i].grid(True)
    axes[-1].set_xlabel('Days From Launch')
    
def plot_3d_over_time(axes, labels, data):
    for i in range(data.shape[0]):
        for j in range(5):
            axes[j].plot(data[i, :, j], color='black', alpha=.01)
    for i in range(5):
        axes[i].set_title(labels[i])
        axes[i].set_ylabel("USD")
        axes[i].yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
        axes[i].grid(True)  
    axes[-1].set_xlabel("Days From Launch")


def bar_plot(ax, xs, f, xlabel, ylabel, title):
    ax.bar(xs,[f(i) for i in xs], color='black', alpha=0.75)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel) 
    ax.set_title(title)
    
def plot_nice(title, adjust=0.85,fontsize=15):
    plt.suptitle(title, fontsize=fontsize);
    plt.tight_layout()
    plt.subplots_adjust(top=adjust)
        
def single_fig():
    return plt.subplots(1, 1, figsize=(12, 6))
        
# SIM METHODS

def generate_trading_day_multipliers(num_days):
    return np.random.lognormal(0,0.1,size=num_days)
    
def dist_notional_at_t(t, col, futures):
    return np.asarray([future[0][t, col] for future in futures])

def dist_balance_at_t(t, col, futures):
    return np.asarray([future[1][t, col] for future in futures])
    
def generate_new_users(kind, num_users):
    
    cov = np.reshape([1, CORRELATION_VOLUME_CUSTODY, CORRELATION_VOLUME_CUSTODY, 1], (2,2))
    v = np.exp(mvn.rvs(mean = [0,0], cov= cov , size = int(num_users))) / (np.e**.5)
    kinds = {
        'hold': (v) * [50, 2000],
        'millenial': v * [150, 1550],
        'europe': v * [250, 3000],
        'asia': v * [300, 1550],
        'institution': v * [25000, 80000]
    }
    return kinds[kind]

def exchange_fee_revenue(user_type_notionals, sor_ratio, mobile_ratio,
                        include_tokenomics=False, full_output=False,
                         sor_only=False, exch_only=False):
    sor_use = user_type_notionals * sor_ratio
    mobile_use = user_type_notionals * mobile_ratio
    exch_use = user_type_notionals - sor_use - mobile_use  # If it's not mobile or SoR, it's on our platform
    
    if sor_only:
        mobile_use, exch_use = 0, 0
    
    if exch_only:
        mobile_use, sor_use = 0, 0
    
    # Here we're just averaging the make and take fees
    fees_collected =  exch_use * (FEES['take']  + FEES['make']) / 2.0 #  They trade with each other
    fees_collected -= fees_collected * AFFILIATE_BREAK * AFFILIATE_FRAC

    if not include_tokenomics:
        fees_collected += mobile_use * FEES['mobile']
        fees_collected += sor_use * FEES['sor']
        return fees_collected
    
    fees_collected_usd = fees_collected * (1 - LXDX_FRAC_EXCHANGE)
    fees_collected_tokens = fees_collected * LXDX_FRAC_EXCHANGE * (1 - TRADE_THROUGH_FRAC)
    
    
    mobile_fees, sor_fees = mobile_use * FEES['mobile'], sor_use * FEES['sor']
    fees_collected_usd += mobile_fees * (1 - LXDX_FRAC_EXCHANGE)
    fees_collected_tokens += mobile_fees * LXDX_FRAC_EXCHANGE
    
    fees_collected_usd += sor_fees * (1 - LXDX_FRAC_SOR)
    fees_collected_tokens += sor_fees * LXDX_FRAC_SOR * (1 - EFFECTIVE_SOR)
    
    # Proof of Risk
    fees_collected_tokens -= fees_collected_tokens * .05
    
    if full_output:
        return (fees_collected_usd, fees_collected_tokens)
    
    fees_paid_out_to_yield = fees_collected_tokens * (1 - EFFECTIVE_TOKEN_YIELD)
    fees_retained_back_by_treasury = LXDX_PERCENT_TOKENS_IN_TREASURY * fees_paid_out_to_yield
    return fees_collected_usd + fees_collected_tokens - fees_paid_out_to_yield + fees_retained_back_by_treasury

def cost_per_signup(num_users):
    x = min(50., 12 + num_users /1e5)
    return max(4, np.random.normal(x, x))

def growth_model(marketing_spends, **kwargs):
    DAYS_PER_MONTH = 30.
    
    
    total_users = max(10000, np.random.normal(kwargs.pop('initial', 50000), kwargs.pop('initial_std', 25000)))
    
    organic = kwargs.pop('organic', 5000) / DAYS_PER_MONTH
    organic_std = kwargs.pop('organic_std', 3000) / DAYS_PER_MONTH
    organic_signups = np.random.normal(organic, organic_std, len(marketing_spends))
    
    max_organic = kwargs.pop('max_organic', 75000) / DAYS_PER_MONTH
    
    growth_lo = kwargs.pop('growth_lo', 0.01)
    growth_hi = kwargs.pop('growth_hi', 0.10)
    
    jumps = np.random.poisson(1./365, len(marketing_spends))
    
    monthly_growth_rate = np.random.uniform(growth_lo, growth_hi)
    
    monthly_growth = np.random.normal(monthly_growth_rate, monthly_growth_rate, len(marketing_spends)) / DAYS_PER_MONTH
    
    churn = kwargs.pop('churn', 0.02) / DAYS_PER_MONTH
    churn_std = kwargs.pop('churn_std', 0.01) / DAYS_PER_MONTH
    churned = np.maximum(0., np.random.normal(churn, churn_std, len(marketing_spends)))    
    users = np.zeros(len(marketing_spends)) * np.NaN

    for i, spend in enumerate(marketing_spends):
        if jumps[i] > 0:
            for j in range(i, i+30):
                if j >= len(marketing_spend):
                    continue
                monthly_growth[j] = np.random.normal(10 * monthly_growth_rate, monthly_growth_rate)  / DAYS_PER_MONTH
        total_users -= total_users * churn
        total_users += min(max_organic, organic_signups[i] + monthly_growth[i] * total_users)  # Organic
        total_users += spend / cost_per_signup(total_users)  # Paid
        total_users = int(total_users)
        users[i] = total_users
    return users

def simulate_futures(all_users, starting_t=0):
    
    master_users = np.asarray([generate_new_users(user_type, 2e6) for user_type in user_types])
    master_notional = np.cumsum(master_users[:, :, 0].T, axis=0)
    master_balance = np.cumsum(master_users[:, :, 1].T, axis=0)
    
    def inner_lookup(master, value, col):
        return master[int(value), col]
    
    notionals = np.zeros((all_users.shape[0], all_users.shape[1], 5))
    balances = np.zeros((all_users.shape[0], all_users.shape[1], 5))
    
    for i in range(all_users.shape[0]):
        for j in range(all_users.shape[1]):
            notionals[i, j] = np.asarray([inner_lookup(master_notional, all_users[i, j, k], k) for k in range(5)])
            balances[i, j] = np.asarray([inner_lookup(master_balance, all_users[i, j, k], k) for k in range(5)])
    return notionals, balances

def quick_sim(odds, f):
    Ps = np.vstack((odds)).T
    rem = 1 - np.sum(Ps, axis=1)
    P = np.c_[Ps, rem]
    
    res = np.zeros(len(P))
    for i in range(len(P)): 
        year_became_binance = np.random.choice([1, 2, 3, 4, 5, 6], 100, p=P[i,:])
        res[i] = f(year_became_binance)
    return np.mean(res)

Introduction

Valuation of an asset such as our LXDX Token typically follows either a top-down or bottom-up approach. From the top-down, we might compare our token to similar exchange tokens, and look for where it should trade via relative value. From the bottom-up, we might model the cash flows of the exchange as it pertains to the LXDX Token Yield and the value of an option on those cash flows. (Thinking of each LXDX Token as a call on those flows is generally pretty "close".)

Given that we are pre-launch, there's a good deal of uncertainty. How do you assess our likelihood to draw users to our platform, to grow the platform, to understand the likely amount of revenue each will contribute?

Our analysis here takes a hybridized approach. We lead off with a bottom-up approach based on monte carlo simulations, and then, using relative value, compare these outputs to where the market is valuing other exchange tokens.

We then, as a sanity check, take the bottom-up approach a bit further and decompose out the revenue streams into the constituent pieces of the LXDX Token economics. While doing so, we draw parallels to KuCoin's KCS token, which, through its large revenue share, has some similar mechanics.

In engineering, the word "trade" has a very different meaning; it's used to express the "trade-off" of choosing one approach vs another. You may increase the reliability of a system by using a thicker metal, but then it becomes heavier, which may stress another system. Exchange token economics have many trades but the core trade is:

  • How much of the exchange's success is owned by token holders vs how much is owned by the exchange itself?

That question warrants significant and nuanced discussion that we do not provide here.

Assumptions

A lot of simplifying assumptions are made for brevity's sake. We do a best attempt to call out such assumptions, but as with any model, adding complexity often leads to adding obscurity.

Tact

We provide all of the code for the discussion at hand. As we step through the analysis, we make best efforts to provide the math, the code, and the justifications with each step. This is somewhat at-odds with brevity, but we believe it will help answer the various modeling questions as they come up.

Thesis

Our core thesis is that our token represents a great value. We intend to demonstrate its strong value across a few varying approaches:

  • Via comparison to similar exchange tokens
  • Via comparison modeling of the token yield component (Comparing to KuCoin's KCS)
  • Via direct modeling of the token yield component
  • By demonstrating that our business model is robust and that will have magnifying effects on all of the above

Modeling Users

We lead off by modeling our customers. We break our users into five demographics, with varying usage patterns. Getting into the nitty-gritty is out of scope, but generally, we generate correlated lognormal distributions representing their level of activity and the balance that they carry at the exchange. Each demographic has slightly different assumed relative usage of core systems such as mobile or smart routing.

The follow code renders a few histograms reflecting the daily trading volume and daily balance carried per demographic.

In [2]:
fig, axes = plt.subplots(2, 5, figsize=(15,5))
axes = axes.flatten()
sample_user_notionals = [generate_new_users(user_type, 1000) for user_type in user_types]
for i in range(5):
    u = sample_user_notionals[i][:,0]
    hist(axes[i], u, user_types[i]+':'+'USD', round_floats=True, currency=True, xpos=.4)
for i in range(5,10):
    u = sample_user_notionals[i-5][:,1]
    hist(axes[i], u, user_types[i-5]+':'+'USD', round_floats=True, currency=True, xpos=.4)
plot_nice("Volume (USD) Traded by Customer Type (Top) and Notional @ Exchange (Bottom)", 0.9)

Modeling Signups

The other primary input to our bottom-up approach is modeling user growth over time. There are a few major components to this:

  • Modeling overall growth per unit time and per dollar invested in marketing
  • Modeling the random demographic mix of our users as described above

While there is certainly the ability to build very complex models for this, we keep it simple.

User Acquisition Cost

We model the cost of user acquisition as a function of the number of current users. We sample from a simple gaussian, but the mean and standard deviation of the gaussian increases as we increase the total user base. In English, marketing spend (while expanding awareness) is more effective early, but the impact of marketing spend and the uncertainty of its effectiveness diminishes as the product grows more popular.

Organic Growth

We model organic word-of-mouth growth (in monthly units) with both a fixed and a relative percentage growth component (for example: 5000 users per month and a 5% total growth per month). We cap this organic growth at a monthly rate of 75,000 users; essentially, this is the most "free" users we get without spending to get them in the door.

Mathwise, we model the fixed component as a gaussian. Sensible defaults are 5000 $\pm$ 5000 per month.

The driving variable here is the percentile growth, which we model multi-level: we select a true growth rate from a uniform distribution reflecting our lack of certainty with respect to our prior on growth rate -- we default choose between 1 and 10%. We then sample from a gaussian at that chosen growth rate per step, with a standard deviation equal to the chosen mean.

Lastly, we have a small jump component representing either a spike in exchange interest or an overall jump in overall interest. We model this via a poisson where we expect 1 jump on average per year; when a jump occurs, growth increases significantly for the next 30 days. (So if at 3% per month, becomes for a short window 30% -- this still obeys our 75,000 max per month cap, so a jump maximally adds ~50k users)

Churn

Churn, given that we're on the steepward upward sloping curve of user growth, is a minor model parameter. We model it as a random percentage loss of users in monthy percentile units: approximately 2%. We use a zero-truncated gaussian to model this.

Time Period and Initial Conditions

We'll be building out our models to simulate the two years of exchange growth from launch date.

A significant driver in any such model is initial conditions. How many users are signed up at launch? It might be tempting to look at other exchange "pre-signups", but this might be misleading given it's hard to really engineer when many of the rival platforms really started. I'd think of the initial user count as less the number of users literally signed up pre-launch, but the near-launch user count (Which prices in the initial flood of signups over the first few days).

We use the very conservative mean value of 50,000 $\pm$ 25000 (truncated on the low side at 10000) in our internal models. Obviously, a huge launch where you quickly get to a few hundred thousand users is great. KuCoin got to a million users in 4 months; 2 million in 7 months, so it does happen (if you believe those numbers that is), but we prefer to be conservative here.

Before getting into the specifics of the mixed demographic model, let's take a look at a few sample user growth paths.

In [3]:
fig, ax = single_fig()
assumptions = {'organic': 6000, 'organic_std': 6000, 'growth_lo': .02, 'growth_hi': .10}
marketing_spend = np.ones(TWO_YEARS) * 10.  # Spending 10$ a day on marketing! Almost 1 user!

paths = [growth_model(marketing_spend, **assumptions) for i in range(100)]
paths_plot(ax, paths)

Marketing

The assumptions with respect to marketing spend's impact on user growth are consequential, even with the uncertainty we inject, there's still cause to be conservative in any estimates.

Our token sale is a 38M USD raise; approximately 5.5M of that is devoted purely to marketing spend. Given the nature (and simplicity) of our model here, we amortize out that spend over the next two years to avoid unfairly optimistic "frontloaded" spend. Much of this budget is devoted to staffing: content writers, senior and junior marketing hires, two institutional sales veterans, etc.

We don't include any other possible spend on marketing, although it would certainly exist, to be conservative. On the modeling side, the larger the relative spend, the greater the variance reduction in total user counts. (Just something to be aware of)

In [4]:
fig, ax = single_fig()

assumptions = {'organic': 5000, 'organic_std': 5000, 'growth_lo': .01, 'growth_hi': .10}
marketing_budget = 5.5e6
daily_spend = 5.5e6 / TWO_YEARS
marketing_spend = np.ones(TWO_YEARS) * daily_spend

paths = [growth_model(marketing_spend, **assumptions) for i in range(100)]
paths_plot(ax, paths)

Demographics

We take a slightly different tact for modeling demographic composition; we begin with the following initial estimates of our user composition:

  • 25%, 30%, 20%, 25%, and 0.1% for our categories of Hold, Millenial, Europe, Asia, and Institution

We then perform random walks, perturbing the composition at each step. (The variance of the step is relative to the initial percentage -- this means that the institutional composition won't change much)

I've left the code in-line to make it clear that we're doing this via a bit of hack-ish GBM.

Modeling the number of large institutional customers by a percentage is a bit fragile; for our more "business" internal models (aka Excel), we typically model these customers separately.

In [5]:
fig, axes = plt.subplots(5, 1, figsize=(12,12), sharex=True)
def demographics_via_gbm(S0, var_denom = 10.):
    # S0 need not sum to 1, it doesn't actually matter; we guard!
    X = np.vstack([np.random.normal(0, S0[i] / var_denom, TWO_YEARS) for i in range(5)]).T
    X = np.cumsum(X, axis=0)
    S = np.maximum(0.0, S0 * np.exp(X))  # This should be done via reflection
    return S / np.sum(S, axis=1)[:, np.newaxis] # Normalize to sum to 1!
    
S0 = [0.25, 0.3, 0.2, 0.25, 0.001]
user_mix = demographics_via_gbm(S0)

plot_demographics(axes, ['Hold', 'Millenial', 'European', 'Asia', 'Institutional'], user_mix)
plot_nice("Relative Shares of Our Customer Base by Demographic", 0.95, fontsize=18)

Putting It Together

We're now able to combine the user growth assumptions with our demographic assumptions to create paths-per-user-type.

Why Do We Care?

Remember that different types of users have vastly different expected values to our revenue stream. If our marketing is only successful at attracting low value users to our platform, we can post a high user count, but it doesn't mean we're generating a lot of trading fees.

In [6]:
fig, axes = plt.subplots(5, 1, figsize=(12,12), sharex=True)

# It's really important that you are careful with these NumPy broadcasts
user_mixtures = np.asarray([demographics_via_gbm(S0) for i in range(50)])
true_users = np.asarray([np.multiply(user_mixtures, paths[i][:,np.newaxis]) for i in range(len(paths))])
true_users = true_users.reshape(-1, true_users.shape[-2], true_users.shape[-1])

plot_users_by_demographic(axes, ['Hold', 'Millenial', 'European', 'Asia', 'Institutional'], true_users[::10])
plot_nice("Relative Shares of Our Customer Base by Demographic", 0.90, fontsize=18)

Reframing A Bit

This helps, but it's a bit hard to look at. What about a function that lets us look at the expected number of users at a given point in time by demographic?

In [7]:
def hist_users_at_t(ax, t, col, downsample=10):
    data = [true_users[k][t, col] for k in range(len(true_users))]
    _, bins, _  = ax.hist(data, bins=20, color='k', histtype='stepfilled', density=True)
    y = scipy.stats.norm.pdf(bins, np.mean(data), np.std(data))
    ax.plot(bins, y, 'r--', linewidth=2)


fig, axes = plt.subplots(1, 5, figsize=(14,3))
DAYS_FROM_LAUNCH = 180 # Can play with this
for i, ax in enumerate(axes):
    hist_users_at_t(ax, DAYS_FROM_LAUNCH, i)

plot_nice("Histograms of Users By Type At Point T (Days From Launch) in Time")

Notional Traded

As we move through the botom up analysis, with our customer behavior models, and our customer growth models, we can now model the amount of trading volume we'll see.

We'll first do that to sanity check its alignment to other exchange volumes and then move quickly through valuation of that amount traded.

In [8]:
fig, axes = plt.subplots(5, 1, figsize=(12,12), sharex=True)
notionals, balances = simulate_futures(true_users)

plot_3d_over_time(axes, ['Hold', 'Millenial', 'European', 'Asia', 'Institutional'], notionals[::50])
plot_nice("Notional Traded By Demographic", 0.90, fontsize=20)