Tree shape diversity¶

The plot elements below will be saved to `/analysis/output/NDS-LB/`¶

In [1]:

Copied!





import os
import tempfile
cache_dir = tempfile.mkdtemp()
os.environ['XDG_CACHE_HOME'] = cache_dir
import glob
import pickle
import pandas as pd
import numpy as np
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize, to_hex
from svgutils.compose import *
import altair as alt

import utils.trees as ut
import utils.phenotype_colorscales as pc
import os
import tempfile
cache_dir = tempfile.mkdtemp()
os.environ['XDG_CACHE_HOME'] = cache_dir
import glob
import pickle
import pandas as pd
import numpy as np
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize, to_hex
from svgutils.compose import *
import altair as alt

import utils.trees as ut
import utils.phenotype_colorscales as pc

In [2]:

parameters

Copied!





results = "results"
ranking_subdir = 'naive_reversions_first'
scale = 20 # must be 5 or 20
metadata_csv = "../../gc_metadata.csv"
outbase = "_ignore/NDS-LB/"
workflow_env_exec = True
results = "results"
ranking_subdir = 'naive_reversions_first'
scale = 20 # must be 5 or 20
metadata_csv = "../../gc_metadata.csv"
outbase = "_ignore/NDS-LB/"
workflow_env_exec = True

In [3]:

injected-parameters

Copied!





# Parameters
results = "."
ranking_subdir = "default"
metadata_csv = "gc_metadata.csv"
outbase = "."
workflow_env_exec = True
# Parameters
results = "."
ranking_subdir = "default"
metadata_csv = "gc_metadata.csv"
outbase = "."
workflow_env_exec = True

In [4]:

Copied!





if workflow_env_exec:
    os.environ['PYTHONHASHSEED'] = '0'
    os.environ['MPLBACKEND'] = 'Agg'
    os.environ['MPLCONFIGDIR'] = '/tmp/matplotlib'
    os.environ["QT_QPA_PLATFORM"] = "offscreen"
    os.environ["XDG_RUNTIME_DIR"] = "/tmp/xdg"
if workflow_env_exec:
    os.environ['PYTHONHASHSEED'] = '0'
    os.environ['MPLBACKEND'] = 'Agg'
    os.environ['MPLCONFIGDIR'] = '/tmp/matplotlib'
    os.environ["QT_QPA_PLATFORM"] = "offscreen"
    os.environ["XDG_RUNTIME_DIR"] = "/tmp/xdg"

In [5]:

Copied!

output_dir = f"{outbase}/{ranking_subdir}"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
output_dir = f"{outbase}/{ranking_subdir}"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

Load GC trees and constuct a data frame of statistics¶

In [6]:

Copied!





# results = "../nextflow/results"
metadata = pd.read_csv(metadata_csv, index_col=0)
metadata.query("(strain == 'wt') & (cell_type == 'GC') & (imm_duration != 'w10')", inplace=True)
metadata
# results = "../nextflow/results"
metadata = pd.read_csv(metadata_csv, index_col=0)
metadata.query("(strain == 'wt') & (cell_type == 'GC') & (imm_duration != 'w10')", inplace=True)
metadata

Out[6]:

	ngs_id	imm_duration	mouse	gc	strain	node	cell_type	plate	hc_barcode	lc_barcode	row	col
uid
D15_M1_GC1	PR-2-01	d15	1	1	wt	RP	GC	2	9	9	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D15_M1_GC2	PR-2-01	d15	1	2	wt	RI	GC	3	2	1	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D15_M1_GC3	PR-2-01	d15	1	3	wt	LI	GC	4	14	2	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D15_M2_GC4	PR-2-01	d15	2	4	wt	RP	GC	5	10	11	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D15_M3_GC5	PR-2-01	d15	3	5	wt	RP	GC	6	7	4	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
...	...	...	...	...	...	...	...	...	...	...	...	...
D20_M24_GC115	PR-1-04	d20	24	115	wt	RP	GC	72	16	16	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D20_M25_GC116	PR-1-02	d20	25	116	wt	RP	GC	65	8	8	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D20_M25_GC117	PR-1-03	d20	25	117	wt	RP	GC	68	9	9	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D20_M25_GC118	PR-1-08	d20	25	118	wt	RP	GC	66	15	15	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12
D20_M25_GC119	PR-1-08	d20	25	119	wt	RP	GC	67	16	16	A.B.C.D.E.F.G.H	1.2.3.4.5.6.7.8.9.10.11.12

119 rows × 12 columns

In [7]:

Copied!

len(metadata.index.unique())
len(metadata.index.unique())

Out[7]:

In [8]:

Copied!

metadata.imm_duration.value_counts()
metadata.imm_duration.value_counts()

Out[8]:

imm_duration
d20    67
d15    52
Name: count, dtype: int64

In [9]:

Copied!

trees = ut.load_trees(metadata, results, ranking_subdir)
len(trees)
trees = ut.load_trees(metadata, results, ranking_subdir)
len(trees)

Out[9]:

In [10]:

Copied!





stat = "REI"
# taus = np.logspace(-2, 0, 10)
taus = np.linspace(0.05, 1, 20)
# taus = [0.05, 0.1, 1]

# stat = "LBI"
# taus = np.logspace(-1, 1, 50)
stat = "REI"
# taus = np.logspace(-2, 0, 10)
taus = np.linspace(0.05, 1, 20)
# taus = [0.05, 0.1, 1]

# stat = "LBI"
# taus = np.logspace(-1, 1, 50)

In [11]:

Copied!





df = pd.DataFrame()

row = 0
for tau in taus:
    for key, tree in trees.items():
        pr, mouse, gc = key.split("_")
        mouse = mouse.lstrip("mouse")

        ut.burst_stat(tree, stat, tau)
        
        gc = gc.lstrip("GC")
        df.loc[row, "uid"] = key
        df.loc[row, "τ"] = tau
        df.loc[row, "ngs_id"] = metadata.ngs_id[key]
        df.loc[row, "mouse"] = str(metadata.mouse[key])
        df.loc[row, "GC"] = str(metadata.gc[key])
        df.loc[row, "time"] = metadata.imm_duration[key]
        
        abundances = np.array([node.abundance for node in tree.tree.traverse()])

        if len(tree.tree.children) == 1:
            print(f"NOTE: tree {key} has only one root clade")
        root = tree.tree

        clade_sizes = [sum(node.abundance for node in child.traverse()) for child in root.children]
        df.loc[row, "cells sampled"] = sum(abundances)
        df.loc[row, "normalized dominance score"] = max(clade_sizes) / sum(clade_sizes)
        df.loc[row, f"maximum {stat}"] = np.nanmax([getattr(node, stat) for node in tree.tree.traverse()])    
        
        # note: additive delta_bind for now
        delta_bind_dat = []
        for node in tree.tree.traverse():
            if not np.isnan(node.delta_bind):
                for _ in range(node.abundance):
                    delta_bind_dat.append(node.delta_bind)
        df.loc[row, "max Δaffinity"] = np.nanmax(delta_bind_dat)
        df.loc[row, "95th percentile Δaffinity"] = np.percentile(delta_bind_dat, 95)
        df.loc[row, "mean Δaffinity"] = np.nanmean(delta_bind_dat)
        df.loc[row, "median Δaffinity"] = np.nanmedian(delta_bind_dat)

        max_stat_idx = np.nanargmax([getattr(node, stat) for node in tree.tree.traverse()])
        df.loc[row, f"id of max {stat} node"] = [node.name for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"mutations in max {stat} node"] = [node.aa_substitutions for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"Δaffinity of max {stat} node"] = [node.delta_bind for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"ΔΔaffinity of max {stat} node"] = [node.delta_bind - (node.up.delta_bind if node.up is not None else 0) for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"max {stat} node is root"] = max_stat_idx == 0

        row += 1

df
df = pd.DataFrame()

row = 0
for tau in taus:
    for key, tree in trees.items():
        pr, mouse, gc = key.split("_")
        mouse = mouse.lstrip("mouse")

        ut.burst_stat(tree, stat, tau)
        
        gc = gc.lstrip("GC")
        df.loc[row, "uid"] = key
        df.loc[row, "τ"] = tau
        df.loc[row, "ngs_id"] = metadata.ngs_id[key]
        df.loc[row, "mouse"] = str(metadata.mouse[key])
        df.loc[row, "GC"] = str(metadata.gc[key])
        df.loc[row, "time"] = metadata.imm_duration[key]
        
        abundances = np.array([node.abundance for node in tree.tree.traverse()])

        if len(tree.tree.children) == 1:
            print(f"NOTE: tree {key} has only one root clade")
        root = tree.tree

        clade_sizes = [sum(node.abundance for node in child.traverse()) for child in root.children]
        df.loc[row, "cells sampled"] = sum(abundances)
        df.loc[row, "normalized dominance score"] = max(clade_sizes) / sum(clade_sizes)
        df.loc[row, f"maximum {stat}"] = np.nanmax([getattr(node, stat) for node in tree.tree.traverse()])    
        
        # note: additive delta_bind for now
        delta_bind_dat = []
        for node in tree.tree.traverse():
            if not np.isnan(node.delta_bind):
                for _ in range(node.abundance):
                    delta_bind_dat.append(node.delta_bind)
        df.loc[row, "max Δaffinity"] = np.nanmax(delta_bind_dat)
        df.loc[row, "95th percentile Δaffinity"] = np.percentile(delta_bind_dat, 95)
        df.loc[row, "mean Δaffinity"] = np.nanmean(delta_bind_dat)
        df.loc[row, "median Δaffinity"] = np.nanmedian(delta_bind_dat)

        max_stat_idx = np.nanargmax([getattr(node, stat) for node in tree.tree.traverse()])
        df.loc[row, f"id of max {stat} node"] = [node.name for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"mutations in max {stat} node"] = [node.aa_substitutions for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"Δaffinity of max {stat} node"] = [node.delta_bind for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"ΔΔaffinity of max {stat} node"] = [node.delta_bind - (node.up.delta_bind if node.up is not None else 0) for node in tree.tree.traverse()][max_stat_idx]
        df.loc[row, f"max {stat} node is root"] = max_stat_idx == 0

        row += 1

df

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade

NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade

NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade

NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

NOTE: tree D20_M20_GC61 has only one root clade
NOTE: tree D20_M20_GC62 has only one root clade
NOTE: tree D20_M20_GC64 has only one root clade
NOTE: tree D20_M21_GC71 has only one root clade

NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade

Out[11]:

	uid	τ	ngs_id	mouse	GC	time	cells sampled	normalized dominance score	maximum REI	max Δaffinity	95th percentile Δaffinity	mean Δaffinity	median Δaffinity	id of max REI node	mutations in max REI node	Δaffinity of max REI node	ΔΔaffinity of max REI node	max REI node is root
0	D15_M1_GC1	0.05	PR-2-01	1	1	d15	67.0	0.462687	0.062726	1.57098	1.323038	0.497280	0.773400	seq36	A105(H)G S109(L)R	1.24129	0.79500	False
1	D15_M1_GC2	0.05	PR-2-01	1	2	d15	79.0	0.435897	0.028576	1.44539	1.299505	0.381382	0.397360	seq41	D107(H)E	0.20896	0.20896	False
2	D15_M1_GC3	0.05	PR-2-01	1	3	d15	73.0	0.506849	0.048843	1.96260	1.291822	0.504928	0.535100	seq41	A105(H)G	0.44629	0.44629	False
3	D15_M2_GC4	0.05	PR-2-01	2	4	d15	44.0	0.340909	0.093466	0.88793	0.801909	0.092011	0.287630	seq8	A105(H)G	0.44629	0.44629	False
4	D15_M3_GC5	0.05	PR-2-01	3	5	d15	78.0	0.213333	0.040110	1.59914	1.107394	-0.214632	0.000000	naive		0.00000	0.00000	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2375	D20_M24_GC115	1.00	PR-1-04	24	115	d20	85.0	0.882353	1.000000	1.94591	1.830034	1.094204	1.046480	naive		0.00000	0.00000	True
2376	D20_M25_GC116	1.00	PR-1-02	25	116	d20	78.0	0.884615	1.000000	2.38422	1.655125	0.784909	1.105020	naive		0.00000	0.00000	True
2377	D20_M25_GC117	1.00	PR-1-03	25	117	d20	76.0	0.407895	1.000000	1.78818	1.578093	0.632779	0.608315	naive		0.00000	0.00000	True
2378	D20_M25_GC118	1.00	PR-1-08	25	118	d20	70.0	0.985714	1.000000	1.42049	1.351937	0.288105	0.492355	naive		0.00000	0.00000	True
2379	D20_M25_GC119	1.00	PR-1-08	25	119	d20	81.0	0.580247	1.000000	1.68377	1.527660	0.582019	0.700545	naive		0.00000	0.00000	True

2380 rows × 18 columns

In [12]:

Copied!

df.groupby("τ")[f"max {stat} node is root"].agg(lambda group: group.astype(int).sum() / len(group)).plot(xlabel="τ", ylabel=f"fraction GCs with root as max {stat} node", figsize=(5, 4));
# plt.xscale("log")
df.groupby("τ")[f"max {stat} node is root"].agg(lambda group: group.astype(int).sum() / len(group)).plot(xlabel="τ", ylabel=f"fraction GCs with root as max {stat} node", figsize=(5, 4));
# plt.xscale("log")

No description has been provided for this image

In [13]:

Copied!

taus
taus

Out[13]:

array([0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55,
       0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  ])

In [14]:

Copied!





g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"Δaffinity of max {stat} node", y="median Δaffinity", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"Δaffinity of max {stat} node", y="median Δaffinity", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()

In [15]:

Copied!

sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"median Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()

sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"max Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()
sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"median Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()

sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"max Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()

In [16]:

Copied!





g = sns.relplot(data=df.loc[df.τ.isin(taus)], x="cells sampled", y=f"maximum REI", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": True, "sharey": False})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x="cells sampled", y=f"maximum REI", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": True, "sharey": False})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()

In [17]:

Copied!





g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y="median Δaffinity", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y="median Δaffinity", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()

In [18]:

Copied!





g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y=f"Δaffinity of max {stat} node", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": False, "sharey": False})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y=f"Δaffinity of max {stat} node", col="τ",
                col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
                facet_kws={"sharex": False, "sharey": False})
for tau, ax in zip(taus, g.axes):
    ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()

In [19]:

Copied!





cor_df = pd.DataFrame()
for gc, group_df in df.groupby("τ"):
    ρ_node, p_node = sp.stats.spearmanr(group_df[f"Δaffinity of max {stat} node"], group_df[f"maximum {stat}"])
    ρ_gc, p_gc = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"maximum {stat}"])
    ρ_aff, p_aff = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"Δaffinity of max {stat} node"])
    cor_df.loc[gc, "ρ_node"] = ρ_node
    cor_df.loc[gc, "ρ_gc"] = ρ_gc
    cor_df.loc[gc, "p_node"] = p_node
    cor_df.loc[gc, "p_gc"] = p_gc
    cor_df.loc[gc, "ρ_aff"] = ρ_aff
    cor_df.loc[gc, "p_aff"] = p_aff

plt.figure(figsize=(5, 7))
plt.subplot(2, 1, 1)
plt.title(f"correlations with maximum {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_aff", label=f"median Δaffinity Vs Δaffinity of max {stat} node")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_gc", label=f"median Δaffinity Vs max {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_node", label=f"Δaffinity of max {stat} node Vs max {stat}")
plt.axhline(0, color="k", ls="--", lw=1)
# plt.xlabel("τ")
plt.ylabel("Spearman ρ")
plt.ylim(-1, 1)
plt.legend()

plt.subplot(2, 1, 2)
sns.lineplot(data=cor_df, x=cor_df.index, y="p_aff")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_gc")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_node")
plt.xlabel("τ")
plt.ylabel("p-value")
plt.yscale("log")
plt.show()
cor_df = pd.DataFrame()
for gc, group_df in df.groupby("τ"):
    ρ_node, p_node = sp.stats.spearmanr(group_df[f"Δaffinity of max {stat} node"], group_df[f"maximum {stat}"])
    ρ_gc, p_gc = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"maximum {stat}"])
    ρ_aff, p_aff = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"Δaffinity of max {stat} node"])
    cor_df.loc[gc, "ρ_node"] = ρ_node
    cor_df.loc[gc, "ρ_gc"] = ρ_gc
    cor_df.loc[gc, "p_node"] = p_node
    cor_df.loc[gc, "p_gc"] = p_gc
    cor_df.loc[gc, "ρ_aff"] = ρ_aff
    cor_df.loc[gc, "p_aff"] = p_aff

plt.figure(figsize=(5, 7))
plt.subplot(2, 1, 1)
plt.title(f"correlations with maximum {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_aff", label=f"median Δaffinity Vs Δaffinity of max {stat} node")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_gc", label=f"median Δaffinity Vs max {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_node", label=f"Δaffinity of max {stat} node Vs max {stat}")
plt.axhline(0, color="k", ls="--", lw=1)
# plt.xlabel("τ")
plt.ylabel("Spearman ρ")
plt.ylim(-1, 1)
plt.legend()

plt.subplot(2, 1, 2)
sns.lineplot(data=cor_df, x=cor_df.index, y="p_aff")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_gc")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_node")
plt.xlabel("τ")
plt.ylabel("p-value")
plt.yscale("log")
plt.show()

/tmp/ipykernel_351/1009681117.py:3: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
  ρ_node, p_node = sp.stats.spearmanr(group_df[f"Δaffinity of max {stat} node"], group_df[f"maximum {stat}"])
/tmp/ipykernel_351/1009681117.py:4: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
  ρ_gc, p_gc = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"maximum {stat}"])
/tmp/ipykernel_351/1009681117.py:5: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
  ρ_aff, p_aff = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"Δaffinity of max {stat} node"])

In [20]:

Copied!





# outcome = "mean Δaffinity"
# outcome = "median Δaffinity"
# outcome = "95th percentile Δaffinity"
outcome = f"Δaffinity of max {stat} node"
# outcome = f"ΔΔaffinity of max {stat} node"
# outcome = "mean Δaffinity"
# outcome = "median Δaffinity"
# outcome = "95th percentile Δaffinity"
outcome = f"Δaffinity of max {stat} node"
# outcome = f"ΔΔaffinity of max {stat} node"

In [21]:

Copied!

# tau = taus[np.where(tau_cor_df.ρ > 0, tau_cor_df.p, 1).argmin()]
tau = taus[9]
tau
# tau = taus[np.where(tau_cor_df.ρ > 0, tau_cor_df.p, 1).argmin()]
tau = taus[9]
tau

Out[21]:

np.float64(0.49999999999999994)

Need to recompute tree stats using the chosen $\tau$

In [22]:

Copied!

for tree in trees.values():
    ut.burst_stat(tree, stat, tau)
for tree in trees.values():
    ut.burst_stat(tree, stat, tau)

In [23]:

Copied!

df = df.loc[df["τ"] == tau].sort_values(f"maximum {stat}").reset_index()
df
df = df.loc[df["τ"] == tau].sort_values(f"maximum {stat}").reset_index()
df

Out[23]:

	index	uid	τ	ngs_id	mouse	GC	time	cells sampled	normalized dominance score	maximum REI	max Δaffinity	95th percentile Δaffinity	mean Δaffinity	median Δaffinity	id of max REI node	mutations in max REI node	Δaffinity of max REI node	ΔΔaffinity of max REI node	max REI node is root
0	1166	D20_M22_GC96	0.5	PR-1-07	22	96	d20	39.0	0.435897	0.038462	1.85465	1.700965	1.005091	1.161450	seq4	Y88(H)F A105(H)G N108(L)K	1.35896	0.05302	False
1	1159	D20_M22_GC89	0.5	PR-1-03	22	89	d20	87.0	0.413793	0.047593	1.83375	1.545630	0.782702	0.714810	seq33	Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116...	1.46106	0.99422	False
2	1158	D20_M22_GC88	0.5	PR-1-03	22	88	d20	82.0	0.390244	0.048780	1.96522	1.552431	0.109996	0.138920	seq15	T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P	0.08799	0.07656	False
3	1156	D20_M22_GC86	0.5	PR-1-02	22	86	d20	76.0	0.684211	0.050164	2.22660	1.989688	0.534087	0.812585	seq29	I76(H)F L99(L)W L116(L)P	-0.02546	0.00067	False
4	1146	D20_M21_GC76	0.5	PR-1-05	21	76	d20	69.0	0.768116	0.050767	2.12465	1.787844	0.497640	1.008670	11	A105(H)G A40(L)G	1.25043	0.00000	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
114	1176	D20_M23_GC106	0.5	PR-1-04	23	106	d20	83.0	1.000000	0.312688	1.41209	1.081766	0.828434	0.957790	seq12	S64(H)T N92(H)K Y42(L)C N108(L)H	0.95779	-0.02745	False
115	1110	D15_M16_GC40	0.5	PR-2-04	16	40	d15	51.0	0.862745	0.331189	1.90813	1.403847	0.866527	1.199390	seq13	A105(H)G Y66(L)F S109(L)R	1.19939	0.75310	False
116	1173	D20_M23_GC103	0.5	PR-1-03	23	103	d20	81.0	0.962963	0.332031	1.92757	1.245264	0.520700	0.411910	seq12	I78(H)L N92(H)T Y42(L)C	0.35030	0.35030	False
117	1105	D15_M14_GC35	0.5	PR-2-03	14	35	d15	57.0	0.947368	0.338584	2.35414	2.071883	1.101941	1.250430	seq7	A105(H)G A40(L)G	1.25043	0.80414	False
118	1101	D15_M13_GC31	0.5	PR-2-03	13	31	d15	72.0	0.845070	0.368842	1.80017	1.484965	0.820345	0.919910	seq1	I78(H)F Y42(L)S N108(L)K	0.91991	0.91991	False

119 rows × 19 columns

In [24]:

Copied!





plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()

# plt.figure(figsize=(4, 3))
# sns.regplot(data=df.loc[df[outcome] != 0], x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10), label="no roots")
# plt.title(f"τ = {tau:.2f}")
# plt.legend()
# plt.show()

plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y="median Δaffinity", scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()
plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()

# plt.figure(figsize=(4, 3))
# sns.regplot(data=df.loc[df[outcome] != 0], x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10), label="no roots")
# plt.title(f"τ = {tau:.2f}")
# plt.legend()
# plt.show()

plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y="median Δaffinity", scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()

In [25]:

Copied!

thresh = 0.25
thresh = 0.25

In [26]:

Copied!

df.to_csv(f"{output_dir}/data.csv")
df.to_csv(f"{output_dir}/data.csv")

Rank plot of total GC cell abundance¶

In [27]:

Copied!





plt.figure(figsize=(15, 3))
ax = sns.barplot(data=df.sort_values(["ngs_id", "mouse", "cells sampled"]), x="GC", y="cells sampled", #hue="mouse",
                #  hue_order=df.mouse.unique(),
                 legend=False,
                 dodge=False)
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig(f"{output_dir}/abundances.svg")
plt.show()
plt.figure(figsize=(15, 3))
ax = sns.barplot(data=df.sort_values(["ngs_id", "mouse", "cells sampled"]), x="GC", y="cells sampled", #hue="mouse",
                #  hue_order=df.mouse.unique(),
                 legend=False,
                 dodge=False)
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig(f"{output_dir}/abundances.svg")
plt.show()

Plot tree shape stats in a scatter plot, and render some example trees¶

For each tree we render it colored by fitness statistic, then by affinity, and by fitness stat

In [28]:

Copied!

df
df

Out[28]:

	index	uid	τ	ngs_id	mouse	GC	time	cells sampled	normalized dominance score	maximum REI	max Δaffinity	95th percentile Δaffinity	mean Δaffinity	median Δaffinity	id of max REI node	mutations in max REI node	Δaffinity of max REI node	ΔΔaffinity of max REI node	max REI node is root
0	1166	D20_M22_GC96	0.5	PR-1-07	22	96	d20	39.0	0.435897	0.038462	1.85465	1.700965	1.005091	1.161450	seq4	Y88(H)F A105(H)G N108(L)K	1.35896	0.05302	False
1	1159	D20_M22_GC89	0.5	PR-1-03	22	89	d20	87.0	0.413793	0.047593	1.83375	1.545630	0.782702	0.714810	seq33	Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116...	1.46106	0.99422	False
2	1158	D20_M22_GC88	0.5	PR-1-03	22	88	d20	82.0	0.390244	0.048780	1.96522	1.552431	0.109996	0.138920	seq15	T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P	0.08799	0.07656	False
3	1156	D20_M22_GC86	0.5	PR-1-02	22	86	d20	76.0	0.684211	0.050164	2.22660	1.989688	0.534087	0.812585	seq29	I76(H)F L99(L)W L116(L)P	-0.02546	0.00067	False
4	1146	D20_M21_GC76	0.5	PR-1-05	21	76	d20	69.0	0.768116	0.050767	2.12465	1.787844	0.497640	1.008670	11	A105(H)G A40(L)G	1.25043	0.00000	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
114	1176	D20_M23_GC106	0.5	PR-1-04	23	106	d20	83.0	1.000000	0.312688	1.41209	1.081766	0.828434	0.957790	seq12	S64(H)T N92(H)K Y42(L)C N108(L)H	0.95779	-0.02745	False
115	1110	D15_M16_GC40	0.5	PR-2-04	16	40	d15	51.0	0.862745	0.331189	1.90813	1.403847	0.866527	1.199390	seq13	A105(H)G Y66(L)F S109(L)R	1.19939	0.75310	False
116	1173	D20_M23_GC103	0.5	PR-1-03	23	103	d20	81.0	0.962963	0.332031	1.92757	1.245264	0.520700	0.411910	seq12	I78(H)L N92(H)T Y42(L)C	0.35030	0.35030	False
117	1105	D15_M14_GC35	0.5	PR-2-03	14	35	d15	57.0	0.947368	0.338584	2.35414	2.071883	1.101941	1.250430	seq7	A105(H)G A40(L)G	1.25043	0.80414	False
118	1101	D15_M13_GC31	0.5	PR-2-03	13	31	d15	72.0	0.845070	0.368842	1.80017	1.484965	0.820345	0.919910	seq1	I78(H)F Y42(L)S N108(L)K	0.91991	0.91991	False

119 rows × 19 columns

In [29]:

Copied!

pc
pc

Out[29]:

<module 'utils.phenotype_colorscales' from '/fh/fast/matsen_e/shared/replay/pipeline-cache/d3/1ee57a4dd94f4f450fcb7b2312a558/utils/phenotype_colorscales.py'>

In [30]:

Copied!

outcome
outcome

Out[30]:

'Δaffinity of max REI node'

In [31]:

Copied!





# stat cmap
cmap = "viridis"

vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
xmargin=0.05
ymargin=0.03
xticks = [0.25, 0.5, 0.75, 1.0]
yticks = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
min_size = df["cells sampled"].min()
max_size = df["cells sampled"].max()
print(min_size, max_size)
# Set up a common normalization for sizes
size_norm = Normalize(vmin=min_size, vmax=max_size)

size_range = (10, 100)

# legend_sizes = [min_size, (min_size + max_size) / 2, max_size]
# legend_markers = [Line2D([0], [0], marker='o', color='w', label=f'{int(size)} cells',
#                          markersize=(20 + (size - min_size) / (max_size - min_size) * 180), 
#                          markerfacecolor='gray', markeredgecolor='gray') 
#                   for size in legend_sizes]

# size_range = (min_cells_sampled, max_cells_sampled)


for time, time_df in df.groupby("time"):
    print(time, flush=True)

    time_df_mouse_sorted = time_df.copy()
    time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(int)
    time_df_mouse_sorted.sort_values("mouse", inplace=True)
    time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(str)

    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18,3.5), sharey=True, sharex=True)
    # fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, sharex=True)
    sns.scatterplot(data=time_df_mouse_sorted, 
                    x="normalized dominance score", 
                    y=f"maximum {stat}", 
                    hue="mouse", 
                    size="cells sampled", 
                    sizes=size_range,
                    # size_norm=size_norm,
                    # style="time",
                    clip_on=False, 
                    alpha=0.9,
                    legend="brief",
                    size_norm=size_norm,    
                    ax=ax1)

    # ax1.legend(handles=legend_markers, title="mouse", loc='upper right', bbox_to_anchor=(1, 1))
    # # Get the existing hue legend and add it
    # handles, labels = ax1.get_legend_handles_labels()
    # ax1.legend(handles=handles + legend_markers, labels=labels + [f'{int(size)} cells' for size in legend_sizes], 
    #        title="Outcome & Cells Sampled", loc='upper right', bbox_to_anchor=(1, 1))

    ax1.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False, ncol=2)
    # ax1.legend(loc='upper left', frameon=True, ncol=3, fontsize=8)
    # ax1.legend(frameon=False)
    # sns.move_legend(ax1, "upper left", bbox_to_anchor=(1, 1))

    ax2.set_title("median Δaffinity")
    sns.scatterplot(data=time_df_mouse_sorted, 
                    x="normalized dominance score", 
                    y=f"maximum {stat}", 
                    hue="median Δaffinity", 
                    size="cells sampled", 
                    sizes=size_range,
                    # style="time",
                    clip_on=False, 
                    alpha=0.9,
                    palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
                    legend="brief",
                    size_norm=size_norm,
                    ax=ax2)
    ax2.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)

    ax3.set_title(outcome)
    sns.scatterplot(data=time_df_mouse_sorted, 
                    x="normalized dominance score", 
                    y=f"maximum {stat}", 
                    hue=outcome, 
                    size="cells sampled", 
                    sizes=size_range,
                    # style="time",
                    clip_on=False, 
                    alpha=0.9,
                    palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
                    legend="brief",
                    size_norm=size_norm,
                    ax=ax3)
    # move legend for ax3 outside of plot to the right and remove the frame
    ax3.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)
    # plt.tight_layout()

    ax1.set_xlim(0.15 - xmargin, 1 + xmargin)
    ax1.set_ylim(vmin + ymargin, vmax + ymargin)
    ax1.set_xticks(xticks)
    ax1.set_yticks(yticks)
    # adjust width between subplots
    plt.subplots_adjust(wspace=1.2)

    plt.savefig(f"{output_dir}/scatter_{time}.svg")
    plt.show()
# stat cmap
cmap = "viridis"

vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
xmargin=0.05
ymargin=0.03
xticks = [0.25, 0.5, 0.75, 1.0]
yticks = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
min_size = df["cells sampled"].min()
max_size = df["cells sampled"].max()
print(min_size, max_size)
# Set up a common normalization for sizes
size_norm = Normalize(vmin=min_size, vmax=max_size)

size_range = (10, 100)

# legend_sizes = [min_size, (min_size + max_size) / 2, max_size]
# legend_markers = [Line2D([0], [0], marker='o', color='w', label=f'{int(size)} cells',
#                          markersize=(20 + (size - min_size) / (max_size - min_size) * 180), 
#                          markerfacecolor='gray', markeredgecolor='gray') 
#                   for size in legend_sizes]

# size_range = (min_cells_sampled, max_cells_sampled)


for time, time_df in df.groupby("time"):
    print(time, flush=True)

    time_df_mouse_sorted = time_df.copy()
    time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(int)
    time_df_mouse_sorted.sort_values("mouse", inplace=True)
    time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(str)

    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18,3.5), sharey=True, sharex=True)
    # fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, sharex=True)
    sns.scatterplot(data=time_df_mouse_sorted, 
                    x="normalized dominance score", 
                    y=f"maximum {stat}", 
                    hue="mouse", 
                    size="cells sampled", 
                    sizes=size_range,
                    # size_norm=size_norm,
                    # style="time",
                    clip_on=False, 
                    alpha=0.9,
                    legend="brief",
                    size_norm=size_norm,    
                    ax=ax1)

    # ax1.legend(handles=legend_markers, title="mouse", loc='upper right', bbox_to_anchor=(1, 1))
    # # Get the existing hue legend and add it
    # handles, labels = ax1.get_legend_handles_labels()
    # ax1.legend(handles=handles + legend_markers, labels=labels + [f'{int(size)} cells' for size in legend_sizes], 
    #        title="Outcome & Cells Sampled", loc='upper right', bbox_to_anchor=(1, 1))

    ax1.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False, ncol=2)
    # ax1.legend(loc='upper left', frameon=True, ncol=3, fontsize=8)
    # ax1.legend(frameon=False)
    # sns.move_legend(ax1, "upper left", bbox_to_anchor=(1, 1))

    ax2.set_title("median Δaffinity")
    sns.scatterplot(data=time_df_mouse_sorted, 
                    x="normalized dominance score", 
                    y=f"maximum {stat}", 
                    hue="median Δaffinity", 
                    size="cells sampled", 
                    sizes=size_range,
                    # style="time",
                    clip_on=False, 
                    alpha=0.9,
                    palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
                    legend="brief",
                    size_norm=size_norm,
                    ax=ax2)
    ax2.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)

    ax3.set_title(outcome)
    sns.scatterplot(data=time_df_mouse_sorted, 
                    x="normalized dominance score", 
                    y=f"maximum {stat}", 
                    hue=outcome, 
                    size="cells sampled", 
                    sizes=size_range,
                    # style="time",
                    clip_on=False, 
                    alpha=0.9,
                    palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
                    legend="brief",
                    size_norm=size_norm,
                    ax=ax3)
    # move legend for ax3 outside of plot to the right and remove the frame
    ax3.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)
    # plt.tight_layout()

    ax1.set_xlim(0.15 - xmargin, 1 + xmargin)
    ax1.set_ylim(vmin + ymargin, vmax + ymargin)
    ax1.set_xticks(xticks)
    ax1.set_yticks(yticks)
    # adjust width between subplots
    plt.subplots_adjust(wspace=1.2)

    plt.savefig(f"{output_dir}/scatter_{time}.svg")
    plt.show()

25.0 94.0
d15

d20

In [32]:

Copied!

df
df

Out[32]:

	index	uid	τ	ngs_id	mouse	GC	time	cells sampled	normalized dominance score	maximum REI	max Δaffinity	95th percentile Δaffinity	mean Δaffinity	median Δaffinity	id of max REI node	mutations in max REI node	Δaffinity of max REI node	ΔΔaffinity of max REI node	max REI node is root
0	1166	D20_M22_GC96	0.5	PR-1-07	22	96	d20	39.0	0.435897	0.038462	1.85465	1.700965	1.005091	1.161450	seq4	Y88(H)F A105(H)G N108(L)K	1.35896	0.05302	False
1	1159	D20_M22_GC89	0.5	PR-1-03	22	89	d20	87.0	0.413793	0.047593	1.83375	1.545630	0.782702	0.714810	seq33	Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116...	1.46106	0.99422	False
2	1158	D20_M22_GC88	0.5	PR-1-03	22	88	d20	82.0	0.390244	0.048780	1.96522	1.552431	0.109996	0.138920	seq15	T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P	0.08799	0.07656	False
3	1156	D20_M22_GC86	0.5	PR-1-02	22	86	d20	76.0	0.684211	0.050164	2.22660	1.989688	0.534087	0.812585	seq29	I76(H)F L99(L)W L116(L)P	-0.02546	0.00067	False
4	1146	D20_M21_GC76	0.5	PR-1-05	21	76	d20	69.0	0.768116	0.050767	2.12465	1.787844	0.497640	1.008670	11	A105(H)G A40(L)G	1.25043	0.00000	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
114	1176	D20_M23_GC106	0.5	PR-1-04	23	106	d20	83.0	1.000000	0.312688	1.41209	1.081766	0.828434	0.957790	seq12	S64(H)T N92(H)K Y42(L)C N108(L)H	0.95779	-0.02745	False
115	1110	D15_M16_GC40	0.5	PR-2-04	16	40	d15	51.0	0.862745	0.331189	1.90813	1.403847	0.866527	1.199390	seq13	A105(H)G Y66(L)F S109(L)R	1.19939	0.75310	False
116	1173	D20_M23_GC103	0.5	PR-1-03	23	103	d20	81.0	0.962963	0.332031	1.92757	1.245264	0.520700	0.411910	seq12	I78(H)L N92(H)T Y42(L)C	0.35030	0.35030	False
117	1105	D15_M14_GC35	0.5	PR-2-03	14	35	d15	57.0	0.947368	0.338584	2.35414	2.071883	1.101941	1.250430	seq7	A105(H)G A40(L)G	1.25043	0.80414	False
118	1101	D15_M13_GC31	0.5	PR-2-03	13	31	d15	72.0	0.845070	0.368842	1.80017	1.484965	0.820345	0.919910	seq1	I78(H)F Y42(L)S N108(L)K	0.91991	0.91991	False

119 rows × 19 columns

In [33]:

Copied!

time_df.round(2)
time_df.round(2)

Out[33]:

	index	uid	τ	ngs_id	mouse	GC	time	cells sampled	normalized dominance score	maximum REI	max Δaffinity	95th percentile Δaffinity	mean Δaffinity	median Δaffinity	id of max REI node	mutations in max REI node	Δaffinity of max REI node	ΔΔaffinity of max REI node	max REI node is root
0	1166	D20_M22_GC96	0.5	PR-1-07	22	96	d20	39.0	0.44	0.04	1.85	1.70	1.01	1.16	seq4	Y88(H)F A105(H)G N108(L)K	1.36	0.05	False
1	1159	D20_M22_GC89	0.5	PR-1-03	22	89	d20	87.0	0.41	0.05	1.83	1.55	0.78	0.71	seq33	Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116...	1.46	0.99	False
2	1158	D20_M22_GC88	0.5	PR-1-03	22	88	d20	82.0	0.39	0.05	1.97	1.55	0.11	0.14	seq15	T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P	0.09	0.08	False
3	1156	D20_M22_GC86	0.5	PR-1-02	22	86	d20	76.0	0.68	0.05	2.23	1.99	0.53	0.81	seq29	I76(H)F L99(L)W L116(L)P	-0.03	0.00	False
4	1146	D20_M21_GC76	0.5	PR-1-05	21	76	d20	69.0	0.77	0.05	2.12	1.79	0.50	1.01	11	A105(H)G A40(L)G	1.25	0.00	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
109	1141	D20_M21_GC71	0.5	PR-1-02	21	71	d20	88.0	1.00	0.26	1.94	1.44	0.79	0.99	seq11	R67(L)K Q105(L)H L116(L)I	0.99	-0.01	False
111	1161	D20_M22_GC91	0.5	PR-1-06	22	91	d20	77.0	0.73	0.28	2.50	1.77	0.85	1.49	seq17	Y66(L)H Q105(L)H N108(L)K L116(L)I	1.64	0.49	False
113	1139	D20_M20_GC69	0.5	PR-1-08	20	69	d20	54.0	0.80	0.29	2.48	2.27	0.99	1.71	seq2	A105(H)G A40(L)G S109(L)R	2.05	0.80	False
114	1176	D20_M23_GC106	0.5	PR-1-04	23	106	d20	83.0	1.00	0.31	1.41	1.08	0.83	0.96	seq12	S64(H)T N92(H)K Y42(L)C N108(L)H	0.96	-0.03	False
116	1173	D20_M23_GC103	0.5	PR-1-03	23	103	d20	81.0	0.96	0.33	1.93	1.25	0.52	0.41	seq12	I78(H)L N92(H)T Y42(L)C	0.35	0.35	False

67 rows × 19 columns

In [34]:

Copied!





cmap = "viridis"

vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())

for time, time_df in df.groupby("time"):

    scatter_plot = alt.Chart(time_df.round(3)).mark_circle().encode(
        x=alt.X('normalized dominance score:Q', title='Normalized Dominance Score'),
        y=alt.Y('maximum REI:Q', title='Maximum REI'),
        size=alt.Size('cells sampled:Q', title='Cells Sampled'),
        color=alt.Color('mouse:N', title='Mouse', legend=None),
        tooltip=['uid:N', 'cells sampled:Q', 'median Δaffinity:Q', 'Δaffinity of max REI node:Q']  # Tooltip to show the uid when hovered over
    ).properties(
        width=600,
        height=400,
        title='Interactive plot'
    ).interactive()

    # Display the plot
    scatter_plot.show()
    scatter_plot.save(f"{output_dir}/interactive_REI_Dom_{time}.html")

cmap = "viridis"

vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())

for time, time_df in df.groupby("time"):

    scatter_plot = alt.Chart(time_df.round(3)).mark_circle().encode(
        x=alt.X('normalized dominance score:Q', title='Normalized Dominance Score'),
        y=alt.Y('maximum REI:Q', title='Maximum REI'),
        size=alt.Size('cells sampled:Q', title='Cells Sampled'),
        color=alt.Color('mouse:N', title='Mouse', legend=None),
        tooltip=['uid:N', 'cells sampled:Q', 'median Δaffinity:Q', 'Δaffinity of max REI node:Q']  # Tooltip to show the uid when hovered over
    ).properties(
        width=600,
        height=400,
        title='Interactive plot'
    ).interactive()

    # Display the plot
    scatter_plot.show()
    scatter_plot.save(f"{output_dir}/interactive_REI_Dom_{time}.html")

In [35]:

Copied!

# !rm {output_dir}/trees/*.svg
# !rm {output_dir}/trees/*.svg

In [36]:

Copied!

output_dir
output_dir

Out[36]:

'./default'

In [37]:

Copied!

!mkdir -p {output_dir}/trees
!mkdir -p {output_dir}/trees

In [38]:

Copied!

%env MPLBACKEND=Agg
# %matplotlib inline
%env MPLBACKEND=Agg
# %matplotlib inline

env: MPLBACKEND=Agg

In [39]:

Copied!





from collections import defaultdict
tree_expr = defaultdict(list)
for i, row in df.iterrows():
    key = row.uid
    for node in trees[key].tree.traverse():
        tree_expr["uid"].append(key)
        tree_expr["node_bind"].append(getattr(node, "delta_bind"))
        tree_expr["node_expr"].append(getattr(node, "delta_expr"))
        # tree_expr["node_bind_norm"].append(pc.affinity_trees_grey_centered.norm(getattr(node, "delta_bind")))
        # tree_expr["node_expr_norm"].append(pc.expression_trees_grey_centered.norm(getattr(node, "delta_expr")))
tree_expr_df = pd.DataFrame(tree_expr)
tree_expr_df.head()
from collections import defaultdict
tree_expr = defaultdict(list)
for i, row in df.iterrows():
    key = row.uid
    for node in trees[key].tree.traverse():
        tree_expr["uid"].append(key)
        tree_expr["node_bind"].append(getattr(node, "delta_bind"))
        tree_expr["node_expr"].append(getattr(node, "delta_expr"))
        # tree_expr["node_bind_norm"].append(pc.affinity_trees_grey_centered.norm(getattr(node, "delta_bind")))
        # tree_expr["node_expr_norm"].append(pc.expression_trees_grey_centered.norm(getattr(node, "delta_expr")))
tree_expr_df = pd.DataFrame(tree_expr)
tree_expr_df.head()

Out[39]:

	uid	node_bind	node_expr
0	D20_M22_GC96	0.00000	0.00000
1	D20_M22_GC96	-0.46711	-0.30968
2	D20_M22_GC96	0.48156	-0.07432
3	D20_M22_GC96	0.15592	-0.54128
4	D20_M22_GC96	0.80414	0.03813

In [40]:

Copied!





fig, ax = plt.subplots(2, 1, figsize = [20,10], sharex=True, sharey=True)
sns.boxplot(tree_expr_df, x="uid", y="node_bind", ax=ax[0])
sns.boxplot(tree_expr_df, x="uid", y="node_expr", ax=ax[1])
ax[1].set_xticks(ax[1].get_xticks(), ax[1].get_xticklabels(), rotation=90, ha='right')
plt.show()
fig, ax = plt.subplots(2, 1, figsize = [20,10], sharex=True, sharey=True)
sns.boxplot(tree_expr_df, x="uid", y="node_bind", ax=ax[0])
sns.boxplot(tree_expr_df, x="uid", y="node_expr", ax=ax[1])
ax[1].set_xticks(ax[1].get_xticks(), ax[1].get_xticklabels(), rotation=90, ha='right')
plt.show()

In [41]:

Copied!





branch_margin = 0.1
# scale = 20
for i, row in df.iterrows():
    x, y = row[["normalized dominance score", f"maximum {stat}"]]
    key = row.uid
    
    colormap1 = trees[key].feature_colormap(
        stat, 
        vmin=vmin, 
        vmax=vmax, 
        # scale="symlog", 
        linthresh=1, 
        cmap=cmap
    )
    trees[key].render(
        f"{output_dir}/trees/{key}.{stat}.scale{scale}.svg", 
        colormap=colormap1, 
        scale=scale, 
        branch_margin=branch_margin
    )
    # print(f"Done: {key}")
    # if row[f"maximum {stat}"] > thresh:
    #     print(row.uid, stat)
    #     if row[f"max {stat} node is root"]:
    #         print(f"NOTE: max {stat} node is root")
    #     display(
    #         trees[key].render(
    #             "%%inline", 
    #             colormap=colormap1, 
    #             scale=scale, 
    #             branch_margin=branch_margin
    #         )
    #     )

    colormap2 = {
        node.name: to_hex(
            pc.affinity_trees_grey_centered.cmap(
                pc.affinity_trees_grey_centered.norm(
                    getattr(node, "delta_bind")
                )
            )
        ) for node in trees[key].tree.traverse()
    }
    trees[key].render(
        f"{output_dir}/trees/{key}.binding.scale{scale}.svg", 
        colormap=colormap2, 
        scale=scale, 
        branch_margin=branch_margin
    )
    # if row[f"maximum {stat}"] > thresh:
    #     print(row.uid, "Δaffinity")
    #     display(
    #         trees[key].render(
    #             "%%inline", 
    #             colormap=colormap2, 
    #             scale=scale, 
    #             branch_margin=branch_margin
    #         )
    #     )

    colormap3 = {
        node.name: to_hex(
            pc.expression_trees_grey_centered.cmap(
                pc.expression_trees_grey_centered.norm(
                    getattr(node, "delta_expr")
                )
            )
        ) for node in trees[key].tree.traverse()
    }
    trees[key].render(
        f"{output_dir}/trees/{key}.expression.scale{scale}.svg", 
        colormap=colormap3, 
        scale=scale, 
        branch_margin=branch_margin
    )

# plt.show()
branch_margin = 0.1
# scale = 20
for i, row in df.iterrows():
    x, y = row[["normalized dominance score", f"maximum {stat}"]]
    key = row.uid
    
    colormap1 = trees[key].feature_colormap(
        stat, 
        vmin=vmin, 
        vmax=vmax, 
        # scale="symlog", 
        linthresh=1, 
        cmap=cmap
    )
    trees[key].render(
        f"{output_dir}/trees/{key}.{stat}.scale{scale}.svg", 
        colormap=colormap1, 
        scale=scale, 
        branch_margin=branch_margin
    )
    # print(f"Done: {key}")
    # if row[f"maximum {stat}"] > thresh:
    #     print(row.uid, stat)
    #     if row[f"max {stat} node is root"]:
    #         print(f"NOTE: max {stat} node is root")
    #     display(
    #         trees[key].render(
    #             "%%inline", 
    #             colormap=colormap1, 
    #             scale=scale, 
    #             branch_margin=branch_margin
    #         )
    #     )

    colormap2 = {
        node.name: to_hex(
            pc.affinity_trees_grey_centered.cmap(
                pc.affinity_trees_grey_centered.norm(
                    getattr(node, "delta_bind")
                )
            )
        ) for node in trees[key].tree.traverse()
    }
    trees[key].render(
        f"{output_dir}/trees/{key}.binding.scale{scale}.svg", 
        colormap=colormap2, 
        scale=scale, 
        branch_margin=branch_margin
    )
    # if row[f"maximum {stat}"] > thresh:
    #     print(row.uid, "Δaffinity")
    #     display(
    #         trees[key].render(
    #             "%%inline", 
    #             colormap=colormap2, 
    #             scale=scale, 
    #             branch_margin=branch_margin
    #         )
    #     )

    colormap3 = {
        node.name: to_hex(
            pc.expression_trees_grey_centered.cmap(
                pc.expression_trees_grey_centered.norm(
                    getattr(node, "delta_expr")
                )
            )
        ) for node in trees[key].tree.traverse()
    }
    trees[key].render(
        f"{output_dir}/trees/{key}.expression.scale{scale}.svg", 
        colormap=colormap3, 
        scale=scale, 
        branch_margin=branch_margin
    )

# plt.show()

Color bars for the tree colormaps¶

In [42]:

Copied!





fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=cmap, norm=Normalize(vmin=vmin, vmax=vmax)),
             orientation='horizontal',
             cax=cax,
             label=f"{stat}")
plt.savefig(f"{output_dir}/cbar_REI.svg", bbox_inches="tight")
plt.show()

fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=pc.affinity_trees_grey_centered.cmap, norm=pc.affinity_trees_grey_centered.norm),
             orientation='horizontal',
             cax=cax,
             label="Δaffinity")
plt.savefig(f"{output_dir}/cbar_binding.svg", bbox_inches="tight")
plt.show()

fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(
    cm.ScalarMappable(
        cmap=pc.expression_trees_grey_centered.cmap, 
        norm=pc.expression_trees_grey_centered.norm
    ),
    orientation='horizontal',
    cax=cax,
    label="Δexpression",
    ticks=[-3, 0, 1],
    
    )
plt.savefig(f"{output_dir}/cbar_expression.svg", bbox_inches="tight")
plt.show()
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=cmap, norm=Normalize(vmin=vmin, vmax=vmax)),
             orientation='horizontal',
             cax=cax,
             label=f"{stat}")
plt.savefig(f"{output_dir}/cbar_REI.svg", bbox_inches="tight")
plt.show()

fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=pc.affinity_trees_grey_centered.cmap, norm=pc.affinity_trees_grey_centered.norm),
             orientation='horizontal',
             cax=cax,
             label="Δaffinity")
plt.savefig(f"{output_dir}/cbar_binding.svg", bbox_inches="tight")
plt.show()

fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(
    cm.ScalarMappable(
        cmap=pc.expression_trees_grey_centered.cmap, 
        norm=pc.expression_trees_grey_centered.norm
    ),
    orientation='horizontal',
    cax=cax,
    label="Δexpression",
    ticks=[-3, 0, 1],
    
    )
plt.savefig(f"{output_dir}/cbar_expression.svg", bbox_inches="tight")
plt.show()

Stack the tree svg's into a single image¶

define output directory and clear out any existing files

In [43]:

Copied!

stacked_dir = f"{output_dir}/stacked_trees"
!mkdir -p {stacked_dir}
!rm {output_dir}/stacked_trees/ngs_id*{scale}*.svg
stacked_dir = f"{output_dir}/stacked_trees"
!mkdir -p {stacked_dir}
!rm {output_dir}/stacked_trees/ngs_id*{scale}*.svg

rm: cannot remove './default/stacked_trees/ngs_id*20*.svg': No such file or directory

In [44]:

Copied!

# Set the maximum width for the display
pd.set_option('display.max_colwidth', None)

# Set the maximum number of columns to display
pd.set_option('display.max_columns', None)
# Set the maximum width for the display
pd.set_option('display.max_colwidth', None)

# Set the maximum number of columns to display
pd.set_option('display.max_columns', None)

Use svg utils to stitch together the trees rendered above.

In [45]:

Copied!





for stat in ["REI", "binding", "expression"]:
    for duration in ["20", "15"]:

        files = sorted(glob.glob(f"{output_dir}/trees/D{duration}*{stat}.scale{scale}.svg"))
        files_df = pd.DataFrame(files, columns=["file"])
        path_start = 0 if not files[0].startswith(".") else 1
        files_df["UID"] = files_df.file.str.split(".").str[path_start].str.split("/").str[-1]
        files_df["mouse"] = files_df.file.str.extract(r"M(\d+)").astype(int)
        files_df["GC"] = files_df.file.str.extract(r"GC(\d+)").astype(int)
        files_df.sort_values(["mouse", "GC"], inplace=True)
        
        # Initial y position for stacking
        y_offset = 220
        all_panels = []
        all_panels.append(
           Text(f"IMM Duration: {duration} days", 200, 35, size=24, weight='bold')
        )

        # add colorbar
        cbar = f"cbar_{stat}"
        colorbar_svg = SVG(f"{output_dir}/{cbar}.svg").scale(2)
        colorbar_svg.move(175, 45)
        all_panels.append(colorbar_svg)

        # iterate through the tree, create a panel for each one.
        for i, (idx, row) in enumerate(files_df.iterrows()):

            position = (10, y_offset)
            annotation = row.UID
            panel = Panel(
                    SVG(row.file).scale(0.5),
                    Text(annotation, 10, -20, size=16, weight='bold')
            ).move(*position)
            all_panels.append(panel)
            if scale == 20:
                y_offset += 350
            else:
                y_offset += 125

        fig = Figure("%dpx"%1050, "%dpx" % y_offset ,*all_panels)
        fig.save(f"{output_dir}/stacked_trees/D{duration}.{stat}.scale{scale}.svg")

for stat in ["REI", "binding", "expression"]:
    for duration in ["20", "15"]:

        files = sorted(glob.glob(f"{output_dir}/trees/D{duration}*{stat}.scale{scale}.svg"))
        files_df = pd.DataFrame(files, columns=["file"])
        path_start = 0 if not files[0].startswith(".") else 1
        files_df["UID"] = files_df.file.str.split(".").str[path_start].str.split("/").str[-1]
        files_df["mouse"] = files_df.file.str.extract(r"M(\d+)").astype(int)
        files_df["GC"] = files_df.file.str.extract(r"GC(\d+)").astype(int)
        files_df.sort_values(["mouse", "GC"], inplace=True)
        
        # Initial y position for stacking
        y_offset = 220
        all_panels = []
        all_panels.append(
           Text(f"IMM Duration: {duration} days", 200, 35, size=24, weight='bold')
        )

        # add colorbar
        cbar = f"cbar_{stat}"
        colorbar_svg = SVG(f"{output_dir}/{cbar}.svg").scale(2)
        colorbar_svg.move(175, 45)
        all_panels.append(colorbar_svg)

        # iterate through the tree, create a panel for each one.
        for i, (idx, row) in enumerate(files_df.iterrows()):

            position = (10, y_offset)
            annotation = row.UID
            panel = Panel(
                    SVG(row.file).scale(0.5),
                    Text(annotation, 10, -20, size=16, weight='bold')
            ).move(*position)
            all_panels.append(panel)
            if scale == 20:
                y_offset += 350
            else:
                y_offset += 125

        fig = Figure("%dpx"%1050, "%dpx" % y_offset ,*all_panels)
        fig.save(f"{output_dir}/stacked_trees/D{duration}.{stat}.scale{scale}.svg")

Affinity vs. REI¶

In [46]:

Copied!





stat = "REI"
dat = []
for gc, tree in trees.items():
    for node in tree.tree.traverse():
        dat.append([gc, getattr(node, stat), node.delta_bind])

df_nodes = pd.DataFrame(dat, columns=["GC", stat, "Δaffinity"])

plt.figure(figsize=(4, 4))
sns.scatterplot(data=df_nodes, x=stat, y="Δaffinity", hue="GC", legend=False, alpha=0.5, size=.1)
plt.axhline(0, ls="--", c="k", lw=1)
plt.axvline(0, ls="--", c="k", lw=1)
plt.xscale("log")
plt.tight_layout()
plt.show()
stat = "REI"
dat = []
for gc, tree in trees.items():
    for node in tree.tree.traverse():
        dat.append([gc, getattr(node, stat), node.delta_bind])

df_nodes = pd.DataFrame(dat, columns=["GC", stat, "Δaffinity"])

plt.figure(figsize=(4, 4))
sns.scatterplot(data=df_nodes, x=stat, y="Δaffinity", hue="GC", legend=False, alpha=0.5, size=.1)
plt.axhline(0, ls="--", c="k", lw=1)
plt.axvline(0, ls="--", c="k", lw=1)
plt.xscale("log")
plt.tight_layout()
plt.show()

Summary¶

Developed REI, because no branch lengths with LB
Doesn't correlate will with GC affinity
Recent motivated by evolving affinity

Tree shape diversity¶

The plot elements below will be saved to /analysis/output/NDS-LB/¶

Load GC trees and constuct a data frame of statistics¶

Rank plot of total GC cell abundance¶

Plot tree shape stats in a scatter plot, and render some example trees¶

Color bars for the tree colormaps¶

Stack the tree svg's into a single image¶

Affinity vs. REI¶

Summary¶

The plot elements below will be saved to `/analysis/output/NDS-LB/`¶