Tree shape diversity¶
The plot elements below will be saved to /analysis/output/NDS-LB/
¶
In [1]:
Copied!
import os
import tempfile
cache_dir = tempfile.mkdtemp()
os.environ['XDG_CACHE_HOME'] = cache_dir
import glob
import pickle
import pandas as pd
import numpy as np
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize, to_hex
from svgutils.compose import *
import altair as alt
import utils.trees as ut
import utils.phenotype_colorscales as pc
import os
import tempfile
cache_dir = tempfile.mkdtemp()
os.environ['XDG_CACHE_HOME'] = cache_dir
import glob
import pickle
import pandas as pd
import numpy as np
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize, to_hex
from svgutils.compose import *
import altair as alt
import utils.trees as ut
import utils.phenotype_colorscales as pc
In [2]:
Copied!
results = "results"
ranking_subdir = 'naive_reversions_first'
scale = 20 # must be 5 or 20
metadata_csv = "../../gc_metadata.csv"
outbase = "_ignore/NDS-LB/"
workflow_env_exec = True
results = "results"
ranking_subdir = 'naive_reversions_first'
scale = 20 # must be 5 or 20
metadata_csv = "../../gc_metadata.csv"
outbase = "_ignore/NDS-LB/"
workflow_env_exec = True
In [3]:
Copied!
# Parameters
results = "."
ranking_subdir = "default"
metadata_csv = "gc_metadata.csv"
outbase = "."
workflow_env_exec = True
# Parameters
results = "."
ranking_subdir = "default"
metadata_csv = "gc_metadata.csv"
outbase = "."
workflow_env_exec = True
In [4]:
Copied!
if workflow_env_exec:
os.environ['PYTHONHASHSEED'] = '0'
os.environ['MPLBACKEND'] = 'Agg'
os.environ['MPLCONFIGDIR'] = '/tmp/matplotlib'
os.environ["QT_QPA_PLATFORM"] = "offscreen"
os.environ["XDG_RUNTIME_DIR"] = "/tmp/xdg"
if workflow_env_exec:
os.environ['PYTHONHASHSEED'] = '0'
os.environ['MPLBACKEND'] = 'Agg'
os.environ['MPLCONFIGDIR'] = '/tmp/matplotlib'
os.environ["QT_QPA_PLATFORM"] = "offscreen"
os.environ["XDG_RUNTIME_DIR"] = "/tmp/xdg"
In [5]:
Copied!
output_dir = f"{outbase}/{ranking_subdir}"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_dir = f"{outbase}/{ranking_subdir}"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
Load GC trees and constuct a data frame of statistics¶
In [6]:
Copied!
# results = "../nextflow/results"
metadata = pd.read_csv(metadata_csv, index_col=0)
metadata.query("(strain == 'wt') & (cell_type == 'GC') & (imm_duration != 'w10')", inplace=True)
metadata
# results = "../nextflow/results"
metadata = pd.read_csv(metadata_csv, index_col=0)
metadata.query("(strain == 'wt') & (cell_type == 'GC') & (imm_duration != 'w10')", inplace=True)
metadata
Out[6]:
ngs_id | imm_duration | mouse | gc | strain | node | cell_type | plate | hc_barcode | lc_barcode | row | col | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uid | ||||||||||||
D15_M1_GC1 | PR-2-01 | d15 | 1 | 1 | wt | RP | GC | 2 | 9 | 9 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M1_GC2 | PR-2-01 | d15 | 1 | 2 | wt | RI | GC | 3 | 2 | 1 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M1_GC3 | PR-2-01 | d15 | 1 | 3 | wt | LI | GC | 4 | 14 | 2 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M2_GC4 | PR-2-01 | d15 | 2 | 4 | wt | RP | GC | 5 | 10 | 11 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M3_GC5 | PR-2-01 | d15 | 3 | 5 | wt | RP | GC | 6 | 7 | 4 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
D20_M24_GC115 | PR-1-04 | d20 | 24 | 115 | wt | RP | GC | 72 | 16 | 16 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC116 | PR-1-02 | d20 | 25 | 116 | wt | RP | GC | 65 | 8 | 8 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC117 | PR-1-03 | d20 | 25 | 117 | wt | RP | GC | 68 | 9 | 9 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC118 | PR-1-08 | d20 | 25 | 118 | wt | RP | GC | 66 | 15 | 15 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC119 | PR-1-08 | d20 | 25 | 119 | wt | RP | GC | 67 | 16 | 16 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
119 rows × 12 columns
In [7]:
Copied!
len(metadata.index.unique())
len(metadata.index.unique())
Out[7]:
119
In [8]:
Copied!
metadata.imm_duration.value_counts()
metadata.imm_duration.value_counts()
Out[8]:
imm_duration d20 67 d15 52 Name: count, dtype: int64
In [9]:
Copied!
trees = ut.load_trees(metadata, results, ranking_subdir)
len(trees)
trees = ut.load_trees(metadata, results, ranking_subdir)
len(trees)
Out[9]:
119
In [10]:
Copied!
stat = "REI"
# taus = np.logspace(-2, 0, 10)
taus = np.linspace(0.05, 1, 20)
# taus = [0.05, 0.1, 1]
# stat = "LBI"
# taus = np.logspace(-1, 1, 50)
stat = "REI"
# taus = np.logspace(-2, 0, 10)
taus = np.linspace(0.05, 1, 20)
# taus = [0.05, 0.1, 1]
# stat = "LBI"
# taus = np.logspace(-1, 1, 50)
In [11]:
Copied!
df = pd.DataFrame()
row = 0
for tau in taus:
for key, tree in trees.items():
pr, mouse, gc = key.split("_")
mouse = mouse.lstrip("mouse")
ut.burst_stat(tree, stat, tau)
gc = gc.lstrip("GC")
df.loc[row, "uid"] = key
df.loc[row, "τ"] = tau
df.loc[row, "ngs_id"] = metadata.ngs_id[key]
df.loc[row, "mouse"] = str(metadata.mouse[key])
df.loc[row, "GC"] = str(metadata.gc[key])
df.loc[row, "time"] = metadata.imm_duration[key]
abundances = np.array([node.abundance for node in tree.tree.traverse()])
if len(tree.tree.children) == 1:
print(f"NOTE: tree {key} has only one root clade")
root = tree.tree
clade_sizes = [sum(node.abundance for node in child.traverse()) for child in root.children]
df.loc[row, "cells sampled"] = sum(abundances)
df.loc[row, "normalized dominance score"] = max(clade_sizes) / sum(clade_sizes)
df.loc[row, f"maximum {stat}"] = np.nanmax([getattr(node, stat) for node in tree.tree.traverse()])
# note: additive delta_bind for now
delta_bind_dat = []
for node in tree.tree.traverse():
if not np.isnan(node.delta_bind):
for _ in range(node.abundance):
delta_bind_dat.append(node.delta_bind)
df.loc[row, "max Δaffinity"] = np.nanmax(delta_bind_dat)
df.loc[row, "95th percentile Δaffinity"] = np.percentile(delta_bind_dat, 95)
df.loc[row, "mean Δaffinity"] = np.nanmean(delta_bind_dat)
df.loc[row, "median Δaffinity"] = np.nanmedian(delta_bind_dat)
max_stat_idx = np.nanargmax([getattr(node, stat) for node in tree.tree.traverse()])
df.loc[row, f"id of max {stat} node"] = [node.name for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"mutations in max {stat} node"] = [node.aa_substitutions for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"Δaffinity of max {stat} node"] = [node.delta_bind for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"ΔΔaffinity of max {stat} node"] = [node.delta_bind - (node.up.delta_bind if node.up is not None else 0) for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"max {stat} node is root"] = max_stat_idx == 0
row += 1
df
df = pd.DataFrame()
row = 0
for tau in taus:
for key, tree in trees.items():
pr, mouse, gc = key.split("_")
mouse = mouse.lstrip("mouse")
ut.burst_stat(tree, stat, tau)
gc = gc.lstrip("GC")
df.loc[row, "uid"] = key
df.loc[row, "τ"] = tau
df.loc[row, "ngs_id"] = metadata.ngs_id[key]
df.loc[row, "mouse"] = str(metadata.mouse[key])
df.loc[row, "GC"] = str(metadata.gc[key])
df.loc[row, "time"] = metadata.imm_duration[key]
abundances = np.array([node.abundance for node in tree.tree.traverse()])
if len(tree.tree.children) == 1:
print(f"NOTE: tree {key} has only one root clade")
root = tree.tree
clade_sizes = [sum(node.abundance for node in child.traverse()) for child in root.children]
df.loc[row, "cells sampled"] = sum(abundances)
df.loc[row, "normalized dominance score"] = max(clade_sizes) / sum(clade_sizes)
df.loc[row, f"maximum {stat}"] = np.nanmax([getattr(node, stat) for node in tree.tree.traverse()])
# note: additive delta_bind for now
delta_bind_dat = []
for node in tree.tree.traverse():
if not np.isnan(node.delta_bind):
for _ in range(node.abundance):
delta_bind_dat.append(node.delta_bind)
df.loc[row, "max Δaffinity"] = np.nanmax(delta_bind_dat)
df.loc[row, "95th percentile Δaffinity"] = np.percentile(delta_bind_dat, 95)
df.loc[row, "mean Δaffinity"] = np.nanmean(delta_bind_dat)
df.loc[row, "median Δaffinity"] = np.nanmedian(delta_bind_dat)
max_stat_idx = np.nanargmax([getattr(node, stat) for node in tree.tree.traverse()])
df.loc[row, f"id of max {stat} node"] = [node.name for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"mutations in max {stat} node"] = [node.aa_substitutions for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"Δaffinity of max {stat} node"] = [node.delta_bind for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"ΔΔaffinity of max {stat} node"] = [node.delta_bind - (node.up.delta_bind if node.up is not None else 0) for node in tree.tree.traverse()][max_stat_idx]
df.loc[row, f"max {stat} node is root"] = max_stat_idx == 0
row += 1
df
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade
NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade NOTE: tree D20_M22_GC90 has only one root clade
NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
NOTE: tree D20_M20_GC61 has only one root clade NOTE: tree D20_M20_GC62 has only one root clade NOTE: tree D20_M20_GC64 has only one root clade NOTE: tree D20_M21_GC71 has only one root clade
NOTE: tree D20_M22_GC90 has only one root clade NOTE: tree D20_M22_GC97 has only one root clade NOTE: tree D20_M23_GC106 has only one root clade
Out[11]:
uid | τ | ngs_id | mouse | GC | time | cells sampled | normalized dominance score | maximum REI | max Δaffinity | 95th percentile Δaffinity | mean Δaffinity | median Δaffinity | id of max REI node | mutations in max REI node | Δaffinity of max REI node | ΔΔaffinity of max REI node | max REI node is root | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | D15_M1_GC1 | 0.05 | PR-2-01 | 1 | 1 | d15 | 67.0 | 0.462687 | 0.062726 | 1.57098 | 1.323038 | 0.497280 | 0.773400 | seq36 | A105(H)G S109(L)R | 1.24129 | 0.79500 | False |
1 | D15_M1_GC2 | 0.05 | PR-2-01 | 1 | 2 | d15 | 79.0 | 0.435897 | 0.028576 | 1.44539 | 1.299505 | 0.381382 | 0.397360 | seq41 | D107(H)E | 0.20896 | 0.20896 | False |
2 | D15_M1_GC3 | 0.05 | PR-2-01 | 1 | 3 | d15 | 73.0 | 0.506849 | 0.048843 | 1.96260 | 1.291822 | 0.504928 | 0.535100 | seq41 | A105(H)G | 0.44629 | 0.44629 | False |
3 | D15_M2_GC4 | 0.05 | PR-2-01 | 2 | 4 | d15 | 44.0 | 0.340909 | 0.093466 | 0.88793 | 0.801909 | 0.092011 | 0.287630 | seq8 | A105(H)G | 0.44629 | 0.44629 | False |
4 | D15_M3_GC5 | 0.05 | PR-2-01 | 3 | 5 | d15 | 78.0 | 0.213333 | 0.040110 | 1.59914 | 1.107394 | -0.214632 | 0.000000 | naive | 0.00000 | 0.00000 | True | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2375 | D20_M24_GC115 | 1.00 | PR-1-04 | 24 | 115 | d20 | 85.0 | 0.882353 | 1.000000 | 1.94591 | 1.830034 | 1.094204 | 1.046480 | naive | 0.00000 | 0.00000 | True | |
2376 | D20_M25_GC116 | 1.00 | PR-1-02 | 25 | 116 | d20 | 78.0 | 0.884615 | 1.000000 | 2.38422 | 1.655125 | 0.784909 | 1.105020 | naive | 0.00000 | 0.00000 | True | |
2377 | D20_M25_GC117 | 1.00 | PR-1-03 | 25 | 117 | d20 | 76.0 | 0.407895 | 1.000000 | 1.78818 | 1.578093 | 0.632779 | 0.608315 | naive | 0.00000 | 0.00000 | True | |
2378 | D20_M25_GC118 | 1.00 | PR-1-08 | 25 | 118 | d20 | 70.0 | 0.985714 | 1.000000 | 1.42049 | 1.351937 | 0.288105 | 0.492355 | naive | 0.00000 | 0.00000 | True | |
2379 | D20_M25_GC119 | 1.00 | PR-1-08 | 25 | 119 | d20 | 81.0 | 0.580247 | 1.000000 | 1.68377 | 1.527660 | 0.582019 | 0.700545 | naive | 0.00000 | 0.00000 | True |
2380 rows × 18 columns
In [12]:
Copied!
df.groupby("τ")[f"max {stat} node is root"].agg(lambda group: group.astype(int).sum() / len(group)).plot(xlabel="τ", ylabel=f"fraction GCs with root as max {stat} node", figsize=(5, 4));
# plt.xscale("log")
df.groupby("τ")[f"max {stat} node is root"].agg(lambda group: group.astype(int).sum() / len(group)).plot(xlabel="τ", ylabel=f"fraction GCs with root as max {stat} node", figsize=(5, 4));
# plt.xscale("log")
In [13]:
Copied!
taus
taus
Out[13]:
array([0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. ])
In [14]:
Copied!
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"Δaffinity of max {stat} node", y="median Δaffinity", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"Δaffinity of max {stat} node", y="median Δaffinity", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
In [15]:
Copied!
sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"median Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()
sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"max Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()
sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"median Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()
sns.scatterplot(data=df.loc[df.τ == taus[0]], x="cells sampled", y=f"max Δaffinity", hue="mouse", style="ngs_id", legend=False)
plt.show()
In [16]:
Copied!
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x="cells sampled", y=f"maximum REI", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": True, "sharey": False})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x="cells sampled", y=f"maximum REI", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": True, "sharey": False})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
In [17]:
Copied!
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y="median Δaffinity", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y="median Δaffinity", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": False, "sharey": True})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
In [18]:
Copied!
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y=f"Δaffinity of max {stat} node", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": False, "sharey": False})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
g = sns.relplot(data=df.loc[df.τ.isin(taus)], x=f"maximum REI", y=f"Δaffinity of max {stat} node", col="τ",
col_wrap=5, hue="max REI node is root", height=2, s=10, legend=False,
facet_kws={"sharex": False, "sharey": False})
for tau, ax in zip(taus, g.axes):
ax.set_title(f"τ = {tau:.2f}")
plt.tight_layout()
plt.show()
In [19]:
Copied!
cor_df = pd.DataFrame()
for gc, group_df in df.groupby("τ"):
ρ_node, p_node = sp.stats.spearmanr(group_df[f"Δaffinity of max {stat} node"], group_df[f"maximum {stat}"])
ρ_gc, p_gc = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"maximum {stat}"])
ρ_aff, p_aff = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"Δaffinity of max {stat} node"])
cor_df.loc[gc, "ρ_node"] = ρ_node
cor_df.loc[gc, "ρ_gc"] = ρ_gc
cor_df.loc[gc, "p_node"] = p_node
cor_df.loc[gc, "p_gc"] = p_gc
cor_df.loc[gc, "ρ_aff"] = ρ_aff
cor_df.loc[gc, "p_aff"] = p_aff
plt.figure(figsize=(5, 7))
plt.subplot(2, 1, 1)
plt.title(f"correlations with maximum {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_aff", label=f"median Δaffinity Vs Δaffinity of max {stat} node")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_gc", label=f"median Δaffinity Vs max {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_node", label=f"Δaffinity of max {stat} node Vs max {stat}")
plt.axhline(0, color="k", ls="--", lw=1)
# plt.xlabel("τ")
plt.ylabel("Spearman ρ")
plt.ylim(-1, 1)
plt.legend()
plt.subplot(2, 1, 2)
sns.lineplot(data=cor_df, x=cor_df.index, y="p_aff")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_gc")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_node")
plt.xlabel("τ")
plt.ylabel("p-value")
plt.yscale("log")
plt.show()
cor_df = pd.DataFrame()
for gc, group_df in df.groupby("τ"):
ρ_node, p_node = sp.stats.spearmanr(group_df[f"Δaffinity of max {stat} node"], group_df[f"maximum {stat}"])
ρ_gc, p_gc = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"maximum {stat}"])
ρ_aff, p_aff = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"Δaffinity of max {stat} node"])
cor_df.loc[gc, "ρ_node"] = ρ_node
cor_df.loc[gc, "ρ_gc"] = ρ_gc
cor_df.loc[gc, "p_node"] = p_node
cor_df.loc[gc, "p_gc"] = p_gc
cor_df.loc[gc, "ρ_aff"] = ρ_aff
cor_df.loc[gc, "p_aff"] = p_aff
plt.figure(figsize=(5, 7))
plt.subplot(2, 1, 1)
plt.title(f"correlations with maximum {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_aff", label=f"median Δaffinity Vs Δaffinity of max {stat} node")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_gc", label=f"median Δaffinity Vs max {stat}")
sns.lineplot(data=cor_df, x=cor_df.index, y="ρ_node", label=f"Δaffinity of max {stat} node Vs max {stat}")
plt.axhline(0, color="k", ls="--", lw=1)
# plt.xlabel("τ")
plt.ylabel("Spearman ρ")
plt.ylim(-1, 1)
plt.legend()
plt.subplot(2, 1, 2)
sns.lineplot(data=cor_df, x=cor_df.index, y="p_aff")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_gc")
sns.lineplot(data=cor_df, x=cor_df.index, y="p_node")
plt.xlabel("τ")
plt.ylabel("p-value")
plt.yscale("log")
plt.show()
/tmp/ipykernel_351/1009681117.py:3: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined. ρ_node, p_node = sp.stats.spearmanr(group_df[f"Δaffinity of max {stat} node"], group_df[f"maximum {stat}"]) /tmp/ipykernel_351/1009681117.py:4: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined. ρ_gc, p_gc = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"maximum {stat}"]) /tmp/ipykernel_351/1009681117.py:5: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined. ρ_aff, p_aff = sp.stats.spearmanr(group_df[f"median Δaffinity"], group_df[f"Δaffinity of max {stat} node"])
In [20]:
Copied!
# outcome = "mean Δaffinity"
# outcome = "median Δaffinity"
# outcome = "95th percentile Δaffinity"
outcome = f"Δaffinity of max {stat} node"
# outcome = f"ΔΔaffinity of max {stat} node"
# outcome = "mean Δaffinity"
# outcome = "median Δaffinity"
# outcome = "95th percentile Δaffinity"
outcome = f"Δaffinity of max {stat} node"
# outcome = f"ΔΔaffinity of max {stat} node"
In [21]:
Copied!
# tau = taus[np.where(tau_cor_df.ρ > 0, tau_cor_df.p, 1).argmin()]
tau = taus[9]
tau
# tau = taus[np.where(tau_cor_df.ρ > 0, tau_cor_df.p, 1).argmin()]
tau = taus[9]
tau
Out[21]:
np.float64(0.49999999999999994)
Need to recompute tree stats using the chosen $\tau$
In [22]:
Copied!
for tree in trees.values():
ut.burst_stat(tree, stat, tau)
for tree in trees.values():
ut.burst_stat(tree, stat, tau)
In [23]:
Copied!
df = df.loc[df["τ"] == tau].sort_values(f"maximum {stat}").reset_index()
df
df = df.loc[df["τ"] == tau].sort_values(f"maximum {stat}").reset_index()
df
Out[23]:
index | uid | τ | ngs_id | mouse | GC | time | cells sampled | normalized dominance score | maximum REI | max Δaffinity | 95th percentile Δaffinity | mean Δaffinity | median Δaffinity | id of max REI node | mutations in max REI node | Δaffinity of max REI node | ΔΔaffinity of max REI node | max REI node is root | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1166 | D20_M22_GC96 | 0.5 | PR-1-07 | 22 | 96 | d20 | 39.0 | 0.435897 | 0.038462 | 1.85465 | 1.700965 | 1.005091 | 1.161450 | seq4 | Y88(H)F A105(H)G N108(L)K | 1.35896 | 0.05302 | False |
1 | 1159 | D20_M22_GC89 | 0.5 | PR-1-03 | 22 | 89 | d20 | 87.0 | 0.413793 | 0.047593 | 1.83375 | 1.545630 | 0.782702 | 0.714810 | seq33 | Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116... | 1.46106 | 0.99422 | False |
2 | 1158 | D20_M22_GC88 | 0.5 | PR-1-03 | 22 | 88 | d20 | 82.0 | 0.390244 | 0.048780 | 1.96522 | 1.552431 | 0.109996 | 0.138920 | seq15 | T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P | 0.08799 | 0.07656 | False |
3 | 1156 | D20_M22_GC86 | 0.5 | PR-1-02 | 22 | 86 | d20 | 76.0 | 0.684211 | 0.050164 | 2.22660 | 1.989688 | 0.534087 | 0.812585 | seq29 | I76(H)F L99(L)W L116(L)P | -0.02546 | 0.00067 | False |
4 | 1146 | D20_M21_GC76 | 0.5 | PR-1-05 | 21 | 76 | d20 | 69.0 | 0.768116 | 0.050767 | 2.12465 | 1.787844 | 0.497640 | 1.008670 | 11 | A105(H)G A40(L)G | 1.25043 | 0.00000 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
114 | 1176 | D20_M23_GC106 | 0.5 | PR-1-04 | 23 | 106 | d20 | 83.0 | 1.000000 | 0.312688 | 1.41209 | 1.081766 | 0.828434 | 0.957790 | seq12 | S64(H)T N92(H)K Y42(L)C N108(L)H | 0.95779 | -0.02745 | False |
115 | 1110 | D15_M16_GC40 | 0.5 | PR-2-04 | 16 | 40 | d15 | 51.0 | 0.862745 | 0.331189 | 1.90813 | 1.403847 | 0.866527 | 1.199390 | seq13 | A105(H)G Y66(L)F S109(L)R | 1.19939 | 0.75310 | False |
116 | 1173 | D20_M23_GC103 | 0.5 | PR-1-03 | 23 | 103 | d20 | 81.0 | 0.962963 | 0.332031 | 1.92757 | 1.245264 | 0.520700 | 0.411910 | seq12 | I78(H)L N92(H)T Y42(L)C | 0.35030 | 0.35030 | False |
117 | 1105 | D15_M14_GC35 | 0.5 | PR-2-03 | 14 | 35 | d15 | 57.0 | 0.947368 | 0.338584 | 2.35414 | 2.071883 | 1.101941 | 1.250430 | seq7 | A105(H)G A40(L)G | 1.25043 | 0.80414 | False |
118 | 1101 | D15_M13_GC31 | 0.5 | PR-2-03 | 13 | 31 | d15 | 72.0 | 0.845070 | 0.368842 | 1.80017 | 1.484965 | 0.820345 | 0.919910 | seq1 | I78(H)F Y42(L)S N108(L)K | 0.91991 | 0.91991 | False |
119 rows × 19 columns
In [24]:
Copied!
plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()
# plt.figure(figsize=(4, 3))
# sns.regplot(data=df.loc[df[outcome] != 0], x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10), label="no roots")
# plt.title(f"τ = {tau:.2f}")
# plt.legend()
# plt.show()
plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y="median Δaffinity", scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()
plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()
# plt.figure(figsize=(4, 3))
# sns.regplot(data=df.loc[df[outcome] != 0], x=f"maximum {stat}", y=outcome, scatter_kws=dict(s=10), label="no roots")
# plt.title(f"τ = {tau:.2f}")
# plt.legend()
# plt.show()
plt.figure(figsize=(4, 3))
sns.regplot(data=df, x=f"maximum {stat}", y="median Δaffinity", scatter_kws=dict(s=10))
plt.title(f"τ = {tau:.2f}")
plt.show()
In [25]:
Copied!
thresh = 0.25
thresh = 0.25
In [26]:
Copied!
df.to_csv(f"{output_dir}/data.csv")
df.to_csv(f"{output_dir}/data.csv")
Rank plot of total GC cell abundance¶
In [27]:
Copied!
plt.figure(figsize=(15, 3))
ax = sns.barplot(data=df.sort_values(["ngs_id", "mouse", "cells sampled"]), x="GC", y="cells sampled", #hue="mouse",
# hue_order=df.mouse.unique(),
legend=False,
dodge=False)
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig(f"{output_dir}/abundances.svg")
plt.show()
plt.figure(figsize=(15, 3))
ax = sns.barplot(data=df.sort_values(["ngs_id", "mouse", "cells sampled"]), x="GC", y="cells sampled", #hue="mouse",
# hue_order=df.mouse.unique(),
legend=False,
dodge=False)
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig(f"{output_dir}/abundances.svg")
plt.show()
Plot tree shape stats in a scatter plot, and render some example trees¶
For each tree we render it colored by fitness statistic, then by affinity, and by fitness stat
In [28]:
Copied!
df
df
Out[28]:
index | uid | τ | ngs_id | mouse | GC | time | cells sampled | normalized dominance score | maximum REI | max Δaffinity | 95th percentile Δaffinity | mean Δaffinity | median Δaffinity | id of max REI node | mutations in max REI node | Δaffinity of max REI node | ΔΔaffinity of max REI node | max REI node is root | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1166 | D20_M22_GC96 | 0.5 | PR-1-07 | 22 | 96 | d20 | 39.0 | 0.435897 | 0.038462 | 1.85465 | 1.700965 | 1.005091 | 1.161450 | seq4 | Y88(H)F A105(H)G N108(L)K | 1.35896 | 0.05302 | False |
1 | 1159 | D20_M22_GC89 | 0.5 | PR-1-03 | 22 | 89 | d20 | 87.0 | 0.413793 | 0.047593 | 1.83375 | 1.545630 | 0.782702 | 0.714810 | seq33 | Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116... | 1.46106 | 0.99422 | False |
2 | 1158 | D20_M22_GC88 | 0.5 | PR-1-03 | 22 | 88 | d20 | 82.0 | 0.390244 | 0.048780 | 1.96522 | 1.552431 | 0.109996 | 0.138920 | seq15 | T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P | 0.08799 | 0.07656 | False |
3 | 1156 | D20_M22_GC86 | 0.5 | PR-1-02 | 22 | 86 | d20 | 76.0 | 0.684211 | 0.050164 | 2.22660 | 1.989688 | 0.534087 | 0.812585 | seq29 | I76(H)F L99(L)W L116(L)P | -0.02546 | 0.00067 | False |
4 | 1146 | D20_M21_GC76 | 0.5 | PR-1-05 | 21 | 76 | d20 | 69.0 | 0.768116 | 0.050767 | 2.12465 | 1.787844 | 0.497640 | 1.008670 | 11 | A105(H)G A40(L)G | 1.25043 | 0.00000 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
114 | 1176 | D20_M23_GC106 | 0.5 | PR-1-04 | 23 | 106 | d20 | 83.0 | 1.000000 | 0.312688 | 1.41209 | 1.081766 | 0.828434 | 0.957790 | seq12 | S64(H)T N92(H)K Y42(L)C N108(L)H | 0.95779 | -0.02745 | False |
115 | 1110 | D15_M16_GC40 | 0.5 | PR-2-04 | 16 | 40 | d15 | 51.0 | 0.862745 | 0.331189 | 1.90813 | 1.403847 | 0.866527 | 1.199390 | seq13 | A105(H)G Y66(L)F S109(L)R | 1.19939 | 0.75310 | False |
116 | 1173 | D20_M23_GC103 | 0.5 | PR-1-03 | 23 | 103 | d20 | 81.0 | 0.962963 | 0.332031 | 1.92757 | 1.245264 | 0.520700 | 0.411910 | seq12 | I78(H)L N92(H)T Y42(L)C | 0.35030 | 0.35030 | False |
117 | 1105 | D15_M14_GC35 | 0.5 | PR-2-03 | 14 | 35 | d15 | 57.0 | 0.947368 | 0.338584 | 2.35414 | 2.071883 | 1.101941 | 1.250430 | seq7 | A105(H)G A40(L)G | 1.25043 | 0.80414 | False |
118 | 1101 | D15_M13_GC31 | 0.5 | PR-2-03 | 13 | 31 | d15 | 72.0 | 0.845070 | 0.368842 | 1.80017 | 1.484965 | 0.820345 | 0.919910 | seq1 | I78(H)F Y42(L)S N108(L)K | 0.91991 | 0.91991 | False |
119 rows × 19 columns
In [29]:
Copied!
pc
pc
Out[29]:
<module 'utils.phenotype_colorscales' from '/fh/fast/matsen_e/shared/replay/pipeline-cache/d3/1ee57a4dd94f4f450fcb7b2312a558/utils/phenotype_colorscales.py'>
In [30]:
Copied!
outcome
outcome
Out[30]:
'Δaffinity of max REI node'
In [31]:
Copied!
# stat cmap
cmap = "viridis"
vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
xmargin=0.05
ymargin=0.03
xticks = [0.25, 0.5, 0.75, 1.0]
yticks = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
min_size = df["cells sampled"].min()
max_size = df["cells sampled"].max()
print(min_size, max_size)
# Set up a common normalization for sizes
size_norm = Normalize(vmin=min_size, vmax=max_size)
size_range = (10, 100)
# legend_sizes = [min_size, (min_size + max_size) / 2, max_size]
# legend_markers = [Line2D([0], [0], marker='o', color='w', label=f'{int(size)} cells',
# markersize=(20 + (size - min_size) / (max_size - min_size) * 180),
# markerfacecolor='gray', markeredgecolor='gray')
# for size in legend_sizes]
# size_range = (min_cells_sampled, max_cells_sampled)
for time, time_df in df.groupby("time"):
print(time, flush=True)
time_df_mouse_sorted = time_df.copy()
time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(int)
time_df_mouse_sorted.sort_values("mouse", inplace=True)
time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(str)
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18,3.5), sharey=True, sharex=True)
# fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, sharex=True)
sns.scatterplot(data=time_df_mouse_sorted,
x="normalized dominance score",
y=f"maximum {stat}",
hue="mouse",
size="cells sampled",
sizes=size_range,
# size_norm=size_norm,
# style="time",
clip_on=False,
alpha=0.9,
legend="brief",
size_norm=size_norm,
ax=ax1)
# ax1.legend(handles=legend_markers, title="mouse", loc='upper right', bbox_to_anchor=(1, 1))
# # Get the existing hue legend and add it
# handles, labels = ax1.get_legend_handles_labels()
# ax1.legend(handles=handles + legend_markers, labels=labels + [f'{int(size)} cells' for size in legend_sizes],
# title="Outcome & Cells Sampled", loc='upper right', bbox_to_anchor=(1, 1))
ax1.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False, ncol=2)
# ax1.legend(loc='upper left', frameon=True, ncol=3, fontsize=8)
# ax1.legend(frameon=False)
# sns.move_legend(ax1, "upper left", bbox_to_anchor=(1, 1))
ax2.set_title("median Δaffinity")
sns.scatterplot(data=time_df_mouse_sorted,
x="normalized dominance score",
y=f"maximum {stat}",
hue="median Δaffinity",
size="cells sampled",
sizes=size_range,
# style="time",
clip_on=False,
alpha=0.9,
palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
legend="brief",
size_norm=size_norm,
ax=ax2)
ax2.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)
ax3.set_title(outcome)
sns.scatterplot(data=time_df_mouse_sorted,
x="normalized dominance score",
y=f"maximum {stat}",
hue=outcome,
size="cells sampled",
sizes=size_range,
# style="time",
clip_on=False,
alpha=0.9,
palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
legend="brief",
size_norm=size_norm,
ax=ax3)
# move legend for ax3 outside of plot to the right and remove the frame
ax3.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)
# plt.tight_layout()
ax1.set_xlim(0.15 - xmargin, 1 + xmargin)
ax1.set_ylim(vmin + ymargin, vmax + ymargin)
ax1.set_xticks(xticks)
ax1.set_yticks(yticks)
# adjust width between subplots
plt.subplots_adjust(wspace=1.2)
plt.savefig(f"{output_dir}/scatter_{time}.svg")
plt.show()
# stat cmap
cmap = "viridis"
vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
xmargin=0.05
ymargin=0.03
xticks = [0.25, 0.5, 0.75, 1.0]
yticks = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
min_size = df["cells sampled"].min()
max_size = df["cells sampled"].max()
print(min_size, max_size)
# Set up a common normalization for sizes
size_norm = Normalize(vmin=min_size, vmax=max_size)
size_range = (10, 100)
# legend_sizes = [min_size, (min_size + max_size) / 2, max_size]
# legend_markers = [Line2D([0], [0], marker='o', color='w', label=f'{int(size)} cells',
# markersize=(20 + (size - min_size) / (max_size - min_size) * 180),
# markerfacecolor='gray', markeredgecolor='gray')
# for size in legend_sizes]
# size_range = (min_cells_sampled, max_cells_sampled)
for time, time_df in df.groupby("time"):
print(time, flush=True)
time_df_mouse_sorted = time_df.copy()
time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(int)
time_df_mouse_sorted.sort_values("mouse", inplace=True)
time_df_mouse_sorted["mouse"] = time_df_mouse_sorted["mouse"].astype(str)
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18,3.5), sharey=True, sharex=True)
# fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, sharex=True)
sns.scatterplot(data=time_df_mouse_sorted,
x="normalized dominance score",
y=f"maximum {stat}",
hue="mouse",
size="cells sampled",
sizes=size_range,
# size_norm=size_norm,
# style="time",
clip_on=False,
alpha=0.9,
legend="brief",
size_norm=size_norm,
ax=ax1)
# ax1.legend(handles=legend_markers, title="mouse", loc='upper right', bbox_to_anchor=(1, 1))
# # Get the existing hue legend and add it
# handles, labels = ax1.get_legend_handles_labels()
# ax1.legend(handles=handles + legend_markers, labels=labels + [f'{int(size)} cells' for size in legend_sizes],
# title="Outcome & Cells Sampled", loc='upper right', bbox_to_anchor=(1, 1))
ax1.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False, ncol=2)
# ax1.legend(loc='upper left', frameon=True, ncol=3, fontsize=8)
# ax1.legend(frameon=False)
# sns.move_legend(ax1, "upper left", bbox_to_anchor=(1, 1))
ax2.set_title("median Δaffinity")
sns.scatterplot(data=time_df_mouse_sorted,
x="normalized dominance score",
y=f"maximum {stat}",
hue="median Δaffinity",
size="cells sampled",
sizes=size_range,
# style="time",
clip_on=False,
alpha=0.9,
palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
legend="brief",
size_norm=size_norm,
ax=ax2)
ax2.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)
ax3.set_title(outcome)
sns.scatterplot(data=time_df_mouse_sorted,
x="normalized dominance score",
y=f"maximum {stat}",
hue=outcome,
size="cells sampled",
sizes=size_range,
# style="time",
clip_on=False,
alpha=0.9,
palette=pc.affinity_trees.cmap, hue_norm=pc.affinity_trees.norm,
legend="brief",
size_norm=size_norm,
ax=ax3)
# move legend for ax3 outside of plot to the right and remove the frame
ax3.legend(loc='upper left', bbox_to_anchor=(1, 1), frameon=False)
# plt.tight_layout()
ax1.set_xlim(0.15 - xmargin, 1 + xmargin)
ax1.set_ylim(vmin + ymargin, vmax + ymargin)
ax1.set_xticks(xticks)
ax1.set_yticks(yticks)
# adjust width between subplots
plt.subplots_adjust(wspace=1.2)
plt.savefig(f"{output_dir}/scatter_{time}.svg")
plt.show()
25.0 94.0 d15
d20
In [32]:
Copied!
df
df
Out[32]:
index | uid | τ | ngs_id | mouse | GC | time | cells sampled | normalized dominance score | maximum REI | max Δaffinity | 95th percentile Δaffinity | mean Δaffinity | median Δaffinity | id of max REI node | mutations in max REI node | Δaffinity of max REI node | ΔΔaffinity of max REI node | max REI node is root | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1166 | D20_M22_GC96 | 0.5 | PR-1-07 | 22 | 96 | d20 | 39.0 | 0.435897 | 0.038462 | 1.85465 | 1.700965 | 1.005091 | 1.161450 | seq4 | Y88(H)F A105(H)G N108(L)K | 1.35896 | 0.05302 | False |
1 | 1159 | D20_M22_GC89 | 0.5 | PR-1-03 | 22 | 89 | d20 | 87.0 | 0.413793 | 0.047593 | 1.83375 | 1.545630 | 0.782702 | 0.714810 | seq33 | Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116... | 1.46106 | 0.99422 | False |
2 | 1158 | D20_M22_GC88 | 0.5 | PR-1-03 | 22 | 88 | d20 | 82.0 | 0.390244 | 0.048780 | 1.96522 | 1.552431 | 0.109996 | 0.138920 | seq15 | T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P | 0.08799 | 0.07656 | False |
3 | 1156 | D20_M22_GC86 | 0.5 | PR-1-02 | 22 | 86 | d20 | 76.0 | 0.684211 | 0.050164 | 2.22660 | 1.989688 | 0.534087 | 0.812585 | seq29 | I76(H)F L99(L)W L116(L)P | -0.02546 | 0.00067 | False |
4 | 1146 | D20_M21_GC76 | 0.5 | PR-1-05 | 21 | 76 | d20 | 69.0 | 0.768116 | 0.050767 | 2.12465 | 1.787844 | 0.497640 | 1.008670 | 11 | A105(H)G A40(L)G | 1.25043 | 0.00000 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
114 | 1176 | D20_M23_GC106 | 0.5 | PR-1-04 | 23 | 106 | d20 | 83.0 | 1.000000 | 0.312688 | 1.41209 | 1.081766 | 0.828434 | 0.957790 | seq12 | S64(H)T N92(H)K Y42(L)C N108(L)H | 0.95779 | -0.02745 | False |
115 | 1110 | D15_M16_GC40 | 0.5 | PR-2-04 | 16 | 40 | d15 | 51.0 | 0.862745 | 0.331189 | 1.90813 | 1.403847 | 0.866527 | 1.199390 | seq13 | A105(H)G Y66(L)F S109(L)R | 1.19939 | 0.75310 | False |
116 | 1173 | D20_M23_GC103 | 0.5 | PR-1-03 | 23 | 103 | d20 | 81.0 | 0.962963 | 0.332031 | 1.92757 | 1.245264 | 0.520700 | 0.411910 | seq12 | I78(H)L N92(H)T Y42(L)C | 0.35030 | 0.35030 | False |
117 | 1105 | D15_M14_GC35 | 0.5 | PR-2-03 | 14 | 35 | d15 | 57.0 | 0.947368 | 0.338584 | 2.35414 | 2.071883 | 1.101941 | 1.250430 | seq7 | A105(H)G A40(L)G | 1.25043 | 0.80414 | False |
118 | 1101 | D15_M13_GC31 | 0.5 | PR-2-03 | 13 | 31 | d15 | 72.0 | 0.845070 | 0.368842 | 1.80017 | 1.484965 | 0.820345 | 0.919910 | seq1 | I78(H)F Y42(L)S N108(L)K | 0.91991 | 0.91991 | False |
119 rows × 19 columns
In [33]:
Copied!
time_df.round(2)
time_df.round(2)
Out[33]:
index | uid | τ | ngs_id | mouse | GC | time | cells sampled | normalized dominance score | maximum REI | max Δaffinity | 95th percentile Δaffinity | mean Δaffinity | median Δaffinity | id of max REI node | mutations in max REI node | Δaffinity of max REI node | ΔΔaffinity of max REI node | max REI node is root | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1166 | D20_M22_GC96 | 0.5 | PR-1-07 | 22 | 96 | d20 | 39.0 | 0.44 | 0.04 | 1.85 | 1.70 | 1.01 | 1.16 | seq4 | Y88(H)F A105(H)G N108(L)K | 1.36 | 0.05 | False |
1 | 1159 | D20_M22_GC89 | 0.5 | PR-1-03 | 22 | 89 | d20 | 87.0 | 0.41 | 0.05 | 1.83 | 1.55 | 0.78 | 0.71 | seq33 | Y103(H)F F10(L)L S12(L)T Y42(L)H N108(L)K L116... | 1.46 | 0.99 | False |
2 | 1158 | D20_M22_GC88 | 0.5 | PR-1-03 | 22 | 88 | d20 | 82.0 | 0.39 | 0.05 | 1.97 | 1.55 | 0.11 | 0.14 | seq15 | T96(H)A P72(L)S N93(L)T L99(L)M L116(L)P | 0.09 | 0.08 | False |
3 | 1156 | D20_M22_GC86 | 0.5 | PR-1-02 | 22 | 86 | d20 | 76.0 | 0.68 | 0.05 | 2.23 | 1.99 | 0.53 | 0.81 | seq29 | I76(H)F L99(L)W L116(L)P | -0.03 | 0.00 | False |
4 | 1146 | D20_M21_GC76 | 0.5 | PR-1-05 | 21 | 76 | d20 | 69.0 | 0.77 | 0.05 | 2.12 | 1.79 | 0.50 | 1.01 | 11 | A105(H)G A40(L)G | 1.25 | 0.00 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
109 | 1141 | D20_M21_GC71 | 0.5 | PR-1-02 | 21 | 71 | d20 | 88.0 | 1.00 | 0.26 | 1.94 | 1.44 | 0.79 | 0.99 | seq11 | R67(L)K Q105(L)H L116(L)I | 0.99 | -0.01 | False |
111 | 1161 | D20_M22_GC91 | 0.5 | PR-1-06 | 22 | 91 | d20 | 77.0 | 0.73 | 0.28 | 2.50 | 1.77 | 0.85 | 1.49 | seq17 | Y66(L)H Q105(L)H N108(L)K L116(L)I | 1.64 | 0.49 | False |
113 | 1139 | D20_M20_GC69 | 0.5 | PR-1-08 | 20 | 69 | d20 | 54.0 | 0.80 | 0.29 | 2.48 | 2.27 | 0.99 | 1.71 | seq2 | A105(H)G A40(L)G S109(L)R | 2.05 | 0.80 | False |
114 | 1176 | D20_M23_GC106 | 0.5 | PR-1-04 | 23 | 106 | d20 | 83.0 | 1.00 | 0.31 | 1.41 | 1.08 | 0.83 | 0.96 | seq12 | S64(H)T N92(H)K Y42(L)C N108(L)H | 0.96 | -0.03 | False |
116 | 1173 | D20_M23_GC103 | 0.5 | PR-1-03 | 23 | 103 | d20 | 81.0 | 0.96 | 0.33 | 1.93 | 1.25 | 0.52 | 0.41 | seq12 | I78(H)L N92(H)T Y42(L)C | 0.35 | 0.35 | False |
67 rows × 19 columns
In [34]:
Copied!
cmap = "viridis"
vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
for time, time_df in df.groupby("time"):
scatter_plot = alt.Chart(time_df.round(3)).mark_circle().encode(
x=alt.X('normalized dominance score:Q', title='Normalized Dominance Score'),
y=alt.Y('maximum REI:Q', title='Maximum REI'),
size=alt.Size('cells sampled:Q', title='Cells Sampled'),
color=alt.Color('mouse:N', title='Mouse', legend=None),
tooltip=['uid:N', 'cells sampled:Q', 'median Δaffinity:Q', 'Δaffinity of max REI node:Q'] # Tooltip to show the uid when hovered over
).properties(
width=600,
height=400,
title='Interactive plot'
).interactive()
# Display the plot
scatter_plot.show()
scatter_plot.save(f"{output_dir}/interactive_REI_Dom_{time}.html")
cmap = "viridis"
vmin = min(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
vmax = max(getattr(node, stat) for tree in trees.values() for node in tree.tree.traverse())
for time, time_df in df.groupby("time"):
scatter_plot = alt.Chart(time_df.round(3)).mark_circle().encode(
x=alt.X('normalized dominance score:Q', title='Normalized Dominance Score'),
y=alt.Y('maximum REI:Q', title='Maximum REI'),
size=alt.Size('cells sampled:Q', title='Cells Sampled'),
color=alt.Color('mouse:N', title='Mouse', legend=None),
tooltip=['uid:N', 'cells sampled:Q', 'median Δaffinity:Q', 'Δaffinity of max REI node:Q'] # Tooltip to show the uid when hovered over
).properties(
width=600,
height=400,
title='Interactive plot'
).interactive()
# Display the plot
scatter_plot.show()
scatter_plot.save(f"{output_dir}/interactive_REI_Dom_{time}.html")
In [35]:
Copied!
# !rm {output_dir}/trees/*.svg
# !rm {output_dir}/trees/*.svg
In [36]:
Copied!
output_dir
output_dir
Out[36]:
'./default'
In [37]:
Copied!
!mkdir -p {output_dir}/trees
!mkdir -p {output_dir}/trees
In [38]:
Copied!
%env MPLBACKEND=Agg
# %matplotlib inline
%env MPLBACKEND=Agg
# %matplotlib inline
env: MPLBACKEND=Agg
In [39]:
Copied!
from collections import defaultdict
tree_expr = defaultdict(list)
for i, row in df.iterrows():
key = row.uid
for node in trees[key].tree.traverse():
tree_expr["uid"].append(key)
tree_expr["node_bind"].append(getattr(node, "delta_bind"))
tree_expr["node_expr"].append(getattr(node, "delta_expr"))
# tree_expr["node_bind_norm"].append(pc.affinity_trees_grey_centered.norm(getattr(node, "delta_bind")))
# tree_expr["node_expr_norm"].append(pc.expression_trees_grey_centered.norm(getattr(node, "delta_expr")))
tree_expr_df = pd.DataFrame(tree_expr)
tree_expr_df.head()
from collections import defaultdict
tree_expr = defaultdict(list)
for i, row in df.iterrows():
key = row.uid
for node in trees[key].tree.traverse():
tree_expr["uid"].append(key)
tree_expr["node_bind"].append(getattr(node, "delta_bind"))
tree_expr["node_expr"].append(getattr(node, "delta_expr"))
# tree_expr["node_bind_norm"].append(pc.affinity_trees_grey_centered.norm(getattr(node, "delta_bind")))
# tree_expr["node_expr_norm"].append(pc.expression_trees_grey_centered.norm(getattr(node, "delta_expr")))
tree_expr_df = pd.DataFrame(tree_expr)
tree_expr_df.head()
Out[39]:
uid | node_bind | node_expr | |
---|---|---|---|
0 | D20_M22_GC96 | 0.00000 | 0.00000 |
1 | D20_M22_GC96 | -0.46711 | -0.30968 |
2 | D20_M22_GC96 | 0.48156 | -0.07432 |
3 | D20_M22_GC96 | 0.15592 | -0.54128 |
4 | D20_M22_GC96 | 0.80414 | 0.03813 |
In [40]:
Copied!
fig, ax = plt.subplots(2, 1, figsize = [20,10], sharex=True, sharey=True)
sns.boxplot(tree_expr_df, x="uid", y="node_bind", ax=ax[0])
sns.boxplot(tree_expr_df, x="uid", y="node_expr", ax=ax[1])
ax[1].set_xticks(ax[1].get_xticks(), ax[1].get_xticklabels(), rotation=90, ha='right')
plt.show()
fig, ax = plt.subplots(2, 1, figsize = [20,10], sharex=True, sharey=True)
sns.boxplot(tree_expr_df, x="uid", y="node_bind", ax=ax[0])
sns.boxplot(tree_expr_df, x="uid", y="node_expr", ax=ax[1])
ax[1].set_xticks(ax[1].get_xticks(), ax[1].get_xticklabels(), rotation=90, ha='right')
plt.show()
In [41]:
Copied!
branch_margin = 0.1
# scale = 20
for i, row in df.iterrows():
x, y = row[["normalized dominance score", f"maximum {stat}"]]
key = row.uid
colormap1 = trees[key].feature_colormap(
stat,
vmin=vmin,
vmax=vmax,
# scale="symlog",
linthresh=1,
cmap=cmap
)
trees[key].render(
f"{output_dir}/trees/{key}.{stat}.scale{scale}.svg",
colormap=colormap1,
scale=scale,
branch_margin=branch_margin
)
# print(f"Done: {key}")
# if row[f"maximum {stat}"] > thresh:
# print(row.uid, stat)
# if row[f"max {stat} node is root"]:
# print(f"NOTE: max {stat} node is root")
# display(
# trees[key].render(
# "%%inline",
# colormap=colormap1,
# scale=scale,
# branch_margin=branch_margin
# )
# )
colormap2 = {
node.name: to_hex(
pc.affinity_trees_grey_centered.cmap(
pc.affinity_trees_grey_centered.norm(
getattr(node, "delta_bind")
)
)
) for node in trees[key].tree.traverse()
}
trees[key].render(
f"{output_dir}/trees/{key}.binding.scale{scale}.svg",
colormap=colormap2,
scale=scale,
branch_margin=branch_margin
)
# if row[f"maximum {stat}"] > thresh:
# print(row.uid, "Δaffinity")
# display(
# trees[key].render(
# "%%inline",
# colormap=colormap2,
# scale=scale,
# branch_margin=branch_margin
# )
# )
colormap3 = {
node.name: to_hex(
pc.expression_trees_grey_centered.cmap(
pc.expression_trees_grey_centered.norm(
getattr(node, "delta_expr")
)
)
) for node in trees[key].tree.traverse()
}
trees[key].render(
f"{output_dir}/trees/{key}.expression.scale{scale}.svg",
colormap=colormap3,
scale=scale,
branch_margin=branch_margin
)
# plt.show()
branch_margin = 0.1
# scale = 20
for i, row in df.iterrows():
x, y = row[["normalized dominance score", f"maximum {stat}"]]
key = row.uid
colormap1 = trees[key].feature_colormap(
stat,
vmin=vmin,
vmax=vmax,
# scale="symlog",
linthresh=1,
cmap=cmap
)
trees[key].render(
f"{output_dir}/trees/{key}.{stat}.scale{scale}.svg",
colormap=colormap1,
scale=scale,
branch_margin=branch_margin
)
# print(f"Done: {key}")
# if row[f"maximum {stat}"] > thresh:
# print(row.uid, stat)
# if row[f"max {stat} node is root"]:
# print(f"NOTE: max {stat} node is root")
# display(
# trees[key].render(
# "%%inline",
# colormap=colormap1,
# scale=scale,
# branch_margin=branch_margin
# )
# )
colormap2 = {
node.name: to_hex(
pc.affinity_trees_grey_centered.cmap(
pc.affinity_trees_grey_centered.norm(
getattr(node, "delta_bind")
)
)
) for node in trees[key].tree.traverse()
}
trees[key].render(
f"{output_dir}/trees/{key}.binding.scale{scale}.svg",
colormap=colormap2,
scale=scale,
branch_margin=branch_margin
)
# if row[f"maximum {stat}"] > thresh:
# print(row.uid, "Δaffinity")
# display(
# trees[key].render(
# "%%inline",
# colormap=colormap2,
# scale=scale,
# branch_margin=branch_margin
# )
# )
colormap3 = {
node.name: to_hex(
pc.expression_trees_grey_centered.cmap(
pc.expression_trees_grey_centered.norm(
getattr(node, "delta_expr")
)
)
) for node in trees[key].tree.traverse()
}
trees[key].render(
f"{output_dir}/trees/{key}.expression.scale{scale}.svg",
colormap=colormap3,
scale=scale,
branch_margin=branch_margin
)
# plt.show()
Color bars for the tree colormaps¶
In [42]:
Copied!
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=cmap, norm=Normalize(vmin=vmin, vmax=vmax)),
orientation='horizontal',
cax=cax,
label=f"{stat}")
plt.savefig(f"{output_dir}/cbar_REI.svg", bbox_inches="tight")
plt.show()
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=pc.affinity_trees_grey_centered.cmap, norm=pc.affinity_trees_grey_centered.norm),
orientation='horizontal',
cax=cax,
label="Δaffinity")
plt.savefig(f"{output_dir}/cbar_binding.svg", bbox_inches="tight")
plt.show()
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(
cm.ScalarMappable(
cmap=pc.expression_trees_grey_centered.cmap,
norm=pc.expression_trees_grey_centered.norm
),
orientation='horizontal',
cax=cax,
label="Δexpression",
ticks=[-3, 0, 1],
)
plt.savefig(f"{output_dir}/cbar_expression.svg", bbox_inches="tight")
plt.show()
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=cmap, norm=Normalize(vmin=vmin, vmax=vmax)),
orientation='horizontal',
cax=cax,
label=f"{stat}")
plt.savefig(f"{output_dir}/cbar_REI.svg", bbox_inches="tight")
plt.show()
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=pc.affinity_trees_grey_centered.cmap, norm=pc.affinity_trees_grey_centered.norm),
orientation='horizontal',
cax=cax,
label="Δaffinity")
plt.savefig(f"{output_dir}/cbar_binding.svg", bbox_inches="tight")
plt.show()
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(
cm.ScalarMappable(
cmap=pc.expression_trees_grey_centered.cmap,
norm=pc.expression_trees_grey_centered.norm
),
orientation='horizontal',
cax=cax,
label="Δexpression",
ticks=[-3, 0, 1],
)
plt.savefig(f"{output_dir}/cbar_expression.svg", bbox_inches="tight")
plt.show()
Stack the tree svg's into a single image¶
define output directory and clear out any existing files
In [43]:
Copied!
stacked_dir = f"{output_dir}/stacked_trees"
!mkdir -p {stacked_dir}
!rm {output_dir}/stacked_trees/ngs_id*{scale}*.svg
stacked_dir = f"{output_dir}/stacked_trees"
!mkdir -p {stacked_dir}
!rm {output_dir}/stacked_trees/ngs_id*{scale}*.svg
rm: cannot remove './default/stacked_trees/ngs_id*20*.svg': No such file or directory
In [44]:
Copied!
# Set the maximum width for the display
pd.set_option('display.max_colwidth', None)
# Set the maximum number of columns to display
pd.set_option('display.max_columns', None)
# Set the maximum width for the display
pd.set_option('display.max_colwidth', None)
# Set the maximum number of columns to display
pd.set_option('display.max_columns', None)
Use svg utils to stitch together the trees rendered above.
In [45]:
Copied!
for stat in ["REI", "binding", "expression"]:
for duration in ["20", "15"]:
files = sorted(glob.glob(f"{output_dir}/trees/D{duration}*{stat}.scale{scale}.svg"))
files_df = pd.DataFrame(files, columns=["file"])
path_start = 0 if not files[0].startswith(".") else 1
files_df["UID"] = files_df.file.str.split(".").str[path_start].str.split("/").str[-1]
files_df["mouse"] = files_df.file.str.extract(r"M(\d+)").astype(int)
files_df["GC"] = files_df.file.str.extract(r"GC(\d+)").astype(int)
files_df.sort_values(["mouse", "GC"], inplace=True)
# Initial y position for stacking
y_offset = 220
all_panels = []
all_panels.append(
Text(f"IMM Duration: {duration} days", 200, 35, size=24, weight='bold')
)
# add colorbar
cbar = f"cbar_{stat}"
colorbar_svg = SVG(f"{output_dir}/{cbar}.svg").scale(2)
colorbar_svg.move(175, 45)
all_panels.append(colorbar_svg)
# iterate through the tree, create a panel for each one.
for i, (idx, row) in enumerate(files_df.iterrows()):
position = (10, y_offset)
annotation = row.UID
panel = Panel(
SVG(row.file).scale(0.5),
Text(annotation, 10, -20, size=16, weight='bold')
).move(*position)
all_panels.append(panel)
if scale == 20:
y_offset += 350
else:
y_offset += 125
fig = Figure("%dpx"%1050, "%dpx" % y_offset ,*all_panels)
fig.save(f"{output_dir}/stacked_trees/D{duration}.{stat}.scale{scale}.svg")
for stat in ["REI", "binding", "expression"]:
for duration in ["20", "15"]:
files = sorted(glob.glob(f"{output_dir}/trees/D{duration}*{stat}.scale{scale}.svg"))
files_df = pd.DataFrame(files, columns=["file"])
path_start = 0 if not files[0].startswith(".") else 1
files_df["UID"] = files_df.file.str.split(".").str[path_start].str.split("/").str[-1]
files_df["mouse"] = files_df.file.str.extract(r"M(\d+)").astype(int)
files_df["GC"] = files_df.file.str.extract(r"GC(\d+)").astype(int)
files_df.sort_values(["mouse", "GC"], inplace=True)
# Initial y position for stacking
y_offset = 220
all_panels = []
all_panels.append(
Text(f"IMM Duration: {duration} days", 200, 35, size=24, weight='bold')
)
# add colorbar
cbar = f"cbar_{stat}"
colorbar_svg = SVG(f"{output_dir}/{cbar}.svg").scale(2)
colorbar_svg.move(175, 45)
all_panels.append(colorbar_svg)
# iterate through the tree, create a panel for each one.
for i, (idx, row) in enumerate(files_df.iterrows()):
position = (10, y_offset)
annotation = row.UID
panel = Panel(
SVG(row.file).scale(0.5),
Text(annotation, 10, -20, size=16, weight='bold')
).move(*position)
all_panels.append(panel)
if scale == 20:
y_offset += 350
else:
y_offset += 125
fig = Figure("%dpx"%1050, "%dpx" % y_offset ,*all_panels)
fig.save(f"{output_dir}/stacked_trees/D{duration}.{stat}.scale{scale}.svg")
Affinity vs. REI¶
In [46]:
Copied!
stat = "REI"
dat = []
for gc, tree in trees.items():
for node in tree.tree.traverse():
dat.append([gc, getattr(node, stat), node.delta_bind])
df_nodes = pd.DataFrame(dat, columns=["GC", stat, "Δaffinity"])
plt.figure(figsize=(4, 4))
sns.scatterplot(data=df_nodes, x=stat, y="Δaffinity", hue="GC", legend=False, alpha=0.5, size=.1)
plt.axhline(0, ls="--", c="k", lw=1)
plt.axvline(0, ls="--", c="k", lw=1)
plt.xscale("log")
plt.tight_layout()
plt.show()
stat = "REI"
dat = []
for gc, tree in trees.items():
for node in tree.tree.traverse():
dat.append([gc, getattr(node, stat), node.delta_bind])
df_nodes = pd.DataFrame(dat, columns=["GC", stat, "Δaffinity"])
plt.figure(figsize=(4, 4))
sns.scatterplot(data=df_nodes, x=stat, y="Δaffinity", hue="GC", legend=False, alpha=0.5, size=.1)
plt.axhline(0, ls="--", c="k", lw=1)
plt.axvline(0, ls="--", c="k", lw=1)
plt.xscale("log")
plt.tight_layout()
plt.show()
Summary¶
- Developed REI, because no branch lengths with LB
- Doesn't correlate will with GC affinity
- Recent motivated by evolving affinity