Phenotype Trajectories
In [1]:
Copied!
import os
import tempfile
cache_dir = tempfile.mkdtemp()
os.environ['XDG_CACHE_HOME'] = cache_dir
import numpy as np
import pandas as pd
from matplotlib.colors import Normalize
import matplotlib.pyplot as plt
from matplotlib import collections as mc
from matplotlib import cm
import seaborn as sns
# import glob
# import pickle
from collections import defaultdict
from Bio.Seq import Seq
from pdf2image import convert_from_path
# import ete3
import utils.trees
import os
import tempfile
cache_dir = tempfile.mkdtemp()
os.environ['XDG_CACHE_HOME'] = cache_dir
import numpy as np
import pandas as pd
from matplotlib.colors import Normalize
import matplotlib.pyplot as plt
from matplotlib import collections as mc
from matplotlib import cm
import seaborn as sns
# import glob
# import pickle
from collections import defaultdict
from Bio.Seq import Seq
from pdf2image import convert_from_path
# import ete3
import utils.trees
Load GC trees¶
In [2]:
Copied!
results = "../nextflow/results/"
ranking_subdir = 'naive_reversions_first'
metadata_csv = "../gc_metadata.csv"
mutability_csv = "../nextflow/data/mutability/MK_RS5NF_mutability.csv"
substitution_csv = "../nextflow/data/mutability/MK_RS5NF_substitution.csv"
# chigy_hc_mut_rates = "../passenger/output/chigy_hc_mutation_rates_nt.csv"
# chigy_lc_mut_rates = "../passenger/output/chigy_lc_mutation_rates_nt.csv"
final_variant_scores = "data/dms/final_variant_scores.csv"
dms_sites = "data/dms/CGGnaive_sites.csv"
outbase = "output/phenotype-trajectories"
workflow_env_exec = False # only set to True within the Nextflow papermill process
# simulation params
affinity_threshold = np.inf
# affinity_threshold = 0.1
n_replicates = 10
num_tries = 1000
results = "../nextflow/results/"
ranking_subdir = 'naive_reversions_first'
metadata_csv = "../gc_metadata.csv"
mutability_csv = "../nextflow/data/mutability/MK_RS5NF_mutability.csv"
substitution_csv = "../nextflow/data/mutability/MK_RS5NF_substitution.csv"
# chigy_hc_mut_rates = "../passenger/output/chigy_hc_mutation_rates_nt.csv"
# chigy_lc_mut_rates = "../passenger/output/chigy_lc_mutation_rates_nt.csv"
final_variant_scores = "data/dms/final_variant_scores.csv"
dms_sites = "data/dms/CGGnaive_sites.csv"
outbase = "output/phenotype-trajectories"
workflow_env_exec = False # only set to True within the Nextflow papermill process
# simulation params
affinity_threshold = np.inf
# affinity_threshold = 0.1
n_replicates = 10
num_tries = 1000
In [3]:
Copied!
# Parameters
results = "."
ranking_subdir = "naive_reversions_first"
metadata_csv = "gc_metadata.csv"
final_variant_scores = "final_variant_scores.csv"
dms_sites = "CGGnaive_sites.csv"
mutability_csv = "MK_RS5NF_mutability.csv"
substitution_csv = "MK_RS5NF_substitution.csv"
outbase = "."
workflow_env_exec = True
# Parameters
results = "."
ranking_subdir = "naive_reversions_first"
metadata_csv = "gc_metadata.csv"
final_variant_scores = "final_variant_scores.csv"
dms_sites = "CGGnaive_sites.csv"
mutability_csv = "MK_RS5NF_mutability.csv"
substitution_csv = "MK_RS5NF_substitution.csv"
outbase = "."
workflow_env_exec = True
In [4]:
Copied!
output_dir = f"{outbase}/{ranking_subdir}"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_dir = f"{outbase}/{ranking_subdir}"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
In [5]:
Copied!
metadata = pd.read_csv(metadata_csv, index_col=0)
metadata.query("(strain == 'wt') & (cell_type == 'GC') & (imm_duration != 'w10')", inplace=True)
metadata.rename(columns={'imm_duration': 'time'}, inplace=True)
metadata
metadata = pd.read_csv(metadata_csv, index_col=0)
metadata.query("(strain == 'wt') & (cell_type == 'GC') & (imm_duration != 'w10')", inplace=True)
metadata.rename(columns={'imm_duration': 'time'}, inplace=True)
metadata
Out[5]:
ngs_id | time | mouse | gc | strain | node | cell_type | plate | hc_barcode | lc_barcode | row | col | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uid | ||||||||||||
D15_M1_GC1 | PR-2-01 | d15 | 1 | 1 | wt | RP | GC | 2 | 9 | 9 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M1_GC2 | PR-2-01 | d15 | 1 | 2 | wt | RI | GC | 3 | 2 | 1 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M1_GC3 | PR-2-01 | d15 | 1 | 3 | wt | LI | GC | 4 | 14 | 2 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M2_GC4 | PR-2-01 | d15 | 2 | 4 | wt | RP | GC | 5 | 10 | 11 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D15_M3_GC5 | PR-2-01 | d15 | 3 | 5 | wt | RP | GC | 6 | 7 | 4 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
D20_M24_GC115 | PR-1-04 | d20 | 24 | 115 | wt | RP | GC | 72 | 16 | 16 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC116 | PR-1-02 | d20 | 25 | 116 | wt | RP | GC | 65 | 8 | 8 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC117 | PR-1-03 | d20 | 25 | 117 | wt | RP | GC | 68 | 9 | 9 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC118 | PR-1-08 | d20 | 25 | 118 | wt | RP | GC | 66 | 15 | 15 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
D20_M25_GC119 | PR-1-08 | d20 | 25 | 119 | wt | RP | GC | 67 | 16 | 16 | A.B.C.D.E.F.G.H | 1.2.3.4.5.6.7.8.9.10.11.12 |
119 rows × 12 columns
In [6]:
Copied!
trees = utils.trees.load_trees(metadata, results, ranking_subdir)
len(trees)
trees = utils.trees.load_trees(metadata, results, ranking_subdir)
len(trees)
Out[6]:
119
Mutability model¶
In [7]:
Copied!
mutability = pd.read_csv(mutability_csv, sep=" ", index_col=0).squeeze("columns")
substitution = pd.read_csv(substitution_csv, sep=" ", index_col=0)
naive = next(iter(trees.values())).tree.sequence
igh_frame = 1
igk_frame = 1
igk_idx = 336
def mutate_uniform(sequence):
i = np.random.choice(len(sequence))
base = sequence[i]
alt_base = np.random.choice(list("ACGT".replace(base, "")))
sequence = list(sequence)
sequence[i] = alt_base
return "".join(sequence)
def mutate_S5F(sequence):
sequence_H = "NN" + sequence[:igk_idx] + "NN"
sequence_K = "NN" + sequence[igk_idx:] + "NN"
# mutabilities of each nucleotide
contexts = [sequence_H[(i - 2) : (i + 3)]
for i in range(2, len(sequence_H) - 2)
] + [sequence_K[(i - 2) : (i + 3)]
for i in range(2, len(sequence_K) - 2)
]
mutabilities = np.array([mutability[context] for context in contexts])
i = np.random.choice(len(mutabilities), p=mutabilities / sum(mutabilities))
sequence = sequence[:i] + np.random.choice(substitution.columns, p=substitution.loc[contexts[i]].fillna(0)) + sequence[(i + 1):]
return sequence
def aa(seq, frame):
return Seq(seq[(frame - 1) : (frame - 1 + (3 * ((len(seq) - (frame - 1)) // 3)))]).translate()
def mutations(naive_aa, aa, pos_map, chain_annotation):
return [f"{aa1}{pos_map[pos]}{chain_annotation}{aa2}"
for pos, (aa1, aa2) in enumerate(zip(naive_aa, aa))
if aa1 != aa2]
naive_igh_aa = aa(naive[:igk_idx], igh_frame)
naive_igk_aa = aa(naive[igk_idx:], igk_frame)
mutability = pd.read_csv(mutability_csv, sep=" ", index_col=0).squeeze("columns")
substitution = pd.read_csv(substitution_csv, sep=" ", index_col=0)
naive = next(iter(trees.values())).tree.sequence
igh_frame = 1
igk_frame = 1
igk_idx = 336
def mutate_uniform(sequence):
i = np.random.choice(len(sequence))
base = sequence[i]
alt_base = np.random.choice(list("ACGT".replace(base, "")))
sequence = list(sequence)
sequence[i] = alt_base
return "".join(sequence)
def mutate_S5F(sequence):
sequence_H = "NN" + sequence[:igk_idx] + "NN"
sequence_K = "NN" + sequence[igk_idx:] + "NN"
# mutabilities of each nucleotide
contexts = [sequence_H[(i - 2) : (i + 3)]
for i in range(2, len(sequence_H) - 2)
] + [sequence_K[(i - 2) : (i + 3)]
for i in range(2, len(sequence_K) - 2)
]
mutabilities = np.array([mutability[context] for context in contexts])
i = np.random.choice(len(mutabilities), p=mutabilities / sum(mutabilities))
sequence = sequence[:i] + np.random.choice(substitution.columns, p=substitution.loc[contexts[i]].fillna(0)) + sequence[(i + 1):]
return sequence
def aa(seq, frame):
return Seq(seq[(frame - 1) : (frame - 1 + (3 * ((len(seq) - (frame - 1)) // 3)))]).translate()
def mutations(naive_aa, aa, pos_map, chain_annotation):
return [f"{aa1}{pos_map[pos]}{chain_annotation}{aa2}"
for pos, (aa1, aa2) in enumerate(zip(naive_aa, aa))
if aa1 != aa2]
naive_igh_aa = aa(naive[:igk_idx], igh_frame)
naive_igk_aa = aa(naive[igk_idx:], igk_frame)
DMS data¶
In [8]:
Copied!
dms_df = pd.read_csv(final_variant_scores, index_col="mutation", dtype=dict(position_IMGT=pd.Int16Dtype()))
# remove linker sites
dms_df = dms_df[dms_df.chain != "link"]
# add indicator for wildtype data
dms_df["WT"] = dms_df.wildtype == dms_df.mutant
assert dms_df.position_IMGT.max() < 1000
dms_df["site"] = [f"{chain}-{str(pos).zfill(3)}" for chain, pos in zip(dms_df.chain, dms_df.position_IMGT)]
dms_df
dms_df = pd.read_csv(final_variant_scores, index_col="mutation", dtype=dict(position_IMGT=pd.Int16Dtype()))
# remove linker sites
dms_df = dms_df[dms_df.chain != "link"]
# add indicator for wildtype data
dms_df["WT"] = dms_df.wildtype == dms_df.mutant
assert dms_df.position_IMGT.max() < 1000
dms_df["site"] = [f"{chain}-{str(pos).zfill(3)}" for chain, pos in zip(dms_df.chain, dms_df.position_IMGT)]
dms_df
Out[8]:
target | wildtype | position | position_IMGT | chain | annotation | mutant | codon | single_nt | bind_CGG | delta_bind_CGG | n_bc_bind_CGG | n_libs_bind_CGG | expr | delta_expr | n_bc_expr | n_libs_expr | WT | site | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
mutation | |||||||||||||||||||
E1(H)A | CGG_naive | E | 1 | 1 | H | FWRH1 | A | GAG | True | 8.74239 | 0.11933 | 20 | 2 | 10.45318 | -0.00246 | 20 | 2 | False | H-001 |
E1(H)C | CGG_naive | E | 1 | 1 | H | FWRH1 | C | GAG | False | 8.60813 | -0.01492 | 22 | 2 | 10.33348 | -0.12216 | 22 | 2 | False | H-001 |
E1(H)D | CGG_naive | E | 1 | 1 | H | FWRH1 | D | GAG | True | 8.63554 | 0.01249 | 18 | 2 | 10.50438 | 0.04874 | 18 | 2 | False | H-001 |
E1(H)E | CGG_naive | E | 1 | 1 | H | FWRH1 | E | GAG | True | 8.62305 | 0.00000 | 23285 | 2 | 10.45565 | 0.00000 | 23285 | 2 | True | H-001 |
E1(H)F | CGG_naive | E | 1 | 1 | H | FWRH1 | F | GAG | False | 8.75738 | 0.13433 | 29 | 2 | 10.34185 | -0.11379 | 29 | 2 | False | H-001 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
R128(L)S | CGG_naive | R | 235 | 128 | L | FWRL4 | S | CGT | True | 8.63400 | 0.01095 | 45 | 2 | 10.52908 | 0.07344 | 45 | 2 | False | L-128 |
R128(L)T | CGG_naive | R | 235 | 128 | L | FWRL4 | T | CGT | False | 8.64386 | 0.02081 | 41 | 2 | 10.54211 | 0.08647 | 41 | 2 | False | L-128 |
R128(L)V | CGG_naive | R | 235 | 128 | L | FWRL4 | V | CGT | False | 8.58887 | -0.03418 | 28 | 2 | 10.49638 | 0.04073 | 28 | 2 | False | L-128 |
R128(L)W | CGG_naive | R | 235 | 128 | L | FWRL4 | W | CGT | False | 8.66728 | 0.04423 | 36 | 2 | 10.36373 | -0.09192 | 36 | 2 | False | L-128 |
R128(L)Y | CGG_naive | R | 235 | 128 | L | FWRL4 | Y | CGT | False | 8.63043 | 0.00737 | 38 | 2 | 10.46643 | 0.01079 | 38 | 2 | False | L-128 |
4400 rows × 19 columns
In [9]:
Copied!
pos_df = pd.read_csv(dms_sites, dtype=dict(site=pd.Int16Dtype()), index_col="site_scFv")
pos_df
pos_df = pd.read_csv(dms_sites, dtype=dict(site=pd.Int16Dtype()), index_col="site_scFv")
pos_df
Out[9]:
amino_acid | chain | site | KI_codon | annotation | |
---|---|---|---|---|---|
site_scFv | |||||
1 | E | H | 1 | GAG | FWRH1 |
2 | V | H | 2 | GTG | FWRH1 |
3 | Q | H | 3 | CAG | FWRH1 |
4 | L | H | 4 | CTT | FWRH1 |
5 | Q | H | 5 | CAG | FWRH1 |
... | ... | ... | ... | ... | ... |
231 | L | L | 124 | CTA | FWRL4 |
232 | E | L | 125 | GAA | FWRL4 |
233 | I | L | 126 | ATA | FWRL4 |
234 | K | L | 127 | AAA | FWRL4 |
235 | R | L | 128 | CGT | FWRL4 |
235 rows × 5 columns
In [10]:
Copied!
igh_pos_map = pos_df.loc[pos_df.chain == "H", "site"].reset_index(drop=True)
igk_pos_map = pos_df.loc[pos_df.chain == "L", "site"].reset_index(drop=True)
igh_pos_map = pos_df.loc[pos_df.chain == "H", "site"].reset_index(drop=True)
igk_pos_map = pos_df.loc[pos_df.chain == "L", "site"].reset_index(drop=True)
Simulate expression-matched trees¶
This cell takes a while (~1 hour)
In [11]:
Copied!
sim_trees = defaultdict(list)
for gc, tree in trees.items():
replicate = 1
# dead = False
while replicate <= n_replicates:
print(f"GC {gc} replicate {replicate}", end=" \r")
new_tree = tree.tree.copy()
for node, node_new in zip(tree.tree.traverse(strategy="preorder"), new_tree.traverse(strategy="preorder")):
assert node.name == node_new.name
if not node.is_root():
success = False
closest_bind = np.inf
for tries in range(num_tries):
node_new.sequence = node_new.up.sequence
for _ in range(int(node.dist)):
node_new.sequence = mutate_S5F(node_new.sequence)
igh_aa = aa(node_new.sequence[:igk_idx], igh_frame)
igk_aa = aa(node_new.sequence[igk_idx:], igk_frame)
igh_mutations = mutations(naive_igh_aa, igh_aa, igh_pos_map, "(H)")
igk_mutations = mutations(naive_igk_aa, igk_aa, igk_pos_map, "(L)")
all_mutations = igh_mutations + igk_mutations
if any("*" in x for x in all_mutations):
node_new.delta_bind = np.nan
node_new.delta_expr = np.nan
else:
node_new.delta_bind = dms_df.delta_bind_CGG[all_mutations].sum()
node_new.delta_expr = dms_df.delta_expr[all_mutations].sum()
if np.isnan([node.delta_bind, node.delta_expr]).all():
if np.isnan([node_new.delta_bind, node_new.delta_expr]).all():
success = True
break
else:
continue
delta_delta_bind = np.abs(node_new.delta_bind - node.delta_bind)
delta_delta_expr = np.abs(node_new.delta_expr - node.delta_expr)
if delta_delta_bind < closest_bind:
closest_bind = delta_delta_bind
closest_seq = node_new.sequence
closest_delta_bind = node_new.delta_bind
closest_delta_expr = node_new.delta_expr
if delta_delta_bind < affinity_threshold:
success = True
break
if not success:
node_new.sequence = closest_seq
node_new.delta_bind = closest_delta_bind
node_new.delta_expr = closest_delta_expr
if tries == num_tries - 1:
print(f"GC {gc} replicate {replicate} node {node.name} failed to find a viable sequence. Closest: {node_new.delta_bind-node.delta_bind:.2f} {node_new.delta_expr-node.delta_expr:.2f}")
sim_trees[gc].append(new_tree)
replicate += 1
sim_trees = defaultdict(list)
for gc, tree in trees.items():
replicate = 1
# dead = False
while replicate <= n_replicates:
print(f"GC {gc} replicate {replicate}", end=" \r")
new_tree = tree.tree.copy()
for node, node_new in zip(tree.tree.traverse(strategy="preorder"), new_tree.traverse(strategy="preorder")):
assert node.name == node_new.name
if not node.is_root():
success = False
closest_bind = np.inf
for tries in range(num_tries):
node_new.sequence = node_new.up.sequence
for _ in range(int(node.dist)):
node_new.sequence = mutate_S5F(node_new.sequence)
igh_aa = aa(node_new.sequence[:igk_idx], igh_frame)
igk_aa = aa(node_new.sequence[igk_idx:], igk_frame)
igh_mutations = mutations(naive_igh_aa, igh_aa, igh_pos_map, "(H)")
igk_mutations = mutations(naive_igk_aa, igk_aa, igk_pos_map, "(L)")
all_mutations = igh_mutations + igk_mutations
if any("*" in x for x in all_mutations):
node_new.delta_bind = np.nan
node_new.delta_expr = np.nan
else:
node_new.delta_bind = dms_df.delta_bind_CGG[all_mutations].sum()
node_new.delta_expr = dms_df.delta_expr[all_mutations].sum()
if np.isnan([node.delta_bind, node.delta_expr]).all():
if np.isnan([node_new.delta_bind, node_new.delta_expr]).all():
success = True
break
else:
continue
delta_delta_bind = np.abs(node_new.delta_bind - node.delta_bind)
delta_delta_expr = np.abs(node_new.delta_expr - node.delta_expr)
if delta_delta_bind < closest_bind:
closest_bind = delta_delta_bind
closest_seq = node_new.sequence
closest_delta_bind = node_new.delta_bind
closest_delta_expr = node_new.delta_expr
if delta_delta_bind < affinity_threshold:
success = True
break
if not success:
node_new.sequence = closest_seq
node_new.delta_bind = closest_delta_bind
node_new.delta_expr = closest_delta_expr
if tries == num_tries - 1:
print(f"GC {gc} replicate {replicate} node {node.name} failed to find a viable sequence. Closest: {node_new.delta_bind-node.delta_bind:.2f} {node_new.delta_expr-node.delta_expr:.2f}")
sim_trees[gc].append(new_tree)
replicate += 1
GC D15_M1_GC1 replicate 1
GC D15_M1_GC1 replicate 2
GC D15_M1_GC1 replicate 3
GC D15_M1_GC1 replicate 4
GC D15_M1_GC1 replicate 5
GC D15_M1_GC1 replicate 6
GC D15_M1_GC1 replicate 7
GC D15_M1_GC1 replicate 8
GC D15_M1_GC1 replicate 9
GC D15_M1_GC1 replicate 10
GC D15_M1_GC2 replicate 1
GC D15_M1_GC2 replicate 2
GC D15_M1_GC2 replicate 3
GC D15_M1_GC2 replicate 4
GC D15_M1_GC2 replicate 5
GC D15_M1_GC2 replicate 6
GC D15_M1_GC2 replicate 7
GC D15_M1_GC2 replicate 8
GC D15_M1_GC2 replicate 9
GC D15_M1_GC2 replicate 10
GC D15_M1_GC3 replicate 1
GC D15_M1_GC3 replicate 2
GC D15_M1_GC3 replicate 3
GC D15_M1_GC3 replicate 4
GC D15_M1_GC3 replicate 5
GC D15_M1_GC3 replicate 6
GC D15_M1_GC3 replicate 7
GC D15_M1_GC3 replicate 8
GC D15_M1_GC3 replicate 9
GC D15_M1_GC3 replicate 10
GC D15_M2_GC4 replicate 1
GC D15_M2_GC4 replicate 2
GC D15_M2_GC4 replicate 3
GC D15_M2_GC4 replicate 4
GC D15_M2_GC4 replicate 5
GC D15_M2_GC4 replicate 6
GC D15_M2_GC4 replicate 7
GC D15_M2_GC4 replicate 8
GC D15_M2_GC4 replicate 9
GC D15_M2_GC4 replicate 10
GC D15_M3_GC5 replicate 1
GC D15_M3_GC5 replicate 2
GC D15_M3_GC5 replicate 3
GC D15_M3_GC5 replicate 4
GC D15_M3_GC5 replicate 5
GC D15_M3_GC5 replicate 6
GC D15_M3_GC5 replicate 7
GC D15_M3_GC5 replicate 8
GC D15_M3_GC5 replicate 9
GC D15_M3_GC5 replicate 10
GC D15_M3_GC6 replicate 1
GC D15_M3_GC6 replicate 2
GC D15_M3_GC6 replicate 3 GC D15_M3_GC6 replicate 4
GC D15_M3_GC6 replicate 5
GC D15_M3_GC6 replicate 6
GC D15_M3_GC6 replicate 7
GC D15_M3_GC6 replicate 8
GC D15_M3_GC6 replicate 9
GC D15_M3_GC6 replicate 10
GC D15_M4_GC7 replicate 1
GC D15_M4_GC7 replicate 2
GC D15_M4_GC7 replicate 3
GC D15_M4_GC7 replicate 4
GC D15_M4_GC7 replicate 5
GC D15_M4_GC7 replicate 6
GC D15_M4_GC7 replicate 7
GC D15_M4_GC7 replicate 8
GC D15_M4_GC7 replicate 9
GC D15_M4_GC7 replicate 10
GC D15_M4_GC8 replicate 1
GC D15_M4_GC8 replicate 2
GC D15_M4_GC8 replicate 3
GC D15_M4_GC8 replicate 4
GC D15_M4_GC8 replicate 5
GC D15_M4_GC8 replicate 6
GC D15_M4_GC8 replicate 7
GC D15_M4_GC8 replicate 8
GC D15_M4_GC8 replicate 9
GC D15_M4_GC8 replicate 10
GC D15_M5_GC9 replicate 1
GC D15_M5_GC9 replicate 2
GC D15_M5_GC9 replicate 3
GC D15_M5_GC9 replicate 4
GC D15_M5_GC9 replicate 5
GC D15_M5_GC9 replicate 6
GC D15_M5_GC9 replicate 7
GC D15_M5_GC9 replicate 8
GC D15_M5_GC9 replicate 9
GC D15_M5_GC9 replicate 10
GC D15_M5_GC10 replicate 1
GC D15_M5_GC10 replicate 2
GC D15_M5_GC10 replicate 3
GC D15_M5_GC10 replicate 4
GC D15_M5_GC10 replicate 5
GC D15_M5_GC10 replicate 6
GC D15_M5_GC10 replicate 7
GC D15_M5_GC10 replicate 8
GC D15_M5_GC10 replicate 9
GC D15_M5_GC10 replicate 10
GC D15_M6_GC11 replicate 1
GC D15_M6_GC11 replicate 2
GC D15_M6_GC11 replicate 3
GC D15_M6_GC11 replicate 4
GC D15_M6_GC11 replicate 5
GC D15_M6_GC11 replicate 6
GC D15_M6_GC11 replicate 7
GC D15_M6_GC11 replicate 8
GC D15_M6_GC11 replicate 9
GC D15_M6_GC11 replicate 10
GC D15_M6_GC12 replicate 1
GC D15_M6_GC12 replicate 2
GC D15_M6_GC12 replicate 3
GC D15_M6_GC12 replicate 4
GC D15_M6_GC12 replicate 5
GC D15_M6_GC12 replicate 6
GC D15_M6_GC12 replicate 7
GC D15_M6_GC12 replicate 8
GC D15_M6_GC12 replicate 9
GC D15_M6_GC12 replicate 10
GC D15_M6_GC13 replicate 1
GC D15_M6_GC13 replicate 2
GC D15_M6_GC13 replicate 3
GC D15_M6_GC13 replicate 4
GC D15_M6_GC13 replicate 5
GC D15_M6_GC13 replicate 6
GC D15_M6_GC13 replicate 7
GC D15_M6_GC13 replicate 8
GC D15_M6_GC13 replicate 9
GC D15_M6_GC13 replicate 10
GC D15_M6_GC14 replicate 1
GC D15_M6_GC14 replicate 2
GC D15_M6_GC14 replicate 3
GC D15_M6_GC14 replicate 4
GC D15_M6_GC14 replicate 5
GC D15_M6_GC14 replicate 6
GC D15_M6_GC14 replicate 7
GC D15_M6_GC14 replicate 8
GC D15_M6_GC14 replicate 9
GC D15_M6_GC14 replicate 10
GC D15_M7_GC15 replicate 1
GC D15_M7_GC15 replicate 2
GC D15_M7_GC15 replicate 3
GC D15_M7_GC15 replicate 4
GC D15_M7_GC15 replicate 5
GC D15_M7_GC15 replicate 6
GC D15_M7_GC15 replicate 7
GC D15_M7_GC15 replicate 8
GC D15_M7_GC15 replicate 9
GC D15_M7_GC15 replicate 10
GC D15_M7_GC16 replicate 1
GC D15_M7_GC16 replicate 2
GC D15_M7_GC16 replicate 3
GC D15_M7_GC16 replicate 4
GC D15_M7_GC16 replicate 5
GC D15_M7_GC16 replicate 6
GC D15_M7_GC16 replicate 7
GC D15_M7_GC16 replicate 8
GC D15_M7_GC16 replicate 9
GC D15_M7_GC16 replicate 10
GC D15_M8_GC17 replicate 1
GC D15_M8_GC17 replicate 2
GC D15_M8_GC17 replicate 3
GC D15_M8_GC17 replicate 4
GC D15_M8_GC17 replicate 5
GC D15_M8_GC17 replicate 6
GC D15_M8_GC17 replicate 7
GC D15_M8_GC17 replicate 8
GC D15_M8_GC17 replicate 9
GC D15_M8_GC17 replicate 10
GC D15_M9_GC18 replicate 1
GC D15_M9_GC18 replicate 2
GC D15_M9_GC18 replicate 3
GC D15_M9_GC18 replicate 4
GC D15_M9_GC18 replicate 5
GC D15_M9_GC18 replicate 6
GC D15_M9_GC18 replicate 7
GC D15_M9_GC18 replicate 8
GC D15_M9_GC18 replicate 9
GC D15_M9_GC18 replicate 10
GC D15_M9_GC19 replicate 1
GC D15_M9_GC19 replicate 2
GC D15_M9_GC19 replicate 3
GC D15_M9_GC19 replicate 4
GC D15_M9_GC19 replicate 5
GC D15_M9_GC19 replicate 6
GC D15_M9_GC19 replicate 7
GC D15_M9_GC19 replicate 8
GC D15_M9_GC19 replicate 9
GC D15_M9_GC19 replicate 10
GC D15_M10_GC20 replicate 1
GC D15_M10_GC20 replicate 2
GC D15_M10_GC20 replicate 3
GC D15_M10_GC20 replicate 4
GC D15_M10_GC20 replicate 5
GC D15_M10_GC20 replicate 6
GC D15_M10_GC20 replicate 7
GC D15_M10_GC20 replicate 8
GC D15_M10_GC20 replicate 9
GC D15_M10_GC20 replicate 10
GC D15_M10_GC21 replicate 1
GC D15_M10_GC21 replicate 2
GC D15_M10_GC21 replicate 3
GC D15_M10_GC21 replicate 4
GC D15_M10_GC21 replicate 5
GC D15_M10_GC21 replicate 6
GC D15_M10_GC21 replicate 7
GC D15_M10_GC21 replicate 8
GC D15_M10_GC21 replicate 9
GC D15_M10_GC21 replicate 10
GC D15_M10_GC22 replicate 1
GC D15_M10_GC22 replicate 2
GC D15_M10_GC22 replicate 3
GC D15_M10_GC22 replicate 4
GC D15_M10_GC22 replicate 5
GC D15_M10_GC22 replicate 6
GC D15_M10_GC22 replicate 7
GC D15_M10_GC22 replicate 8
GC D15_M10_GC22 replicate 9
GC D15_M10_GC22 replicate 10
GC D15_M10_GC23 replicate 1
GC D15_M10_GC23 replicate 2
GC D15_M10_GC23 replicate 3
GC D15_M10_GC23 replicate 4
GC D15_M10_GC23 replicate 5
GC D15_M10_GC23 replicate 6
GC D15_M10_GC23 replicate 7
GC D15_M10_GC23 replicate 8
GC D15_M10_GC23 replicate 9
GC D15_M10_GC23 replicate 10
GC D15_M11_GC24 replicate 1
GC D15_M11_GC24 replicate 2
GC D15_M11_GC24 replicate 3
GC D15_M11_GC24 replicate 4
GC D15_M11_GC24 replicate 5
GC D15_M11_GC24 replicate 6
GC D15_M11_GC24 replicate 7
GC D15_M11_GC24 replicate 8
GC D15_M11_GC24 replicate 9
GC D15_M11_GC24 replicate 10
GC D15_M11_GC25 replicate 1
GC D15_M11_GC25 replicate 2
GC D15_M11_GC25 replicate 3
GC D15_M11_GC25 replicate 4
GC D15_M11_GC25 replicate 5
GC D15_M11_GC25 replicate 6
GC D15_M11_GC25 replicate 7
GC D15_M11_GC25 replicate 8
GC D15_M11_GC25 replicate 9
GC D15_M11_GC25 replicate 10
GC D15_M11_GC26 replicate 1
GC D15_M11_GC26 replicate 2
GC D15_M11_GC26 replicate 3
GC D15_M11_GC26 replicate 4
GC D15_M11_GC26 replicate 5
GC D15_M11_GC26 replicate 6
GC D15_M11_GC26 replicate 7
GC D15_M11_GC26 replicate 8
GC D15_M11_GC26 replicate 9
GC D15_M11_GC26 replicate 10
GC D15_M12_GC27 replicate 1
GC D15_M12_GC27 replicate 2
GC D15_M12_GC27 replicate 3
GC D15_M12_GC27 replicate 4
GC D15_M12_GC27 replicate 5
GC D15_M12_GC27 replicate 6
GC D15_M12_GC27 replicate 7
GC D15_M12_GC27 replicate 8
GC D15_M12_GC27 replicate 9
GC D15_M12_GC27 replicate 10
GC D15_M12_GC28 replicate 1
GC D15_M12_GC28 replicate 2
GC D15_M12_GC28 replicate 3
GC D15_M12_GC28 replicate 4
GC D15_M12_GC28 replicate 5
GC D15_M12_GC28 replicate 6
GC D15_M12_GC28 replicate 7
GC D15_M12_GC28 replicate 8
GC D15_M12_GC28 replicate 9
GC D15_M12_GC28 replicate 10
GC D15_M12_GC29 replicate 1
GC D15_M12_GC29 replicate 2
GC D15_M12_GC29 replicate 3
GC D15_M12_GC29 replicate 4
GC D15_M12_GC29 replicate 5
GC D15_M12_GC29 replicate 6
GC D15_M12_GC29 replicate 7
GC D15_M12_GC29 replicate 8
GC D15_M12_GC29 replicate 9
GC D15_M12_GC29 replicate 10
GC D15_M13_GC30 replicate 1
GC D15_M13_GC30 replicate 2
GC D15_M13_GC30 replicate 3
GC D15_M13_GC30 replicate 4
GC D15_M13_GC30 replicate 5
GC D15_M13_GC30 replicate 6
GC D15_M13_GC30 replicate 7
GC D15_M13_GC30 replicate 8
GC D15_M13_GC30 replicate 9
GC D15_M13_GC30 replicate 10
GC D15_M13_GC31 replicate 1
GC D15_M13_GC31 replicate 2
GC D15_M13_GC31 replicate 3
GC D15_M13_GC31 replicate 4
GC D15_M13_GC31 replicate 5
GC D15_M13_GC31 replicate 6
GC D15_M13_GC31 replicate 7
GC D15_M13_GC31 replicate 8
GC D15_M13_GC31 replicate 9
GC D15_M13_GC31 replicate 10
GC D15_M13_GC32 replicate 1
GC D15_M13_GC32 replicate 2
GC D15_M13_GC32 replicate 3
GC D15_M13_GC32 replicate 4
GC D15_M13_GC32 replicate 5
GC D15_M13_GC32 replicate 6
GC D15_M13_GC32 replicate 7
GC D15_M13_GC32 replicate 8
GC D15_M13_GC32 replicate 9
GC D15_M13_GC32 replicate 10
GC D15_M13_GC33 replicate 1
GC D15_M13_GC33 replicate 2
GC D15_M13_GC33 replicate 3
GC D15_M13_GC33 replicate 4
GC D15_M13_GC33 replicate 5
GC D15_M13_GC33 replicate 6
GC D15_M13_GC33 replicate 7
GC D15_M13_GC33 replicate 8
GC D15_M13_GC33 replicate 9
GC D15_M13_GC33 replicate 10
GC D15_M14_GC34 replicate 1
GC D15_M14_GC34 replicate 2
GC D15_M14_GC34 replicate 3
GC D15_M14_GC34 replicate 4
GC D15_M14_GC34 replicate 5
GC D15_M14_GC34 replicate 6
GC D15_M14_GC34 replicate 7
GC D15_M14_GC34 replicate 8
GC D15_M14_GC34 replicate 9
GC D15_M14_GC34 replicate 10
GC D15_M14_GC35 replicate 1
GC D15_M14_GC35 replicate 2
GC D15_M14_GC35 replicate 3
GC D15_M14_GC35 replicate 4
GC D15_M14_GC35 replicate 5
GC D15_M14_GC35 replicate 6
GC D15_M14_GC35 replicate 7
GC D15_M14_GC35 replicate 8
GC D15_M14_GC35 replicate 9
GC D15_M14_GC35 replicate 10
GC D15_M14_GC36 replicate 1
GC D15_M14_GC36 replicate 2
GC D15_M14_GC36 replicate 3
GC D15_M14_GC36 replicate 4
GC D15_M14_GC36 replicate 5
GC D15_M14_GC36 replicate 6
GC D15_M14_GC36 replicate 7
GC D15_M14_GC36 replicate 8
GC D15_M14_GC36 replicate 9
GC D15_M14_GC36 replicate 10
GC D15_M14_GC37 replicate 1
GC D15_M14_GC37 replicate 2
GC D15_M14_GC37 replicate 3
GC D15_M14_GC37 replicate 4
GC D15_M14_GC37 replicate 5
GC D15_M14_GC37 replicate 6
GC D15_M14_GC37 replicate 7
GC D15_M14_GC37 replicate 8
GC D15_M14_GC37 replicate 9
GC D15_M14_GC37 replicate 10
GC D15_M15_GC38 replicate 1
GC D15_M15_GC38 replicate 2
GC D15_M15_GC38 replicate 3
GC D15_M15_GC38 replicate 4
GC D15_M15_GC38 replicate 5
GC D15_M15_GC38 replicate 6
GC D15_M15_GC38 replicate 7
GC D15_M15_GC38 replicate 8
GC D15_M15_GC38 replicate 9
GC D15_M15_GC38 replicate 10
GC D15_M16_GC39 replicate 1
GC D15_M16_GC39 replicate 2
GC D15_M16_GC39 replicate 3
GC D15_M16_GC39 replicate 4
GC D15_M16_GC39 replicate 5
GC D15_M16_GC39 replicate 6
GC D15_M16_GC39 replicate 7
GC D15_M16_GC39 replicate 8
GC D15_M16_GC39 replicate 9
GC D15_M16_GC39 replicate 10
GC D15_M16_GC40 replicate 1
GC D15_M16_GC40 replicate 2
GC D15_M16_GC40 replicate 3
GC D15_M16_GC40 replicate 4
GC D15_M16_GC40 replicate 5
GC D15_M16_GC40 replicate 6
GC D15_M16_GC40 replicate 7
GC D15_M16_GC40 replicate 8
GC D15_M16_GC40 replicate 9
GC D15_M16_GC40 replicate 10
GC D15_M17_GC41 replicate 1
GC D15_M17_GC41 replicate 2
GC D15_M17_GC41 replicate 3
GC D15_M17_GC41 replicate 4
GC D15_M17_GC41 replicate 5
GC D15_M17_GC41 replicate 6
GC D15_M17_GC41 replicate 7
GC D15_M17_GC41 replicate 8
GC D15_M17_GC41 replicate 9
GC D15_M17_GC41 replicate 10
GC D15_M17_GC42 replicate 1
GC D15_M17_GC42 replicate 2
GC D15_M17_GC42 replicate 3
GC D15_M17_GC42 replicate 4
GC D15_M17_GC42 replicate 5
GC D15_M17_GC42 replicate 6
GC D15_M17_GC42 replicate 7
GC D15_M17_GC42 replicate 8
GC D15_M17_GC42 replicate 9
GC D15_M17_GC42 replicate 10
GC D15_M17_GC43 replicate 1
GC D15_M17_GC43 replicate 2
GC D15_M17_GC43 replicate 3
GC D15_M17_GC43 replicate 4
GC D15_M17_GC43 replicate 5
GC D15_M17_GC43 replicate 6
GC D15_M17_GC43 replicate 7
GC D15_M17_GC43 replicate 8
GC D15_M17_GC43 replicate 9
GC D15_M17_GC43 replicate 10
GC D15_M17_GC44 replicate 1
GC D15_M17_GC44 replicate 2
GC D15_M17_GC44 replicate 3
GC D15_M17_GC44 replicate 4
GC D15_M17_GC44 replicate 5
GC D15_M17_GC44 replicate 6
GC D15_M17_GC44 replicate 7
GC D15_M17_GC44 replicate 8
GC D15_M17_GC44 replicate 9
GC D15_M17_GC44 replicate 10
GC D15_M17_GC45 replicate 1
GC D15_M17_GC45 replicate 2
GC D15_M17_GC45 replicate 3
GC D15_M17_GC45 replicate 4
GC D15_M17_GC45 replicate 5
GC D15_M17_GC45 replicate 6
GC D15_M17_GC45 replicate 7
GC D15_M17_GC45 replicate 8
GC D15_M17_GC45 replicate 9
GC D15_M17_GC45 replicate 10
GC D15_M18_GC46 replicate 1
GC D15_M18_GC46 replicate 2
GC D15_M18_GC46 replicate 3
GC D15_M18_GC46 replicate 4
GC D15_M18_GC46 replicate 5
GC D15_M18_GC46 replicate 6
GC D15_M18_GC46 replicate 7
GC D15_M18_GC46 replicate 8
GC D15_M18_GC46 replicate 9
GC D15_M18_GC46 replicate 10
GC D15_M18_GC47 replicate 1
GC D15_M18_GC47 replicate 2
GC D15_M18_GC47 replicate 3
GC D15_M18_GC47 replicate 4
GC D15_M18_GC47 replicate 5
GC D15_M18_GC47 replicate 6
GC D15_M18_GC47 replicate 7
GC D15_M18_GC47 replicate 8
GC D15_M18_GC47 replicate 9
GC D15_M18_GC47 replicate 10
GC D15_M18_GC48 replicate 1
GC D15_M18_GC48 replicate 2
GC D15_M18_GC48 replicate 3
GC D15_M18_GC48 replicate 4
GC D15_M18_GC48 replicate 5
GC D15_M18_GC48 replicate 6
GC D15_M18_GC48 replicate 7
GC D15_M18_GC48 replicate 8
GC D15_M18_GC48 replicate 9
GC D15_M18_GC48 replicate 10
GC D15_M19_GC49 replicate 1
GC D15_M19_GC49 replicate 2
GC D15_M19_GC49 replicate 3
GC D15_M19_GC49 replicate 4
GC D15_M19_GC49 replicate 5
GC D15_M19_GC49 replicate 6
GC D15_M19_GC49 replicate 7
GC D15_M19_GC49 replicate 8
GC D15_M19_GC49 replicate 9
GC D15_M19_GC49 replicate 10
GC D15_M19_GC50 replicate 1
GC D15_M19_GC50 replicate 2
GC D15_M19_GC50 replicate 3
GC D15_M19_GC50 replicate 4
GC D15_M19_GC50 replicate 5
GC D15_M19_GC50 replicate 6
GC D15_M19_GC50 replicate 7
GC D15_M19_GC50 replicate 8
GC D15_M19_GC50 replicate 9
GC D15_M19_GC50 replicate 10
GC D15_M19_GC51 replicate 1
GC D15_M19_GC51 replicate 2
GC D15_M19_GC51 replicate 3
GC D15_M19_GC51 replicate 4
GC D15_M19_GC51 replicate 5
GC D15_M19_GC51 replicate 6
GC D15_M19_GC51 replicate 7
GC D15_M19_GC51 replicate 8
GC D15_M19_GC51 replicate 9
GC D15_M19_GC51 replicate 10
GC D15_M19_GC52 replicate 1
GC D15_M19_GC52 replicate 2
GC D15_M19_GC52 replicate 3
GC D15_M19_GC52 replicate 4
GC D15_M19_GC52 replicate 5
GC D15_M19_GC52 replicate 6
GC D15_M19_GC52 replicate 7
GC D15_M19_GC52 replicate 8
GC D15_M19_GC52 replicate 9
GC D15_M19_GC52 replicate 10
GC D20_M20_GC53 replicate 1
GC D20_M20_GC53 replicate 2
GC D20_M20_GC53 replicate 3
GC D20_M20_GC53 replicate 4
GC D20_M20_GC53 replicate 5
GC D20_M20_GC53 replicate 6
GC D20_M20_GC53 replicate 7
GC D20_M20_GC53 replicate 8
GC D20_M20_GC53 replicate 9
GC D20_M20_GC53 replicate 10
GC D20_M20_GC54 replicate 1
GC D20_M20_GC54 replicate 2
GC D20_M20_GC54 replicate 3
GC D20_M20_GC54 replicate 4
GC D20_M20_GC54 replicate 5
GC D20_M20_GC54 replicate 6
GC D20_M20_GC54 replicate 7
GC D20_M20_GC54 replicate 8
GC D20_M20_GC54 replicate 9
GC D20_M20_GC54 replicate 10
GC D20_M20_GC55 replicate 1
GC D20_M20_GC55 replicate 2
GC D20_M20_GC55 replicate 3
GC D20_M20_GC55 replicate 4
GC D20_M20_GC55 replicate 5
GC D20_M20_GC55 replicate 6
GC D20_M20_GC55 replicate 7
GC D20_M20_GC55 replicate 8
GC D20_M20_GC55 replicate 9
GC D20_M20_GC55 replicate 10
GC D20_M20_GC56 replicate 1
GC D20_M20_GC56 replicate 2
GC D20_M20_GC56 replicate 3
GC D20_M20_GC56 replicate 4
GC D20_M20_GC56 replicate 5
GC D20_M20_GC56 replicate 6
GC D20_M20_GC56 replicate 7
GC D20_M20_GC56 replicate 8
GC D20_M20_GC56 replicate 9
GC D20_M20_GC56 replicate 10
GC D20_M20_GC57 replicate 1
GC D20_M20_GC57 replicate 2
GC D20_M20_GC57 replicate 3
GC D20_M20_GC57 replicate 4
GC D20_M20_GC57 replicate 5
GC D20_M20_GC57 replicate 6
GC D20_M20_GC57 replicate 7
GC D20_M20_GC57 replicate 8
GC D20_M20_GC57 replicate 9
GC D20_M20_GC57 replicate 10
GC D20_M20_GC58 replicate 1
GC D20_M20_GC58 replicate 2
GC D20_M20_GC58 replicate 3
GC D20_M20_GC58 replicate 4
GC D20_M20_GC58 replicate 5
GC D20_M20_GC58 replicate 6
GC D20_M20_GC58 replicate 7
GC D20_M20_GC58 replicate 8
GC D20_M20_GC58 replicate 9
GC D20_M20_GC58 replicate 10
GC D20_M20_GC59 replicate 1
GC D20_M20_GC59 replicate 2
GC D20_M20_GC59 replicate 3
GC D20_M20_GC59 replicate 4
GC D20_M20_GC59 replicate 5
GC D20_M20_GC59 replicate 6
GC D20_M20_GC59 replicate 7
GC D20_M20_GC59 replicate 8
GC D20_M20_GC59 replicate 9
GC D20_M20_GC59 replicate 10
GC D20_M20_GC60 replicate 1
GC D20_M20_GC60 replicate 2
GC D20_M20_GC60 replicate 3
GC D20_M20_GC60 replicate 4
GC D20_M20_GC60 replicate 5
GC D20_M20_GC60 replicate 6
GC D20_M20_GC60 replicate 7
GC D20_M20_GC60 replicate 8
GC D20_M20_GC60 replicate 9
GC D20_M20_GC60 replicate 10
GC D20_M20_GC61 replicate 1
GC D20_M20_GC61 replicate 2
GC D20_M20_GC61 replicate 3
GC D20_M20_GC61 replicate 4
GC D20_M20_GC61 replicate 5
GC D20_M20_GC61 replicate 6
GC D20_M20_GC61 replicate 7
GC D20_M20_GC61 replicate 8
GC D20_M20_GC61 replicate 9
GC D20_M20_GC61 replicate 10
GC D20_M20_GC62 replicate 1
GC D20_M20_GC62 replicate 2
GC D20_M20_GC62 replicate 3
GC D20_M20_GC62 replicate 4
GC D20_M20_GC62 replicate 5
GC D20_M20_GC62 replicate 6
GC D20_M20_GC62 replicate 7
GC D20_M20_GC62 replicate 8
GC D20_M20_GC62 replicate 9
GC D20_M20_GC62 replicate 10
GC D20_M20_GC63 replicate 1
GC D20_M20_GC63 replicate 2
GC D20_M20_GC63 replicate 3
GC D20_M20_GC63 replicate 4
GC D20_M20_GC63 replicate 5
GC D20_M20_GC63 replicate 6
GC D20_M20_GC63 replicate 7
GC D20_M20_GC63 replicate 8
GC D20_M20_GC63 replicate 9
GC D20_M20_GC63 replicate 10
GC D20_M20_GC64 replicate 1
GC D20_M20_GC64 replicate 2
GC D20_M20_GC64 replicate 3
GC D20_M20_GC64 replicate 4
GC D20_M20_GC64 replicate 5
GC D20_M20_GC64 replicate 6
GC D20_M20_GC64 replicate 7
GC D20_M20_GC64 replicate 8
GC D20_M20_GC64 replicate 9
GC D20_M20_GC64 replicate 10
GC D20_M20_GC65 replicate 1
GC D20_M20_GC65 replicate 2
GC D20_M20_GC65 replicate 3
GC D20_M20_GC65 replicate 4
GC D20_M20_GC65 replicate 5
GC D20_M20_GC65 replicate 6
GC D20_M20_GC65 replicate 7
GC D20_M20_GC65 replicate 8
GC D20_M20_GC65 replicate 9
GC D20_M20_GC65 replicate 10
GC D20_M20_GC66 replicate 1
GC D20_M20_GC66 replicate 2
GC D20_M20_GC66 replicate 3
GC D20_M20_GC66 replicate 4
GC D20_M20_GC66 replicate 5
GC D20_M20_GC66 replicate 6
GC D20_M20_GC66 replicate 7
GC D20_M20_GC66 replicate 8
GC D20_M20_GC66 replicate 9
GC D20_M20_GC66 replicate 10
GC D20_M20_GC67 replicate 1
GC D20_M20_GC67 replicate 2
GC D20_M20_GC67 replicate 3
GC D20_M20_GC67 replicate 4
GC D20_M20_GC67 replicate 5
GC D20_M20_GC67 replicate 6
GC D20_M20_GC67 replicate 7
GC D20_M20_GC67 replicate 8
GC D20_M20_GC67 replicate 9
GC D20_M20_GC67 replicate 10
GC D20_M20_GC68 replicate 1
GC D20_M20_GC68 replicate 2
GC D20_M20_GC68 replicate 3
GC D20_M20_GC68 replicate 4
GC D20_M20_GC68 replicate 5
GC D20_M20_GC68 replicate 6
GC D20_M20_GC68 replicate 7
GC D20_M20_GC68 replicate 8
GC D20_M20_GC68 replicate 9
GC D20_M20_GC68 replicate 10
GC D20_M20_GC69 replicate 1
GC D20_M20_GC69 replicate 2
GC D20_M20_GC69 replicate 3
GC D20_M20_GC69 replicate 4
GC D20_M20_GC69 replicate 5
GC D20_M20_GC69 replicate 6
GC D20_M20_GC69 replicate 7
GC D20_M20_GC69 replicate 8
GC D20_M20_GC69 replicate 9
GC D20_M20_GC69 replicate 10
GC D20_M21_GC70 replicate 1
GC D20_M21_GC70 replicate 2
GC D20_M21_GC70 replicate 3
GC D20_M21_GC70 replicate 4
GC D20_M21_GC70 replicate 5
GC D20_M21_GC70 replicate 6
GC D20_M21_GC70 replicate 7
GC D20_M21_GC70 replicate 8
GC D20_M21_GC70 replicate 9
GC D20_M21_GC70 replicate 10
GC D20_M21_GC71 replicate 1
GC D20_M21_GC71 replicate 2
GC D20_M21_GC71 replicate 3
GC D20_M21_GC71 replicate 4
GC D20_M21_GC71 replicate 5
GC D20_M21_GC71 replicate 6
GC D20_M21_GC71 replicate 7
GC D20_M21_GC71 replicate 8
GC D20_M21_GC71 replicate 9
GC D20_M21_GC71 replicate 10
GC D20_M21_GC72 replicate 1
GC D20_M21_GC72 replicate 2
GC D20_M21_GC72 replicate 3
GC D20_M21_GC72 replicate 4
GC D20_M21_GC72 replicate 5
GC D20_M21_GC72 replicate 6
GC D20_M21_GC72 replicate 7
GC D20_M21_GC72 replicate 8
GC D20_M21_GC72 replicate 9
GC D20_M21_GC72 replicate 10
GC D20_M21_GC73 replicate 1
GC D20_M21_GC73 replicate 2
GC D20_M21_GC73 replicate 3
GC D20_M21_GC73 replicate 4
GC D20_M21_GC73 replicate 5
GC D20_M21_GC73 replicate 6
GC D20_M21_GC73 replicate 7
GC D20_M21_GC73 replicate 8
GC D20_M21_GC73 replicate 9
GC D20_M21_GC73 replicate 10
GC D20_M21_GC74 replicate 1 GC D20_M21_GC74 replicate 2
GC D20_M21_GC74 replicate 3
GC D20_M21_GC74 replicate 4 GC D20_M21_GC74 replicate 5
GC D20_M21_GC74 replicate 6
GC D20_M21_GC74 replicate 7
GC D20_M21_GC74 replicate 8
GC D20_M21_GC74 replicate 9 GC D20_M21_GC74 replicate 10
GC D20_M21_GC75 replicate 1
GC D20_M21_GC75 replicate 2
GC D20_M21_GC75 replicate 3
GC D20_M21_GC75 replicate 4
GC D20_M21_GC75 replicate 5
GC D20_M21_GC75 replicate 6
GC D20_M21_GC75 replicate 7
GC D20_M21_GC75 replicate 8
GC D20_M21_GC75 replicate 9
GC D20_M21_GC75 replicate 10
GC D20_M21_GC76 replicate 1
GC D20_M21_GC76 replicate 2
GC D20_M21_GC76 replicate 3
GC D20_M21_GC76 replicate 4
GC D20_M21_GC76 replicate 5
GC D20_M21_GC76 replicate 6
GC D20_M21_GC76 replicate 7
GC D20_M21_GC76 replicate 8
GC D20_M21_GC76 replicate 9
GC D20_M21_GC76 replicate 10
GC D20_M21_GC77 replicate 1
GC D20_M21_GC77 replicate 2
GC D20_M21_GC77 replicate 3
GC D20_M21_GC77 replicate 4
GC D20_M21_GC77 replicate 5
GC D20_M21_GC77 replicate 6
GC D20_M21_GC77 replicate 7
GC D20_M21_GC77 replicate 8
GC D20_M21_GC77 replicate 9
GC D20_M21_GC77 replicate 10
GC D20_M21_GC78 replicate 1
GC D20_M21_GC78 replicate 2
GC D20_M21_GC78 replicate 3
GC D20_M21_GC78 replicate 4
GC D20_M21_GC78 replicate 5
GC D20_M21_GC78 replicate 6
GC D20_M21_GC78 replicate 7
GC D20_M21_GC78 replicate 8
GC D20_M21_GC78 replicate 9
GC D20_M21_GC78 replicate 10
GC D20_M21_GC79 replicate 1
GC D20_M21_GC79 replicate 2
GC D20_M21_GC79 replicate 3
GC D20_M21_GC79 replicate 4
GC D20_M21_GC79 replicate 5
GC D20_M21_GC79 replicate 6
GC D20_M21_GC79 replicate 7
GC D20_M21_GC79 replicate 8
GC D20_M21_GC79 replicate 9
GC D20_M21_GC79 replicate 10
GC D20_M21_GC80 replicate 1
GC D20_M21_GC80 replicate 2
GC D20_M21_GC80 replicate 3
GC D20_M21_GC80 replicate 4
GC D20_M21_GC80 replicate 5
GC D20_M21_GC80 replicate 6
GC D20_M21_GC80 replicate 7
GC D20_M21_GC80 replicate 8
GC D20_M21_GC80 replicate 9
GC D20_M21_GC80 replicate 10
GC D20_M21_GC81 replicate 1
GC D20_M21_GC81 replicate 2
GC D20_M21_GC81 replicate 3
GC D20_M21_GC81 replicate 4
GC D20_M21_GC81 replicate 5
GC D20_M21_GC81 replicate 6
GC D20_M21_GC81 replicate 7
GC D20_M21_GC81 replicate 8
GC D20_M21_GC81 replicate 9
GC D20_M21_GC81 replicate 10
GC D20_M21_GC82 replicate 1
GC D20_M21_GC82 replicate 2
GC D20_M21_GC82 replicate 3
GC D20_M21_GC82 replicate 4
GC D20_M21_GC82 replicate 5
GC D20_M21_GC82 replicate 6
GC D20_M21_GC82 replicate 7
GC D20_M21_GC82 replicate 8
GC D20_M21_GC82 replicate 9
GC D20_M21_GC82 replicate 10
GC D20_M21_GC83 replicate 1
GC D20_M21_GC83 replicate 2
GC D20_M21_GC83 replicate 3
GC D20_M21_GC83 replicate 4
GC D20_M21_GC83 replicate 5
GC D20_M21_GC83 replicate 6
GC D20_M21_GC83 replicate 7
GC D20_M21_GC83 replicate 8
GC D20_M21_GC83 replicate 9
GC D20_M21_GC83 replicate 10
GC D20_M21_GC84 replicate 1
GC D20_M21_GC84 replicate 2
GC D20_M21_GC84 replicate 3
GC D20_M21_GC84 replicate 4
GC D20_M21_GC84 replicate 5
GC D20_M21_GC84 replicate 6
GC D20_M21_GC84 replicate 7
GC D20_M21_GC84 replicate 8
GC D20_M21_GC84 replicate 9
GC D20_M21_GC84 replicate 10
GC D20_M22_GC85 replicate 1
GC D20_M22_GC85 replicate 2
GC D20_M22_GC85 replicate 3
GC D20_M22_GC85 replicate 4
GC D20_M22_GC85 replicate 5
GC D20_M22_GC85 replicate 6
GC D20_M22_GC85 replicate 7
GC D20_M22_GC85 replicate 8
GC D20_M22_GC85 replicate 9
GC D20_M22_GC85 replicate 10
GC D20_M22_GC86 replicate 1
GC D20_M22_GC86 replicate 2
GC D20_M22_GC86 replicate 3
GC D20_M22_GC86 replicate 4
GC D20_M22_GC86 replicate 5
GC D20_M22_GC86 replicate 6
GC D20_M22_GC86 replicate 7
GC D20_M22_GC86 replicate 8
GC D20_M22_GC86 replicate 9
GC D20_M22_GC86 replicate 10
GC D20_M22_GC87 replicate 1
GC D20_M22_GC87 replicate 2
GC D20_M22_GC87 replicate 3
GC D20_M22_GC87 replicate 4
GC D20_M22_GC87 replicate 5
GC D20_M22_GC87 replicate 6
GC D20_M22_GC87 replicate 7
GC D20_M22_GC87 replicate 8
GC D20_M22_GC87 replicate 9
GC D20_M22_GC87 replicate 10
GC D20_M22_GC88 replicate 1
GC D20_M22_GC88 replicate 2
GC D20_M22_GC88 replicate 3
GC D20_M22_GC88 replicate 4
GC D20_M22_GC88 replicate 5
GC D20_M22_GC88 replicate 6
GC D20_M22_GC88 replicate 7
GC D20_M22_GC88 replicate 8
GC D20_M22_GC88 replicate 9
GC D20_M22_GC88 replicate 10
GC D20_M22_GC89 replicate 1
GC D20_M22_GC89 replicate 2
GC D20_M22_GC89 replicate 3
GC D20_M22_GC89 replicate 4
GC D20_M22_GC89 replicate 5
GC D20_M22_GC89 replicate 6
GC D20_M22_GC89 replicate 7
GC D20_M22_GC89 replicate 8
GC D20_M22_GC89 replicate 9
GC D20_M22_GC89 replicate 10
GC D20_M22_GC90 replicate 1
GC D20_M22_GC90 replicate 2
GC D20_M22_GC90 replicate 3
GC D20_M22_GC90 replicate 4
GC D20_M22_GC90 replicate 5
GC D20_M22_GC90 replicate 6
GC D20_M22_GC90 replicate 7
GC D20_M22_GC90 replicate 8
GC D20_M22_GC90 replicate 9
GC D20_M22_GC90 replicate 10
GC D20_M22_GC91 replicate 1
GC D20_M22_GC91 replicate 2
GC D20_M22_GC91 replicate 3
GC D20_M22_GC91 replicate 4
GC D20_M22_GC91 replicate 5
GC D20_M22_GC91 replicate 6
GC D20_M22_GC91 replicate 7
GC D20_M22_GC91 replicate 8
GC D20_M22_GC91 replicate 9
GC D20_M22_GC91 replicate 10
GC D20_M22_GC92 replicate 1
GC D20_M22_GC92 replicate 2
GC D20_M22_GC92 replicate 3
GC D20_M22_GC92 replicate 4
GC D20_M22_GC92 replicate 5
GC D20_M22_GC92 replicate 6
GC D20_M22_GC92 replicate 7
GC D20_M22_GC92 replicate 8
GC D20_M22_GC92 replicate 9
GC D20_M22_GC92 replicate 10
GC D20_M22_GC93 replicate 1
GC D20_M22_GC93 replicate 2
GC D20_M22_GC93 replicate 3
GC D20_M22_GC93 replicate 4
GC D20_M22_GC93 replicate 5
GC D20_M22_GC93 replicate 6
GC D20_M22_GC93 replicate 7
GC D20_M22_GC93 replicate 8
GC D20_M22_GC93 replicate 9
GC D20_M22_GC93 replicate 10
GC D20_M22_GC94 replicate 1
GC D20_M22_GC94 replicate 2
GC D20_M22_GC94 replicate 3
GC D20_M22_GC94 replicate 4
GC D20_M22_GC94 replicate 5
GC D20_M22_GC94 replicate 6
GC D20_M22_GC94 replicate 7
GC D20_M22_GC94 replicate 8
GC D20_M22_GC94 replicate 9
GC D20_M22_GC94 replicate 10
GC D20_M22_GC95 replicate 1
GC D20_M22_GC95 replicate 2
GC D20_M22_GC95 replicate 3
GC D20_M22_GC95 replicate 4
GC D20_M22_GC95 replicate 5
GC D20_M22_GC95 replicate 6
GC D20_M22_GC95 replicate 7
GC D20_M22_GC95 replicate 8
GC D20_M22_GC95 replicate 9
GC D20_M22_GC95 replicate 10
GC D20_M22_GC96 replicate 1
GC D20_M22_GC96 replicate 2
GC D20_M22_GC96 replicate 3
GC D20_M22_GC96 replicate 4
GC D20_M22_GC96 replicate 5
GC D20_M22_GC96 replicate 6
GC D20_M22_GC96 replicate 7
GC D20_M22_GC96 replicate 8
GC D20_M22_GC96 replicate 9
GC D20_M22_GC96 replicate 10
GC D20_M22_GC97 replicate 1
GC D20_M22_GC97 replicate 2
GC D20_M22_GC97 replicate 3 GC D20_M22_GC97 replicate 4
GC D20_M22_GC97 replicate 5
GC D20_M22_GC97 replicate 6
GC D20_M22_GC97 replicate 7
GC D20_M22_GC97 replicate 8 GC D20_M22_GC97 replicate 9
GC D20_M22_GC97 replicate 10
GC D20_M22_GC98 replicate 1
GC D20_M22_GC98 replicate 2
GC D20_M22_GC98 replicate 3
GC D20_M22_GC98 replicate 4
GC D20_M22_GC98 replicate 5
GC D20_M22_GC98 replicate 6
GC D20_M22_GC98 replicate 7
GC D20_M22_GC98 replicate 8
GC D20_M22_GC98 replicate 9
GC D20_M22_GC98 replicate 10
GC D20_M22_GC99 replicate 1
GC D20_M22_GC99 replicate 2
GC D20_M22_GC99 replicate 3
GC D20_M22_GC99 replicate 4
GC D20_M22_GC99 replicate 5
GC D20_M22_GC99 replicate 6
GC D20_M22_GC99 replicate 7
GC D20_M22_GC99 replicate 8
GC D20_M22_GC99 replicate 9
GC D20_M22_GC99 replicate 10
GC D20_M23_GC100 replicate 1
GC D20_M23_GC100 replicate 2
GC D20_M23_GC100 replicate 3
GC D20_M23_GC100 replicate 4
GC D20_M23_GC100 replicate 5
GC D20_M23_GC100 replicate 6
GC D20_M23_GC100 replicate 7
GC D20_M23_GC100 replicate 8
GC D20_M23_GC100 replicate 9
GC D20_M23_GC100 replicate 10
GC D20_M23_GC101 replicate 1
GC D20_M23_GC101 replicate 2
GC D20_M23_GC101 replicate 3
GC D20_M23_GC101 replicate 4
GC D20_M23_GC101 replicate 5
GC D20_M23_GC101 replicate 6
GC D20_M23_GC101 replicate 7
GC D20_M23_GC101 replicate 8
GC D20_M23_GC101 replicate 9
GC D20_M23_GC101 replicate 10
GC D20_M23_GC102 replicate 1
GC D20_M23_GC102 replicate 2
GC D20_M23_GC102 replicate 3
GC D20_M23_GC102 replicate 4
GC D20_M23_GC102 replicate 5
GC D20_M23_GC102 replicate 6
GC D20_M23_GC102 replicate 7
GC D20_M23_GC102 replicate 8
GC D20_M23_GC102 replicate 9
GC D20_M23_GC102 replicate 10
GC D20_M23_GC103 replicate 1
GC D20_M23_GC103 replicate 2
GC D20_M23_GC103 replicate 3
GC D20_M23_GC103 replicate 4
GC D20_M23_GC103 replicate 5
GC D20_M23_GC103 replicate 6
GC D20_M23_GC103 replicate 7
GC D20_M23_GC103 replicate 8
GC D20_M23_GC103 replicate 9
GC D20_M23_GC103 replicate 10
GC D20_M23_GC104 replicate 1
GC D20_M23_GC104 replicate 2
GC D20_M23_GC104 replicate 3
GC D20_M23_GC104 replicate 4
GC D20_M23_GC104 replicate 5
GC D20_M23_GC104 replicate 6
GC D20_M23_GC104 replicate 7
GC D20_M23_GC104 replicate 8
GC D20_M23_GC104 replicate 9
GC D20_M23_GC104 replicate 10
GC D20_M23_GC105 replicate 1
GC D20_M23_GC105 replicate 2
GC D20_M23_GC105 replicate 3
GC D20_M23_GC105 replicate 4
GC D20_M23_GC105 replicate 5
GC D20_M23_GC105 replicate 6
GC D20_M23_GC105 replicate 7
GC D20_M23_GC105 replicate 8
GC D20_M23_GC105 replicate 9
GC D20_M23_GC105 replicate 10
GC D20_M23_GC106 replicate 1
GC D20_M23_GC106 replicate 2
GC D20_M23_GC106 replicate 3
GC D20_M23_GC106 replicate 4
GC D20_M23_GC106 replicate 5
GC D20_M23_GC106 replicate 6
GC D20_M23_GC106 replicate 7
GC D20_M23_GC106 replicate 8
GC D20_M23_GC106 replicate 9
GC D20_M23_GC106 replicate 10
GC D20_M23_GC107 replicate 1
GC D20_M23_GC107 replicate 2
GC D20_M23_GC107 replicate 3
GC D20_M23_GC107 replicate 4
GC D20_M23_GC107 replicate 5
GC D20_M23_GC107 replicate 6
GC D20_M23_GC107 replicate 7
GC D20_M23_GC107 replicate 8
GC D20_M23_GC107 replicate 9
GC D20_M23_GC107 replicate 10
GC D20_M23_GC108 replicate 1
GC D20_M23_GC108 replicate 2
GC D20_M23_GC108 replicate 3
GC D20_M23_GC108 replicate 4
GC D20_M23_GC108 replicate 5
GC D20_M23_GC108 replicate 6
GC D20_M23_GC108 replicate 7
GC D20_M23_GC108 replicate 8
GC D20_M23_GC108 replicate 9
GC D20_M23_GC108 replicate 10
GC D20_M23_GC109 replicate 1
GC D20_M23_GC109 replicate 2
GC D20_M23_GC109 replicate 3
GC D20_M23_GC109 replicate 4
GC D20_M23_GC109 replicate 5
GC D20_M23_GC109 replicate 6
GC D20_M23_GC109 replicate 7
GC D20_M23_GC109 replicate 8
GC D20_M23_GC109 replicate 9
GC D20_M23_GC109 replicate 10
GC D20_M24_GC110 replicate 1
GC D20_M24_GC110 replicate 2
GC D20_M24_GC110 replicate 3
GC D20_M24_GC110 replicate 4
GC D20_M24_GC110 replicate 5
GC D20_M24_GC110 replicate 6
GC D20_M24_GC110 replicate 7
GC D20_M24_GC110 replicate 8
GC D20_M24_GC110 replicate 9
GC D20_M24_GC110 replicate 10
GC D20_M24_GC111 replicate 1
GC D20_M24_GC111 replicate 2
GC D20_M24_GC111 replicate 3
GC D20_M24_GC111 replicate 4
GC D20_M24_GC111 replicate 5
GC D20_M24_GC111 replicate 6
GC D20_M24_GC111 replicate 7
GC D20_M24_GC111 replicate 8
GC D20_M24_GC111 replicate 9
GC D20_M24_GC111 replicate 10
GC D20_M24_GC112 replicate 1
GC D20_M24_GC112 replicate 2
GC D20_M24_GC112 replicate 3
GC D20_M24_GC112 replicate 4
GC D20_M24_GC112 replicate 5
GC D20_M24_GC112 replicate 6
GC D20_M24_GC112 replicate 7
GC D20_M24_GC112 replicate 8
GC D20_M24_GC112 replicate 9
GC D20_M24_GC112 replicate 10
GC D20_M24_GC113 replicate 1
GC D20_M24_GC113 replicate 2
GC D20_M24_GC113 replicate 3
GC D20_M24_GC113 replicate 4
GC D20_M24_GC113 replicate 5
GC D20_M24_GC113 replicate 6
GC D20_M24_GC113 replicate 7
GC D20_M24_GC113 replicate 8
GC D20_M24_GC113 replicate 9
GC D20_M24_GC113 replicate 10
GC D20_M24_GC114 replicate 1
GC D20_M24_GC114 replicate 2
GC D20_M24_GC114 replicate 3
GC D20_M24_GC114 replicate 4
GC D20_M24_GC114 replicate 5
GC D20_M24_GC114 replicate 6
GC D20_M24_GC114 replicate 7
GC D20_M24_GC114 replicate 8
GC D20_M24_GC114 replicate 9
GC D20_M24_GC114 replicate 10
GC D20_M24_GC115 replicate 1
GC D20_M24_GC115 replicate 2
GC D20_M24_GC115 replicate 3
GC D20_M24_GC115 replicate 4
GC D20_M24_GC115 replicate 5
GC D20_M24_GC115 replicate 6
GC D20_M24_GC115 replicate 7
GC D20_M24_GC115 replicate 8
GC D20_M24_GC115 replicate 9
GC D20_M24_GC115 replicate 10
GC D20_M25_GC116 replicate 1
GC D20_M25_GC116 replicate 2
GC D20_M25_GC116 replicate 3
GC D20_M25_GC116 replicate 4
GC D20_M25_GC116 replicate 5
GC D20_M25_GC116 replicate 6
GC D20_M25_GC116 replicate 7
GC D20_M25_GC116 replicate 8
GC D20_M25_GC116 replicate 9
GC D20_M25_GC116 replicate 10
GC D20_M25_GC117 replicate 1
GC D20_M25_GC117 replicate 2
GC D20_M25_GC117 replicate 3
GC D20_M25_GC117 replicate 4
GC D20_M25_GC117 replicate 5
GC D20_M25_GC117 replicate 6
GC D20_M25_GC117 replicate 7
GC D20_M25_GC117 replicate 8
GC D20_M25_GC117 replicate 9
GC D20_M25_GC117 replicate 10
GC D20_M25_GC118 replicate 1
GC D20_M25_GC118 replicate 2
GC D20_M25_GC118 replicate 3
GC D20_M25_GC118 replicate 4
GC D20_M25_GC118 replicate 5
GC D20_M25_GC118 replicate 6
GC D20_M25_GC118 replicate 7
GC D20_M25_GC118 replicate 8
GC D20_M25_GC118 replicate 9
GC D20_M25_GC118 replicate 10
GC D20_M25_GC119 replicate 1
GC D20_M25_GC119 replicate 2
GC D20_M25_GC119 replicate 3
GC D20_M25_GC119 replicate 4
GC D20_M25_GC119 replicate 5
GC D20_M25_GC119 replicate 6
GC D20_M25_GC119 replicate 7
GC D20_M25_GC119 replicate 8
GC D20_M25_GC119 replicate 9
GC D20_M25_GC119 replicate 10
Faster version is simply linear trajectories (no tree)
In [12]:
Copied!
# sim_df = pd.DataFrame()
# # expression_thresh = -1
# expression_thresh = -np.inf
# replicate = 1
# while replicate < 1000:
# print(f"replicate {replicate + 1}", end=" \r")
# sequence = naive
# n_mutations = 0
# rows = []
# while n_mutations <= 20:
# if n_mutations:
# new_sequence = mutate_S5F(sequence)
# else:
# new_sequence = sequence
# igh_aa = aa(new_sequence[:igk_idx], igh_frame)
# igk_aa = aa(new_sequence[igk_idx:], igk_frame)
# igh_mutations = mutations(naive_igh_aa, igh_aa, igh_pos_map, "(H)")
# igk_mutations = mutations(naive_igk_aa, igk_aa, igk_pos_map, "(L)")
# all_mutations = igh_mutations + igk_mutations
# has_stop = any("*" in mutation for mutation in all_mutations)
# affinity = np.nan if has_stop else dms_df.delta_bind_CGG[all_mutations].sum()
# expression = np.nan if has_stop else dms_df.delta_expr[all_mutations].sum()
# if expression < expression_thresh:
# continue
# new_row = [affinity,
# expression,
# n_mutations,
# replicate,
# ]
# rows.append(new_row)
# sequence = new_sequence
# n_mutations += 1
# rep_df = pd.DataFrame(rows, columns=(r"$\Delta$ affinity", r"$\Delta$ expression", "divergence", "replicate"))
# sim_df = pd.concat((sim_df, rep_df), ignore_index=True, verify_integrity=True)
# replicate += 1
# sim_df
# sim_df = pd.DataFrame()
# # expression_thresh = -1
# expression_thresh = -np.inf
# replicate = 1
# while replicate < 1000:
# print(f"replicate {replicate + 1}", end=" \r")
# sequence = naive
# n_mutations = 0
# rows = []
# while n_mutations <= 20:
# if n_mutations:
# new_sequence = mutate_S5F(sequence)
# else:
# new_sequence = sequence
# igh_aa = aa(new_sequence[:igk_idx], igh_frame)
# igk_aa = aa(new_sequence[igk_idx:], igk_frame)
# igh_mutations = mutations(naive_igh_aa, igh_aa, igh_pos_map, "(H)")
# igk_mutations = mutations(naive_igk_aa, igk_aa, igk_pos_map, "(L)")
# all_mutations = igh_mutations + igk_mutations
# has_stop = any("*" in mutation for mutation in all_mutations)
# affinity = np.nan if has_stop else dms_df.delta_bind_CGG[all_mutations].sum()
# expression = np.nan if has_stop else dms_df.delta_expr[all_mutations].sum()
# if expression < expression_thresh:
# continue
# new_row = [affinity,
# expression,
# n_mutations,
# replicate,
# ]
# rows.append(new_row)
# sequence = new_sequence
# n_mutations += 1
# rep_df = pd.DataFrame(rows, columns=(r"$\Delta$ affinity", r"$\Delta$ expression", "divergence", "replicate"))
# sim_df = pd.concat((sim_df, rep_df), ignore_index=True, verify_integrity=True)
# replicate += 1
# sim_df
In [13]:
Copied!
vmin = min(node.REI for tree in trees.values() for node in tree.tree.traverse())
vmax = max(node.REI for tree in trees.values() for node in tree.tree.traverse())
norm = Normalize(vmin=vmin, vmax=vmax)
cmap = "viridis"
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=cmap, norm=norm),
orientation='horizontal',
cax=cax,
label="REI")
plt.savefig(f"{output_dir}/cbar.pdf", bbox_inches="tight")
plt.show()
vmin = min(node.REI for tree in trees.values() for node in tree.tree.traverse())
vmax = max(node.REI for tree in trees.values() for node in tree.tree.traverse())
norm = Normalize(vmin=vmin, vmax=vmax)
cmap = "viridis"
fig = plt.figure(figsize=(2, 1))
cax = fig.add_axes([0, 0, 1, 0.1])
plt.colorbar(cm.ScalarMappable(cmap=cmap, norm=norm),
orientation='horizontal',
cax=cax,
label="REI")
plt.savefig(f"{output_dir}/cbar.pdf", bbox_inches="tight")
plt.show()
Note: lower bounding phenotypes according to worst single mutant
In [14]:
Copied!
traj_dir = f"{output_dir}/phenotype-trajectories"
if not os.path.exists(traj_dir):
os.makedirs(traj_dir)
traj_dir = f"{output_dir}/phenotype-trajectories"
if not os.path.exists(traj_dir):
os.makedirs(traj_dir)
In [15]:
Copied!
max_divergence = 0
for gc, tree in trees.items():
for node in tree.tree.traverse():
divergence = tree.tree.get_distance(node)
if divergence > max_divergence:
max_divergence = divergence
max_divergence
max_divergence = 0
for gc, tree in trees.items():
for node in tree.tree.traverse():
divergence = tree.tree.get_distance(node)
if divergence > max_divergence:
max_divergence = divergence
max_divergence
Out[15]:
19.0
In [16]:
Copied!
worst_bind = dms_df.delta_bind_CGG.min() # np.nanmin([node.delta_bind for tree in trees.values() for node in tree.tree.traverse()]) - .1
best_bind = np.nanmax([node.delta_bind for tree in trees.values() for node in tree.tree.traverse()])
worst_expr = dms_df.delta_expr.min() # np.nanmin([node.delta_expr for tree in trees.values() for node in tree.tree.traverse()]) - .1
best_expr = np.nanmax([node.delta_expr for tree in trees.values() for node in tree.tree.traverse()])
# sim_df_clipped = sim_df.copy(deep=True)
# sim_df_clipped[r"$\Delta$ affinity"] = sim_df_clipped[r"$\Delta$ affinity"].clip(lower=worst_bind)
# sim_df_clipped[r"$\Delta$ expression"] = sim_df_clipped[r"$\Delta$ expression"].clip(lower=worst_expr)
# traj_df = pd.DataFrame()
for j, (gc, tree) in enumerate(trees.items()):
fig, axes = plt.subplots(2, 1, figsize=(2.5, 5), sharex=True)
for i, (phenotype, phenotype_label, worst_phenotype, best_phenotype) in enumerate(
zip(("delta_bind", "delta_expr"),
(r"$\Delta$ affinity", r"$\Delta$ expression"),
(worst_bind, worst_expr),
(best_bind, best_expr)
)
):
points = []
lines = []
colors = []
for node in tree.tree.traverse():
node_phenotype = getattr(node, phenotype)
if node_phenotype < worst_phenotype:
node_phenotype = worst_phenotype
elif np.isnan(node_phenotype):
node_phenotype = worst_phenotype - .2
points.append((tree.tree.get_distance(node),
node_phenotype,
5 + 10 * np.sqrt(node.abundance),
node.REI))
if node.up is not None:
parent_phenotype = getattr(node.up, phenotype)
if parent_phenotype < worst_phenotype:
parent_phenotype = worst_phenotype
elif np.isnan(parent_phenotype):
parent_phenotype = worst_phenotype - .2
lines.append([(tree.tree.get_distance(node.up), parent_phenotype),
(tree.tree.get_distance(node), node_phenotype)])
colors.append(node.REI)
# row_idx = f"{gc}_{node.name}"
# traj_df.loc[row_idx, "gc"] = gc
# traj_df.loc[row_idx, "node"] = node.name
# traj_df.loc[row_idx, "time"] = metadata.time[gc]
# traj_df.loc[row_idx, phenotype_label] = node_phenotype
# traj_df.loc[row_idx, "abundance"] = node.abundance
# traj_df.loc[row_idx, "REI"] = node.REI
sim_lines = []
for node_replicates in zip(*[sim_tree.traverse() for sim_tree in sim_trees[gc]]):
if all(node_replicate.is_root() for node_replicate in node_replicates):
roots = node_replicates
continue
assert all(not node_replicate.is_root() for node_replicate in node_replicates)
parent_divergence = np.median([root.get_distance(node_replicate.up) for root, node_replicate in zip(roots, node_replicates)])
child_divergence = np.median([root.get_distance(node_replicate) for root, node_replicate in zip(roots, node_replicates)])
parent_phenotype = np.median([getattr(node_replicate.up, phenotype) for node_replicate in node_replicates])
child_phenotype = np.median([getattr(node_replicate, phenotype) for node_replicate in node_replicates])
sim_lines.append([(parent_divergence, parent_phenotype),
(child_divergence, child_phenotype)])
lc = mc.LineCollection(lines, colors="k", linewidths=0.3, alpha=1, zorder=3)
sim_lc = mc.LineCollection(sim_lines, colors="C6", linewidths=2, alpha=1, zorder=2)
ax = axes[i]
ax.axhline(0, c="k", ls="--", lw=0.5, zorder=1)
ax.axhline(worst_phenotype, c="orange", ls="--", lw=1, zorder=1)
ax.axhline(worst_phenotype - 0.2, c="r", ls="--", lw=1, zorder=1)
# if phenotype == "delta_expr":
# ax.axhline(delta_expr_threshold, c="r", ls="--", lw=0.25, zorder=1)
ax.add_collection(lc)
ax.add_collection(sim_lc)
# sns.lineplot(data=sim_df, x="divergence", y=phenotype_label, errorbar='sd', ax=ax, legend=False)
ax.scatter(*zip(*points), cmap=cmap, alpha=1, edgecolors="k", linewidths=0.2, zorder=4,
norm=norm)
if i == 0:
ax.set_title(gc)
ax.set_xlabel(None)
ax.set_xlim(0, max_divergence)
ax.set_ylabel(phenotype_label)
ax.set_ylim(worst_phenotype - 0.5, best_phenotype + 0.1)
axes[-1].set_xlabel("branch length from naive")
plt.tight_layout()
plt.savefig(f"{traj_dir}/{gc}.pdf")
if j < 5:
plt.show()
plt.close()
worst_bind = dms_df.delta_bind_CGG.min() # np.nanmin([node.delta_bind for tree in trees.values() for node in tree.tree.traverse()]) - .1
best_bind = np.nanmax([node.delta_bind for tree in trees.values() for node in tree.tree.traverse()])
worst_expr = dms_df.delta_expr.min() # np.nanmin([node.delta_expr for tree in trees.values() for node in tree.tree.traverse()]) - .1
best_expr = np.nanmax([node.delta_expr for tree in trees.values() for node in tree.tree.traverse()])
# sim_df_clipped = sim_df.copy(deep=True)
# sim_df_clipped[r"$\Delta$ affinity"] = sim_df_clipped[r"$\Delta$ affinity"].clip(lower=worst_bind)
# sim_df_clipped[r"$\Delta$ expression"] = sim_df_clipped[r"$\Delta$ expression"].clip(lower=worst_expr)
# traj_df = pd.DataFrame()
for j, (gc, tree) in enumerate(trees.items()):
fig, axes = plt.subplots(2, 1, figsize=(2.5, 5), sharex=True)
for i, (phenotype, phenotype_label, worst_phenotype, best_phenotype) in enumerate(
zip(("delta_bind", "delta_expr"),
(r"$\Delta$ affinity", r"$\Delta$ expression"),
(worst_bind, worst_expr),
(best_bind, best_expr)
)
):
points = []
lines = []
colors = []
for node in tree.tree.traverse():
node_phenotype = getattr(node, phenotype)
if node_phenotype < worst_phenotype:
node_phenotype = worst_phenotype
elif np.isnan(node_phenotype):
node_phenotype = worst_phenotype - .2
points.append((tree.tree.get_distance(node),
node_phenotype,
5 + 10 * np.sqrt(node.abundance),
node.REI))
if node.up is not None:
parent_phenotype = getattr(node.up, phenotype)
if parent_phenotype < worst_phenotype:
parent_phenotype = worst_phenotype
elif np.isnan(parent_phenotype):
parent_phenotype = worst_phenotype - .2
lines.append([(tree.tree.get_distance(node.up), parent_phenotype),
(tree.tree.get_distance(node), node_phenotype)])
colors.append(node.REI)
# row_idx = f"{gc}_{node.name}"
# traj_df.loc[row_idx, "gc"] = gc
# traj_df.loc[row_idx, "node"] = node.name
# traj_df.loc[row_idx, "time"] = metadata.time[gc]
# traj_df.loc[row_idx, phenotype_label] = node_phenotype
# traj_df.loc[row_idx, "abundance"] = node.abundance
# traj_df.loc[row_idx, "REI"] = node.REI
sim_lines = []
for node_replicates in zip(*[sim_tree.traverse() for sim_tree in sim_trees[gc]]):
if all(node_replicate.is_root() for node_replicate in node_replicates):
roots = node_replicates
continue
assert all(not node_replicate.is_root() for node_replicate in node_replicates)
parent_divergence = np.median([root.get_distance(node_replicate.up) for root, node_replicate in zip(roots, node_replicates)])
child_divergence = np.median([root.get_distance(node_replicate) for root, node_replicate in zip(roots, node_replicates)])
parent_phenotype = np.median([getattr(node_replicate.up, phenotype) for node_replicate in node_replicates])
child_phenotype = np.median([getattr(node_replicate, phenotype) for node_replicate in node_replicates])
sim_lines.append([(parent_divergence, parent_phenotype),
(child_divergence, child_phenotype)])
lc = mc.LineCollection(lines, colors="k", linewidths=0.3, alpha=1, zorder=3)
sim_lc = mc.LineCollection(sim_lines, colors="C6", linewidths=2, alpha=1, zorder=2)
ax = axes[i]
ax.axhline(0, c="k", ls="--", lw=0.5, zorder=1)
ax.axhline(worst_phenotype, c="orange", ls="--", lw=1, zorder=1)
ax.axhline(worst_phenotype - 0.2, c="r", ls="--", lw=1, zorder=1)
# if phenotype == "delta_expr":
# ax.axhline(delta_expr_threshold, c="r", ls="--", lw=0.25, zorder=1)
ax.add_collection(lc)
ax.add_collection(sim_lc)
# sns.lineplot(data=sim_df, x="divergence", y=phenotype_label, errorbar='sd', ax=ax, legend=False)
ax.scatter(*zip(*points), cmap=cmap, alpha=1, edgecolors="k", linewidths=0.2, zorder=4,
norm=norm)
if i == 0:
ax.set_title(gc)
ax.set_xlabel(None)
ax.set_xlim(0, max_divergence)
ax.set_ylabel(phenotype_label)
ax.set_ylim(worst_phenotype - 0.5, best_phenotype + 0.1)
axes[-1].set_xlabel("branch length from naive")
plt.tight_layout()
plt.savefig(f"{traj_dir}/{gc}.pdf")
if j < 5:
plt.show()
plt.close()
Tile plot images from each timepoint
In [17]:
Copied!
timepoints = sorted(metadata["time"].unique())
for t in timepoints:
subset = metadata[metadata["time"] == t]
gcs = subset.index.unique()
# collect PDF files for each GC in this timepoint
in_files = [f"{traj_dir}/{gc}.pdf" for gc in gcs if os.path.exists(f"{traj_dir}/{gc}.pdf")]
if not in_files:
continue
# output filename with timepoint
out_file = f"{output_dir}/phenotype-trajectories_tiled_{t}.png"
# Convert each PDF page to an image and collect them
images = []
for pdf_file in in_files:
pages = convert_from_path(pdf_file, 100) # dpi=300
images.append(pages[0]) # just the first page
# Create a figure with subplots for each image
fig, axs = plt.subplots(1, len(images), figsize=(3 * len(images), 6))
if len(images) == 1:
axs = [axs] # ensure axs is iterable even for a single image
for ax, img in zip(axs, images):
ax.imshow(img)
ax.axis("off")
plt.tight_layout()
plt.savefig(out_file, dpi=100)
timepoints = sorted(metadata["time"].unique())
for t in timepoints:
subset = metadata[metadata["time"] == t]
gcs = subset.index.unique()
# collect PDF files for each GC in this timepoint
in_files = [f"{traj_dir}/{gc}.pdf" for gc in gcs if os.path.exists(f"{traj_dir}/{gc}.pdf")]
if not in_files:
continue
# output filename with timepoint
out_file = f"{output_dir}/phenotype-trajectories_tiled_{t}.png"
# Convert each PDF page to an image and collect them
images = []
for pdf_file in in_files:
pages = convert_from_path(pdf_file, 100) # dpi=300
images.append(pages[0]) # just the first page
# Create a figure with subplots for each image
fig, axs = plt.subplots(1, len(images), figsize=(3 * len(images), 6))
if len(images) == 1:
axs = [axs] # ensure axs is iterable even for a single image
for ax, img in zip(axs, images):
ax.imshow(img)
ax.axis("off")
plt.tight_layout()
plt.savefig(out_file, dpi=100)
Final cell distributions¶
In [18]:
Copied!
cells_dat = []
simcells_dat = []
for gc, tree in trees.items():
time = metadata.time[gc]
for node in tree.tree.traverse():
divergence = tree.tree.get_distance(node)
for i in range(node.abundance):
cells_dat.append([time, gc, node.name, i, node.delta_bind, node.delta_expr, divergence])
for sim_idx, sim_tree in enumerate(sim_trees[gc]):
for node in sim_tree.traverse():
divergence = sim_tree.get_distance(node)
for i in range(node.abundance):
simcells_dat.append([time, gc, node.name, i, sim_idx, node.delta_bind, node.delta_expr, divergence])
cells_df = pd.DataFrame(cells_dat, columns=("time", "GC", "seq", "cell", "delta_bind", "delta_expr", "divergence"))
simcells_df = pd.DataFrame(simcells_dat, columns=("time", "GC", "seq", "cell", "replicate", "delta_bind", "delta_expr", "divergence"))
cells_dat = []
simcells_dat = []
for gc, tree in trees.items():
time = metadata.time[gc]
for node in tree.tree.traverse():
divergence = tree.tree.get_distance(node)
for i in range(node.abundance):
cells_dat.append([time, gc, node.name, i, node.delta_bind, node.delta_expr, divergence])
for sim_idx, sim_tree in enumerate(sim_trees[gc]):
for node in sim_tree.traverse():
divergence = sim_tree.get_distance(node)
for i in range(node.abundance):
simcells_dat.append([time, gc, node.name, i, sim_idx, node.delta_bind, node.delta_expr, divergence])
cells_df = pd.DataFrame(cells_dat, columns=("time", "GC", "seq", "cell", "delta_bind", "delta_expr", "divergence"))
simcells_df = pd.DataFrame(simcells_dat, columns=("time", "GC", "seq", "cell", "replicate", "delta_bind", "delta_expr", "divergence"))
In [19]:
Copied!
cells_df_median = cells_df.groupby(["GC"]).agg({"delta_bind": "median", "delta_expr": "median", "time": "first"}).rename(columns={"delta_bind": "median_delta_bind", "delta_expr": "median_delta_expr"})
cells_df_median.to_csv(f"{output_dir}/data_cells_medians.csv", index=False)
simcells_df_median = simcells_df.groupby(["GC", "replicate"]).agg({"delta_bind": "median", "delta_expr": "median", "time": "first"}).reset_index().groupby("GC").agg({"delta_bind": "mean", "delta_expr": "mean", "time": "first"}).rename(columns={"delta_bind": "median_delta_bind", "delta_expr": "median_delta_expr"})
simcells_df_median.to_csv(f"{output_dir}/data_simcells_medians.csv", index=False)
cells_df_median = cells_df.groupby(["GC"]).agg({"delta_bind": "median", "delta_expr": "median", "time": "first"}).rename(columns={"delta_bind": "median_delta_bind", "delta_expr": "median_delta_expr"})
cells_df_median.to_csv(f"{output_dir}/data_cells_medians.csv", index=False)
simcells_df_median = simcells_df.groupby(["GC", "replicate"]).agg({"delta_bind": "median", "delta_expr": "median", "time": "first"}).reset_index().groupby("GC").agg({"delta_bind": "mean", "delta_expr": "mean", "time": "first"}).rename(columns={"delta_bind": "median_delta_bind", "delta_expr": "median_delta_expr"})
simcells_df_median.to_csv(f"{output_dir}/data_simcells_medians.csv", index=False)
In [20]:
Copied!
fig, ax = plt.subplots(figsize=(4, 4))
plt.axhline(0, c="k", ls="--", lw=1)
plt.axvline(0, c="k", ls="--", lw=1)
time_color_map = {
"d15": ["C0", "C1"],
"d20": ["C2", "C3"],
# add more if needed
}
for gc in metadata.index:
ax.plot(
[cells_df_median.loc[gc, "median_delta_bind"], simcells_df_median.loc[gc, "median_delta_bind"]],
[cells_df_median.loc[gc, "median_delta_expr"], simcells_df_median.loc[gc, "median_delta_expr"]],
c="k", lw=0.25, zorder=1, alpha=0.5
)
sns.scatterplot(
x=[cells_df_median.loc[gc, "median_delta_bind"], simcells_df_median.loc[gc, "median_delta_bind"]],
y=[cells_df_median.loc[gc, "median_delta_expr"], simcells_df_median.loc[gc, "median_delta_expr"]],
c=time_color_map[cells_df_median.time[gc]], marker="o", zorder=2, alpha=1.0
)
ax.set_xlabel(r"$\Delta$ affinity")
ax.set_ylabel(r"$\Delta$ expression")
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs.pdf")
plt.show()
fig, ax = plt.subplots(figsize=(4, 4))
plt.axhline(0, c="k", ls="--", lw=1)
plt.axvline(0, c="k", ls="--", lw=1)
time_color_map = {
"d15": ["C0", "C1"],
"d20": ["C2", "C3"],
# add more if needed
}
for gc in metadata.index:
ax.plot(
[cells_df_median.loc[gc, "median_delta_bind"], simcells_df_median.loc[gc, "median_delta_bind"]],
[cells_df_median.loc[gc, "median_delta_expr"], simcells_df_median.loc[gc, "median_delta_expr"]],
c="k", lw=0.25, zorder=1, alpha=0.5
)
sns.scatterplot(
x=[cells_df_median.loc[gc, "median_delta_bind"], simcells_df_median.loc[gc, "median_delta_bind"]],
y=[cells_df_median.loc[gc, "median_delta_expr"], simcells_df_median.loc[gc, "median_delta_expr"]],
c=time_color_map[cells_df_median.time[gc]], marker="o", zorder=2, alpha=1.0
)
ax.set_xlabel(r"$\Delta$ affinity")
ax.set_ylabel(r"$\Delta$ expression")
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs.pdf")
plt.show()
In [21]:
Copied!
cells_df_median["data"] = "observed"
simcells_df_median["data"] = "simulated"
simdat_concat = pd.concat([cells_df_median, simcells_df_median])
simdat_concat.to_csv(f"{output_dir}/data.csv", index=False)
simdat_concat
cells_df_median["data"] = "observed"
simcells_df_median["data"] = "simulated"
simdat_concat = pd.concat([cells_df_median, simcells_df_median])
simdat_concat.to_csv(f"{output_dir}/data.csv", index=False)
simdat_concat
Out[21]:
median_delta_bind | median_delta_expr | time | data | |
---|---|---|---|---|
GC | ||||
D15_M10_GC20 | 0.879405 | -0.058795 | d15 | observed |
D15_M10_GC21 | 0.600000 | -0.058280 | d15 | observed |
D15_M10_GC22 | 1.251610 | -0.017620 | d15 | observed |
D15_M10_GC23 | 0.950490 | -0.039470 | d15 | observed |
D15_M11_GC24 | 1.281540 | -0.003350 | d15 | observed |
... | ... | ... | ... | ... |
D20_M24_GC115 | -0.351236 | -0.721199 | d20 | simulated |
D20_M25_GC116 | -1.410899 | -1.262511 | d20 | simulated |
D20_M25_GC117 | -0.485463 | -0.433510 | d20 | simulated |
D20_M25_GC118 | -0.450546 | -0.749682 | d20 | simulated |
D20_M25_GC119 | -0.871590 | -1.121733 | d20 | simulated |
238 rows × 4 columns
In [22]:
Copied!
g = sns.jointplot(
data=simdat_concat,
x="median_delta_bind",
y="median_delta_expr",
hue=simdat_concat.time + " " + simdat_concat.data,
)
for gc in metadata.index:
g.ax_joint.plot(
[cells_df_median.loc[gc, "median_delta_bind"], simcells_df_median.loc[gc, "median_delta_bind"]],
[cells_df_median.loc[gc, "median_delta_expr"], simcells_df_median.loc[gc, "median_delta_expr"]],
c="k", lw=0.25, zorder=1, alpha=0.5
)
g.ax_joint.axhline(0, c="k", ls="--", lw=1)
g.ax_joint.axvline(0, c="k", ls="--", lw=1)
g.ax_marg_x.axvline(0, c="k", ls="--", lw=1)
g.ax_marg_y.axhline(0, c="k", ls="--", lw=1)
plt.xlabel(r"$\Delta$ affinity")
plt.ylabel(r"$\Delta$ expression")
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs.pdf")
plt.show()
g = sns.jointplot(
data=simdat_concat,
x="median_delta_bind",
y="median_delta_expr",
hue=simdat_concat.time + " " + simdat_concat.data,
)
for gc in metadata.index:
g.ax_joint.plot(
[cells_df_median.loc[gc, "median_delta_bind"], simcells_df_median.loc[gc, "median_delta_bind"]],
[cells_df_median.loc[gc, "median_delta_expr"], simcells_df_median.loc[gc, "median_delta_expr"]],
c="k", lw=0.25, zorder=1, alpha=0.5
)
g.ax_joint.axhline(0, c="k", ls="--", lw=1)
g.ax_joint.axvline(0, c="k", ls="--", lw=1)
g.ax_marg_x.axvline(0, c="k", ls="--", lw=1)
g.ax_marg_y.axhline(0, c="k", ls="--", lw=1)
plt.xlabel(r"$\Delta$ affinity")
plt.ylabel(r"$\Delta$ expression")
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs.pdf")
plt.show()
In [23]:
Copied!
simdat_joined = cells_df_median.join(simcells_df_median, rsuffix="_sim", lsuffix="").drop(columns="time_sim").rename(columns={"time_real": "time"})
simdat_joined
simdat_joined = cells_df_median.join(simcells_df_median, rsuffix="_sim", lsuffix="").drop(columns="time_sim").rename(columns={"time_real": "time"})
simdat_joined
Out[23]:
median_delta_bind | median_delta_expr | time | data | median_delta_bind_sim | median_delta_expr_sim | data_sim | |
---|---|---|---|---|---|---|---|
GC | |||||||
D15_M10_GC20 | 0.879405 | -0.058795 | d15 | observed | -0.616669 | -0.649554 | simulated |
D15_M10_GC21 | 0.600000 | -0.058280 | d15 | observed | -0.561991 | -0.631265 | simulated |
D15_M10_GC22 | 1.251610 | -0.017620 | d15 | observed | -0.984445 | -0.680882 | simulated |
D15_M10_GC23 | 0.950490 | -0.039470 | d15 | observed | -0.807401 | -0.756251 | simulated |
D15_M11_GC24 | 1.281540 | -0.003350 | d15 | observed | -0.400168 | -0.514934 | simulated |
... | ... | ... | ... | ... | ... | ... | ... |
D20_M24_GC115 | 1.046480 | -0.172400 | d20 | observed | -0.351236 | -0.721199 | simulated |
D20_M25_GC116 | 1.105020 | -0.031210 | d20 | observed | -1.410899 | -1.262511 | simulated |
D20_M25_GC117 | 0.608315 | -0.513380 | d20 | observed | -0.485463 | -0.433510 | simulated |
D20_M25_GC118 | 0.492355 | -0.021910 | d20 | observed | -0.450546 | -0.749682 | simulated |
D20_M25_GC119 | 0.700545 | -0.260170 | d20 | observed | -0.871590 | -1.121733 | simulated |
119 rows × 7 columns
In [24]:
Copied!
for median_phenotype in ("median_delta_bind", "median_delta_expr"):
plt.figure(figsize=(4.5, 4.5))
sns.scatterplot(data=simdat_joined, x=median_phenotype+"_sim", y=median_phenotype, hue="time")
plt.axhline(0, c="k", ls="--", lw=0.5)
plt.axvline(0, c="k", ls="--", lw=0.5)
plt.plot(
[simdat_joined[median_phenotype+"_sim"].min(), simdat_joined[median_phenotype].max()],
[simdat_joined[median_phenotype+"_sim"].min(), simdat_joined[median_phenotype].max()],
c="lightgrey", lw=2)
plt.xlim(simdat_joined[median_phenotype+"_sim"].min() - 0.1, simdat_joined[median_phenotype].max() + 0.1)
plt.ylim(simdat_joined[median_phenotype+"_sim"].min() - 0.1, simdat_joined[median_phenotype].max() + 0.1)
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs_{median_phenotype}.pdf")
plt.show()
plt.figure(figsize=(6, 3))
ax = None
for time_idx, time in enumerate(sorted(simdat_joined.time.unique())):
ax = plt.subplot(1, 2, time_idx + 1, sharex=ax, sharey=ax)
plt.title(time)
plt.plot(
[[0, 1]] * len(simdat_joined.query("time == @time")),
simdat_joined.query("time == @time")[[median_phenotype, median_phenotype+"_sim"]].values,
"o", c=f"C{time_idx}", ms=3, alpha=1, mec="k", mew=0.5
)
sns.pointplot(data=simdat_joined.query("time == @time"), x=[0] * len(simdat_joined.query("time == @time")), y=median_phenotype, color="k", estimator="median", errorbar=None, marker="_", markersize=10, markeredgewidth=2)
sns.pointplot(data=simdat_joined.query("time == @time"), x=[1] * len(simdat_joined.query("time == @time")), y=median_phenotype+"_sim", color="k", estimator="median", errorbar=None, marker="_", markersize=10, markeredgewidth=2)
for i, (x, y) in enumerate(simdat_joined.query("time == @time")[[median_phenotype, median_phenotype+"_sim"]].values):
plt.plot([0, 1], [x, y], c=f"C{time_idx}", lw=1, alpha=1, zorder=1)
plt.axhline(0, c="k", ls="--", lw=0.5)
plt.xticks([0, 1], ["observed", "simulated"])
plt.ylabel(median_phenotype)
# plt.xlim(-0.25, 1.25)
# plt.ylim(df.loc[df.groupby("GC")["REI"].idxmax()][["delta_bind", "mean_sister_delta_bind"]].min().min() - 0.2, df.loc[df.groupby("GC")["REI"].idxmax()][["delta_bind", "mean_sister_delta_bind"]].max().max() + 0.2)
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs_{median_phenotype}_2.pdf")
plt.show()
for median_phenotype in ("median_delta_bind", "median_delta_expr"):
plt.figure(figsize=(4.5, 4.5))
sns.scatterplot(data=simdat_joined, x=median_phenotype+"_sim", y=median_phenotype, hue="time")
plt.axhline(0, c="k", ls="--", lw=0.5)
plt.axvline(0, c="k", ls="--", lw=0.5)
plt.plot(
[simdat_joined[median_phenotype+"_sim"].min(), simdat_joined[median_phenotype].max()],
[simdat_joined[median_phenotype+"_sim"].min(), simdat_joined[median_phenotype].max()],
c="lightgrey", lw=2)
plt.xlim(simdat_joined[median_phenotype+"_sim"].min() - 0.1, simdat_joined[median_phenotype].max() + 0.1)
plt.ylim(simdat_joined[median_phenotype+"_sim"].min() - 0.1, simdat_joined[median_phenotype].max() + 0.1)
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs_{median_phenotype}.pdf")
plt.show()
plt.figure(figsize=(6, 3))
ax = None
for time_idx, time in enumerate(sorted(simdat_joined.time.unique())):
ax = plt.subplot(1, 2, time_idx + 1, sharex=ax, sharey=ax)
plt.title(time)
plt.plot(
[[0, 1]] * len(simdat_joined.query("time == @time")),
simdat_joined.query("time == @time")[[median_phenotype, median_phenotype+"_sim"]].values,
"o", c=f"C{time_idx}", ms=3, alpha=1, mec="k", mew=0.5
)
sns.pointplot(data=simdat_joined.query("time == @time"), x=[0] * len(simdat_joined.query("time == @time")), y=median_phenotype, color="k", estimator="median", errorbar=None, marker="_", markersize=10, markeredgewidth=2)
sns.pointplot(data=simdat_joined.query("time == @time"), x=[1] * len(simdat_joined.query("time == @time")), y=median_phenotype+"_sim", color="k", estimator="median", errorbar=None, marker="_", markersize=10, markeredgewidth=2)
for i, (x, y) in enumerate(simdat_joined.query("time == @time")[[median_phenotype, median_phenotype+"_sim"]].values):
plt.plot([0, 1], [x, y], c=f"C{time_idx}", lw=1, alpha=1, zorder=1)
plt.axhline(0, c="k", ls="--", lw=0.5)
plt.xticks([0, 1], ["observed", "simulated"])
plt.ylabel(median_phenotype)
# plt.xlim(-0.25, 1.25)
# plt.ylim(df.loc[df.groupby("GC")["REI"].idxmax()][["delta_bind", "mean_sister_delta_bind"]].min().min() - 0.2, df.loc[df.groupby("GC")["REI"].idxmax()][["delta_bind", "mean_sister_delta_bind"]].max().max() + 0.2)
plt.tight_layout()
plt.savefig(f"{output_dir}/sim-vs-obs_{median_phenotype}_2.pdf")
plt.show()
In [ ]:
Copied!