The Sequence Library¶
In addition to the ...
import numpy as np
import mdtraj as md
import matplotlib.pyplot as plt
import nglview as nv
import copy
import nglview as nv
import random
from Bio.SVDSuperimposer import SVDSuperimposer
import mdna
# Complemetary base pairs
base_pair_map = {'A':'T','T':'A','G':'C','C':'G','U':'A','D':'G','E':'T','L':'M','M':'L','B':'S','S':'B','Z':'P','P':'Z','CM':'G','AH':'T','GM':'C'}
# Load reference bases from the atomic data
reference_bases = {base: md.load_hdf5(mdna.utils.get_data_file_path(f'./atomic/bases/BDNA_{base}.h5')) for base in base_pair_map.keys()}
bases = list(reference_bases.values())
# Define grid to place the bases
order = [['A', 'T', 'G', 'C'],
['AH', 'U', 'GM', 'CM'],
['B', 'S', 'P', 'Z'],
['E', 'D','L', 'M']]
# Initialize the trajectory with the first base
traj = reference_bases[order[0][0]]
# Spacing parameters (in angstroms, adjust as needed)
horizontal_spacing = 1.2 # Spacing between bases within a row
vertical_spacing = 1.5 # Spacing between rows
# Base positions tracking
y_position = 0 # Start at the top-most row and work downwards
for row in order:
x_position = 0 # Reset x position for each new row
for i, base in enumerate(row,1):
if i == 0 and row == order[0]:
# Already initialized with the first base
continue
# Move base in x and y direction
reference_bases[base].xyz[0] = reference_bases[base].xyz[0] + np.array([-x_position, y_position, 0])
# Stack the base to the trajectory
traj = traj.stack(reference_bases[base])
# Increment x position for the next base in the row
x_position += horizontal_spacing
# Decrement y position for the next row to position it below the current one
y_position -= vertical_spacing
Visualize the bases
subtraj = traj.atom_slice(traj.top.select('not element H'))
# subtraj.save_pdb('all_bases.pdb')
# subtraj.save_hdf5('all_bases.h5')
view = nv.show_mdtraj(subtraj)
view.clear()
view.add_representation('licorice', selection='all')
view
How to add a base to the sequence library¶
Here we show how to align a DNA nucleobase to a specific reference frame.
We chose the default reference frame at the origin with the base vectors [1,0,0], [0,1,0], [0,0,1].
This can be useful if you want to add a custom nucleobase to the sequence libary.
For this we need to isolate the base and add it to the sequence library (./atomic/) and add the pdb/h5 to the reference list in geometry.py NUCLEOBASE_DICT
which contains all the atoms that belong to the nucleobas part as well as in the modify. Mutate.mutate base_pair_map
which defines the complementary base partner.
Here we show an example using a methylated base, but you can use any sequence you want. Just select the residue that you want to isolate.
def get_base_vectors(res):
"""Compute base vectors from reference base."""
ref_base = mdna.ReferenceBase(res)
return np.array([ref_base.b_R, ref_base.b_L, ref_base.b_D, ref_base.b_N]).swapaxes(0,1)
def get_rot_mat_trans(x,y):
# load super imposer
sup = SVDSuperimposer()
# Set the coords, y will be rotated and translated on x
sup.set(x, y)
# Do the leastsquared fit
sup.run()
# Get the rms
rms = sup.get_rms()
# Get rotation (right multiplying!) and the translation
rot, tran = sup.get_rotran()
return rot, tran
# Function to calculate positions from origin and vectors
def calculate_positions(triad):
origin = triad[0]
vectors = triad[1:]
# Each row in vectors is added to the origin to get the end position
end_positions = origin + vectors
# Combine the origin with these end positions
positions = np.vstack([origin, end_positions])
return positions
def align_to_ref(traj, ref = np.array([[0,0,0.0],[1,0,0],[0,1,0],[0,0,1]])):
vectors = get_base_vectors(traj)
positions = calculate_positions(vectors[0])
ref_position = calculate_positions(ref)
rot, tran = get_rot_mat_trans(ref_position,positions)
new_xyz = np.dot(traj.xyz[0], rot) + tran
traj.xyz[0] = new_xyz
return traj
# Create a DNA sequence with a methylated base
dna = mdna.make('GCGCG')
dna.methylate(CpG=True)
traj = dna.get_traj()
# Select the methylated base
meth = traj.atom_slice(traj.top.select('resid 1'))
# Align the methylated base to the reference frame
meth = align_to_ref(meth)
# Save the methylated base
meth.save('./pdbs/BDNA_CM.pdb')
meth.save('./pdbs/BDNA_CM.h5')
# Show the methylated base
view = nv.show_mdtraj(meth)
view.clear()
view.add_ball_and_stick()
view
Point mutations¶
def point_mutation(sequence, position=None, new_nucleotide=None):
if position is None:
position = random.randint(0, len(sequence) - 1)
if new_nucleotide is None:
nucleotides = ['A', 'T', 'C', 'G']
new_nucleotide = random.choice([n for n in nucleotides if n != sequence[position]])
mutated_sequence = list(sequence)
mutated_sequence[position] = new_nucleotide
return ''.join(mutated_sequence)
def radiate_system(dna, new_sequence, complementary=True, chainids=[0,1], verbose=False):
pdb = copy.deepcopy(dna.get_traj())
if verbose:
print('--- current stat of the system ---')
s = dna.sequence
if len(s) != len(new_sequence):
raise ValueError('The length of the new sequence does not match the length of the current sequence')
if verbose:
print(len(s),s)
print(''.join(s))
mutations = mdna.get_mutations(s,new_sequence)
if verbose:
print(f'start mutation ---- {mutations} ----')
dna.mutate(mutations,complementary=complementary)
mutant_sequence = dna.sequence
if verbose:
print(mutant_sequence)
print(''.join(mutant_sequence))
new_traj = dna.get_traj()
if verbose:
for c in new_traj.top.chains:
print(c.index, c._residues)
print('--- end radiation ---')
return new_traj
# Example usage
save = False
dna = mdna.make('GCGCG')
point_mutations = np.unique([point_mutation(dna.sequence) for _ in range(100)])
for i, new_sequence in enumerate(point_mutations):
mutant = radiate_system(dna, list(new_sequence))
print(new_sequence,f'saved as point_mutant_{i}.pdb')
if save:
mutant.save(f'./pdbs/point_mutant_{i}.pdb')