Skip to content

Geometry Classes

Classes for computing nucleobase reference frames and rigid base parameters.


Reference Base Class

Fits a reference frame to a single nucleobase using the Tsukuba convention.

mdna.geometry.ReferenceBase

Compute a nucleobase reference frame following the Tsukuba convention.

Given an MDTraj trajectory for a single residue, this class identifies the base type (purine, pyrimidine, or non-canonical analogue), locates the key atoms (C1', N-glycosidic nitrogen, and a ring carbon), and calculates the local reference frame vectors (origin b_R, long axis b_L, short axis b_D, and normal b_N).

Attributes:

Name Type Description
traj Trajectory

Single-residue trajectory.

base_type str

One-letter nucleobase code (e.g. 'A', 'T', 'D').

b_R ndarray

Reference point (origin) of shape (n_frames, 3).

b_L ndarray

Long-axis unit vector of shape (n_frames, 3).

b_D ndarray

Short-axis unit vector of shape (n_frames, 3).

b_N ndarray

Base-normal unit vector of shape (n_frames, 3).

Source code in mdna/geometry.py
class ReferenceBase:
    """Compute a nucleobase reference frame following the Tsukuba convention.

    Given an MDTraj trajectory for a single residue, this class identifies the
    base type (purine, pyrimidine, or non-canonical analogue), locates the key
    atoms (C1', N-glycosidic nitrogen, and a ring carbon), and calculates the
    local reference frame vectors (origin *b_R*, long axis *b_L*, short axis
    *b_D*, and normal *b_N*).

    Attributes:
        traj (md.Trajectory): Single-residue trajectory.
        base_type (str): One-letter nucleobase code (e.g. ``'A'``, ``'T'``, ``'D'``).
        b_R (numpy.ndarray): Reference point (origin) of shape ``(n_frames, 3)``.
        b_L (numpy.ndarray): Long-axis unit vector of shape ``(n_frames, 3)``.
        b_D (numpy.ndarray): Short-axis unit vector of shape ``(n_frames, 3)``.
        b_N (numpy.ndarray): Base-normal unit vector of shape ``(n_frames, 3)``.
    """

    def __init__(self, traj):
        """Initialize the reference base calculation.

        Args:
            traj (md.Trajectory): MDTraj trajectory containing a single nucleotide residue.
        """
        self.traj = traj
        # Determine base type (purine/pyrimidine/other)
        self.base_type = self.get_base_type()
        # Define the Tsukuba convention parameters
        self.tau_1, self.tau_2, self.d = np.radians(141.478), -np.radians(54.418), 0.4702     
        # Get coordinates of key atoms based on base type
        self.C1_coords, self.N_coords, self.C_coords = self.get_coordinates()
        # Calculate base reference point and base vectors
        self.b_R, self.b_L, self.b_D, self.b_N = self.calculate_base_frame()
        # self.basis = np.array([self.b_D.T, self.b_L.T, self.b_N])

    def _select_atom_by_name(self, name: str) -> np.ndarray:
        """Select atom coordinates by atom name.

        Args:
            name (str): Atom selection string (e.g. ``'N9'``, ``'"C1\'"'``).

        Returns:
            numpy.ndarray: Coordinates of shape ``(n_frames, 3)``.
        """
        # Select an atom by name returns shape (n_frames, 1, [x,y,z])
        return np.squeeze(self.traj.xyz[:,[self.traj.topology.select(f'name {name}')[0]],:],axis=1)

    def get_base_type(self) -> str:
        """Determine the nucleobase type from the atoms present in the trajectory.

        Matches the set of non-hydrogen atom names against the known
        nucleobase dictionaries for canonical and non-canonical bases.

        Raises:
            ValueError: If no known base type matches the atom set.

        Returns:
            str: Single-letter nucleobase code (e.g. ``'A'``, ``'G'``, ``'D'``).
        """
        # Extracts all non-hydrogen atoms from the trajectory topology
        atoms = {atom.name for atom in self.traj.topology.atoms if atom.element.symbol != 'H'}

        # Check each base in the dictionary to see if all its atoms are present in the extracted atoms set
        for base, base_atoms in NUCLEOBASE_DICT.items():
            if all(atom in atoms for atom in base_atoms):
                return base
        # If no base matches, raise an error
        raise ValueError("Cannot determine the base type from the PDB file.")

    def get_coordinates(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        """Retrieve C1', N-glycosidic, and ring-carbon coordinates for the base.

        The specific atoms selected depend on the base type (purine vs
        pyrimidine vs non-canonical analogue).

        Returns:
            tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
                ``(C1_coords, N_coords, C_coords)`` each of shape ``(n_frames, 3)``.
        """
        # Get the coordinates of key atoms based on the base type
        C1_coords = self._select_atom_by_name('"C1\'"')
        if self.base_type in ['C','T','U','D']:# "pyrimidine"
            N_coords = self._select_atom_by_name("N1")
            C_coords = self._select_atom_by_name("C2")
        elif self.base_type in ['A','G','E','B','P']:# "purine":
            N_coords = self._select_atom_by_name("N9")
            C_coords = self._select_atom_by_name("C4") 
        elif self.base_type in ['S','Z']: # Hachi pyrimidine analogues
            N_coords = self._select_atom_by_name("C1")
            C_coords = self._select_atom_by_name("C2")
        elif self.base_type in ['L']: # UBPs hydrophobic
            N_coords = self._select_atom_by_name("N1")
            C_coords = self._select_atom_by_name("C5")
        elif self.base_type in ['M']: # UBPs hydrophilic
            N_coords = self._select_atom_by_name("C1")
            C_coords = self._select_atom_by_name("C6")
        else:
            raise ValueError(f"Unsupported base type: {self.base_type}")
        return C1_coords, N_coords, C_coords


    def calculate_base_frame(self) -> np.ndarray:
        """Calculate the base reference frame from the Tsukuba convention.

        Computes the base-normal (*b_N*) from the cross product of key atom
        vectors, then derives the reference point (*b_R*), long axis (*b_L*),
        and short axis (*b_D*) by rotating the N→C1' vector.

        Returns:
            numpy.ndarray: Array of shape ``(4, n_frames, 3)`` containing
                ``[b_R, b_D, b_L, b_N]``.
        """

        # Calculate normal vector using cross product of vectors formed by key atoms
        #  The coords have the shape (n,1,3)
        b_N = np.cross((self.N_coords - self.C1_coords), (self.N_coords-self.C_coords), axis=1)
        b_N /= np.linalg.norm(b_N, axis=1, keepdims=True)  # Normalize b_N to have unit length

        # Compute displacement vector N-C1' 
        N_C1_vector = self.C1_coords - self.N_coords  # Pointing from N to C1'
        N_C1_vector /= np.linalg.norm(N_C1_vector, axis=1, keepdims=True)

        # Rotate N-C1' vector by angle tau_1 around b_N to get the direction for displacement
        R_b_R = RigidBody.get_rotation_matrix(self.tau_1 * b_N)

        # Displace N along this direction by a distance d to get b_R
        b_R = self.N_coords + np.einsum('ijk,ik->ij', R_b_R, N_C1_vector * self.d)

        # Take a unit vector in the N-C1' direction, rotate it around b_N by angle tau_2 to get b_L
        R_b_L = RigidBody.get_rotation_matrix(self.tau_2 * b_N)
        b_L = np.einsum('ijk,ik->ij', R_b_L, N_C1_vector) 

        # Calculate b_D using cross product of b_L and b_N
        b_D = np.cross(b_L, b_N, axis=1)

        return np.array([b_R, b_D, b_L, b_N])
        #return np.array([b_R, -b_D, -b_L, -b_N])

    def plot_baseframe(self, atoms=True, frame=True, ax=None, length=1):
        """Plot the nucleobase atoms and/or reference frame vectors in 3D.

        Args:
            atoms (bool): If True, scatter-plot the atom positions. Defaults to True.
            frame (bool): If True, draw quiver arrows for the *b_L*, *b_D*, and *b_N* vectors. Defaults to True.
            ax (matplotlib.axes.Axes, optional): Existing 3D axes to draw on. If None, a new figure is created.
            length (int): Length scale for the quiver arrows. Defaults to 1.
        """
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111, projection='3d')
        else:
            fig = False

        # Plot the DNA atoms
        if atoms:
            atoms_coords = self.traj.xyz[0]
            ax.scatter(atoms_coords[:,0], atoms_coords[:,1], atoms_coords[:,2], alpha=0.6)

        # Plot the reference frame vectors
        if frame:
            origin = self.b_R[0]
            ax.quiver(origin[0], origin[1], origin[2], 
                    self.b_L[0][0], self.b_L[0][1], self.b_L[0][2], 
                    color='r', length=length, normalize=True)
            ax.quiver(origin[0], origin[1], origin[2], 
                    self.b_D[0][0], self.b_D[0][1], self.b_D[0][2], 
                    color='g', length=length, normalize=True)
            ax.quiver(origin[0], origin[1], origin[2], 
                    self.b_N[0][0], self.b_N[0][1], self.b_N[0][2], 
                    color='b', length=length, normalize=True)

        ax.set_xlabel('X')
        ax.set_ylabel('Y')
        ax.set_zlabel('Z')

        if fig: 
            # Make axes of equal length
            max_range = np.array([
                atoms_coords[:,0].max()-atoms_coords[:,0].min(), 
                atoms_coords[:,1].max()-atoms_coords[:,1].min(), 
                atoms_coords[:,2].max()-atoms_coords[:,2].min()
            ]).max() / 2.0

            mid_x = (atoms_coords[:,0].max()+atoms_coords[:,0].min()) * 0.5
            mid_y = (atoms_coords[:,1].max()+atoms_coords[:,1].min()) * 0.5
            mid_z = (atoms_coords[:,2].max()+atoms_coords[:,2].min()) * 0.5
            ax.set_xlim(mid_x - max_range, mid_x + max_range)
            ax.set_ylim(mid_y - max_range, mid_y + max_range)
            ax.set_zlim(mid_z - max_range, mid_z + max_range)

        ax.axis('equal')

__init__(traj)

Initialize the reference base calculation.

Parameters:

Name Type Description Default
traj Trajectory

MDTraj trajectory containing a single nucleotide residue.

required
Source code in mdna/geometry.py
def __init__(self, traj):
    """Initialize the reference base calculation.

    Args:
        traj (md.Trajectory): MDTraj trajectory containing a single nucleotide residue.
    """
    self.traj = traj
    # Determine base type (purine/pyrimidine/other)
    self.base_type = self.get_base_type()
    # Define the Tsukuba convention parameters
    self.tau_1, self.tau_2, self.d = np.radians(141.478), -np.radians(54.418), 0.4702     
    # Get coordinates of key atoms based on base type
    self.C1_coords, self.N_coords, self.C_coords = self.get_coordinates()
    # Calculate base reference point and base vectors
    self.b_R, self.b_L, self.b_D, self.b_N = self.calculate_base_frame()

calculate_base_frame()

Calculate the base reference frame from the Tsukuba convention.

Computes the base-normal (b_N) from the cross product of key atom vectors, then derives the reference point (b_R), long axis (b_L), and short axis (b_D) by rotating the N→C1' vector.

Returns:

Type Description
ndarray

numpy.ndarray: Array of shape (4, n_frames, 3) containing [b_R, b_D, b_L, b_N].

Source code in mdna/geometry.py
def calculate_base_frame(self) -> np.ndarray:
    """Calculate the base reference frame from the Tsukuba convention.

    Computes the base-normal (*b_N*) from the cross product of key atom
    vectors, then derives the reference point (*b_R*), long axis (*b_L*),
    and short axis (*b_D*) by rotating the N→C1' vector.

    Returns:
        numpy.ndarray: Array of shape ``(4, n_frames, 3)`` containing
            ``[b_R, b_D, b_L, b_N]``.
    """

    # Calculate normal vector using cross product of vectors formed by key atoms
    #  The coords have the shape (n,1,3)
    b_N = np.cross((self.N_coords - self.C1_coords), (self.N_coords-self.C_coords), axis=1)
    b_N /= np.linalg.norm(b_N, axis=1, keepdims=True)  # Normalize b_N to have unit length

    # Compute displacement vector N-C1' 
    N_C1_vector = self.C1_coords - self.N_coords  # Pointing from N to C1'
    N_C1_vector /= np.linalg.norm(N_C1_vector, axis=1, keepdims=True)

    # Rotate N-C1' vector by angle tau_1 around b_N to get the direction for displacement
    R_b_R = RigidBody.get_rotation_matrix(self.tau_1 * b_N)

    # Displace N along this direction by a distance d to get b_R
    b_R = self.N_coords + np.einsum('ijk,ik->ij', R_b_R, N_C1_vector * self.d)

    # Take a unit vector in the N-C1' direction, rotate it around b_N by angle tau_2 to get b_L
    R_b_L = RigidBody.get_rotation_matrix(self.tau_2 * b_N)
    b_L = np.einsum('ijk,ik->ij', R_b_L, N_C1_vector) 

    # Calculate b_D using cross product of b_L and b_N
    b_D = np.cross(b_L, b_N, axis=1)

    return np.array([b_R, b_D, b_L, b_N])

get_base_type()

Determine the nucleobase type from the atoms present in the trajectory.

Matches the set of non-hydrogen atom names against the known nucleobase dictionaries for canonical and non-canonical bases.

Raises:

Type Description
ValueError

If no known base type matches the atom set.

Returns:

Name Type Description
str str

Single-letter nucleobase code (e.g. 'A', 'G', 'D').

Source code in mdna/geometry.py
def get_base_type(self) -> str:
    """Determine the nucleobase type from the atoms present in the trajectory.

    Matches the set of non-hydrogen atom names against the known
    nucleobase dictionaries for canonical and non-canonical bases.

    Raises:
        ValueError: If no known base type matches the atom set.

    Returns:
        str: Single-letter nucleobase code (e.g. ``'A'``, ``'G'``, ``'D'``).
    """
    # Extracts all non-hydrogen atoms from the trajectory topology
    atoms = {atom.name for atom in self.traj.topology.atoms if atom.element.symbol != 'H'}

    # Check each base in the dictionary to see if all its atoms are present in the extracted atoms set
    for base, base_atoms in NUCLEOBASE_DICT.items():
        if all(atom in atoms for atom in base_atoms):
            return base
    # If no base matches, raise an error
    raise ValueError("Cannot determine the base type from the PDB file.")

get_coordinates()

Retrieve C1', N-glycosidic, and ring-carbon coordinates for the base.

The specific atoms selected depend on the base type (purine vs pyrimidine vs non-canonical analogue).

Returns:

Type Description
tuple[ndarray, ndarray, ndarray]

tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]: (C1_coords, N_coords, C_coords) each of shape (n_frames, 3).

Source code in mdna/geometry.py
def get_coordinates(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Retrieve C1', N-glycosidic, and ring-carbon coordinates for the base.

    The specific atoms selected depend on the base type (purine vs
    pyrimidine vs non-canonical analogue).

    Returns:
        tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
            ``(C1_coords, N_coords, C_coords)`` each of shape ``(n_frames, 3)``.
    """
    # Get the coordinates of key atoms based on the base type
    C1_coords = self._select_atom_by_name('"C1\'"')
    if self.base_type in ['C','T','U','D']:# "pyrimidine"
        N_coords = self._select_atom_by_name("N1")
        C_coords = self._select_atom_by_name("C2")
    elif self.base_type in ['A','G','E','B','P']:# "purine":
        N_coords = self._select_atom_by_name("N9")
        C_coords = self._select_atom_by_name("C4") 
    elif self.base_type in ['S','Z']: # Hachi pyrimidine analogues
        N_coords = self._select_atom_by_name("C1")
        C_coords = self._select_atom_by_name("C2")
    elif self.base_type in ['L']: # UBPs hydrophobic
        N_coords = self._select_atom_by_name("N1")
        C_coords = self._select_atom_by_name("C5")
    elif self.base_type in ['M']: # UBPs hydrophilic
        N_coords = self._select_atom_by_name("C1")
        C_coords = self._select_atom_by_name("C6")
    else:
        raise ValueError(f"Unsupported base type: {self.base_type}")
    return C1_coords, N_coords, C_coords

plot_baseframe(atoms=True, frame=True, ax=None, length=1)

Plot the nucleobase atoms and/or reference frame vectors in 3D.

Parameters:

Name Type Description Default
atoms bool

If True, scatter-plot the atom positions. Defaults to True.

True
frame bool

If True, draw quiver arrows for the b_L, b_D, and b_N vectors. Defaults to True.

True
ax Axes

Existing 3D axes to draw on. If None, a new figure is created.

None
length int

Length scale for the quiver arrows. Defaults to 1.

1
Source code in mdna/geometry.py
def plot_baseframe(self, atoms=True, frame=True, ax=None, length=1):
    """Plot the nucleobase atoms and/or reference frame vectors in 3D.

    Args:
        atoms (bool): If True, scatter-plot the atom positions. Defaults to True.
        frame (bool): If True, draw quiver arrows for the *b_L*, *b_D*, and *b_N* vectors. Defaults to True.
        ax (matplotlib.axes.Axes, optional): Existing 3D axes to draw on. If None, a new figure is created.
        length (int): Length scale for the quiver arrows. Defaults to 1.
    """
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
    else:
        fig = False

    # Plot the DNA atoms
    if atoms:
        atoms_coords = self.traj.xyz[0]
        ax.scatter(atoms_coords[:,0], atoms_coords[:,1], atoms_coords[:,2], alpha=0.6)

    # Plot the reference frame vectors
    if frame:
        origin = self.b_R[0]
        ax.quiver(origin[0], origin[1], origin[2], 
                self.b_L[0][0], self.b_L[0][1], self.b_L[0][2], 
                color='r', length=length, normalize=True)
        ax.quiver(origin[0], origin[1], origin[2], 
                self.b_D[0][0], self.b_D[0][1], self.b_D[0][2], 
                color='g', length=length, normalize=True)
        ax.quiver(origin[0], origin[1], origin[2], 
                self.b_N[0][0], self.b_N[0][1], self.b_N[0][2], 
                color='b', length=length, normalize=True)

    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')

    if fig: 
        # Make axes of equal length
        max_range = np.array([
            atoms_coords[:,0].max()-atoms_coords[:,0].min(), 
            atoms_coords[:,1].max()-atoms_coords[:,1].min(), 
            atoms_coords[:,2].max()-atoms_coords[:,2].min()
        ]).max() / 2.0

        mid_x = (atoms_coords[:,0].max()+atoms_coords[:,0].min()) * 0.5
        mid_y = (atoms_coords[:,1].max()+atoms_coords[:,1].min()) * 0.5
        mid_z = (atoms_coords[:,2].max()+atoms_coords[:,2].min()) * 0.5
        ax.set_xlim(mid_x - max_range, mid_x + max_range)
        ax.set_ylim(mid_y - max_range, mid_y + max_range)
        ax.set_zlim(mid_z - max_range, mid_z + max_range)

    ax.axis('equal')

Rigid Base Parameter Class

Computes inter- and intra-base pair parameters from an MDTraj trajectory.

mdna.geometry.NucleicFrames

Rigid base-pair parameter computation from DNA trajectories.

Extracts nucleobase reference frames from an MDTraj trajectory using the Tsukuba convention, then computes the six intra-base-pair parameters (shear, stretch, stagger, buckle, propeller, opening) and six inter-base-pair step parameters (shift, slide, rise, tilt, roll, twist) for every frame in the trajectory.

The mean base-pair reference frames are also stored and can be used as input for downstream construction or analysis.

Attributes:

Name Type Description
traj Trajectory

Input trajectory.

frames ndarray

Mean reference frames of shape (n_bp, n_frames, 4, 3).

parameters ndarray

Combined base-pair and step parameters of shape (n_bp, n_frames, 12).

bp_params ndarray

Intra-base-pair parameters of shape (n_bp, n_frames, 6).

step_params ndarray

Inter-base-pair step parameters of shape (n_bp, n_frames, 6).

names list[str]

Parameter names (base + step).

Example
import mdtraj as md
import mdna

traj = md.load('dna_trajectory.xtc', top='dna.pdb')
dna = mdna.NucleicFrames(traj)
params, names = dna.get_parameters()
# params.shape → (n_frames, n_bp, 12)
Source code in mdna/geometry.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
class NucleicFrames:
    """Rigid base-pair parameter computation from DNA trajectories.

    Extracts nucleobase reference frames from an MDTraj trajectory using the
    Tsukuba convention, then computes the six intra-base-pair parameters
    (shear, stretch, stagger, buckle, propeller, opening) and six inter-base-pair
    step parameters (shift, slide, rise, tilt, roll, twist) for every frame in
    the trajectory.

    The mean base-pair reference frames are also stored and can be used as input
    for downstream construction or analysis.

    Attributes:
        traj (md.Trajectory): Input trajectory.
        frames (numpy.ndarray): Mean reference frames of shape ``(n_bp, n_frames, 4, 3)``.
        parameters (numpy.ndarray): Combined base-pair and step parameters of shape
            ``(n_bp, n_frames, 12)``.
        bp_params (numpy.ndarray): Intra-base-pair parameters of shape ``(n_bp, n_frames, 6)``.
        step_params (numpy.ndarray): Inter-base-pair step parameters of shape ``(n_bp, n_frames, 6)``.
        names (list[str]): Parameter names (base + step).

    Example:
        ```python
        import mdtraj as md
        import mdna

        traj = md.load('dna_trajectory.xtc', top='dna.pdb')
        dna = mdna.NucleicFrames(traj)
        params, names = dna.get_parameters()
        # params.shape → (n_frames, n_bp, 12)
        ```
    """

    def __init__(self, traj, chainids=[0,1], fit_reference=False):
        """Initialize the NucleicFrames object.

        Args:
            traj (object): MDtraj trajectory object.
            chainids (list, optional): Chainids of sense- and anti-sense strands. Defaults to [0,1].
            fit_reference (bool, optional): Fit each base to canonical reference bases before frame extraction.
                Defaults to False.
        """
        self.traj = traj
        self.top = traj.topology
        self.fit_reference = fit_reference
        self.reference_base_map = {'U': 'T'}
        self.reference_fit_data = self._prepare_reference_fit_data() if self.fit_reference else {}
        self.res_A = self.get_residues(chain_index=chainids[0], reverse=False)
        self.res_B = self.get_residues(chain_index=chainids[1], reverse=True)
        self.mean_reference_frames = np.empty((len(self.res_A), 1, 4, 3))
        self.base_frames = self.get_base_reference_frames()
        self.analyse_frames()

    def get_residues(self, chain_index, reverse=False):
        """Get residues from a specified chain.

        Args:
            chain_index (int): Index of the chain in the topology.
            reverse (bool): If True, return residues in reverse order (used for the anti-sense strand).

        Returns:
            residues (list): MDTraj Residue objects.

        Raises:
            IndexError: If *chain_index* is out of range.
        """
        if chain_index >= len(self.top._chains):
            raise IndexError("Chain index out of range.")
        chain = self.top._chains[chain_index]
        residues = chain._residues
        return list(reversed(residues)) if reverse else residues

    def load_reference_bases(self):
        """Load canonical reference base structures from bundled HDF5 files.

        Returns:
            bases (dict[str, md.Trajectory]): Mapping of base letter (A, C, G, T) to single-frame trajectory.
        """
        bases = ['C', 'G', 'T', 'A']
        return {base: md.load_hdf5(get_data_file_path(f'./atomic/bases/BDNA_{base}.h5')) for base in bases}

    def _prepare_reference_fit_data(self):
        """Prepare canonical base atom coordinates and frames for optional fitting."""
        reference_fit_data = {}
        for base, base_traj in self.load_reference_bases().items():
            ref_base = ReferenceBase(base_traj)
            atom_coords = {
                atom.name: base_traj.xyz[0, atom.index, :]
                for atom in base_traj.topology.atoms
                if atom.element.symbol != 'H'
            }
            reference_fit_data[base] = {
                'atom_coords': atom_coords,
                'frame': np.array([ref_base.b_R[0], ref_base.b_L[0], ref_base.b_D[0], ref_base.b_N[0]])
            }
        return reference_fit_data

    def _get_fitted_base_vectors(self, res, ref_base, default_vectors):
        """Fit residue atoms to canonical reference and transform canonical frame."""
        reference_key = self.reference_base_map.get(ref_base.base_type, ref_base.base_type)
        reference_data = self.reference_fit_data.get(reference_key)
        if reference_data is None:
            return default_vectors

        residue_atom_indices = {
            atom.name: atom.index
            for atom in res.topology.atoms
            if atom.element.symbol != 'H'
        }

        candidate_atoms = NUCLEOBASE_DICT.get(ref_base.base_type, [])
        common_atoms = [
            atom_name for atom_name in candidate_atoms
            if atom_name in residue_atom_indices and atom_name in reference_data['atom_coords']
        ]
        if len(common_atoms) < 3:
            return default_vectors

        reference_coords = np.array([reference_data['atom_coords'][atom_name] for atom_name in common_atoms])
        residue_coords = res.xyz[:, [residue_atom_indices[atom_name] for atom_name in common_atoms], :]

        reference_frame = reference_data['frame']
        reference_center = reference_coords.mean(axis=0)
        reference_centered = reference_coords - reference_center

        fitted_vectors = np.empty_like(default_vectors)
        for frame_index in range(residue_coords.shape[0]):
            frame_coords = residue_coords[frame_index]
            frame_center = frame_coords.mean(axis=0)
            frame_centered = frame_coords - frame_center
            try:
                rotation, _ = R.align_vectors(frame_centered, reference_centered)
            except ValueError:
                return default_vectors

            fitted_vectors[frame_index, 0] = rotation.apply(reference_frame[0] - reference_center) + frame_center
            fitted_vectors[frame_index, 1:] = rotation.apply(reference_frame[1:])

        return fitted_vectors

    def get_base_vectors(self, res):
        """Compute base reference vectors for a single residue.

        Args:
            res (md.Trajectory): Single-residue trajectory slice.

        Returns:
            vectors (numpy.ndarray): Base vectors of shape ``(n_frames, 4, 3)``
                ordered as ``[b_R, b_L, b_D, b_N]``.
        """
        ref_base = ReferenceBase(res)
        base_vectors = np.array([ref_base.b_R, ref_base.b_L, ref_base.b_D, ref_base.b_N]).swapaxes(0,1)
        if not self.fit_reference:
            return base_vectors
        return self._get_fitted_base_vectors(res, ref_base, base_vectors)

    def get_base_reference_frames(self):
        """Compute reference frames for all residues in both strands.

        Returns:
            frames (dict): Mapping of MDTraj Residue to base vectors array of shape ``(n_frames, 4, 3)``.
        """
        reference_frames = {} # Dictionary to store the base vectors for each residue
        for res in self.res_A + self.res_B:
            res_traj = self.traj.atom_slice([at.index for at in res.atoms])
            base_vectors = self.get_base_vectors(res_traj)
            reference_frames[res] = base_vectors # Store the base vectors for the residue index (with shape (4, n_frames, 3))
        return reference_frames

    def reshape_input(self,input_A,input_B,is_step=False):

        """Reshape the input to the correct format for the calculations.

        Args:
        input_A (ndarray): Input array for the first triad.
        input_B (ndarray): Input array for the second triad.
        is_step (bool, optional): Flag indicating if the input is a single step or a trajectory. Defaults to False.

        Returns:
        rotation_A (ndarray): Rotation matrices of shape (n, 3, 3) for the first triad.
        rotation_B (ndarray): Rotation matrices of shape (n, 3, 3) for the second triad.
        origin_A (ndarray): Origins of shape (n, 3) for the first triad.
        origin_B (ndarray): Origins of shape (n, 3) for the second triad.
        original_shape (tuple): The original shape of the input.
        """

        # Store original shape
        original_shape = input_A.shape

        # Flatten frames to compute rotation matrices for each time step simultaneously
        input_A_ = input_A.reshape(-1,original_shape[-2],original_shape[-1])  # shape (n, 4, 3)
        input_B_ = input_B.reshape(-1,original_shape[-2],original_shape[-1])  # shape (n, 4, 3)

        # Extract the triads without origin (rotation matrices)
        rotation_A = input_A_[:,1:]  # shape (n, 3, 3)
        rotation_B = input_B_[:,1:]  # shape (n, 3, 3)

        if not is_step:
            # flip (connecting the backbones) and the (baseplane normals).
            # so the second and third vector b_L, b_N
            rotation_B[:,[1,2]] *= -1

        # Extract origins of triads
        origin_A = input_A_[:,0]  # shape (n, 3)
        origin_B = input_B_[:,0]  # shape (n, 3)

        return rotation_A, rotation_B, origin_A, origin_B, original_shape


    def compute_parameters(self, rotation_A, rotation_B, origin_A, origin_B):
        """Calculate the parameters between each base pair and mean reference frames.

        Args:
            rotation_A (ndarray): Rotation matrices of shape (n, 3, 3) for the first triad.
            rotation_B (ndarray): Rotation matrices of shape (n, 3, 3) for the second triad.
            origin_A (ndarray): Origins of shape (n, 3) for the first triad.
            origin_B (ndarray): Origins of shape (n, 3) for the second triad.

        Returns:
            rigid_parameters (ndarray): The parameters of shape (n, 12) representing the relative translation and rotation between each base pair.
            trans_mid (ndarray): The mean translational vector of shape (n, 3) between the triads.
            rotation_mid (ndarray): The mean rotation matrix of shape (n, 3, 3) between the triads.
        """

        # Linear interpolation of translations
        trans_mid = 0.5 * (origin_A + origin_B)

        # Relative translation
        trans_AB = origin_A - origin_B

        # Get relative rotation matrix of base pair
        rotation_BA = rotation_B.transpose(0,2,1) @ rotation_A  # returns shape (n, 3, 3)

        # Get rotation angles based on  rotation matrices
        rotation_angle_BA = RigidBody.extract_omega_values(rotation_BA)

        # Compute halfway rotation matrix and triad (mid frame)
        rotation_halfway = RigidBody.get_rotation_matrix(rotation_angle_BA * 0.5)

        # Get rotation matrix of base pair (aka mean rotation frame)
        rotation_mid = rotation_B @ rotation_halfway 

        # Get transaltional coordinate vector and convert to angstroms
        translational_parameters = np.einsum('ijk,ik->ij',rotation_mid.transpose(0,2,1), trans_AB) * 10

        # Get rotational parameters and convert to degrees
        rotational_parameters = np.rad2deg(np.einsum('ijk,ik->ij', rotation_BA.transpose(0,2,1), rotation_angle_BA))

        # Merge translational and rotational parameters
        rigid_parameters = np.hstack((translational_parameters, rotational_parameters))

        # Return the parameters and the mean reference frame
        return rigid_parameters, trans_mid, rotation_mid


    def calculate_parameters(self,frames_A, frames_B, is_step=False):
        """Calculate the parameters between each base pair and mean reference frames.

        Assumes frames are of shape (n_frames, n_residues, 4, 3) where the last two dimensions are the base triads.
        The base triads consist of an origin (first index) and three vectors (latter 3 indices) representing the base frame.
        With the order of the vectors being: b_R, b_L, b_D, b_N.

        Args:
            frames_A (ndarray): Frames of shape (n_frames, n_residues, 4, 3) representing the base triads for chain A.
            frames_B (ndarray): Frames of shape (n_frames, n_residues, 4, 3) representing the base triads for chain B.
            is_step (bool, optional): Flag indicating if the input is a single step or a trajectory. Defaults to False.

        Notes:
            Note the vectors are stored rowwise in the base triads, and not the usual column representation of the rotation matrices.

        Returns:
            params (ndarray): The parameters of shape (n_frames, n_residues, 6) representing the relative translation and rotation between each base pair.
            mean_reference_frames (ndarray): The mean reference frames of shape (n_bp, n_frames, 4, 3) representing the mean reference frame of each base pair.
        """

        # Reshape frames
        rotation_A, rotation_B, origin_A, origin_B, original_shape = self.reshape_input(frames_A,frames_B, is_step=is_step)

        # Compute parameters
        if not is_step:
            # Flip from row to column representation of the rotation matrices
            rotation_A = rotation_A.transpose(0,2,1)
            rotation_B = rotation_B.transpose(0,2,1)
            params, mean_origin, mean_rotation = self.compute_parameters(rotation_A, rotation_B, origin_A, origin_B)
        else:
            # Switch the input of the B and A triads to get the correct parameters
            params, mean_origin, mean_rotation = self.compute_parameters(rotation_B, rotation_A, origin_B, origin_A)

        # Reshape the parameters to the original shape
        params = params.reshape(original_shape[0], original_shape[1], 6).swapaxes(0, 1)

        # Collect mean reference frames from mid frames of each base pair
        mean_reference_frames = np.hstack((mean_origin[:, np.newaxis, :],mean_rotation)).reshape(original_shape)

        if is_step:
            # Creating an array of zeros with shape (10000, 1, 6)
            extra_column = np.zeros((params.shape[0], 1, 6))

            # Concatenating the existing array and the extra column along the second axis
            params = np.concatenate((extra_column,params), axis=1)

        # Return the parameters and the mean reference frames
        return  params, mean_reference_frames if not is_step else params


    def analyse_frames(self):
        """Compute base-pair and step parameters from the extracted reference frames.

        Populates ``bp_params``, ``step_params``, ``parameters``, ``frames``,
        and ``names`` attributes.
        """

        # Get base reference frames for each residue
        frames_A = np.array([self.base_frames[res] for res in self.res_A])
        frames_B = np.array([self.base_frames[res] for res in self.res_B])

        # Compute parameters between each base pair and mean reference frames
        self.bp_params, self.mean_reference_frames = self.calculate_parameters(frames_A, frames_B)

        # Extract mean reference frames for each neighboring base pair
        B1_triads = self.mean_reference_frames[:-1] # select all but the last frame
        B2_triads = self.mean_reference_frames[1:] # select all but the first frame

        # Compute parameters between each base pair and mean reference frames
        self.step_params = self.calculate_parameters(B1_triads, B2_triads, is_step=True)[0]

        # Store mean reference frame / aka base pair triads as frames and transpose rotation matrices back to row wise
        self.frames = self.mean_reference_frames
        self.frames[:, :, 1:, :] = np.transpose(self.frames[:, :, 1:, :], axes=(0, 1, 3, 2))
        self._clean_parameters()

    def _clean_parameters(self):
        """Assign parameter names and merge base-pair and step arrays."""
        self.step_parameter_names = ['shift', 'slide', 'rise', 'tilt', 'roll', 'twist']
        self.base_parameter_names = ['shear', 'stretch', 'stagger', 'buckle', 'propeller', 'opening']
        self.names = self.base_parameter_names + self.step_parameter_names
        self.parameters = np.dstack((self.bp_params, self.step_params))

    def get_parameters(self, step=False, base=False):
        """Return computed rigid base parameters.

        Args:
            step (bool): If True, return only step parameters.
            base (bool): If True, return only base-pair parameters.

        Returns:
            result (tuple[numpy.ndarray, list[str]]): Parameters of shape
                ``(n_frames, n_bp, n_params)`` and corresponding names.

        Raises:
            ValueError: If both *step* and *base* are True.
        """
        if step and not base:
            return self.step_params, self.step_parameter_names
        elif base and not step:
            return self.bp_params, self.base_parameter_names
        elif not step and not base:
            return self.parameters, self.names
        raise ValueError("Use only one of step=True or base=True, or neither.")

    def get_parameter(self, name='twist') -> np.ndarray:
        """Get a single named parameter across all frames.

        Args:
            name (str): Parameter name. One of: shear, stretch, stagger, buckle,
                propeller, opening, shift, slide, rise, tilt, roll, twist.

        Returns:
            numpy.ndarray: Parameter values of shape ``(n_frames, n_bp)``.

        Raises:
            ValueError: If the parameter name is not recognised.
        """

        if name not in self.names:
            raise ValueError(f"Parameter {name} not found.")
        return self.parameters[:,:,self.names.index(name)]


    def plot_parameters(self, fig=None, ax=None, mean=True, std=True, figsize=[10, 3.5], save=False, step=True, base=True, base_color='cornflowerblue', step_color='coral'):
        """Plot the rigid base parameters of the DNA structure.

        Args:
            fig (matplotlib.figure.Figure, optional): Existing figure.
            ax (numpy.ndarray, optional): Array of axes to plot on.
            mean (bool): Plot the mean line.
            std (bool): Plot the standard deviation band.
            figsize (list): Figure size ``[width, height]``.
            save (bool): If True, save the figure to ``parameters.png``.
            step (bool): Include step parameters.
            base (bool): Include base-pair parameters.
            base_color (str): Color for base-pair parameter plots.
            step_color (str): Color for step parameter plots.

        Returns:
            result (tuple): ``(fig, ax)`` matplotlib figure and axes array.
        """

        import matplotlib.pyplot as plt

        cols = step + base

        if fig is None and ax is None:
            fig,ax = plt.subplots(cols,6, figsize=[12,2*cols])
            ax = ax.flatten()
        if step and not base:
            names = self.step_parameter_names
        elif base and not step:
            names = self.base_parameter_names
        elif base and step:
            names = self.names

        for _,name in enumerate(names):
            if name in self.step_parameter_names:
                color = step_color
            else:
                color = base_color
            para = self.get_parameter(name)
            mean = np.mean(para, axis=0)
            std = np.std(para, axis=0)
            x = range(len(mean))
            #ax[_].errorbar(x,mean, yerr=std, fmt='-', color=color)
            ax[_].fill_between(x, mean-std, mean+std, color=color, alpha=0.2)
            ax[_].plot(mean, color=color,lw=1)    
            ax[_].scatter(x=x,y=mean,color=color,s=10)
            ax[_].set_title(name)

        fig.tight_layout()
        if save:
            fig.savefig('parameters.png')
        return fig, ax 

__init__(traj, chainids=[0, 1], fit_reference=False)

Initialize the NucleicFrames object.

Parameters:

Name Type Description Default
traj object

MDtraj trajectory object.

required
chainids list

Chainids of sense- and anti-sense strands. Defaults to [0,1].

[0, 1]
fit_reference bool

Fit each base to canonical reference bases before frame extraction. Defaults to False.

False
Source code in mdna/geometry.py
def __init__(self, traj, chainids=[0,1], fit_reference=False):
    """Initialize the NucleicFrames object.

    Args:
        traj (object): MDtraj trajectory object.
        chainids (list, optional): Chainids of sense- and anti-sense strands. Defaults to [0,1].
        fit_reference (bool, optional): Fit each base to canonical reference bases before frame extraction.
            Defaults to False.
    """
    self.traj = traj
    self.top = traj.topology
    self.fit_reference = fit_reference
    self.reference_base_map = {'U': 'T'}
    self.reference_fit_data = self._prepare_reference_fit_data() if self.fit_reference else {}
    self.res_A = self.get_residues(chain_index=chainids[0], reverse=False)
    self.res_B = self.get_residues(chain_index=chainids[1], reverse=True)
    self.mean_reference_frames = np.empty((len(self.res_A), 1, 4, 3))
    self.base_frames = self.get_base_reference_frames()
    self.analyse_frames()

analyse_frames()

Compute base-pair and step parameters from the extracted reference frames.

Populates bp_params, step_params, parameters, frames, and names attributes.

Source code in mdna/geometry.py
def analyse_frames(self):
    """Compute base-pair and step parameters from the extracted reference frames.

    Populates ``bp_params``, ``step_params``, ``parameters``, ``frames``,
    and ``names`` attributes.
    """

    # Get base reference frames for each residue
    frames_A = np.array([self.base_frames[res] for res in self.res_A])
    frames_B = np.array([self.base_frames[res] for res in self.res_B])

    # Compute parameters between each base pair and mean reference frames
    self.bp_params, self.mean_reference_frames = self.calculate_parameters(frames_A, frames_B)

    # Extract mean reference frames for each neighboring base pair
    B1_triads = self.mean_reference_frames[:-1] # select all but the last frame
    B2_triads = self.mean_reference_frames[1:] # select all but the first frame

    # Compute parameters between each base pair and mean reference frames
    self.step_params = self.calculate_parameters(B1_triads, B2_triads, is_step=True)[0]

    # Store mean reference frame / aka base pair triads as frames and transpose rotation matrices back to row wise
    self.frames = self.mean_reference_frames
    self.frames[:, :, 1:, :] = np.transpose(self.frames[:, :, 1:, :], axes=(0, 1, 3, 2))
    self._clean_parameters()

calculate_parameters(frames_A, frames_B, is_step=False)

Calculate the parameters between each base pair and mean reference frames.

Assumes frames are of shape (n_frames, n_residues, 4, 3) where the last two dimensions are the base triads. The base triads consist of an origin (first index) and three vectors (latter 3 indices) representing the base frame. With the order of the vectors being: b_R, b_L, b_D, b_N.

Parameters:

Name Type Description Default
frames_A ndarray

Frames of shape (n_frames, n_residues, 4, 3) representing the base triads for chain A.

required
frames_B ndarray

Frames of shape (n_frames, n_residues, 4, 3) representing the base triads for chain B.

required
is_step bool

Flag indicating if the input is a single step or a trajectory. Defaults to False.

False
Notes

Note the vectors are stored rowwise in the base triads, and not the usual column representation of the rotation matrices.

Returns:

Name Type Description
params ndarray

The parameters of shape (n_frames, n_residues, 6) representing the relative translation and rotation between each base pair.

mean_reference_frames ndarray

The mean reference frames of shape (n_bp, n_frames, 4, 3) representing the mean reference frame of each base pair.

Source code in mdna/geometry.py
def calculate_parameters(self,frames_A, frames_B, is_step=False):
    """Calculate the parameters between each base pair and mean reference frames.

    Assumes frames are of shape (n_frames, n_residues, 4, 3) where the last two dimensions are the base triads.
    The base triads consist of an origin (first index) and three vectors (latter 3 indices) representing the base frame.
    With the order of the vectors being: b_R, b_L, b_D, b_N.

    Args:
        frames_A (ndarray): Frames of shape (n_frames, n_residues, 4, 3) representing the base triads for chain A.
        frames_B (ndarray): Frames of shape (n_frames, n_residues, 4, 3) representing the base triads for chain B.
        is_step (bool, optional): Flag indicating if the input is a single step or a trajectory. Defaults to False.

    Notes:
        Note the vectors are stored rowwise in the base triads, and not the usual column representation of the rotation matrices.

    Returns:
        params (ndarray): The parameters of shape (n_frames, n_residues, 6) representing the relative translation and rotation between each base pair.
        mean_reference_frames (ndarray): The mean reference frames of shape (n_bp, n_frames, 4, 3) representing the mean reference frame of each base pair.
    """

    # Reshape frames
    rotation_A, rotation_B, origin_A, origin_B, original_shape = self.reshape_input(frames_A,frames_B, is_step=is_step)

    # Compute parameters
    if not is_step:
        # Flip from row to column representation of the rotation matrices
        rotation_A = rotation_A.transpose(0,2,1)
        rotation_B = rotation_B.transpose(0,2,1)
        params, mean_origin, mean_rotation = self.compute_parameters(rotation_A, rotation_B, origin_A, origin_B)
    else:
        # Switch the input of the B and A triads to get the correct parameters
        params, mean_origin, mean_rotation = self.compute_parameters(rotation_B, rotation_A, origin_B, origin_A)

    # Reshape the parameters to the original shape
    params = params.reshape(original_shape[0], original_shape[1], 6).swapaxes(0, 1)

    # Collect mean reference frames from mid frames of each base pair
    mean_reference_frames = np.hstack((mean_origin[:, np.newaxis, :],mean_rotation)).reshape(original_shape)

    if is_step:
        # Creating an array of zeros with shape (10000, 1, 6)
        extra_column = np.zeros((params.shape[0], 1, 6))

        # Concatenating the existing array and the extra column along the second axis
        params = np.concatenate((extra_column,params), axis=1)

    # Return the parameters and the mean reference frames
    return  params, mean_reference_frames if not is_step else params

compute_parameters(rotation_A, rotation_B, origin_A, origin_B)

Calculate the parameters between each base pair and mean reference frames.

Parameters:

Name Type Description Default
rotation_A ndarray

Rotation matrices of shape (n, 3, 3) for the first triad.

required
rotation_B ndarray

Rotation matrices of shape (n, 3, 3) for the second triad.

required
origin_A ndarray

Origins of shape (n, 3) for the first triad.

required
origin_B ndarray

Origins of shape (n, 3) for the second triad.

required

Returns:

Name Type Description
rigid_parameters ndarray

The parameters of shape (n, 12) representing the relative translation and rotation between each base pair.

trans_mid ndarray

The mean translational vector of shape (n, 3) between the triads.

rotation_mid ndarray

The mean rotation matrix of shape (n, 3, 3) between the triads.

Source code in mdna/geometry.py
def compute_parameters(self, rotation_A, rotation_B, origin_A, origin_B):
    """Calculate the parameters between each base pair and mean reference frames.

    Args:
        rotation_A (ndarray): Rotation matrices of shape (n, 3, 3) for the first triad.
        rotation_B (ndarray): Rotation matrices of shape (n, 3, 3) for the second triad.
        origin_A (ndarray): Origins of shape (n, 3) for the first triad.
        origin_B (ndarray): Origins of shape (n, 3) for the second triad.

    Returns:
        rigid_parameters (ndarray): The parameters of shape (n, 12) representing the relative translation and rotation between each base pair.
        trans_mid (ndarray): The mean translational vector of shape (n, 3) between the triads.
        rotation_mid (ndarray): The mean rotation matrix of shape (n, 3, 3) between the triads.
    """

    # Linear interpolation of translations
    trans_mid = 0.5 * (origin_A + origin_B)

    # Relative translation
    trans_AB = origin_A - origin_B

    # Get relative rotation matrix of base pair
    rotation_BA = rotation_B.transpose(0,2,1) @ rotation_A  # returns shape (n, 3, 3)

    # Get rotation angles based on  rotation matrices
    rotation_angle_BA = RigidBody.extract_omega_values(rotation_BA)

    # Compute halfway rotation matrix and triad (mid frame)
    rotation_halfway = RigidBody.get_rotation_matrix(rotation_angle_BA * 0.5)

    # Get rotation matrix of base pair (aka mean rotation frame)
    rotation_mid = rotation_B @ rotation_halfway 

    # Get transaltional coordinate vector and convert to angstroms
    translational_parameters = np.einsum('ijk,ik->ij',rotation_mid.transpose(0,2,1), trans_AB) * 10

    # Get rotational parameters and convert to degrees
    rotational_parameters = np.rad2deg(np.einsum('ijk,ik->ij', rotation_BA.transpose(0,2,1), rotation_angle_BA))

    # Merge translational and rotational parameters
    rigid_parameters = np.hstack((translational_parameters, rotational_parameters))

    # Return the parameters and the mean reference frame
    return rigid_parameters, trans_mid, rotation_mid

get_base_reference_frames()

Compute reference frames for all residues in both strands.

Returns:

Name Type Description
frames dict

Mapping of MDTraj Residue to base vectors array of shape (n_frames, 4, 3).

Source code in mdna/geometry.py
def get_base_reference_frames(self):
    """Compute reference frames for all residues in both strands.

    Returns:
        frames (dict): Mapping of MDTraj Residue to base vectors array of shape ``(n_frames, 4, 3)``.
    """
    reference_frames = {} # Dictionary to store the base vectors for each residue
    for res in self.res_A + self.res_B:
        res_traj = self.traj.atom_slice([at.index for at in res.atoms])
        base_vectors = self.get_base_vectors(res_traj)
        reference_frames[res] = base_vectors # Store the base vectors for the residue index (with shape (4, n_frames, 3))
    return reference_frames

get_base_vectors(res)

Compute base reference vectors for a single residue.

Parameters:

Name Type Description Default
res Trajectory

Single-residue trajectory slice.

required

Returns:

Name Type Description
vectors ndarray

Base vectors of shape (n_frames, 4, 3) ordered as [b_R, b_L, b_D, b_N].

Source code in mdna/geometry.py
def get_base_vectors(self, res):
    """Compute base reference vectors for a single residue.

    Args:
        res (md.Trajectory): Single-residue trajectory slice.

    Returns:
        vectors (numpy.ndarray): Base vectors of shape ``(n_frames, 4, 3)``
            ordered as ``[b_R, b_L, b_D, b_N]``.
    """
    ref_base = ReferenceBase(res)
    base_vectors = np.array([ref_base.b_R, ref_base.b_L, ref_base.b_D, ref_base.b_N]).swapaxes(0,1)
    if not self.fit_reference:
        return base_vectors
    return self._get_fitted_base_vectors(res, ref_base, base_vectors)

get_parameter(name='twist')

Get a single named parameter across all frames.

Parameters:

Name Type Description Default
name str

Parameter name. One of: shear, stretch, stagger, buckle, propeller, opening, shift, slide, rise, tilt, roll, twist.

'twist'

Returns:

Type Description
ndarray

numpy.ndarray: Parameter values of shape (n_frames, n_bp).

Raises:

Type Description
ValueError

If the parameter name is not recognised.

Source code in mdna/geometry.py
def get_parameter(self, name='twist') -> np.ndarray:
    """Get a single named parameter across all frames.

    Args:
        name (str): Parameter name. One of: shear, stretch, stagger, buckle,
            propeller, opening, shift, slide, rise, tilt, roll, twist.

    Returns:
        numpy.ndarray: Parameter values of shape ``(n_frames, n_bp)``.

    Raises:
        ValueError: If the parameter name is not recognised.
    """

    if name not in self.names:
        raise ValueError(f"Parameter {name} not found.")
    return self.parameters[:,:,self.names.index(name)]

get_parameters(step=False, base=False)

Return computed rigid base parameters.

Parameters:

Name Type Description Default
step bool

If True, return only step parameters.

False
base bool

If True, return only base-pair parameters.

False

Returns:

Name Type Description
result tuple[ndarray, list[str]]

Parameters of shape (n_frames, n_bp, n_params) and corresponding names.

Raises:

Type Description
ValueError

If both step and base are True.

Source code in mdna/geometry.py
def get_parameters(self, step=False, base=False):
    """Return computed rigid base parameters.

    Args:
        step (bool): If True, return only step parameters.
        base (bool): If True, return only base-pair parameters.

    Returns:
        result (tuple[numpy.ndarray, list[str]]): Parameters of shape
            ``(n_frames, n_bp, n_params)`` and corresponding names.

    Raises:
        ValueError: If both *step* and *base* are True.
    """
    if step and not base:
        return self.step_params, self.step_parameter_names
    elif base and not step:
        return self.bp_params, self.base_parameter_names
    elif not step and not base:
        return self.parameters, self.names
    raise ValueError("Use only one of step=True or base=True, or neither.")

get_residues(chain_index, reverse=False)

Get residues from a specified chain.

Parameters:

Name Type Description Default
chain_index int

Index of the chain in the topology.

required
reverse bool

If True, return residues in reverse order (used for the anti-sense strand).

False

Returns:

Name Type Description
residues list

MDTraj Residue objects.

Raises:

Type Description
IndexError

If chain_index is out of range.

Source code in mdna/geometry.py
def get_residues(self, chain_index, reverse=False):
    """Get residues from a specified chain.

    Args:
        chain_index (int): Index of the chain in the topology.
        reverse (bool): If True, return residues in reverse order (used for the anti-sense strand).

    Returns:
        residues (list): MDTraj Residue objects.

    Raises:
        IndexError: If *chain_index* is out of range.
    """
    if chain_index >= len(self.top._chains):
        raise IndexError("Chain index out of range.")
    chain = self.top._chains[chain_index]
    residues = chain._residues
    return list(reversed(residues)) if reverse else residues

load_reference_bases()

Load canonical reference base structures from bundled HDF5 files.

Returns:

Name Type Description
bases dict[str, Trajectory]

Mapping of base letter (A, C, G, T) to single-frame trajectory.

Source code in mdna/geometry.py
def load_reference_bases(self):
    """Load canonical reference base structures from bundled HDF5 files.

    Returns:
        bases (dict[str, md.Trajectory]): Mapping of base letter (A, C, G, T) to single-frame trajectory.
    """
    bases = ['C', 'G', 'T', 'A']
    return {base: md.load_hdf5(get_data_file_path(f'./atomic/bases/BDNA_{base}.h5')) for base in bases}

plot_parameters(fig=None, ax=None, mean=True, std=True, figsize=[10, 3.5], save=False, step=True, base=True, base_color='cornflowerblue', step_color='coral')

Plot the rigid base parameters of the DNA structure.

Parameters:

Name Type Description Default
fig Figure

Existing figure.

None
ax ndarray

Array of axes to plot on.

None
mean bool

Plot the mean line.

True
std bool

Plot the standard deviation band.

True
figsize list

Figure size [width, height].

[10, 3.5]
save bool

If True, save the figure to parameters.png.

False
step bool

Include step parameters.

True
base bool

Include base-pair parameters.

True
base_color str

Color for base-pair parameter plots.

'cornflowerblue'
step_color str

Color for step parameter plots.

'coral'

Returns:

Name Type Description
result tuple

(fig, ax) matplotlib figure and axes array.

Source code in mdna/geometry.py
def plot_parameters(self, fig=None, ax=None, mean=True, std=True, figsize=[10, 3.5], save=False, step=True, base=True, base_color='cornflowerblue', step_color='coral'):
    """Plot the rigid base parameters of the DNA structure.

    Args:
        fig (matplotlib.figure.Figure, optional): Existing figure.
        ax (numpy.ndarray, optional): Array of axes to plot on.
        mean (bool): Plot the mean line.
        std (bool): Plot the standard deviation band.
        figsize (list): Figure size ``[width, height]``.
        save (bool): If True, save the figure to ``parameters.png``.
        step (bool): Include step parameters.
        base (bool): Include base-pair parameters.
        base_color (str): Color for base-pair parameter plots.
        step_color (str): Color for step parameter plots.

    Returns:
        result (tuple): ``(fig, ax)`` matplotlib figure and axes array.
    """

    import matplotlib.pyplot as plt

    cols = step + base

    if fig is None and ax is None:
        fig,ax = plt.subplots(cols,6, figsize=[12,2*cols])
        ax = ax.flatten()
    if step and not base:
        names = self.step_parameter_names
    elif base and not step:
        names = self.base_parameter_names
    elif base and step:
        names = self.names

    for _,name in enumerate(names):
        if name in self.step_parameter_names:
            color = step_color
        else:
            color = base_color
        para = self.get_parameter(name)
        mean = np.mean(para, axis=0)
        std = np.std(para, axis=0)
        x = range(len(mean))
        #ax[_].errorbar(x,mean, yerr=std, fmt='-', color=color)
        ax[_].fill_between(x, mean-std, mean+std, color=color, alpha=0.2)
        ax[_].plot(mean, color=color,lw=1)    
        ax[_].scatter(x=x,y=mean,color=color,s=10)
        ax[_].set_title(name)

    fig.tight_layout()
    if save:
        fig.savefig('parameters.png')
    return fig, ax 

reshape_input(input_A, input_B, is_step=False)

Reshape the input to the correct format for the calculations.

Args: input_A (ndarray): Input array for the first triad. input_B (ndarray): Input array for the second triad. is_step (bool, optional): Flag indicating if the input is a single step or a trajectory. Defaults to False.

Returns: rotation_A (ndarray): Rotation matrices of shape (n, 3, 3) for the first triad. rotation_B (ndarray): Rotation matrices of shape (n, 3, 3) for the second triad. origin_A (ndarray): Origins of shape (n, 3) for the first triad. origin_B (ndarray): Origins of shape (n, 3) for the second triad. original_shape (tuple): The original shape of the input.

Source code in mdna/geometry.py
def reshape_input(self,input_A,input_B,is_step=False):

    """Reshape the input to the correct format for the calculations.

    Args:
    input_A (ndarray): Input array for the first triad.
    input_B (ndarray): Input array for the second triad.
    is_step (bool, optional): Flag indicating if the input is a single step or a trajectory. Defaults to False.

    Returns:
    rotation_A (ndarray): Rotation matrices of shape (n, 3, 3) for the first triad.
    rotation_B (ndarray): Rotation matrices of shape (n, 3, 3) for the second triad.
    origin_A (ndarray): Origins of shape (n, 3) for the first triad.
    origin_B (ndarray): Origins of shape (n, 3) for the second triad.
    original_shape (tuple): The original shape of the input.
    """

    # Store original shape
    original_shape = input_A.shape

    # Flatten frames to compute rotation matrices for each time step simultaneously
    input_A_ = input_A.reshape(-1,original_shape[-2],original_shape[-1])  # shape (n, 4, 3)
    input_B_ = input_B.reshape(-1,original_shape[-2],original_shape[-1])  # shape (n, 4, 3)

    # Extract the triads without origin (rotation matrices)
    rotation_A = input_A_[:,1:]  # shape (n, 3, 3)
    rotation_B = input_B_[:,1:]  # shape (n, 3, 3)

    if not is_step:
        # flip (connecting the backbones) and the (baseplane normals).
        # so the second and third vector b_L, b_N
        rotation_B[:,[1,2]] *= -1

    # Extract origins of triads
    origin_A = input_A_[:,0]  # shape (n, 3)
    origin_B = input_B_[:,0]  # shape (n, 3)

    return rotation_A, rotation_B, origin_A, origin_B, original_shape