from pathlib import Path
import warnings
import copy
import json
from functools import wraps
from inspect import signature
from collections.abc import Iterable
from typing import (
Any,
List,
Dict,
Tuple,
Union,
Optional,
Literal,
Callable,
TYPE_CHECKING,
)
# OAT IMPORTS
try:
from oxDNA_analysis_tools.external_force_utils.force_reader import write_force_file
from oxDNA_analysis_tools.external_force_utils.forces import mutual_trap
from oxDNA_analysis_tools.UTILS.RyeReader import (
get_confs,
describe,
strand_describe,
inbox,
)
from oxDNA_analysis_tools.oxDNA_PDB import oxDNA_PDB
oat_installed = True
except ImportError:
oat_installed = False
# pyFuRNAce IMPORTS
from .symbols import (
pair_map_to_dot_bracket,
dot_bracket_to_pair_map,
Node,
tree_to_dot_bracket,
dot_bracket_to_tree,
accept_symbol,
base_pairing,
bp_symbols,
rotate_90,
MotifStructureError,
folding_barriers as fold_bar,
)
from .callback import Callback
from .sequence import Sequence
from .strand import Strand, StrandsBlock
from .basepair import BasePair
from .position import Position, Direction
if TYPE_CHECKING: # only for type checkers / linters, not at runtime
from .origami import Origami
[docs]
class Motif(Callback):
"""
Represents a structural motif in an RNA Origami design.
A `Motif` is composed of multiple `Strand` objects, defining an RNA secondary
structure with base pair interactions. It supports coordinate locking,
strand joining, sequence assignment, and 2D motif visualization.
Parameters
----------
*strands : list of Strand, optional
A list of `Strand` objects to initialize the motif.
basepair : BasePair, dict, optional
A dictionary defining base pair connections, with `Position` objects as keys
and values representing paired positions.
structure : str, optional
A dot-bracket notation representation of the motif's structure.
If this is provided, the `basepair` parameter is ignored.
autopairing : bool, default True
If True, automatically pairs bases based on complementary interactions.
If a basepair or structure is given, autopairing is turned off.
lock_coords : bool, default True
If True, locks the coordinate system for all strands in the motif.
When a strand coordinate transformed, all other strands are transformed.
join : bool, default True
If True, attempts to merge consecutive strands where possible.
copy : bool, default False
If True, creates copies of the input strands instead of using references.
**kwargs : dict
Additional arguments to pass to the parent `Callback` class.
Attributes
----------
autopairing : bool
Indicates whether the motif automatically pairs bases.
basepair : BasePair
The base-pairing map of the motif.
A double dictionary with `Position` objects as keys and values
representing paired positions in the 2D representation.
junctions : dict
A dictionary with 2D Direction as key and Positions of the junctions as values.
lock_coords : bool
Indicates whether the strands coordinates are part of the same coordinate
system.
max_pos : tuple of int
The maximum x, y coordinates occupied by the motif.
min_pos : tuple of int
The minimum x, y coordinates occupied by the motif.
num_char : int
The maximum length of the motif lines.
num_lines : int
The number of lines in the motif structure.
pair_map : dict
A mapping of paired bases indices (alternative to dot-bracket notation).
positions : tuple of Position
A tuple of the positions of each character of each strand in 2D space.
seq_positions : tuple of Position
The positions of each nucleotide in the motif sequence (x,y coordinates).
Note: the sequence is always 5' to 3' in the motif.
sequence : Sequence
The nucleotide sequence of the motif. It's the concatenation of the sequence.
of each strand, with 5' to 3' directionality.
strands : list of Strand
A list of strands composing the motif.
structure : str
The motif structure represented in dot-bracket notation.
Notes
-----
- The motif is built from strands and supports strand merging.
- Base pairs can be set manually or inferred automatically.
- Supports transformations, file export, and visual representation.
"""
def __init__(
self,
*strands: List[Strand],
basepair: Optional[Dict[Tuple[int, int], Tuple[int, int]]] = None,
structure: Optional[str] = None,
autopairing: bool = True,
lock_coords: bool = True,
join: bool = True,
copy: bool = False,
**kwargs,
) -> None:
"""
Initialize a Motif instance.
Parameters
----------
*strands : list of Strand
List of strands to add to the motif.
basepair : dict, optional
The base-pairing map of the motif.
A double dictionary with `Position` objects as keys and values
representing paired positions in the 2D representation.
structure : str, optional
The structure of the motif in dot-bracket notation.
If provided, the `basepair` parameter is ignored.
autopairing : bool, default True
Whether to enable automatic base pairing.
If a basepair or structure is given, autopairing is turned off.
lock_coords : bool, default True
Whether to lock coordinates in the same coordinate system.
join : bool, default True
Whether to join consecutive strands.
copy : bool, default False
Whether to copy strands.
"""
super().__init__(**kwargs)
### Initialize the attributes
self._positions = None
self._seq_positions = None
self._max_pos = None
self._min_pos = None
self._junctions = {}
self._sequence = Sequence()
self._pair_map = BasePair()
self._structure = None
self._strands = []
self._lock_coords = lock_coords
self._strands_block = StrandsBlock()
### STRANDS INITIALIZATION
if (
len(strands) == 1
and isinstance(strands[0], Iterable)
and not isinstance(strands[0], Strand)
):
# if a single list of strands is passed, unpack it
strands = tuple(strands[0])
extra = kwargs.pop("strands", tuple())
if isinstance(extra, Strand):
strands += (extra,)
elif isinstance(extra, Iterable):
strands += tuple(extra)
else:
raise TypeError(
"`strands` keyword argument must be a Strand or an "
"iterable of Strands."
)
if not all(isinstance(s, Strand) for s in strands):
raise ValueError(
"All the elements in the strands input must be of type"
f" Strand. Got types {[type(s) for s in strands]}."
)
### COPY/REGISTER STRANDS
if copy:
strands = self.copy_strands_preserve_blocks(strands, self)
else:
for s in strands:
s.register_callback(self._updated_strands)
### JOIN STRANDS
if join:
strands = self.join_strands(strands=strands)
self._strands = list(strands) # set the strands
### LOCK COORDINATES
if self.lock_coords:
# IMPORTANT: look at the _coords to avoid locking strands
# that don't have coordinates
# (therefore should not be locked)
self._strands_block = StrandsBlock(*[s for s in self._strands])
# if not s._coords.is_empty()])
### BASEPAIR/DOT-BRACKET INITIALIZATION
# prioritize the structure
if structure:
self.structure = structure
autopairing = False
else:
# if basepair is given, set off autopairing
if basepair:
autopairing = False
else:
autopairing = autopairing
basepair = BasePair()
# initialize basepair property
self.basepair = BasePair(basepair, callback=self._updated_basepair)
# set autopairing
self._autopairing = autopairing
def __str__(self) -> str:
"""Return the blueprint of the motif."""
if not self:
return ""
# create the canvas
canvas_repr = [" " * self.num_char] * self.num_lines
# draw each strand to the canvas
for strand in self:
strand.draw(canvas_repr, return_string=False)
# add base pairing at position (if the base pairing position is free)
for pos1, pos2 in self.basepair.items():
# select the horizontal basepair symbol
if pos1[1] - pos2[1] == 0 and abs(pos1[0] - pos2[0]) == 2:
pos = ((pos1[0] + pos2[0]) // 2, pos1[1])
sym = "="
# select the vertical basepair symbol
elif pos1[0] - pos2[0] == 0 and abs(pos1[1] - pos2[1]) == 2:
pos = (pos1[0], (pos1[1] + pos2[1]) // 2)
sym = "┊"
# skip the basepair if it is not horizontal or vertical
else:
continue
# if the position is free, add the basepair symbol
if (
pos[1] < len(canvas_repr)
and pos[0] < len(canvas_repr[0])
and canvas_repr[pos[1]][pos[0]] == " "
):
canvas_repr[pos[1]] = (
canvas_repr[pos[1]][: pos[0]]
+ sym
+ canvas_repr[pos[1]][pos[0] + 1 :]
)
# send a warning if the position is already occupied
else:
warnings.warn(
f"Hidden basepair at position {pos}."
f" Trying to pair {pos1} with {pos2}.",
stacklevel=2,
)
return "\n".join(canvas_repr)
def __repr__(self) -> str:
"""Return the list of strands."""
return str(list(self))
def __getitem__(self, idx: int) -> "Strand":
"""Get the strand at index."""
return self._strands[idx]
def __setitem__(self, idx: int, strand: "Strand") -> None:
"""Set the strand at index and try to join."""
if not isinstance(strand, Strand):
raise ValueError(f"{strand} is not a Strand object.")
# register the callback to update the motif
strand.register_callback(self._updated_strands)
self._strands[idx] = strand
# indicate that the strands have been updated
self._strands = self.join_strands(self._strands)
if self.lock_coords:
self._strands_block = StrandsBlock(*[s for s in self._strands])
self._updated_strands()
def __len__(self) -> int:
"""Get the number of strands."""
return len(self._strands)
def __add__(self, other: "Motif") -> "Motif":
"""
Add two motifs together by stacking them horizontally.
Parameters
----------
other : Motif
The motif to be added.
Returns
-------
Motif
The new motif resulting from the addition.
Raises
------
ValueError
If `other` is not a `Motif` instance.
"""
self._check_addition(other)
# edge cases
if not self and not other:
return Motif()
elif not self:
return other.copy()
elif not other:
return self.copy()
# create a copy of the motif to add.
# All operations will be performed on the copies
self_copy = self.copy()
other_copy = other.copy()
# align the motifs horizontally
self.align(self_copy, other_copy)
# get the x shifts to move the motifs
x_shifts = self.get_sequential_shift(
[self_copy, other_copy], position_based=False
)
# move motif to the right of the first motif
other_copy.shift((x_shifts[1], 0))
# if one of the two motifs doesn't use autopairing,
# set it false and update the basepair dictionary
if self_copy.autopairing and other_copy.autopairing:
new_basepair = BasePair()
else:
new_basepair = self_copy.basepair
new_basepair.update(other_copy.basepair)
# return the new motif collecting the strands
return Motif(strands=list(self_copy) + list(other_copy), basepair=new_basepair)
def __iadd__(self, other: "Motif") -> "Motif":
"""
Add another motif to the current motif in place and join the strands.
Parameters
----------
other : Motif
The motif to be added.
Returns
-------
Motif
The updated motif.
"""
self._check_addition(other)
# edge cases
if not other:
return self
# copy the other motif to prevent modifying it
other_copy = other.copy()
if self:
# align the motifs horizontally
self.align(self, other_copy)
x_shifts = self.get_sequential_shift(
[self, other_copy], position_based=False
)
# move motif to the right of the first motif
other_copy.shift((x_shifts[1], 0))
# if other has autopairing off, set it off
if not other_copy.autopairing:
self._autopairing = False
self._basepair.update(other_copy.basepair)
# add the other strands to self
for strand in other_copy:
self._strands.append(strand)
# join the strands
self._strands = self.join_strands(self._strands)
if self.lock_coords:
self._strands_block = StrandsBlock(*[s for s in self._strands])
for s in self:
s.register_callback(self._updated_strands)
self._updated_strands()
return self
def __radd__(self, other: "Motif") -> "Motif":
"""
Enable right addition for motifs.
"""
if other == 0:
return self
else:
return self.__add__(other)
def __bool__(self) -> bool:
"""True if the motif contains at least one valid strand, False otherwise."""
if not self._strands:
return False
for s in self._strands:
if s:
return True
return False
def __contains__(self, other: Union["Strand", "Sequence", str, "BasePair"]) -> bool:
"""Check if the motif contains a strand, sequence, or base pair."""
if isinstance(other, Strand):
return other in self._strands
elif isinstance(other, Sequence):
return other in self.sequence
elif isinstance(other, str):
return any(other in s for s in self._strands)
elif isinstance(other, BasePair):
return all(self.basepair.get(k) == v for k, v in other.items())
return False
def __eq__(self, other: Any) -> bool:
"""True if two motifs have equal strands and equal basepair."""
if not isinstance(other, Motif):
return False
elif len(self) != len(other):
return False
for s1, s2 in zip(self, other):
if s1 != s2:
return False
if self.basepair != other.basepair:
return False
return True
###
### PROPERTIES
###
@property
def autopairing(self) -> bool:
"""
Indicates whether the motif automatically pairs bases.
"""
return self._autopairing
@autopairing.setter
def autopairing(self, autopairing: bool) -> None:
"""
Set the autopairing property.
"""
self._autopairing = bool(autopairing)
if autopairing:
self._basepair = BasePair()
self._calculate_basepair()
@property
def basepair(self) -> BasePair:
"""
A dictionary with positions as key and the paired position as values.
"""
if self._autopairing and (
not self._positions or not self._sequence or not self._basepair
):
# calculate the basepair dictionary
self._calculate_basepair()
# reset the dot bracket
self._structure = None
return self._basepair
@basepair.setter
def basepair(
self, basepair_dict: Union[Dict[Position, Position], BasePair]
) -> None:
"""
Set the basepair dictionary and turn autopairing off.
Parameters
----------
basepair_dict : dict or BasePair
A dictionary with positions as key and the paired position as values.
"""
if not isinstance(basepair_dict, (dict, BasePair)):
raise ValueError(
f"{basepair_dict} must be a dictionary or a BasePair"
f" object. Got {type(basepair_dict)} instead."
)
try:
basepair_dict = {Position(k): Position(v) for k, v in basepair_dict.items()}
except Exception as e:
raise ValueError(
"Error converting the basepair dictionary to"
f" a dictionary of Position objects: {e}"
)
self._autopairing = False
self._basepair = BasePair(basepair_dict, callback=self._updated_basepair)
self._trigger_callbacks()
@property
def junctions(self) -> Dict[Tuple[int, int], Tuple[Tuple[int, int]]]:
"""
A dictionary with Direction as key (right: (1,0), bottom: (0,1),
left: (-1,0), top: (0,1)) and an ordered list of junction positon as value.
"""
### if the junctions are already calculated return them
if self._positions is None:
self._calculate_positions()
return self._junctions
@property
def lock_coords(self) -> bool:
"""
Boolean indicating if the coordinates are locked in the same block.
"""
return self._lock_coords
@lock_coords.setter
def lock_coords(self, lock_coords):
"""
Set wether the strands coordinates must be locked in the same block.
"""
self._lock_coords = bool(lock_coords)
@property
def max_pos(self) -> Tuple[int, int]:
"""
The maximum x, y coordinates occupied of the motif.
"""
if not self._positions:
self._calculate_positions()
return self._max_pos
@property
def min_pos(self) -> Tuple[int, int]:
"""
The minimum x, y coordinates occupied of the motif.
"""
if not self._positions:
self._calculate_positions()
return self._min_pos
@property
def num_char(self) -> int:
"""
The maximum length of the motif lines.
"""
if not self._positions:
self._calculate_positions()
return self._max_pos[0] + 1
@property
def num_lines(self) -> int:
"""
The number of lines in the motif structure.
"""
if not self._positions:
self._calculate_positions()
return self._max_pos[1] + 1
@property
def pair_map(self) -> Dict[int, Union[int, None]]:
"""
The dictionary of the paired indexes (alternative to the dot bracket
notation).
"""
if self._positions and self._pair_map:
return self._pair_map
pos_to_ind = {pos: ind for ind, pos in enumerate(self.seq_positions)}
self._pair_map = BasePair(
{
ind: pos_to_ind.get(self.basepair.get(pos))
for pos, ind in pos_to_ind.items()
}
)
return self._pair_map
@property
def positions(self) -> Tuple[Position]:
"""
A tuple of the positions of each character of each strand in 2D space
(x,y coordinates).
"""
if not self._positions:
self._calculate_positions()
return self._positions
@property
def sequence(self) -> str:
"""
Return the sequence of the motif.
"""
if self._sequence:
self._sequence
### calculate the sequence
tot_seq = ""
for s in self:
if not s.sequence: # skip the strands without sequence
continue
if s.directionality == "35":
tot_seq += str(s.sequence)[::-1]
else:
tot_seq += str(s.sequence)
tot_seq += "&"
self._sequence = Sequence(tot_seq.strip("&")) # remove separator at the end
return self._sequence
@sequence.setter
def sequence(self, seq_list: Union[str, List[str], Sequence] = None) -> None:
"""
Set the sequence of each strand the motif
Parameters
----------
seq_list : str or Sequence or list of str or Sequence
The list of sequences to set to the strands.
If a single string is passed, it is split by '&'.
Raises
------
ValueError
If the number of sequences is different from the number of strands.
"""
if not isinstance(seq_list, Iterable) or (
not isinstance(seq_list, (str, Sequence))
and not isinstance(seq_list, (tuple, list))
and any(not isinstance(s, (str, Sequence)) for s in seq_list)
):
raise ValueError(
f"{seq_list} must be a string, a Sequence object or "
"a list of strings or Sequence objects. "
f"Got {type(seq_list)} instead."
)
if isinstance(seq_list, (str, Sequence)):
seq_list = seq_list.split("&")
if len(seq_list) != len(self):
raise ValueError(
"The number of sequences must be equal to the number of"
f" strands. Got {len(seq_list)} sequences "
f" for {len(self)} strands."
)
seq_list = seq_list[:] # copy the list
for s in self:
seq = seq_list.pop(0)
direct = 1 if s.directionality == "53" else -1
s.sequence = seq[::direct]
@property
def seq_positions(self) -> Tuple[Position]:
"""
The positions of each nucleotide in the motif sequence (x,y coordinates).
The sequence has always the directionality 5' to 3'
"""
if not self._positions:
self._calculate_positions()
return self._seq_positions
@property
def strands(self) -> List["Strand"]:
"""
The list of strands in the motif.
"""
return self._strands
@property
def structure(self) -> str:
"""
Return the dot bracket representation of the motif.
"""
### if the dot bracket is already calculated return it
if self._structure:
return self._structure
# calculate the break points of the strands
break_points = [i for i, sym in enumerate(self.sequence) if sym == "&"]
### CREATE THE DOT BRACKET NOTATION ###
dotbracket = pair_map_to_dot_bracket(
self.pair_map, len(self.sequence.replace("&", ""))
)
### ADD THE BREAK POINTS ###
for i, bp in enumerate(break_points):
dotbracket = dotbracket[:bp] + "&" + dotbracket[bp:]
self._structure = dotbracket
return self._structure
@structure.setter
def structure(self, structure: str) -> None:
"""Set the dot bracket notation of the motif, it updates the basepair."""
if not isinstance(structure, str):
raise ValueError(
f"{structure} must be a string. " f"Got {type(structure)} instead."
)
if len(structure) != len(self.sequence):
raise ValueError(
f"The length of the dot bracket must be equal to the"
f" length of the sequence."
f"Got {len(structure)} for {len(self.sequence)}"
)
basepair_dict = BasePair()
# get the dictionary of paired indexes without the separation symbol
pair_map = dot_bracket_to_pair_map(structure.replace("&", ""))
# get the list of base positions in simple order
# iterate over the base positions
for index, pos in enumerate(self.seq_positions):
paired = pair_map[index]
# the index of the base position is paired to something
if paired is not None:
basepair_dict[Position(pos)] = Position(self.seq_positions[paired])
# set the basepair dictionary
self.basepair = basepair_dict
self._pair_map = pair_map
self._structure = structure
###
### CLASS METHODS
###
[docs]
@classmethod
def concat(
cls,
*motifs: List["Motif"],
axis: Literal[0, 1] = 1,
extend: bool = False,
copy: bool = True,
align: bool = True,
position_based: bool = False,
align_junctions: Optional[List[Tuple[int, int]]] = None,
unlock_strands: bool = False,
return_shifts: bool = False,
**kwargs,
) -> "Motif":
"""
Concatenate multiple motifs along a specified axis.
Parameters
----------
*motifs : list of Motif
List of motifs to concatenate.
axis : int, default 1
The numpy axis along which motifs are aligned.
1 for horizontal alignment, 0 for vertical alignment.
extend : bool, default False
Whether to extend junctions before concatenation.
copy : bool, default True
Whether to copy motifs before concatenation.
align : bool, default True
Whether to align motifs before concatenation.
position_based : bool, default False
Whether to align motifs based on positions instead of junctions.
align_junctions : list of tuple, optional
List of junction indices for alignment.
unlock_strands : bool, default False
Whether to unlock all strands, so they are not part
of the same Strand block (different coordinate systems).
return_shifts : bool, default False
Whether to return the shift values for each motif to be aligned
and concatenated. If True, a list of tuples with the shifts is returned,
along with the concatenated motif (useful for Origami alignment).
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Motif
The concatenated Motif object.
List[Tuple[int, int]], optional
The list of shifts applied to each motif during alignment.
Only returned if return_shifts is True.
"""
if (
len(motifs) == 1
and isinstance(motifs[0], Iterable)
and not isinstance(motifs[0], Motif)
):
# if a single list of motifs is passed, unpack it
motifs = tuple(motifs[0])
extra = tuple(kwargs.pop("motifs", tuple()))
if isinstance(extra, Motif):
motifs += (extra,)
elif isinstance(extra, Iterable):
motifs += tuple(extra)
# check if all the elements in the motifs input are of type Motif
if not all(isinstance(m, Motif) for m in motifs):
raise ValueError(
"All the elements in the motifs input must be of type"
f" Motif. Got types {[type(m) for m in motifs]}."
)
# make a motif list and remove the empty motifs
motifs = [m for m in motifs if m]
if copy:
motifs = [m.copy() for m in motifs]
if return_shifts:
# create a list of shifts for each motif
shifts = [Position.zero()] * len(motifs)
if align:
aligned = Motif.align(
*motifs,
axis=axis,
align_junctions=align_junctions,
return_shifts=return_shifts,
)
if return_shifts:
aligned, shifts = aligned
else:
aligned = motifs
# trick to handle all the axis at once
x_pos = axis
y_pos = int(not axis)
if extend:
# Extend the junctions
extend_until = [list(m.max_pos) for m in aligned]
max_extend = max(extend_until, key=lambda x: x[axis], default=[0, 0, 0])[
axis
]
for extend_to in extend_until:
extend_to[axis] = max_extend
aligned = [
m.extend_junctions(until=extend_to)
for m, extend_to in zip(aligned, extend_until)
]
# prepare the motif shifting them two by two
for ind, m1 in enumerate(aligned[:-1]):
m2 = aligned[ind + 1]
# calculate the shift based on the positions
if position_based:
max_pos_m1 = m1.max_pos[y_pos]
min_pos_m2 = m2.min_pos[y_pos]
# calculate the shift based on the junctions
else:
m1_junct = m1.junctions[Position((x_pos, y_pos))]
m2_junct = m2.junctions[Position((-x_pos, -y_pos))]
if m1_junct and m2_junct:
ind1, ind2 = 0, 0
if align_junctions and align_junctions[ind]:
ind1, ind2 = align_junctions[ind]
max_pos_m1 = m1_junct[ind1][y_pos]
min_pos_m2 = m2_junct[ind2][y_pos]
# No junctions, just place the motifs one after the other
else:
max_pos_m1 = m1.max_pos[y_pos]
min_pos_m2 = 0
if m2.min_pos[y_pos] > max_pos_m1:
# if the second motif is already shifted, don't shift
max_pos_m1 = -1
# Calculate the shift for the axis
shift_pos = Position(
(
x_pos * (max_pos_m1 - min_pos_m2 + 1),
y_pos * (max_pos_m1 - min_pos_m2 + 1),
)
)
# Apply the shift
m2.shift(shift_pos)
# update the shifts
if return_shifts:
shifts[ind + 1] = shifts[ind + 1] + shift_pos
### BASEPAIRS HANDLING
basepair = BasePair()
# if all the motifs have autopairing on, the new motif has autopairing on
autopairing = all([m.autopairing for m in aligned])
if not autopairing:
for m in aligned:
basepair.update(m.basepair)
### STRANDS LOCKING
if unlock_strands:
for m in aligned:
for s in m:
s.strands_block = None
# create a object of the class
new_motif = cls(**kwargs)
new_motif._autopairing = autopairing
new_motif._basepair = basepair
new_motif.replace_all_strands(
[strand for m in aligned for strand in m], copy=False, join=True
)
# used in Origami assembly
if return_shifts:
return new_motif, shifts
return new_motif
[docs]
@classmethod
def from_file(cls, file_path: str, **kwargs) -> "Motif":
"""
Create a Motif object from a text file containing a motif sketch.
Each Strand is read starting from the `5` symbol. If you want to add the 5'
terminal symbol, start the strand with `55`. Only one symbol should be placed
next to the `5` start of the strand in order to guess the right direction.
Alternatively, you can use symbols `^v><` to start a strand and indicate
the start direction (up, down, right, left); in this case you can place
multiple symbols next to the start symbol.
Parameters
----------
file_path : str
Path to the file containing the motif structure.
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Motif
The constructed Motif object.
"""
with open(file_path, "r", encoding="utf-8") as f:
motif_text = f.read()
return cls.from_text(motif_text, **kwargs)
[docs]
@classmethod
def from_json(cls, json_data: Union[str, Dict], **kwargs) -> "Motif":
"""
Create a Motif object from a JSON string or dictionary.
The json dictionary is parsed, and is consumed to create the Motif object.
Parameters
----------
json_data : str or dict
The JSON string or dictionary representing the motif.
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Motif
The constructed Motif object.
"""
from ... import motifs
if isinstance(json_data, str):
data = json.loads(json_data)
elif isinstance(json_data, dict):
data = json_data
else:
raise ValueError(
f"{json_data} must be a JSON string or dictionary."
f" Got {type(json_data)} instead."
)
# remove version if present
data.pop("pyfurnace_version", None)
# version tracking is useful for backward compatibility
# try to retrieve the motif data
mot_type = data.pop("motif_type", None)
search_cls = getattr(motifs, mot_type, None)
if search_cls:
cls = search_cls
strands = []
strands_blocks_id = {}
for strand_data in data.pop("strands", []):
sb_id = strand_data.get("strands_block_id", None)
strand = Strand.from_json(strand_data)
strands.append(strand)
# handle strands blocks
if sb_id is not None:
strands_blocks_id.setdefault(sb_id, []).append(strand)
# assign strands blocks
lock_coords = data.pop("lock_coords", True)
if not lock_coords:
for sb_strands in strands_blocks_id.values():
StrandsBlock(*sb_strands)
basepair = BasePair(
{
Position.from_json(k): Position.from_json(v)
for k, v in data.pop("basepair", {}).items()
}
)
new_motif = cls()
new_motif._lock_coords = lock_coords
new_motif.replace_all_strands(strands, copy=False, join=False)
new_motif.autopairing = data.pop("autopairing", True)
if basepair:
new_motif.basepair = basepair
# Assign any other remaining attributes
for key, value in data.items():
setattr(new_motif, key, value)
return new_motif
[docs]
@classmethod
def from_json_file(cls, file_path: str, **kwargs) -> "Motif":
"""
Create a Motif object from a JSON file.
Parameters
----------
file_path : str
Path to the JSON file representing the motif.
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Motif
The constructed Motif object.
"""
with open(file_path, "r", encoding="utf-8") as f:
json_data = json.load(f)
return cls.from_json(json_data, **kwargs)
[docs]
@classmethod
def from_list(cls, motif_list: List[str], **kwargs) -> "Motif":
"""
Create a Motif object from a list of strings representing a motif sketch.
Each Strand is read starting from the `5` symbol. If you want to add the 5'
terminal symbol, start the strand with `55`. Only one symbol should be placed
next to the `5` start of the strand in order to guess the right direction.
Alternatively, you can use symbols `^v><` to start a strand and indicate
the start direction (up, down, right, left); in this case you can place
multiple symbols next to the start symbol.
Parameters
----------
motif_list : list of str
The list of strings representing the motif.
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Motif
The constructed Motif object.
"""
### initialize the variables
strand_list = [] # list of strands
mapped_pos = set() # set of visited positions
### Get the maximum x and y positions
max_x = max([len(line) for line in motif_list], default=1)
max_y = len(motif_list) - 1
### Uniform the line length
for index, line in enumerate(motif_list):
diff = max_x - (len(line) - 1)
if diff:
motif_list[index] = line + " " * diff
### TRACE THE STRANDS STARTING WITH ^v><
for y, line in enumerate(motif_list):
for x, char in enumerate(line):
# check a start of a strand
if char in "^v><": # Special direction characters
if char == "^":
direction = (0, -1)
elif char == "v":
direction = (0, 1)
elif char == ">":
direction = (1, 0)
elif char == "<":
direction = (-1, 0)
# get the strand direction
start_pos = (x + direction[0], y + direction[1])
# get the strand characters
strand_chars = Motif.trace(
motif_list, start_pos, direction, limits=(max_x, max_y)
)
# create the Strand object
strand = Strand(
strand_chars,
directionality="53",
start=start_pos,
direction=direction,
)
# add the Strand to the list
strand_list.append(strand)
# update the visited positions
mapped_pos.update(set(strand.positions))
### TRACE THE STRANDS STARTING WITH 5
for y, line in enumerate(motif_list):
for x, char in enumerate(line):
# check a start of a strand
if char == "5" and (x, y) not in mapped_pos:
right_sym = None
bot_sym = None
left_sym = None
top_sym = None
# try to guess the direction based on the next symbol
if x + 1 <= max_x:
right_sym = motif_list[y][x + 1].capitalize()
if y + 1 <= max_y:
bot_sym = motif_list[y + 1][x].capitalize()
if x > 0:
left_sym = motif_list[y][x - 1].capitalize()
if y > 0:
top_sym = motif_list[y - 1][x].capitalize()
direction = None
if right_sym in accept_symbol and motif_list[y][x + 1] not in " 3":
direction = (1, 0)
elif bot_sym in accept_symbol and bot_sym not in " 3":
direction = (0, 1)
elif left_sym in accept_symbol and left_sym not in " 3":
direction = (-1, 0)
elif top_sym in accept_symbol and top_sym not in " 3":
direction = (0, -1)
if direction is None:
raise MotifStructureError(
f"Strand at position {x, y}" " has no direction."
)
# get the strand direction
start_pos = (x + direction[0], y + direction[1])
# get the strand characters
strand_chars = Motif.trace(
motif_list, start_pos, direction, limits=(max_x, max_y)
)
# create the Strand object
strand = Strand(
strand_chars,
directionality="53",
start=start_pos,
direction=direction,
)
# add the Strand to the list
strand_list.append(strand)
# update the visited positions
mapped_pos.update(set(strand.positions))
### TRACE THE BASEPAIRS ###
basepair = BasePair()
for y, line in enumerate(motif_list):
for x, sym in enumerate(line):
pos1 = None
# if found a basepair symbol, add the position to the dictionary
if sym in "┊!*:": # vertical basepair
pos1 = (x, y - 1)
pos2 = (x, y + 1)
elif sym in "=": # horizontal basepair
pos1 = (x - 1, y)
pos2 = (x + 1, y)
if pos1 is not None:
basepair[Position(pos1)] = Position(pos2)
return cls(strands=strand_list, basepair=basepair, **kwargs)
[docs]
@classmethod
def from_structure(
cls,
structure: Optional[Union[str, dict, BasePair, Node]] = None,
sequence: Optional[str] = None,
pk_energy=-8.5,
pk_denergy=0.5,
**kwargs,
) -> "Motif":
"""
Parse a structure or sequence representation to a Motif object.
If a structure is not provided, it is calculated from the sequence
with RNAfold. If a sequence is not provided, it is assumed to be
a sequence of 'N's of the same length as the structure.
Parameters
----------
structure : Union[str, dict, BasePair, Node]
The structure representation to convert.
sequence : str, optional
The sequence or sequence constraints of the motif.
pk_energy : float, optional
The energy of the pseudoknots (if present).
pk_denergy : float, optional
The energy tolerance of the pseudoknots (if present).
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Motif
The Motif object created from the structure representation.
"""
# import here to avoid circular imports
from .origami import Origami
from RNA import fold
if not structure:
# if only sequence is provided, fold it to get the structure
structure = fold(sequence)[0]
for i, sym in enumerate(sequence[::-1]):
if sym == "&":
structure = structure[:-i] + "&" + structure[-i:]
# input dot-bracket notation
if isinstance(structure, str):
node = dot_bracket_to_tree(structure, sequence=sequence)
pair_map = dot_bracket_to_pair_map(structure)
# input pair map
elif isinstance(structure, (BasePair, dict)):
pair_map = structure.copy()
node = dot_bracket_to_tree(
pair_map_to_dot_bracket(structure), sequence=sequence
)
structure = pair_map_to_dot_bracket(structure)
# input tree
elif isinstance(structure, Node):
node = structure
pair_map = dot_bracket_to_pair_map(tree_to_dot_bracket(node))
structure = tree_to_dot_bracket(node)
else:
raise ValueError(f"Invalid structure representation: {structure}")
if not sequence:
sequence = "".join("N" if sym != "&" else "&" for sym in structure)
else:
sequence = str(sequence).replace("T", "U").upper()
if isinstance(structure, str) and len(structure.strip("& ")) != len(
sequence.strip("& ")
):
raise ValueError(
f"The sequence length must be equal to the structure "
f"length. Got sequence len {len(sequence)} for structure"
f" len {len(structure)}."
)
# initialize the origami object
origami = Origami([[]], align="first", ss_assembly=True)
current_index = [0, 0]
def recursive_build_origami(node, insert_at=None, flip=False):
"""
Recursively build the origami from the tree representation.
"""
# nonlocal current_index
# initialize the variables
if insert_at is None:
insert_at = current_index
motif = None
### BASE CASES: sequence break, unpaired nucleotide, stem
if node.label == "&":
return
if node.label == "(":
motif = Motif(
Strand(node.seq if node.seq else "N"),
Strand(
sequence[pair_map[node.index]],
start=(0, 2),
directionality="35",
),
basepair={(0, 0): (0, 2)},
)
elif node.label == ".":
motif = Motif(
Strand(node.seq if node.seq else "N"), Strand("-", start=(0, 2))
)
# add the motif and update the current index
if motif:
origami.insert(insert_at, motif.flip(flip, flip))
current_index[1] += 1 # increment the x index
# recursive call for the children
if node.children:
child_inds = []
# check each child before recursive call
for i, child in enumerate(node.children):
insert_at = None
flip = False
# bulge after a stem
if child.label == "." and any(
c.label == "(" for c in node.children[:i]
):
insert_at = child_inds.pop()
flip = True
# sequence break + only unpaired
elif child.label in ".&" and all(
c.label in ".&" for c in node.children
):
if "&" in [c.label for c in node.children[:i]]:
insert_at = child_inds.pop()
flip = True
# sequence break or multiple stems
elif child.label == "&" or (
child.label == "("
and any(c.label == "(" for c in node.children[:i])
):
connect_down = Motif(
Strand("──"),
Strand("╮", start=(0, 2), directionality="35"),
Strand("╭", start=(1, 2), direction=(0, -1)),
)
connect_up = Motif(
Strand("││╰─", direction=(0, 1), directionality="35"),
Strand("╰", start=(1, 0), direction=(0, 1)),
)
if child_inds:
insert_connect = child_inds.pop()
else:
insert_connect = current_index
# insert the top connector
origami.insert(insert_connect, connect_down)
shift_x = sum(
[
m.num_char
for m in origami[insert_connect[0], : insert_connect[1]]
]
)
connect_up.shift((shift_x, 0))
origami.append([connect_up])
# increment the y index
current_index[0] += 1
# set the x index to the end of the line
current_index[1] = len(origami[-1])
for i in range(insert_connect[0] + 1, current_index[0]):
# add the vertical connector
origami.insert(
(i, 0),
Motif(
Strand("│", direction=(0, 1), directionality="35"),
Strand("│", direction=(0, 1), start=(1, 0)),
).shift((shift_x, 0)),
)
# shift all the motifs until you reach the first connector
for m in origami[i, 1:]:
m.shift((2, 0))
if "││╰─" in m:
break
if insert_at is None:
insert_at = current_index.copy()
child_inds.append(insert_at)
recursive_build_origami(child, insert_at=insert_at, flip=flip)
# this could not work in the case a stem doesn't end with at least
# one unpaired nucleotide, but that does never happen in natural
# structures, so we can ignore this case
if not any(c.children or c.label == "&" for c in node.children):
origami.append(Motif(Strand("╮│╯")))
current_index[1] -= 1 # decrement the x index
# call the recursive function
recursive_build_origami(node)
# get the motif from the origami object
motif = origami.assembled
### ADD THE PSEUDOKNOTS ###
seq_offset = 0
# dictionary with index as key and pseudoknot id as value
full_map = dict()
pair_map = dot_bracket_to_pair_map(structure.replace("&", ""))
# iterate over the subsequences
for i, struct in enumerate(structure.split("&")):
new_pk_info = {"id": [], "ind_fwd": [], "E": [], "dE": []}
j = 0
ss_len = len(struct)
# iterate over the subsequences structure
while j < ss_len:
sym = struct[j]
# found pseudoknot
if sym not in ".()" and (seq_offset + j) not in full_map:
# get the length of the pseudoknot
length = 1
while struct[j + length] == sym:
length += 1
# get the pseudoknot id of get a new one
if pair_map[seq_offset + j] in full_map:
pk_id = full_map[pair_map[seq_offset + j]] + "'"
else:
inds = [k.split("_")[1].strip("'") for k in full_map.values()]
pk_id = "100_" + str(int(max(inds, default="-1")) + 1)
# add the pseudoknot to the motif
new_pk_info["id"].append(pk_id)
new_pk_info["ind_fwd"].append((j, j + length - 1))
indices = range(seq_offset + j, seq_offset + j + length)
# update the full map
full_map.update({k: pk_id for k in indices})
new_pk_info["E"].append(pk_energy)
new_pk_info["dE"].append(pk_denergy)
j += length
j += 1
# add the pseudoknots info to the strand
motif[i].pk_info = new_pk_info
seq_offset += len(struct)
kwargs.setdefault("lock_coords", False)
obj = cls(**kwargs)
obj.replace_all_strands(motif, copy=False, join=False)
return obj
[docs]
@classmethod
def from_text(cls, motif_text: str, **kwargs) -> "Motif":
"""
Create a Motif object from a text string representing a motif sketch.
Each Strand is read starting from the `5` symbol. If you want to add the 5'
terminal symbol, start the strand with `55`. Only one symbol should be placed
next to the `5` start of the strand in order to guess the right direction.
Alternatively, you can use symbols `^v><` to start a strand and indicate
the start direction (up, down, right, left); in this case you can place
multiple symbols next to the start symbol.
Parameters
----------
motif_text : str
The motif structure in text format.
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Motif
The constructed Motif object.
"""
motif_list = [line for line in motif_text.split("\n")]
return cls.from_list(motif_list, **kwargs)
###
### STATIC METHODS
###
[docs]
@staticmethod
def align(
*motifs: List["Motif"],
axis: Literal[0, 1] = 1,
extend: bool = False,
align_junctions: Optional[List[Tuple[int, int]]] = None,
align_to: Literal["first", "last", "center"] = "first",
return_shifts: bool = False,
**kwargs,
) -> List["Motif"] | Tuple[List["Motif"], List[Tuple[int, int]]]:
"""
Align motifs along a given axis by shifting them (without concatenating).
Parameters
----------
*motifs : List[Motif]
List of motifs to align.
axis : int, default 1
The numpy axis along which motifs are aligned
(1 for horizontal, 0 for vertical).
extend : bool, default False
If True, junctions are extended to accommodate the shift.
align_junctions : Optional[List[Tuple[int, int]]], default None
List of tuples specifying which junctions to align.
align_to : str, default "first"
Specifies how to align the motifs. Options: "first", "last", "center".
"first" aligns the first junction of the first motif with the first junction
of the second motif. "last" aligns the last junction of the first motif
with the last junction of the second motif. "center" aligns the center
of the first motif with the center of the second motif.
return_shifts : bool, default False
If True, returns the shifts applied to each motif during alignment,
along with the aligned motifs (useful for Origami alignment).
If False, only the aligned motifs are returned.
**kwargs : dict
Parse if calling the motifs list explicitly.
Returns
-------
List[Motif]
The list of aligned motifs.
List[Tuple[int, int]], optional
The list of shifts applied to each motif during alignment.
Only returned if return_shifts is True.
"""
### get the alignment direction
if axis in (1, 0):
direction = Position((axis, int(not axis)))
else:
raise ValueError(
f"{axis} is not a valid value for the axis parameter."
" The axis parameter must be 0 or 1"
)
# the direction to shift is the opposite of the direction of the axis
shift_direction = Position((int(not axis), axis))
### check alignment type
if align_to not in ("first", "last", "center"):
raise ValueError(
f"{align_to} is not a valid value for the align_to"
"parameter. The align_to parameter must be"
' "first", "last" or "center"'
)
### check if the motifs are of the right type
if (
len(motifs) == 1
and isinstance(motifs[0], Iterable)
and not isinstance(motifs[0], Motif)
):
# an iterable of motifs is passed
motifs = tuple(motifs[0])
extra = tuple(kwargs.pop("motifs", tuple()))
if isinstance(extra, Motif):
motifs += (extra,)
elif isinstance(extra, Iterable):
motifs += tuple(extra)
# check if all the motifs are of type Motif
if not all(isinstance(m, Motif) for m in motifs):
raise ValueError("All the motifs must be of type Motif.")
# if shifts are requested, save them
if return_shifts:
shifts = [Position((0, 0)) for _ in motifs]
### align all the motifs
ind1 = 0
ind2 = 1
n_motifs = len(motifs)
# start with the assumption that all the motifs are connected
connected_motifs = [True] * n_motifs
while ind2 < n_motifs: # stop when the second motif is the last one
m1 = motifs[ind1]
m2 = motifs[ind2]
if align_junctions and align_junctions[ind1]:
# align the junction with the given index
junct_ind1, junct_ind2 = align_junctions[ind1]
else:
# align the first junction of the first motif
# with the first junction of the second motif
junct_ind1, junct_ind2 = 0, 0 # by default align the first junction
if align_to == "last":
junct_ind1, junct_ind2 = -1, -1
junctions1 = m1.junctions[direction]
junctions2 = m2.junctions[-direction]
### calculate the shift of the two motifs
# and shift the motif with the lower junction
if align_to == "center":
# if align_to is center align to the center of the motifs
shift = int(
(m1.max_pos[axis] + m1.min_pos[axis]) / 2
- (m2.max_pos[axis] + m2.min_pos[axis]) / 2
)
elif (
junctions1
and junctions2
and junct_ind1 < len(junctions1)
and junct_ind2 < len(junctions2)
):
# if junctions are detected, calculate the shift based on the junctions
shift = junctions1[junct_ind1][axis] - junctions2[junct_ind2][axis]
else: # nothing to align, the motifs are not connected
connected_motifs[ind1] = False # mark the first motif as not connected
shift = 0
shift_pos = shift_direction * shift
if shift > 0:
# shift only the newly appended motif
m2.shift(shift_pos, extend=extend)
# update the shifts
if return_shifts:
shifts[ind2] = shifts[ind2] + shift_pos
elif shift < 0:
shift_pos = -shift_pos
# shift the left motif and all the previous one if they are connected
for j in range(ind1, -1, -1):
if not connected_motifs[j]:
break
motifs[j].shift(shift_pos, extend=extend)
# update the shifts
if return_shifts:
shifts[j] = shifts[j] + shift_pos
# update the indices
ind1 += 1
ind2 += 1
if return_shifts:
# if the shifts are requested, return them
return motifs, shifts
# if the shifts are not requested, return the motifs
return motifs
[docs]
@staticmethod
def copy_strands_preserve_blocks(
strands: List["Strand"], motif: Optional["Motif"] = None
) -> List["Strand"]:
"""
Copy a list of strands while preserving strand blocks.
Ensures that strands that belong to the same motif remain linked together.
Parameters
----------
strands : list of Strand
The list of strands to be copied.
motif : Motif, optional
If provided, associates the strands with the given motif and registers
callback functions.
Returns
-------
list of Strand
A list of copied strands.
"""
# IMPORTANT: keep strand that are part of the same motif linked
# get the set of strands block id
motifs_id = {id(s.strands_block) for s in strands}
# for each strands block, make a new one and link to the old id
new_motifs_dict = {key: StrandsBlock() for key in motifs_id}
strands_copy = []
# copy all strands
for strand in strands:
# check that the strands are in the strand class
if not isinstance(strand, Strand):
raise ValueError(f"{strand} is not a Strand object.")
# collect the new strands block
new_strands_block = new_motifs_dict[id(strand.strands_block)]
# copy the strand and add the callback
if motif is not None:
copied = strand.copy(callback=motif._updated_strands)
else:
copied = strand.copy()
strands_copy.append(copied)
new_strands_block.add(copied) # add the origami to the new strands block
return strands_copy
[docs]
@staticmethod
def get_sequential_shift(
motifs: List["Motif"],
*args,
axis: Literal[0, 1] = 1,
position_based: bool = True,
) -> List[int]:
"""
Calculate the shift needed to align motifs sequentially.
Parameters
----------
motifs : List[Motif]
List of motifs to align.
*args: List[Motif]
Additional motifs to align.
axis : int, default 1
The numpy axis along which the shift is computed
(1 for horizontal, 0 for vertical).
position_based : bool, default True
If True, the shift is calculated based on motif positions
rather than junctions.
Returns
-------
List[int]
A list of shift integers for each motif along the axis.
Examples
--------
.. code-block:: text
m1:
--NN--
::
--NN--
m2:
--SK--
::
--SK--
Resulting shift: 2
[0, 2]
To have:
--NN----SK--
:: ::
--NN----SK--
"""
if isinstance(motifs, Motif):
motifs = [motifs]
# put the motifs in a list and remove the empty motifs
motifs = [m for m in motifs + list(args) if m]
# trick to handle all the axis at once
x_pos = axis
y_pos = int(not axis)
shifts = [0]
current_shift = 0
for ind, m1 in enumerate(motifs[:-1]):
m2 = motifs[ind + 1]
# calculate the shift based on the positions
if position_based:
max_pos_m1 = m1.max_pos[y_pos]
min_pos_m2 = m2.min_pos[y_pos]
# calculate the shift based on the junctions
else:
m1_junct = m1.junctions
m2_junct = m2.junctions
# if the junctions are detected, calculate the shift
# based on the junctions
junc_dir = Position((x_pos, y_pos))
if m1_junct[junc_dir] and m2_junct[-junc_dir]:
max_pos_m1 = max((pos[y_pos] for pos in m1_junct[junc_dir]))
min_pos_m2 = min((pos[y_pos] for pos in m2_junct[-junc_dir]))
else: # no junctions, just concatenate the motifs
max_pos_m1 = m1.max_pos[y_pos]
min_pos_m2 = 0
# update the shift
current_shift += max_pos_m1 - min_pos_m2 + 1
shifts.append(current_shift)
return shifts
[docs]
@staticmethod
def join_strands(strands: List["Strand"]) -> List["Strand"]:
"""
Attempt to join consecutive strands and return the list of joined strands.
Parameters
----------
strands : List[Strand]
The list of strands to be joined.
Returns
-------
List[Strand]
The list of joined strands.
"""
# remove the empty strands
strands = [s for s in strands if s]
# ordering strand is:
# - the strands with 5' end
# - strand with the lowest y start position
# - strand with the lowest x start position
strands = sorted(strands, key=lambda s: (-int("5" in s.strand), *s.start[::-1]))
# Comments for coders: I tried different ways to join the strands,
# especially building a graph of joinable strands then using Depth First Search
# to join the adjacent strands; but this version of the code is faster
joined_strands = set()
for ind1, s1 in enumerate(strands[:-1]):
# if the strand is empty, or is already joined skip it
if ind1 in joined_strands:
continue
# join the strand with the consecutive strands
# until no more strands are joined
while True:
current_len = len(joined_strands)
for ind2, s2 in enumerate(strands):
# skip s2 is the same strand or already joined
if ind1 == ind2 or ind2 in joined_strands:
continue
# skip if the motifs are not joinable for sure
if (
len(
set(
(
s1.prec_pos,
s1.start,
s1.end,
s1.next_pos,
s2.prec_pos,
s2.start,
s2.end,
s2.next_pos,
)
)
)
== 8
):
continue
# try to join the strands
joined = Strand.join_strands(s1, s2)
# the strands are joined
if joined is not None:
# add the index of strand2 to the joined strands
joined_strands.add(ind2)
# replace the first strand with the joined strand
strands[ind1] = joined
# update the first strand
s1 = joined
# if nothing is joined, break the loop
if current_len == len(joined_strands):
break
# return the strands excluded the joined ones
return [s for i, s in enumerate(strands) if i not in joined_strands]
[docs]
@staticmethod
def trace(
motif_list: List[str],
pos: Tuple[int, int],
direction: Tuple[int, int],
limits: Tuple[int, int],
) -> str:
"""
Trace strands in a motif representation in a recursive manner.
Parameters
----------
motif_list : list of str
The list of strings of the motif sketch.
pos : tuple of int
The starting position.
direction : tuple of int
The tracing direction.
limits : tuple of int
The boundary limits.
Returns
-------
str
The traced strand sequence.
"""
# pos is a tuple (x,y) of the character we are considering
# so it is a tuple (char_ind, line_ind)
x = pos[0]
y = pos[1]
# We reached an adge
if x > limits[0] or x == -1 or y > limits[1] or y == -1:
return ""
# Read the new symbol
sym = motif_list[y][x]
# Terminal symbols
if sym == "3":
return sym
elif sym == " " or sym not in accept_symbol:
return ""
# Direction turns
elif sym in "╰╮\\": # reverse the tuple
direction = direction[::-1]
elif sym in "╭╯/": # reverse the tuple and change sign
direction = (-direction[1], -direction[0])
# Error
elif sym in bp_symbols:
raise MotifStructureError("The strand leads to a base pairing")
# Go to the next position
pos = (pos[0] + direction[0], pos[1] + direction[1])
# Recursive call
return sym + Motif.trace(motif_list, pos, direction, limits)
###
### PROTECTED METHODS
###
def _calculate_basepair(self) -> None:
"""
Calculate and store base pairings in the `_basepair` attribute.
The method determines base pairing by identifying complementary bases
that are positioned one step away (horizontally or vertically). The
base pairing dictionary (using positions as key/value) is stored in
the `_basepair` attribute as a Basepair object.
"""
basepair = BasePair(callback=self._updated_basepair)
pos_to_ind = {pos: ind for ind, pos in enumerate(self.seq_positions)}
sequence = str(self.sequence).replace("&", "")
for pos1, ind1 in pos_to_ind.items():
base1 = sequence[ind1]
# skip already paired pos
if pos1 in basepair:
continue
# control if there is a complementary base in all the directions
for d in Direction:
# calculate the second position
pos2 = pos1 + d * 2
# skip the positions already paired or not in the base map
if pos2 in basepair or pos2 not in pos_to_ind:
continue
# if the basepair position is already taken, skip
bp_pos = pos1 + d
if bp_pos in self._positions:
continue
# check the bases can pair
base2 = sequence[pos_to_ind[pos2]]
if base2 in base_pairing[base1]:
basepair[Position(pos1)] = Position(pos2)
break
self._basepair = basepair
def _calculate_positions(self) -> None:
"""
Calculate the 2D properties of the strand.
This function calculates the properties:
- positions
- seq_positions
- max_pos
- min_pos
- junctions
"""
### initialize the variables
positions = ()
seq_positions = ()
max_pos = [0, 0]
min_pos = [float("inf"), float("inf")]
### initialize the junctions
junctions_dict = {direct: [] for direct in Direction}
for ind, s in enumerate(self):
if not s:
continue
positions += s.positions
seq_dir = 1 if s.directionality == "53" else -1
seq_positions += s._seq_positions[::seq_dir]
# update MAX/MIN positions
if s._max_pos[0] > max_pos[0]:
max_pos[0] = s._max_pos[0]
if s._max_pos[1] > max_pos[1]:
max_pos[1] = s._max_pos[1]
if s._min_pos[0] < min_pos[0]:
min_pos[0] = s._min_pos[0]
if s._min_pos[1] < min_pos[1]:
min_pos[1] = s._min_pos[1]
# skip if the the start is '5' or '3' and the strand is not just the symbol
if s[0] not in "35":
# invert the start direction to have the direction of the junction
junctions_dict[-s.direction].append(s.start)
# skip if the the end is '5' or '3' and the strand is not just the symbol
if s[-1] not in "35":
junctions_dict[s.end_direction].append(s.end)
### order the junctions
for key, val in junctions_dict.items():
# order the bottom/top junctions
if key in (Direction.DOWN, Direction.UP):
val.sort() # order the junctions in growing x and y
# order the left/right junctions
elif key in (Direction.LEFT, Direction.RIGHT):
val.sort(key=lambda pos: pos.swap_xy()) # order in growing y and x
#### convert the junctions to tuple
self._junctions = {key: tuple(val) for key, val in junctions_dict.items()}
# save the other positional properties
self._positions = positions
self._seq_positions = seq_positions
self._max_pos = Position(max_pos)
self._min_pos = Position((c if c != float("inf") else 0 for c in min_pos))
def _check_addition(
self, other: "Motif", direction: Union[Position, Tuple[int, int]] = None
) -> None:
"""
Check whether two motifs can be added together in a given direction.
The function ensures that the motifs have compatible junctions for addition.
It raises an error if they cannot be added.
Parameters
----------
other : Motif
The motif to be added.
direction : tuple of int, optional
The direction in which the motifs should be checked for addition.
Default is Direction(1, 0) (right).
Raises
------
ValueError
If `other` is not an instance of `Motif`.
MotifStructureError
If the motifs do not have compatible junctions.
"""
# if one of the two motifs is empty, return without error
if not self or not other:
return
if not isinstance(other, Motif):
raise ValueError(f"{other} is not a valid type for addition")
if direction is None:
direction = Direction.RIGHT
elif not isinstance(direction, (Direction, Position)):
direction = Position(direction)
# take the junctions of the left and right side of the motifs
Strand._check_position(input_pos_dir=direction, direction=True)
junction1 = self.junctions[direction]
junction2 = other.junctions[direction * -1]
### SANITY CHECKS
if not junction1 or not junction2:
raise MotifStructureError(
f"The motifs cannot be added in the direction {direction}, "
"the junctions are missing. Junctions"
f" motif1: {junction1}, Junctions motif2: {junction2}. If you want"
" to concatenate the motifs, use pf.Motif.concat() method."
)
def _updated_basepair(self, **kwargs) -> None:
"""
Update the base-pair dictionary when the sequence changes.
This is a callback function triggered when modifications occur to the sequence
or base-pair mappings, ensuring consistency in structural relationships.
Parameters
----------
**kwargs
Additional arguments that may be passed during the update process.
Notes
-----
This function resets the dot-bracket notation representation of the motif.
"""
self._structure = None # reset the dot bracket
self._pair_map = BasePair()
self._trigger_callbacks(**kwargs)
def _updated_strands(self, **kwargs) -> None:
"""
Update the motif when the strands are changed.
This is a callback function that is triggered when the strands of the motif
are modified, ensuring that cached properties (e.g., structure, base pairs)
remain consistent.
Parameters
----------
**kwargs
Additional arguments that may be passed during the update process.
Notes
-----
This method resets cached attributes such as:
- Sequence
- Position mappings
- Base-pair structures
"""
self._sequence = ""
self._positions = None
if self._autopairing:
self._basepair = BasePair()
self._structure = None
self._updated_basepair()
self._trigger_callbacks(**kwargs)
###
### METHODS
###
[docs]
def append(
self, strand: "Strand", join: bool = True, copy: bool = False
) -> "Motif":
"""
Append a strand to the motif.
Parameters
----------
strand : Strand
The strand to be added.
join : bool, default True
Whether to attempt joining the new strand with existing strands.
copy : bool, default False
If True, a copy of the strand is appended.
Returns
-------
Motif
The updated motif.
Raises
------
ValueError
If the provided `strand` is not a `Strand` instance.
"""
if not isinstance(strand, Strand):
raise ValueError(f"{strand} is not a Strand object.")
if copy:
strand = strand.copy()
strand.register_callback(self._updated_strands)
self._strands.append(strand)
if join:
self._strands = self.join_strands(self._strands)
if self.lock_coords:
self._strands_block = StrandsBlock(*[s for s in self._strands])
self._updated_strands()
return self
[docs]
def copy(self, callback=None, **kwargs) -> "Motif":
"""
Return a copy of the motif.
This method creates a custom deep copy of the current motif, preserving
its strand arrangements, base pairings, and additional attributes.
Parameters
----------
callback : callable, default None
A callback function to be registered in the copied motif.
**kwargs : dict
If the keyword 'deepcopy' with a list of attributes names is passed,
the attributes will deepcopyed instead referenced.
Same for the keyword 'copy' with a list of attributes names.
Returns
-------
Motif or subclass
A new instance of the motif or its subclass, with the same properties
and attributes as the original.
"""
new_motif = type(self).__new__(type(self))
# attributes that are calculated fresh or immutable that
# can be just reassigned
basic_properties = {
"_autopairing",
"_lock_coords",
"_structure",
"_positions",
"_seq_positions",
"_max_pos",
"_min_pos",
"_sequence",
}
for prop in basic_properties:
setattr(new_motif, prop, getattr(self, prop))
# attributes that have to be teatred differently
avoid_to_copy = {
"_basepair",
"_junctions",
"_pair_map",
"_strands_block",
"_strands",
"_callbacks",
}
new_motif._strands = self.copy_strands_preserve_blocks(self._strands, new_motif)
if self._lock_coords and new_motif._strands:
new_motif._strands_block = new_motif[0].strands_block
new_motif._junctions = {k: tuple(v) for k, v in self._junctions.items()}
new_motif._basepair = self._basepair.copy()
new_motif._pair_map = self._pair_map.copy()
new_motif._callbacks = [callback] if callback else []
### The other attributes should be freshly calculated
# or immutable, so just reassign them
for attr in self.__dict__:
# deepcopy the attribute if it is in the deepcopy list
if attr in kwargs.get("deepcopy", ()):
setattr(new_motif, attr, copy.deepcopy(getattr(self, attr)))
continue
# copy the attribute if it is in the copy list
elif attr in kwargs.get("copy", ()):
setattr(new_motif, attr, getattr(self, attr).copy())
continue
# check if the attribute is to avoid or already copied
if attr in avoid_to_copy or attr in basic_properties:
continue
# reference the attribute
setattr(new_motif, attr, getattr(self, attr))
return new_motif
[docs]
def extend_junctions(
self,
skip_axis: Literal[None, 0, 1] = None,
skip_directions: List[Direction] = None,
until: Optional[Position] = None,
) -> "Motif":
"""
Extend the junctions of the motif in the direction of the junctions.
Unless the skip_axis is specified, the junctions are extended
in all directions. The until parameter allows to specify
the position until which to extend the junctions in the positive direction.
Parameters
----------
skip_axis: int, optional
The numpy axis to skip when extending the junctions
(1 for horizontal, 0 for vertical).
skip_directions: List[Direction], optional
The list of directions to skip when extending the junctions.
until : Position, optional
The position until which to extend the junctions
If None, extend until the end of the motif in all directions
(to the origin and to the maximum position).
Returns
-------
Motif
The motif with the extended junctions.
Raises
------
ValueError
If the axis parameter is not None, 0 or 1.
MotifStructureError
If there is an error in extending the junction
"""
if skip_directions is None:
skip_directions = []
if skip_axis in (0, 1):
skip_directions.append((skip_axis, int(not skip_axis)))
skip_directions.append((-skip_axis, -int(not skip_axis)))
elif skip_axis:
raise ValueError(
f"{skip_axis} is not a valid value for the axis."
" You can skip the axis 0 or 1"
)
# make a dictionary from positions to index
index_map = {p: ind for ind, s in enumerate(self) for p in s.positions}
# create a dictionry from junction direction to strand index
j_to_ind = {
d: {index_map[pos] for pos in pos_list}
for d, pos_list in self.junctions.items()
if d not in skip_directions
}
if until is None:
until = self.max_pos
try:
# this code doens't check for eventual overlaps,
# so it can lead to MotifStructureError if the junctions are not extendable
for d, inds in j_to_ind.items():
for i in inds:
s = self[i]
if -1 in d:
s.extend(direction=d, check=False)
# if it's a positive direction and until is None, extend to max_pos
if 1 in d:
s.extend(direction=d, until=until, check=False)
# The following code works, it's more efficient, and check for overlaps,
# but it's hard to maintain, doesn't keep directionality check and understand.
# So better using the simpler version above.
# Here the previous code:
# axis = [d for d in Direction]
# for axis, sym in ((1, '─'), (0, '│')): # consider the two axis
# naxis = int(not axis)
# neg_direction = Position((- axis, - naxis))
# pos_direction = Position((axis, naxis))
# if neg_direction not in skip_directions:
# # consider all the junction positions in the negative direction
# for pos in juncts[neg_direction]:
# # if the position is not at the minimum border
# if pos[naxis] != 0:
# # add a strand from the position and going to the border
# extend_strand = Strand(sym * pos[naxis],
# start=(pos[0] * naxis,
# pos[1] * axis),
# direction=pos_direction)
# # check that the extension doesn't overlap
# # with other strands
# if not (set(extend_strand.positions)
# & set(index_map)):
# # get the strand at the position
# strand_at_pos = self[index_map[pos]]
# strand_at_pos.join(extend_strand)
# if pos_direction not in skip_directions:
# for pos in juncts[pos_direction]:
# strand_at_pos = self[index_map[pos]]
# max_pos = self.max_pos[naxis]
# # set the maximum position to the until parameter
# if until[naxis]:
# max_pos = until[naxis]
# if pos[naxis] < max_pos:
# # add a strand from the position and going to the border,
# extend_strand = Strand(sym * (max_pos - pos[naxis]),
# start=(pos[0] + 1 * axis,
# pos[1] + 1 * naxis),
# direction=pos_direction)
# # check that the extension doesn't overlap
# # with other strands
# if not (set(extend_strand.positions)
# & set(index_map)):
# strand_at_pos.join(extend_strand)
except MotifStructureError as e:
print(f"Error in extending junction at strand {s}. Full error:")
print("\t", e)
return self
[docs]
def flip(
self,
horizontally: bool = True,
vertically: bool = True,
strand_index: list = None,
reorder=False,
) -> "Motif":
"""
Flip the strands of the motif horizontally and/or vertically inplace.
Parameters
----------
horizontally : bool, default True
If True, flip the strands horizontally.
vertically : bool, default True
If True, flip the strands vertically.
strand_index : list, default None
The list of indices of the strands to flip.
If None, all the strands are flipped.
reorder : bool, default False
If True, reorder the strands after flipping,
so that the first strand is the one with the 5' end,
then the strand with the lowest y start position,
and finally the strand with the lowest x start position.
Returns
-------
Motif
The motif with the flipped strands.
"""
# save the initial index of character and lines,
# (it changes every time you change a strand otherwise)
idx_char = self.num_char - 1
idx_lines = self.num_lines - 1
# create new basepair dictionary in case autopairing is off
new_basepair_dict = BasePair()
for pos1, pos2 in self.basepair.items():
if horizontally:
pos1 = (idx_char - pos1[0], pos1[1]) # flip the horizontal position
pos2 = (idx_char - pos2[0], pos2[1]) # flip the horizontal position
if vertically:
pos1 = (pos1[0], idx_lines - pos1[1]) # flip the vertical position
pos2 = (pos2[0], idx_lines - pos2[1]) # flip the vertical position
new_basepair_dict[Position(pos1)] = Position(pos2)
self._basepair = new_basepair_dict # save the new basepair
for ind, strand in enumerate(self):
# flip only the strands in the strand_index list, if it is not None
if strand_index and ind not in strand_index:
continue
# flip the start position of the strands:
# the new start is the border - the previous position
new_start = list(strand.start)
if horizontally:
new_start[0] = idx_char - strand.start[0]
if vertically:
new_start[1] = idx_lines - strand.start[1]
strand.start = new_start
strand.flip(horizontally, vertically, flip_start=False)
if reorder:
# reorder the strands in order to have:
# - the strands with 5' end
# - strand with the lowest y start position
# - strand with the lowest x start position
self._strands.sort(key=lambda s: (-int("5" in s.strand), *s.start[::-1]))
return self
@wraps(fold_bar) # inherit the documentation from the function
def folding_barriers(self, kl_delay: int = 150) -> Tuple[str, int]:
return fold_bar(self.structure, kl_delay)
[docs]
def get_strand_index_map(self) -> Dict[Tuple[int, int], int]:
"""
Get a dictionary of positions as keys and the index of the
strand in the motif as values.
Returns
-------
Dict[Tuple[int, int], int]
A dictionary mapping positions to strand indexes
"""
return {pos: ind for ind, strand in enumerate(self) for pos in strand.positions}
[docs]
def get_position_map(self) -> Dict[Tuple[int, int], str]:
"""
Get a dictionary of positions as keys and the corresponding
characters as values.
Returns
-------
Dict[Tuple[int, int], str]
A dictionary mapping positions to characters.
"""
return {
pos: sym
for strand in self
for pos, sym in zip(strand.positions, strand.strand)
}
[docs]
def insert(
self, index: int, strand: "Strand", join: bool = True, copy: bool = False
) -> "Motif":
"""
Insert a strand at a given index in the motif.
Parameters
----------
index : int
The index at which to insert the strand.
strand : Strand
The strand to be inserted.
join : bool, default True
Whether to attempt joining the new strand with existing strands.
copy : bool, default False
If True, a copy of the strand is inserted.
Returns
-------
Motif
The updated motif.
Raises
------
ValueError
If `strand` is not a `Strand` instance.
"""
if not isinstance(strand, Strand):
raise ValueError(f"{strand} is not a Strand object.")
if copy:
strand = strand.copy()
strand.register_callback(self._updated_strands)
self._strands.insert(index, strand)
if join:
self._strands = self.join_strands(self._strands)
if self.lock_coords:
self._strands_block = StrandsBlock(*[s for s in self._strands])
self._updated_strands()
return self
[docs]
def pop(self, index: int = -1) -> "Strand":
"""
Remove and return the strand at the specified index.
Parameters
----------
index : int, default -1
The index of the strand to remove.
Returns
-------
Strand
The removed strand.
"""
strand = self._strands.pop(index)
# REMOVE THE PAIRS IN WHICH THE STRAND IS INVOLVED
new_basepair = BasePair()
for k, v in self._basepair.items():
if k in strand.positions or v in strand.positions:
continue
new_basepair[Position(k)] = Position(v)
self._basepair = new_basepair
strand._clear_callbacks()
self._updated_strands()
self._updated_basepair()
if self.lock_coords:
self._strands_block = StrandsBlock(*[s for s in self._strands])
return strand
[docs]
def replace_all_strands(
self, strands: List["Strand"], copy: bool = True, join: bool = False
) -> "Motif":
"""
Replace all the strands in the motif with the provided list of strands.
Parameters
----------
strands : List[Strand]
The list of strands to replace the current strands.
copy : bool, default True
If True, the strands are copied before replacing the current strands.
join : bool, default False
Whether to attempt joining the new strands.
Returns
-------
Motif
The updated motif.
Raises
------
ValueError
If any of the provided strands is not a `Strand` instance.
"""
# copy the strands if needed, registart callbacks
if copy:
new_strands = self.copy_strands_preserve_blocks(strands, self)
else:
new_strands = strands
for s in strands:
if not isinstance(s, Strand):
raise ValueError(f"{s} is not a Strand object.")
s.register_callback(self._updated_strands)
# join the strands if needed
if join:
new_strands = self.join_strands(new_strands)
self._strands = new_strands
if self.lock_coords:
self._strands_block = StrandsBlock(*[s for s in self._strands])
# this will also update the basepair dictionary
self._updated_strands()
return self
[docs]
def rotate(self, times: int = 1) -> "Motif":
"""
Rotate the motif 90 degrees clockwise.
Parameters
----------
times : int, default 1
The number of times to rotate the motif.
Returns
-------
Motif
The rotated motif.
"""
for _ in range(times % 4):
# collect the num_lines before is updated
num_lines = self.num_lines
for s in self:
sign = +1
# when rotating from vertical to horizontal: change sign
if s._direction[1]:
sign = -1
s._start = s._start.replace(x=num_lines - 1 - s.start[1], y=s.start[0])
s._direction = s._direction.replace(
x=sign * s._direction[1], y=sign * s._direction[0]
)
# adjust the sequence symbols
s.strand = s.strand.translate(rotate_90)
# rotate the basepair dictionary
if not self._autopairing and self._basepair:
new_bp = BasePair()
for k, v in self._basepair.items():
new_k = (num_lines - 1 - k[1], k[0])
new_v = (num_lines - 1 - v[1], v[0])
new_bp[Position(new_k)] = Position(new_v)
self._basepair = new_bp
return self
[docs]
def save_3d_model(
self: Union["Motif", "Origami"],
filename: str = "motif",
config: bool = True,
topology: bool = True,
forces: bool = False,
pk_forces: bool = False,
return_text: bool = False,
sequence: str = None,
pdb: bool = False,
box_size: Iterable[float] = (1000.0, 1000.0, 1000.0),
**kwargs,
) -> Optional[Tuple[str, str]]:
"""
Save the motif 3D structure in a file for structural 3D modeling.
This function save the 3D oxDNA motif representation in conformation and
topology xoDNA-format files, and optionally in PDB format.
It can also save additional force constraints for oxDNA simulation
and protein configurations if specified.
Parameters
----------
filename : str, optional, default="motif"
The base filepath for the generated oxDNA files (without extension).
config : bool, default=True
If True, generates a configuration (.dat) file with nucleotide positions.
topology : bool, default=True
If True, generates a topology (.top) file specifying strand connectivity.
forces : bool, default=False
If True, saves force constraints for base-pair interactions.
pk_forces : bool, default=False
If True, saves force constraints specifically for pseudoknots.
return_text : bool, default=False
If True, returns the generated oxDNA configuration and topology as strings
instead of writing to files (used for real-time visualization).
sequence : str, optional
If provided, uses the given sequence to generate the topology.
pdb : bool, optional, default=False
If True, exports a PDB (Protein Data Bank) file for visualization.
box_size : Iterable of float, default=(1000.0, 1000.0, 1000.0)
The dimensions of the simulation box (x, y, z) in oxDNA simulation units.
**kwargs
Additional arguments for customizing force constraints
and PDB export settings.
Returns
-------
Optional[Tuple[str, str]]
If `return_text` is True, returns a tuple containing:
- The configuration file content as a string.
- The topology file content as a string.
Otherwise, returns None.
Notes
-----
This function requires the oxDNA analysis tools package to be installed
for the generation of force constraints and pdb export.
"""
def get_kwargs_names(func):
sig = signature(func)
# Extract parameters that have default values (kwargs)
kwargs = [
param.name
for param in sig.parameters.values()
if param.default != param.empty
]
return kwargs
# remove the extension from the filename
filename = str(Path(filename).with_suffix("")) # remove the extension
strands = [s for s in self if s.sequence]
n_nucleotides = sum([len(s.sequence) for s in strands])
n_strands = len(strands)
# create the conformation and topology text
conf_text = (
f"t = 0\n" f"b = {box_size[0]} {box_size[1]} {box_size[2]}\n" f"E = 0 0 0\n"
)
topology_text = ""
### ADD THE STRANDS TO THE CONFORMATION AND TOPOLOGY TEXTS ###
for s in strands:
# check for the sequence direction
dir = 1 - 2 * (s.directionality == "35")
seq = str(s.sequence[::dir])
coord_array = s.coords[::dir]
# add the coordinates to the conformations text
if config:
for pos, a1, a3 in coord_array:
conf_text += (
f"{pos[0]} {pos[1]} {pos[2]} "
f"{a1[0]} {a1[1]} {a1[2]} "
f"{a3[0]} {a3[1]} {a3[2]}\n"
)
# add the sequence to the topology text
if topology:
topology_text += seq + " type=RNA circular=false \n"
# add the proteins to the conformation text
try:
for protein in s.coords.proteins:
if config:
for pos, a1, a3 in protein.coords:
conf_text += (
f"{pos[0]} {pos[1]} {pos[2]} "
f"{a1[0]} {a1[1]} {a1[2]} "
f"{a3[0]} {a3[1]} {a3[2]}\n"
)
# add the proteins to the topology text
if topology:
topology_text += (
f"{protein.sequence} " "type=peptide circular=false \n"
)
n_nucleotides += len(protein)
n_strands += 1
except Exception as e:
print("Problem with proteins", s, e)
if sequence:
topology_text = (
"\n".join(
[f"{seq} type=RNA circular=false" for seq in sequence.split("&")]
)
+ "\n"
)
topology_text = f"{n_nucleotides} {n_strands} 5->3\n" + topology_text
if return_text:
return conf_text, topology_text
# save the files
conf_file = f"{filename}.dat"
if config:
with open(conf_file, "w", encoding="utf-8") as f:
f.write(conf_text)
top_file = f"{filename}.top"
if topology:
with open(top_file, "w", encoding="utf-8") as f:
f.write(topology_text)
### save the external forces
if not forces and not pk_forces:
pass
elif not oat_installed:
warnings.warn(
"oxDNA_analysis_tools is not installed. " "Skipping force writing.",
UserWarning,
)
else:
trap_kw_names = get_kwargs_names(mutual_trap)
trap_kwargs = {k: v for k, v in kwargs.items() if k in trap_kw_names}
pair_map = dot_bracket_to_pair_map(self.structure.replace("&", ""))
trap_kwargs.setdefault("stiff", 0.09)
trap_kwargs.setdefault("r0", 1.2)
trap_kwargs.setdefault("PBC", True)
trap_kwargs.setdefault("rate", 0)
trap_kwargs.setdefault("stiff_rate", 0)
force_list = []
pk_force_list = []
ss = self.structure.replace("&", "")
i = 0
for n1, n2 in pair_map.items():
if n2 is None:
continue
i += 1
trap1 = mutual_trap(n1, n2, **trap_kwargs)
trap2 = mutual_trap(n2, n1, **trap_kwargs)
if forces:
force_list.append(trap1)
force_list.append(trap2)
if pk_forces and ss[n1] not in ".()":
pk_force_list.append(trap1)
pk_force_list.append(trap2)
if forces:
write_force_file(force_list, f"{filename}_forces.txt")
if pk_forces:
write_force_file(pk_force_list, f"{filename}_pk_forces.txt")
if pdb:
if not oat_installed:
warnings.warn(
"oxDNA_analysis_tools is not installed. " "Skipping PDB export.",
UserWarning,
)
else:
# Read oxDNA configuration
system, _ = strand_describe(top_file)
ti, di = describe(top_file, conf_file)
conf = get_confs(ti, di, 0, 1)[0]
conf = inbox(conf, center=True)
# remove the proteins from the configuration if no pdb files provided
if not kwargs.get("protein_pdb_files"):
strand_offset = 0
to_pop = []
conf_to_keep = []
for i, strand in enumerate(system.strands):
strand_end = strand_offset + strand.get_length()
if strand.type == "peptide":
to_pop.append(i)
else:
conf_to_keep.append((strand_offset, strand_end))
strand_offset = strand_end
for i in to_pop[::-1]:
system.strands.pop(i)
oxdna_pdb_kw_names = get_kwargs_names(oxDNA_PDB)
oxdna_pdb_kwargs = {
k: v for k, v in kwargs.items() if k in oxdna_pdb_kw_names
}
oxdna_pdb_kwargs.setdefault("uniform_residue_names", True)
oxDNA_PDB(conf, system, filename, **oxdna_pdb_kwargs)
[docs]
def save_fasta(self, filename: str = "motif") -> None:
"""
Save the motif sequences in a FASTA file.
Parameters
----------
filename : str, optional
The filepath to save (without extension). Default is 'motif'.
"""
path = Path(filename).with_suffix(".fasta")
name = path.stem
seqs = self.sequence.split("&")
dotb = self.structure.split("&")
with open(str(path), "w", encoding="utf-8") as f:
for i, seq in enumerate(seqs):
f.write(f">{name}_strand_{i}\n")
f.write(f"{seq}\n")
f.write(f"{dotb[i]}\n")
[docs]
def save_json(
self, filename: str = "motif", return_data: bool = False
) -> Optional[Dict[str, Any]]:
"""
Save the motif representation as a JSON file.
Parameters
----------
filename : str, default 'motif'
The filepath to save (without extension). Default is 'motif'.
return_data : bool, default False
If True, return the JSON data instead of saving to a file.
Returns
-------
dict, optional
The JSON data if `return_data` is True.
"""
from ... import __version__ # import here to avoid circular imports
json_data = {"pyfurnace_version": __version__}
json_data.update(self.to_json())
if return_data:
return json.dumps(json_data, indent=4)
path = Path(filename).with_suffix(".json")
with open(str(path), "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=4)
[docs]
def save_text(self, filename: str = "motif") -> None:
"""
Save the motif representation as a text file.
Parameters
----------
filename_path : str, optional
The filepath to save (without extension). Default is 'motif'.
"""
path = Path(filename).with_suffix(".txt")
name = path.stem
with open(str(path), "w", encoding="utf-8") as f:
f.write(f">{str(name)}\n")
f.write(f"{self.sequence}\n")
f.write(f"{self.structure}\n\n")
f.write(str(self))
[docs]
def shift(self, shift_vect: Tuple[int, int], extend: bool = False) -> "Motif":
"""
Shift the motif of the given shift vector.
Parameters
----------
shift_vect : Tuple[int, int]
The (x, y) shift values.
extend : bool, default False
Whether to extend junctions when shifting
(in the direction opposite to the shifting direction).
Returns
-------
Motif
The shifted motif.
Raises
------
ValueError
If shifting moves strands to negative positions.
"""
Strand._check_position(input_pos_dir=shift_vect)
shift = Position(shift_vect)
min_pos = self.min_pos
# check if the shift will bring the strands to negative positions
if min_pos[0] + shift[0] < 0 or min_pos[1] + shift[1] < 0:
raise MotifStructureError(
f"The motif cannot be shifted. The strands cannot be drawn"
f" at negative positions. Attempt to draw the motif at "
f"position ({min_pos[0] + shift_vect[0]}, "
f"{min_pos[1] + shift_vect[1]})"
)
# shift the strands
for s in self:
# save the initial start and end positions
s_start = s.start
s_end = s.end
# shift the strand
s.shift(shift, check=False)
if extend:
# extend the strand in the opposite direction
s.extend(direction=s.end_direction, until=s_end, check=False)
s.extend(direction=-s.direction, until=s_start, check=False)
# shift every basepair too
if not self._autopairing:
self._basepair = self.basepair.shift(shift)
return self
[docs]
def sort(
self, key: Optional[Callable[["Strand"], Any]] = None, reverse: bool = False
) -> "Motif":
"""
Sort the strands in the motif.
Parameters
----------
key : function, optional
The function to use to sort the strands. If ``None``, the strands are
sorted by:
- strands with a 5' end first
- lowest starting y position
- lowest starting x position
reverse : bool, default False
If ``True``, sort in descending order.
Returns
-------
Motif
The motif with the sorted strands.
"""
if not key:
# sort the strand according to the lowest start position
def key(s):
return (-int("5" in s.strand), *s.start[::-1])
self._strands.sort(key=key, reverse=reverse)
return self
[docs]
def strip(self, skip_axis: Literal[None, 0, 1] = None) -> "Motif":
"""
Remove the empty lines/columns in the motif structure.
Parameters
----------
skip_axis: int, optional
The numpy axis to skip when stripping the motif
(1 for horizontal, 0 for vertical).
Returns
-------
Motif
The stripped motif.
"""
min_pos = self.min_pos
shift = (-min_pos[0] * int(skip_axis != 1), -min_pos[1] * int(skip_axis != 0))
self.shift(shift)
return self
[docs]
def to_json(self) -> Dict[str, Any]:
"""
Convert the Motif instance to a JSON-serializable dictionary.
Returns
-------
Dict[str, Any]
A dictionary representation of the Motif instance.
"""
motif_dict = {
"motif_type": self.__class__.__name__,
"strands": [s.to_json() for s in self],
"autopairing": self._autopairing,
"basepair": {k.to_json(): v.to_json() for k, v in self._basepair.items()},
"lock_coords": self._lock_coords,
}
dummy_motif = Motif()
for attr in self.__dict__:
if attr not in dummy_motif.__dict__:
# assuming the attribute is JSON serializable
motif_dict[attr] = getattr(self, attr)
return motif_dict