import copy
import json
from pathlib import Path
from functools import wraps
from typing import Any, Dict, List, Tuple, Union, Literal, Callable, Optional, Type
# pyFuRNAce IMPORTS
from .symbols import (
iupac_code,
rotate_dot_bracket,
pair_map_to_dot_bracket,
dot_bracket_to_pair_map,
Node,
tree_to_dot_bracket,
dot_bracket_to_tree,
dot_bracket_to_stacks,
folding_barriers,
)
from .position import Position, Direction
from .callback import Callback
from .sequence import Sequence
from .strand import Strand
from .basepair import BasePair
from .motif import Motif
[docs]
class Origami(Callback):
"""
A class for building and manipulating RNA Origami structures.
The Origami class organizes RNA `Motif` objects into a 2D matrix to
represent spatial arrangements of strands. It supports stacking motifs
horizontally and vertically, calculating vertical junctions and connections,
assembling the full origami structure in a motif object, and exporting
the full structure and sequence.
Parameters
----------
matrix : Motif or list of Motif or list of list of Motif
A motif or 1D/2D list of Motifs to initialize the origami layout.
*args : Motif or list of Motif
Additional motifs or rows of motifs to add to the matrix.
align : {'left', 'first', 'center'}, default='left'
How rows are vertically aligned.
Left: align each row to the left.
First: align to the first crossover in each row.
Center: align every row to the center of the widest row.
copy : bool, default=False
Whether to create a copy of the motifs before adding them to the origami.
ss_assembly : bool, default=False
Wether to assemble the 3D structure of the origami without locking the
coordinates of the motifs.
**kwargs : dict
Additional keyword arguments passed to the Callback base class.
Attributes
----------
align : str
Current vertical alignment mode ('left', 'first', 'center').
assembled : Motif
Combined Motif representing the full assembled Origami.
num_char : List[int]
Number of characters per line, used for alignment.
num_lines : int
Number of horizontal lines (rows) in the Origami.
num_motifs : int
Total number of motifs in the Origami.
pair_map : dict
Dictionary of paired nucleotide indices (alternative to dot-bracket notation).
pos_index_map : Dict[Position, Tuple[int, int]]
Map from character position (x, y) in the assembled motif to the
corresponding index in the original matrix (y, x).
index_shift_map : Dict[Position, Tuple[int, int]]
Map from motif matrix indexes (y, x) to spatial shifts (x, y).
pseudoknots : dict
Pseudoknot metadata including indices and energies.
sequence : Sequence
Full nucleotide sequence of the Origami.
seq_positions : Tuple[Position]
The positions of each nucleotide in the origami sequence (x,y coordinates).
Same as calling origami.assembled.seq_positions. Always 5' to 3'.
ss_assembly : bool
Whether to assemble the 3D structure of the origami without locking
the coordinates of the motifs.
strands : List[Strand]
List of individual strands in the Origami.
structure : str
Dot-bracket notation of the RNA secondary structure.
See Also
--------
Motif, Strand, Sequence
"""
[docs]
@classmethod
def from_structure(
cls,
structure: Optional[Union[str, dict, BasePair, Node]] = None,
sequence: Optional[str] = None,
pk_energy=-8.5,
pk_denergy=0.5,
motif_list: Optional[List[Motif]] = None,
**kwargs,
) -> "Origami":
"""
Parse a structure or sequence representation to an Origami object.
If a structure is not provided, it is calculated from the sequence
with RNAfold. If a sequence is not provided, it is assumed to be
a sequence of 'N's of the same length as the structure.
Parameters
----------
structure : Union[str, dict, BasePair, Node]
The structure representation to convert.
sequence : str, optional
The sequence or sequence constraints of the motif.
pk_energy : float, optional
The energy of the pseudoknots (if present).
pk_denergy : float, optional
The energy tolerance of the pseudoknots (if present).
motif_list : List[Motif], optional
A list of specific motifs to parse the structure.
By default, the motifs are stems and aptamers.
**kwargs : dict
Additional arguments to pass to the Motif constructor.
Returns
-------
Origami
The Origami object created from the structure representation.
"""
from RNA import fold
from ..motifs import Stem, aptamers, aptamers_list, Loop
from ..utils import vertical_double_link, stem_cap_link
if not structure:
# if only sequence is provided, fold it to get the structure
structure = fold(sequence)[0]
for i, sym in enumerate(sequence[::-1]):
if sym == "&":
structure = structure[:-i] + "&" + structure[-i:]
if sequence:
sequence = str(sequence).replace("T", "U").upper()
def build_sequence():
"""Build a default sequence if it's not provided."""
nonlocal sequence
if not sequence:
sequence = "".join("N" if sym != "&" else "&" for sym in structure)
# input dot-bracket notation
if isinstance(structure, str):
build_sequence()
node = dot_bracket_to_tree(structure, sequence=sequence)
pair_map = dot_bracket_to_pair_map(structure)
# input pair map
elif isinstance(structure, (BasePair, dict)):
pair_map = structure.copy()
structure = pair_map_to_dot_bracket(structure)
build_sequence()
node = dot_bracket_to_tree(structure, sequence=sequence)
# input tree
elif isinstance(structure, Node):
node = structure
pair_map = dot_bracket_to_pair_map(tree_to_dot_bracket(node))
structure = tree_to_dot_bracket(node)
build_sequence()
else:
raise ValueError(f"Invalid structure representation: {structure}")
if isinstance(structure, str) and len(structure.strip("& ")) != len(
sequence.strip("& ")
):
raise ValueError(
f"The sequence length must be equal to the structure "
f"length. Got sequence len {len(sequence)} for structure"
f" len {len(structure)}."
)
if motif_list and not all(isinstance(m, Motif) for m in motif_list):
raise ValueError("The motif_list must contain only Motif objects.")
else:
motif_list = []
for name in aptamers_list:
motif = aptamers.__dict__[name]()
# the motif is made of multiple strands,
# the flipped version has a different tree and should be checked
if len(motif.strands) > 1:
motif_list.append(motif)
motif_list.append(motif.copy().flip(reorder=True))
# if the motif is a loop, so for the loop convention is opened
# on the right side, but the origami is built with the left side open
elif isinstance(motif, Loop):
motif_list.append(motif.flip())
# just add the motif
else:
motif_list.append(motif)
### Idea in principle: if the structure is folded with ViennaRNA,
# fold the aptamers with ViennaRNA too, so you can find them in the tree.
# But single aptamers don't fold in the same way when when they are in a
# structure, so we cannot use this approach.
# CODE:
# if folded:
# tree_to_mot = {dot_bracket_to_tree(fold(str(m.sequence))[0],
# sequence=str(m.sequence)): m
# for m in motif_list}
# else:
mot_trees = [
dot_bracket_to_tree(m.structure, sequence=str(m.sequence))
for m in motif_list
]
# initialize the origami object
origami = Origami([[]], align="first", ss_assembly=True)
current_index = [0, 0]
m_seq = ["", ""]
def match_subtree(node: Node, motif_node: Node, depth: int = 0) -> bool:
"""
Recursively checks if motif_root matches the subtree rooted at node.
Parameters
----------
node : Node
The root of the subtree to compare.
motif_node : Node
The root of the motif tree.
depth : int, optional
The current depth in the tree, used to track the last matched node.
Returns
-------
bool
True if motif_root matches the subtree rooted at node.
"""
if (
motif_node.parent # not valid for the root node
and node.label != motif_node.label
):
return None
if (
motif_node.parent # not valid for the root node
and motif_node.seq is not None
and node.seq is not None
and not (iupac_code[node.seq] & iupac_code[motif_node.seq])
):
return None
# sanitize the children comparison
motif_child = [c for c in motif_node.children if c.label != "&"]
node_child = [c for c in node.children if c.label != "&"]
if not motif_child:
return node, depth # Leaf node matched
if len(motif_child) != len(node_child):
return None
node_depths = [
match_subtree(nc, mc, depth=depth + 1)
for nc, mc in zip(node_child, motif_child)
]
if not all(node_depths):
return None
# save the node at the maximum depth
# exclude invalid nodes and nodes that are unpaired
# (the successive tree can grow only from paired nodes)
node, max_depth = max(
node_depths,
key=lambda x: x[1] if (x is not None and x[0].label != ".") else 0,
)
if depth == 0:
# if we are at the root node, we return the node
return node
return node, max_depth
def recursive_build_origami(node, insert_at=None, flip=False):
"""
Recursively build the origami from the tree representation.
Parameters
----------
node : Node
The current node in the tree representation.
insert_at : Tuple[int, int], optional
The position in the origami to insert the motif.
flip : bool, optional
Whether to flip the motif horizontally and vertically.
current_index : List[int]
The current index in the origami matrix, used to track the position
where the next motif should be inserted.
"""
nonlocal m_seq # , current_index
# initialize the variables
if insert_at is None:
insert_at = current_index.copy()
motif = None
### BASE CASES: sequence break, stem, unpaired nucleotide
if node.label == "&":
return
### Check if the current node matches any motif in the motif list
if node.parent:
for tree_mot, mot in zip(mot_trees, motif_list):
found_node = match_subtree(node.parent, tree_mot)
if found_node:
# First, flush stems accumulated
# this cause problems with bulges before aptamers
# but too many edge cases to handle
if m_seq[0]:
if m_seq[0] == "N" * len(m_seq[0]):
stem = Stem(len(m_seq[0]))
else:
stem = Stem(sequence=m_seq[0])
stem[1].sequence = m_seq[1][::-1]
origami.insert(insert_at, stem.flip(flip, flip))
insert_at[1] += 1
current_index[1] += 1
m_seq = ["", ""]
motif = mot.copy()
node = found_node
break
# find the index of the current node in the parent children
if node.parent is not None:
child_seq_ind = [c.index for c in node.parent.children]
n_c_ind = child_seq_ind.index(node.index)
if motif is not None: # if a motif was found, insert it
pass
elif node.label == "(":
m_seq[0] += node.seq if node.seq else "N"
m_seq[1] += sequence[pair_map[node.index]]
# if the next node is not a stem, create a stem motif
if (
not node.children
or len(node.children) > 1
or any(c.label != "(" for c in node.children)
):
# add wobbles in the stem
if m_seq[0] == "N" * len(m_seq[0]):
motif = Stem(len(m_seq[0]))
else:
motif = Stem(sequence=m_seq[0])
motif[1].sequence = m_seq[1][::-1]
elif node.label == ".":
m_seq[0] += node.seq if node.seq else "N"
# if the next node adjacent node is not unpaired, create a motif
if (
n_c_ind == len(child_seq_ind) - 1
or node.parent.children[n_c_ind + 1].label != "."
):
motif = Motif(
Strand(m_seq[0]), Strand("-" * len(m_seq[0]), start=(0, 2))
)
# add the motif and update the current index
if motif:
origami.insert(insert_at, motif.flip(flip, flip))
current_index[1] += 1 # increment the x index
m_seq = ["", ""] # reset the motif sequence
# recursive call for the children
if node.children:
child_inds = []
# check each child before recursive call
for i, child in enumerate(node.children):
insert_at = None
flip = False
# bulge after a stem
if child.label == "." and any(
c.label == "(" for c in node.children[:i]
):
insert_at = child_inds.pop()
flip = True
# sequence break + only unpaired
elif child.label in ".&" and all(
c.label in ".&" for c in node.children
):
if "&" in [c.label for c in node.children[:i]]:
insert_at = child_inds.pop()
flip = True
# sequence break or multiple stems
elif child.label == "&" or (
child.label == "("
and any(c.label == "(" for c in node.children[:i])
):
connect_down = Motif(
Strand("──"),
Strand("╮", start=(0, 2), directionality="35"),
Strand("╭", start=(1, 2), direction=(0, -1)),
)
connect_up = stem_cap_link(vflip=True)
if child_inds:
insert_connect = child_inds.pop()
else:
insert_connect = current_index
# insert the top connector
origami.insert(insert_connect, connect_down)
shift_x = sum(
[
m.num_char
for m in origami[insert_connect[0], : insert_connect[1]]
]
)
connect_up.shift((shift_x, 0))
origami.append([connect_up])
# increment the y index
current_index[0] += 1
# set the x index to the end of the line
current_index[1] = len(origami[-1])
for i in range(insert_connect[0] + 1, current_index[0]):
# add the vertical connector
origami.insert(
(i, 0), vertical_double_link().shift((shift_x, 0))
)
# shift all the motifs until you reach the first connector
for m in origami[i, 1:]:
m.shift((2, 0))
if "││╰─" in m:
break
if insert_at is None:
insert_at = current_index.copy()
child_inds.append(insert_at)
recursive_build_origami(child, insert_at=insert_at, flip=flip)
# this could not work in the case a stem doesn't end with at least
# one unpaired nucleotide, but that does never happen in natural
# structures, so we can ignore this case
if not any(c.children or c.label == "&" for c in node.children):
origami.append(Motif(Strand("╮│╯")))
current_index[1] -= 1 # decrement the x index
# call the recursive function
recursive_build_origami(node)
### ADD THE PSEUDOKNOTS ###
# dictionary with index as key and pseudoknot id as value
full_map = dict()
struct = structure.replace("&", "")
pair_map = dot_bracket_to_pair_map(struct)
# map the sequence index to the slice
pos_to_slice = origami.pos_index_map
seq_positions = origami.seq_positions
motif_shifts = origami.index_shift_map
# iterate over the structure
i = 0
while i < len(struct):
new_pk_info = {"id": [], "ind_fwd": [], "E": [], "dE": []}
# iterate over the subsequences structure
sym = struct[i]
# found pseudoknot
if sym not in ".()" and i not in full_map:
# get the length of the pseudoknot
length = 1
while struct[i + length] == sym:
length += 1
# get the pseudoknot id of get a new one
if pair_map[i] in full_map:
pk_id = full_map[pair_map[i]] + "'"
else:
inds = [k.split("_")[1].strip("'") for k in full_map.values()]
pk_id = "1_" + str(int(max(inds, default="-1")) + 1)
# get the pseudoknot motif, stand an insert offset
pos = seq_positions[i]
motif_yx = pos_to_slice[pos]
shift_yx = motif_shifts[motif_yx]
motif = origami._matrix[motif_yx[0]][motif_yx[1]]
original_pos = (pos[0] - shift_yx[0], pos[1] - shift_yx[1])
strand_ind = next(
i for i, s in enumerate(motif) if original_pos in s.seq_positions
)
seq_offset = motif[strand_ind].seq_positions.index(original_pos)
# add the pseudoknot to the motif
new_pk_info["id"].append(pk_id)
new_pk_info["ind_fwd"].append((seq_offset, seq_offset + length - 1))
indices = range(seq_offset + i, seq_offset + i + length)
# update the full map
full_map.update({k: pk_id for k in indices})
new_pk_info["E"].append(pk_energy)
new_pk_info["dE"].append(pk_denergy)
# add the pseudoknots info to the strand
motif[strand_ind].pk_info = new_pk_info
i += length
i += 1
return origami
def __init__(
self,
matrix: Union[Motif, List[Motif], List[List[Motif]]] = None,
*args: Union[Motif, List[Motif]],
align: Literal["left", "first", "center"] = "left",
copy: bool = False,
ss_assembly: bool = False,
**kwargs,
) -> None:
"""
Initialize an Origami object with a 2D list of motifs.
Parameters
----------
matrix : Motif or list of Motif or list of list of Motif
A motif or 1D/2D list of Motifs to initialize the origami layout.
*args : Motif or list of Motif
Additional motifs or rows of motifs to add to the matrix.
align : {'left', 'first', 'center'}, default='left'
How rows are vertically aligned.
Left: align each row to the left.
First: align to the first crossover in each row.
Center: align every row to the center of the widest row.
copy : bool, default=False
Whether to create a copy of the motifs before adding them to the origami.
ss_assembly : bool, default=False
Wether to assemble the 3D structure of the origami without locking the
coordinates of the motifs.
**kwargs : dict
Additional keyword arguments passed to the Callback base class.
"""
# initialize the callback
Callback.__init__(self, **kwargs)
# initialize the protected atrributes
self._pos_index_map = dict()
self._index_shift_map = dict()
self._assembled = None
self._ss_assembly = bool(ss_assembly)
self._pseudoknots = None
# initialize the matrix
if not matrix:
matrix = []
### CHECK THE MATRXI
# the matrix is a proper 2D list
if (
isinstance(matrix, (list, tuple))
and all(isinstance(row, (list, tuple)) for row in matrix)
and all(isinstance(m, Motif) for row in matrix for m in row)
):
pass
# the matrix is a list of motif
elif isinstance(matrix, (list, tuple)) and any(
isinstance(row, Motif) for row in matrix
):
matrix = [matrix] # make it a 1D list
# the matrix is a motif
elif isinstance(matrix, Motif):
matrix = [[matrix]]
# unsupported type for matrix
else:
raise ValueError(
f"The matrix variable may only contain lists of motifs"
f" or motifs, but it contains {type(matrix)}."
)
### check the type of the args variable
if args:
# the args contanes lines (lists of motifs), so args it's a matrix
if all(isinstance(item, (list, tuple)) for item in args):
# add the lines to the matrix
matrix.extend(args)
# the args contains only motifs, so it's a line
elif all(isinstance(item, Motif) for item in args):
matrix[-1].extend(args) # add the motifs to the last line
else:
raise ValueError(
f"The args variable may only contain lists of"
f" motifs or motifs, but it contains {type(args)}."
)
### add the matrix to the object
if copy:
# make a copy of the matrix
self._matrix = [[m.copy() for m in row] for row in matrix]
else:
self._matrix = matrix
# registest the callbacks
for row in self._matrix:
for m in row:
if self._updated_motif not in m._callbacks:
m.register_callback(self._updated_motif)
### set the alignment type
if align not in ("left", "first", "center"):
raise ValueError(
f'"{align}" is not an accepted value for the'
"align_type variable. The align_type variable"
' can only be: "left", "first" or "center".'
)
else:
self._align = align
def __str__(self) -> str:
"""Return a string representation of the assmebled origami
(the origami motif)."""
return str(self.assembled)
def __repr__(self):
"""Return a string representation of the origami object, by
iterating through the matrix and calling the repr method of
each motif."""
reprs = ""
for line in self._matrix:
for item in line:
reprs += repr(item) + ", "
reprs += ";\n"
return reprs
def __getitem__(
self,
key: Union[
int, slice, Tuple[int, int], Tuple[slice, slice], Callable[[Motif], bool]
],
) -> Union[Motif, List[Motif], List[List[Motif]]]:
"""
Get motifs from the matrix using slicing or filtering.
Parameters
----------
key : int, slice, tuple or callable
If int or slice, returns the corresponding row(s) of motifs.
If tuple of two ints, returns the motif at that position.
If tuple of two slices, returns a submatrix of motifs.
If a function, returns a submatrix of motifs that satisfy the function.
If a motif class, returns a submatrix of motifs of that class.
If a motif instance, returns a submatrix of motifs equal to that instance.
Returns
-------
Union[Motif, List[Motif], List[List[Motif]]]
Retrieved motif(s).
Raises
------
TypeError
If the key is of unsupported type.
"""
### 2D slice
if isinstance(key, (tuple, list)):
y, x = key
# two slices, return a 2D list, a sub-origami
if isinstance(x, slice) and isinstance(y, slice):
return [line[x] for line in self._matrix[y]]
# two integers, return a single motif
if all(isinstance(i, int) for i in key):
return self._matrix[y][x]
# convert any index to a slice, then return a 2D list
if isinstance(y, int):
y = y % len(self._matrix)
y = slice(y, y + 1)
if isinstance(x, int):
x = x % len(self._matrix[y])
x = slice(x, x + 1)
return [m for row in self._matrix[y] for m in row[x]]
### 1D slice, return the row
elif isinstance(key, slice) or isinstance(key, int):
return self._matrix[key]
elif isinstance(key, type) and issubclass(key, Motif):
return [[m for m in row if isinstance(m, key)] for row in self._matrix]
elif isinstance(key, Motif):
return [[m for m in row if m == key] for row in self._matrix]
### Function, return a 2D list of motifs that satisfy the function
elif hasattr(key, "__call__"):
return [[m for m in row if key(m)] for row in self._matrix]
else:
raise TypeError(
"Index must be a single int/slice, "
"a tuple of (row, col) of int/slice, "
"or a function to screen the motifs, got: "
f"{key}, of type: {type(key)}"
)
def __setitem__(
self,
key: Union[
int, slice, Tuple[int, int], Tuple[slice, slice], Callable[[Motif], bool]
],
value: Union[Motif, List[Motif], List[List[Motif]]],
) -> None:
"""
Set motif(s) at the specified position in the matrix, trying to
match the shape of the value to the shape of the key. The value
is always copied when added to the matrix to avoid references
problem when repeating motifs in the matrix.
If the value is a single of motifs or a list of motifs, it will be
set in the selected row(s). If the value is a 2D list of motifs, it will be
set in the selected region of the matrix only if the selected region is a
2D region.
Parameters
----------
key : int, slice, tuple, or callable
If int or slice, sets the entire row(s).
If tuple of two ints, sets a single motif at that position.
If tuple of slices, sets a 2D region in the matrix.
If a function, replaces motifs that satisfy the condition.
value : Motif, list of Motif, or 2D list of Motif
The motif(s) to insert. Must match the shape implied by `key`.
Raises
------
ValueError
If the value does not match the expected dimensions
or contains invalid types.
TypeError
If the key is of unsupported type.
"""
### CHECK THE DIMENSIONALITY OF THE VALUE
# the value is a single motif
if isinstance(value, Motif):
value_dimension = 0
value = [value]
# value is a 1D list of motifs
elif isinstance(value, list) and all(isinstance(item, Motif) for item in value):
value_dimension = 1
# value is a 2D list of motifs
elif (
isinstance(value, list)
and all(isinstance(item, (list, tuple)) for item in value)
and all(isinstance(m, Motif) for sublist in value for m in sublist)
):
value_dimension = 2
else:
raise ValueError(
f"Only motifs, lists of motifs, or 2D lists "
f"of motifs can be added to the Origami, but "
f"the object {value} was added."
)
### CHECK THE DIMENSIONALITY OF THE KEY
mask = None
# if the key is a function, we need to create a mask wich is a submatrix
# with the slices of the motifs that satisfy the function
if hasattr(key, "__call__"):
mask = [
[(i, slice(j, j + 1)) for j, m in enumerate(row) if key(m)]
for i, row in enumerate(self._matrix)
]
# the key is a single int, we select a row
elif isinstance(key, int):
key_dimension = 1
y_int = key % len(self._matrix)
y_slice = slice(y_int, y_int + 1)
x_slice = slice(0, len(self._matrix[y_int]))
# the key is a tuples of ints, we select a single motif
elif isinstance(key, (tuple, list)) and all(isinstance(i, int) for i in key):
key_dimension = 0
# Convert the keys to a positive integer
y_int = key[0] % len(self._matrix)
x_int = key[1] % len(self._matrix[y_int])
# Convert the keys to slices
y_slice = slice(y_int, y_int + 1)
x_slice = slice(x_int, x_int + 1)
# the key is a slice of a row and slice of a column
# so this is still a 1D region
elif (
isinstance(key, (tuple, list))
and isinstance(key[0], int)
and isinstance(key[1], slice)
):
key_dimension = 1 # select a row
y_int = key[0] % len(self._matrix)
y_slice = slice(y_int, y_int + 1)
x_slice = key[1] # get the slice
# key selects a submatrix, so this is a 2D region
elif isinstance(key, (tuple, list)) and all(isinstance(i, slice) for i in key):
key_dimension = 2 # select a 2D region
y_slice = key[0]
x_slice = key[1]
# special case: vertical selection (theoretically a 1D region,
# but for code purposes we need to treat it as a 2D region)
elif (
isinstance(key, (tuple, list))
and isinstance(key[0], slice)
and isinstance(key[1], int)
):
key_dimension = 2 # still select a 2D region, but VERTICAL
y_slice = key[0]
x_int = key[1] % len(self._matrix[y_slice.start])
x_slice = slice(x_int, x_int + 1)
else:
raise TypeError(
"Origami indexes can be: \n"
"\t - a function to screen the motifs, \n"
"\t - an int/slice to select a row, \n"
"\t - a tuple of two int/slice to select a region. \n"
f"Got: {key}, of type: {type(key)}"
)
### APPLY THE MASK (if any)
if mask is not None:
for row_ind, row in enumerate(mask):
for mot_ind, (i, j) in enumerate(row):
# just put in position for 0D/1D values
if value_dimension in (0, 1):
self._matrix[i][j] = [m.copy() for m in value]
# dimensionality 2: match the two submatrices
elif value_dimension == 2:
# try to set the value matching the indices
try:
self._matrix[i][j] = [value[row_ind][mot_ind]].copy()
except IndexError as e:
raise IndexError(
f"Error while setting the value to the Origami. "
f"The lists do not match. Origami indexes: y: {i}, "
f"x: {j}."
) from e
### REDUCED ALL CASES TO A 2D SLICING
else:
for i, line in enumerate(self._matrix[y_slice]):
# dimensionality 0 or 1: just set the value
if value_dimension in (0, 1):
line[x_slice] = [m.copy() for m in value]
# dimensionality 2 math the two submatrices
elif key_dimension == 2 and value_dimension == 2:
# try to set the value matching the indices
try:
line[x_slice] = [m.copy() for m in value[i]]
except IndexError as e:
raise IndexError(
f"Error while setting the value to the Origami. "
f"The lists do not match. Origami indexes: "
f"y index: {i}, x slice: {x_slice}."
) from e
### update the motif
self._updated_motif()
def __delitem__(
self,
key: Union[
int,
slice,
Tuple[int, int],
Tuple[slice, slice],
Callable[[Motif], bool],
],
) -> None:
"""
Delete motifs/rows from the matrix using the same slicing conventions
as __getitem__ and __setitem__.
Supported keys
--------------
- int or slice: delete row(s)
- (int, int): delete a single motif
- (int, slice): delete a slice in a single row
- (slice, slice): delete a 2D submatrix (same x-slice for each row)
- (slice, int): delete a vertical selection (same column for each row)
- callable: delete all motifs for which key(motif) is True
"""
# Callable: delete everything that matches the predicate
if hasattr(key, "__call__"):
# collect indices first so we don't mutate while iterating
rows_to_delete: List[Tuple[int, List[int]]] = []
for i, row in enumerate(self._matrix):
cols = [j for j, m in enumerate(row) if key(m)]
if cols:
rows_to_delete.append((i, cols))
for i, cols in rows_to_delete:
row = self._matrix[i]
for j in sorted(cols, reverse=True):
m = row[j]
m._clear_callbacks()
del row[j]
self._updated_motif()
return
# 1D: entire row(s)
if isinstance(key, int):
if not self._matrix:
return
y_int = key % len(self._matrix)
for m in self._matrix[y_int]:
m._clear_callbacks()
del self._matrix[y_int]
self._updated_motif()
return
if isinstance(key, slice):
rows = self._matrix[key]
for row in rows:
for m in row:
m._clear_callbacks()
del self._matrix[key]
self._updated_motif()
return
# 2D: tuple / list of two indices
if isinstance(key, (tuple, list)) and len(key) == 2:
y, x = key
# (int, int): single motif
if isinstance(y, int) and isinstance(x, int):
if not self._matrix:
return
y_int = y % len(self._matrix)
if not self._matrix[y_int]:
return
x_int = x % len(self._matrix[y_int])
m = self._matrix[y_int][x_int]
m._clear_callbacks()
del self._matrix[y_int][x_int]
self._updated_motif()
return
# (int, slice): horizontal slice in one row
if isinstance(y, int) and isinstance(x, slice):
if not self._matrix:
return
y_int = y % len(self._matrix)
row = self._matrix[y_int]
motifs = row[x]
for m in motifs:
m._clear_callbacks()
del row[x]
self._updated_motif()
return
# (slice, int): vertical selection (same column across rows)
if isinstance(y, slice) and isinstance(x, int):
rows = self._matrix[y]
if not rows:
return
# mirror __setitem__ behaviour (modulo against len(self._matrix[y]))
x_int = x % len(rows)
for row in rows:
if not row:
continue
m = row[x_int]
m._clear_callbacks()
del row[x_int]
self._updated_motif()
return
# (slice, slice): 2D submatrix
if isinstance(y, slice) and isinstance(x, slice):
y_indices = range(*y.indices(len(self._matrix)))
for i in y_indices:
row = self._matrix[i]
motifs = row[x]
for m in motifs:
m._clear_callbacks()
del row[x]
self._updated_motif()
return
# If we get here, the key type is unsupported
raise TypeError(
"Origami indexes can be: \n"
"\t - a function to screen the motifs, \n"
"\t - an int/slice to select a row, \n"
"\t - a tuple of two int/slice to select a region. \n"
f"Got: {key}, of type: {type(key)}"
)
def __len__(self):
"""Get the number of rows in the origami."""
return len(self._matrix)
def __add__(self, other: "Origami") -> "Origami":
"""
Horizontally add another Origami to this Origami.
Parameters
----------
other : Origami
The origami to stack horizontally.
Returns
-------
Origami
A new Origami object with horizontally concatenated motifs.
"""
new_matrix = [[m.copy() for m in row] for row in self._matrix]
if not isinstance(other, Origami):
raise TypeError(
"Unsupported operand type(s) for +: "
"'Origami' and '{type(other).__name__}'"
)
# add extra rows to the new matrix
diff_len = len(other._matrix) - len(new_matrix)
if diff_len > 0:
new_matrix.extend([[] for _ in range(diff_len)])
for i, row in enumerate(other._matrix):
new_matrix[i].extend([m.copy() for m in row])
return Origami(
new_matrix, align=self.align, ss_assembly=self.ss_assembly, copy=False
)
def __bool__(self) -> bool:
"""Return False there are no motifs or all motifs are empty."""
if not self._matrix:
return False
for row in self:
for motif in row:
if motif:
return True
return False
###
### PROPERTIES
###
@property
def align(self) -> Literal["left", "first", "center"]:
"""
The alignment type of the rows of the origami.
"""
return self._align
@align.setter
def align(self, new_align):
"""
Set the alignment type of the rows of the origami.
Parameters
----------
new_align : {'left', 'first', 'center'}
The new alignment type for the origami.
When set to 'left', the motif rows are aligned to the left.
When set to 'first', the motifs rows are aligned to match the first
vertical junction.
When set to 'center', the motifs rows are aligned to the center.
"""
if new_align not in ("left", "first", "center"):
raise ValueError(
f'"{new_align}" is not an accepted value for '
"the align_type variable. The align_type variable "
'may only a string reading "left", "first" or "center".'
)
self._align = new_align
self._updated_motif()
@property
def assembled(self):
"""
The matrix of the origami with the motif shifted in the correct position
for the assembly. The assembled matrix contains rows with the vertical
connection motifs.
"""
if self._assembled is None:
self._assemble()
return self._assembled
@property
def num_char(self) -> List[int]:
"""
The number of characters in each line of the origami.
"""
if not self._matrix:
return 0
return [sum(m.num_char for m in line) for line in self._matrix]
@property
def num_lines(self) -> int:
"""
The number of lines in the origami.
"""
if not self._matrix:
return 0
return len(self._matrix)
@property
def num_motifs(self) -> int:
"""
The number of motifs in the origami.
"""
return sum(1 for line in self._matrix for item in line if item is not None)
@property
def pair_map(self) -> dict:
"""
The dictionary of the paired indexes (alternative to the dot bracket
notation).
"""
return self.assembled.pair_map
@property
def pos_index_map(self) -> dict:
"""
A dictionary with the symbols position (x, y) as keys and the
matrix index (y, x) of the motif that contains it as values.
"""
if self._assembled is None:
self._assemble()
return self._pos_index_map
@property
def index_shift_map(self) -> dict:
"""
A dictionary with the slice of the motif in the matrix as key (y, x)
and positional shift of the motif as values (y, x). The shift is the
difference between the position of the motif in the matrix and the position
of the motif in the assembled origami.
"""
if self._assembled is None:
self._assemble()
return self._index_shift_map
@property
def positions(self) -> List[Position]:
"""
The positions of the characters in the origami. Is the same as
calling the assembled.motif.positions.
"""
if self._assembled is None:
self._assemble()
return self.assembled.positions
@property
def pseudoknots(self) -> dict:
"""
A dictionary with the pseudoknot information.
The dictionary has pseudoknot IDs as keys and the pseudoknot information as
values.
The pseudoknot information is a dictionary with the following keys:
- ind_fwd: a list of tuples (start, end) with the indices of the forward
sequences of the pseudoknot
- ind_rev: a list of tuples (start, end) with the indices of the reverse
sequences of the pseudoknot
- E: the energy of the pseudoknot
- dE: the energy tolerance of the pseudoknot
"""
if self._pseudoknots:
return self._pseudoknots
# A dictionary to store the pseudoknot information, with the pk_index as key
# and the pk information dict as value
pk_dict = dict()
pos_to_ind = {pos: ind for ind, pos in enumerate(self.assembled.seq_positions)}
def add_pk(strand, pk_index, info_nr, shift, forward=True):
"""Add the pseudoknot information to the pk_dict."""
# get the pseudoknot information
pk_info = strand.pk_info
# add the pseudoknot information to the pk_dict
pk_dict.setdefault(
pk_index, {"ind_fwd": [], "ind_rev": [], "E": [], "dE": []}
)
pk_dict[pk_index]["E"].append(pk_info["E"][info_nr])
pk_dict[pk_index]["dE"].append(pk_info["dE"][info_nr])
# indicate the index of the pseudoknot in the sequence
start_pos_ind = 0
if strand.directionality == "35":
start_pos_ind = -1
pos = strand.seq_positions[start_pos_ind]
# get the index of the sequence in the strand
offset_ind = pos_to_ind[(shift[0] + pos[0], shift[1] + pos[1])]
# get the start and end positions of the pseudoknot
pk_start, pk_end = pk_info["ind_fwd"][info_nr]
start_end_tuple = (offset_ind + pk_start, offset_ind + pk_end)
# add the start and end positions to the pk_dict
if forward:
pk_dict[pk_index]["ind_fwd"].append(start_end_tuple)
else:
pk_dict[pk_index]["ind_rev"].append(start_end_tuple)
pk_motifs = []
### collect all the motifs with pseudoknot information
for i, line in enumerate(self._matrix):
for j, m in enumerate(line):
if any(hasattr(s, "pk_info") for s in m):
pk_motifs.append((i, j))
### Iterate through the strands of the motifs with pseudoknot information
for i, j in pk_motifs:
m = self._matrix[i][j]
shift = self.index_shift_map[(i, j)]
# get pseudoknot IDs from the strands
pk_strands = [s for s in m if s.pk_info]
pk_indexes = [pk_id for s in pk_strands for pk_id in s.pk_info["id"]]
### Adjust the pk_index for unique pseudoknots
if any(ind[0] == "0" for ind in pk_indexes): # new 0 pseudoknot
current_n_zero = sum(1 if key[0] == "0" else 0 for key in pk_dict)
pk_index_0 = "0_" + str(current_n_zero + 1)
# add the pseudoknots
for strand in pk_strands:
for info_nr, pk_index in enumerate(strand.pk_info["id"]):
reverse = pk_index[-1] == "'"
if pk_index[0] == "0":
pk_index = pk_index_0
elif reverse:
pk_index = pk_index[:-1]
add_pk(strand, pk_index, info_nr, shift, forward=not reverse)
# make the average energy and average tolerance
for pk in pk_dict.values():
pk["E"] = sum(pk["E"]) / len(pk["E"])
pk["dE"] = sum(pk["dE"]) / len(pk["dE"])
self._pseudoknots = pk_dict
return self._pseudoknots
@property
def sequence(self) -> "Sequence":
"""
The sequence of the origami, as a Sequence.
"""
return self.assembled.sequence
@sequence.setter
def sequence(self, new_seq):
"""
Set the sequence of the origami.
"""
# remove the '&' symbol
new_seq = new_seq.replace("&", "")
current_seq = self.sequence.replace("&", "")
if not isinstance(new_seq, (str, Sequence)) or len(new_seq) != len(current_seq):
raise ValueError(
f"The new sequence must be a string or a Sequence object"
f" with the same lenght of the current sequence "
f"({len(current_seq)}). Got type: {type(new_seq)}; with "
f"length: {len(new_seq)}, excluding the '&' symbols."
)
# adjust the offset if there are multiple strands
offset = 0
# read the maps once to avoid triggering the callback and origami assembly
pos_to_slice = self.pos_index_map
origami_motif = self.assembled
motif_shifts = self.index_shift_map
# iterate over the strands in the origami motif
for s in origami_motif:
# a tuple to identify a specific strand in a motif in the
# origami matrix
strand_ID = None
# initialize/reset the current base map
new_strand_seq = ""
# iterate over the nucleotides in the strand
for ind, pos in enumerate(s.seq_positions):
### GET THE STRAND ID FOR THIS NUCLEOTIDE
# get the y, x cooridnates of the motif in the matrix
motif_yx = pos_to_slice[pos]
# get the x, y shift of the motif in the origami positions
shift_yx = motif_shifts[motif_yx]
# remove the shifts from the position of the base
original_pos = (pos[0] - shift_yx[0], pos[1] - shift_yx[1])
# get the motif at the position
motif = self._matrix[motif_yx[0]][motif_yx[1]]
# get the strand index of the motif at the base position
strand_ind = next(
i for i, s in enumerate(motif) if original_pos in s.seq_positions
)
if strand_ID is None:
strand_ID = (motif_yx[0], motif_yx[1], strand_ind)
### NEW STRAND ID?
# update the sequence of the previous strand before
# moving to the next one
if strand_ID != (motif_yx[0], motif_yx[1], strand_ind):
# get the strand and set the curent base maps
strand = self._matrix[strand_ID[0]][strand_ID[1]][strand_ID[2]]
strand_dir = 1
if strand.directionality == "35":
strand_dir = -1
strand.sequence = new_strand_seq[::strand_dir]
# reset the current base map with the new strand
new_strand_seq = ""
# update the motif and strand position to the current position
strand_ID = (motif_yx[0], motif_yx[1], strand_ind)
# add the new sequence to the current base map
new_strand_seq += new_seq[ind + offset]
# add the last strand
if strand_ID is not None:
last_strand = self._matrix[strand_ID[0]][strand_ID[1]][strand_ID[2]]
last_strand.sequence = new_strand_seq
# update the offset
offset += len(s.sequence)
@property
def seq_positions(self) -> Tuple[Position]:
"""
The positions of each nucleotide in the motif sequence (x,y coordinates).
The sequence has always the directionality 5' to 3'
"""
return self.assembled.seq_positions
@property
def ss_assembly(self) -> bool:
"""
Boolean indicating if the origami 3d structure
is assembled without locking the coordinates of the motifs.
"""
return bool(self._ss_assembly)
@ss_assembly.setter
def ss_assembly(self, new_ss_assembly):
"""
Set the ss_assembly attribute to True or False.
"""
self._ss_assembly = bool(new_ss_assembly)
self._updated_motif()
@property
def strands(self) -> List[Strand]:
"""
The strands of the origami.
"""
return self.assembled.strands
@property
def structure(self) -> str:
"""
The dot-bracket structure of the origami.
"""
return self.assembled.structure
###
### CLASS METHODS
###
[docs]
@classmethod
def from_json(cls, json_data: Union[str, dict]) -> "Origami":
"""
Create an Origami object from a JSON string or dictionary.
The json dictionary is parsed, and is consumed to create the Motif object.
Parameters
----------
json_data : str or dict
A JSON string or dictionary representing the origami.
Returns
-------
Origami
An Origami object created from the JSON data.
"""
if isinstance(json_data, str):
json_dict = json.loads(json_data)
elif isinstance(json_data, dict):
json_dict = json_data
else:
raise TypeError(
f"json_data must be a string or a dictionary, "
f"but got {type(json_data)}."
)
# remove version if present
json_dict.pop("pyfurnace_version", None)
# version tracking is useful for backward compatibility
# rebuild the matrix of motifs
matrix = []
for line in json_dict.get("matrix", []):
new_line = []
for motif_data in line:
# retrieve the motif class
motif = Motif.from_json(motif_data)
new_line.append(motif)
matrix.append(new_line)
return cls(
matrix,
align=json_dict.get("align", "left"),
ss_assembly=json_dict.get("ss_assembly", False),
)
# create the origami from the json dictionary
[docs]
@classmethod
def from_json_file(cls, file_path: str) -> "Origami":
"""
Create an Origami object from a JSON file.
Parameters
----------
file_path : str
The path to the JSON file containing the origami data.
Returns
-------
Origami
An Origami object created from the JSON file.
"""
with open(file_path, "r") as file:
json_data = json.load(file)
return cls.from_json(json_data)
###
### STATIC METHODS
###
@staticmethod
def _calculate_connections(
junctions1: dict,
junctions2: dict,
directionalities: List[str],
x_shift: Union[tuple[int, int], Position] = (0, 0),
start_y: int = 0,
) -> Tuple[Motif, Position]:
"""
Creates the connection between the rows of the origami.
Parameters
----------
junctions1: dict
junctions of the first line
junctions2: dict
junctions of the second line
directionalities: list
the directionalities of the top junctions
x_shift: tuple
The x shift of the junctions of the first and second line
start_y: int
The y position of the first line
Returns
-------
Tuple[Motif, Position]
The connection motifs and height of the vertical connections
"""
### take the junctions of the two lines
j1 = [pos[0] + x_shift[0] for pos in junctions1[Direction.DOWN]]
j2 = [pos[0] + x_shift[1] for pos in junctions2[Direction.UP]]
# a junction is missing, then no connection
if not j2 or not j1:
return Motif(), 0
# the number of connections is the minimum of the two junctions
n_connect = min((len(j1), len(j2)))
j1 = j1[:n_connect]
j2 = j2[:n_connect]
### CREATE THE CONNECTIONS
# a dictionary with the connented pair index as key and
# a set of crossed pair indexes as value
closed_crossings = dict()
# the positions that should be connected
pairs = list(zip(j1, j2))
for ind, (x1, x2) in enumerate(pairs):
# intialize the crossed pair indexes
closed_crossings[ind] = set()
# the minimum x position to connect in this pair
x_min = min(x1, x2)
# the maximum x position to connect in this pair
x_max = max(x1, x2)
# the crossed pairs are pairs that have at list one position between
# the minimum and maximum positions and are not already connected
crossed = {
i
for i, x12 in enumerate(pairs)
if (
i not in closed_crossings
and (x_min <= x12[0] <= x_max or x_min <= x12[1] <= x_max)
)
}
# update the crossed pairs for this connection
closed_crossings[ind].update(crossed)
### CHECK FOR NESTED CROSSINGS
# if pair #1 crosses pair #2; then pair #2 crosses pair #3 and pair #4
# the shift of pair #1 has to take into account also pair #3 and pair #4
# go through the connected pairs
for key1 in list(closed_crossings.keys()):
for key2 in list(closed_crossings.keys()):
# if the second pair is in the crossed pairs of the first pair
if key2 in closed_crossings[key1]:
closed_crossings[key1].update(closed_crossings[key2])
# calculate the maximum number of crossings
max_crossing = max(len(crossed) for crossed in closed_crossings.values())
### MAKE THE STRANDS
strands = []
for ind, (x1, x2) in enumerate(pairs):
# the the number of crossings for this pair
n_crossings = len(closed_crossings[ind])
if x1 < x2: # the first motif is on the left
strand = (
"│" * n_crossings
+ "╰"
+ "─" * (x2 - x1 - 1)
+ "╮"
+ "│" * (max_crossing - n_crossings)
)
elif x1 > x2: # the first motif is on the right
strand = (
"│" * (max_crossing - n_crossings)
+ "╯"
+ "─" * (x1 - x2 - 1)
+ "╭"
+ "│" * n_crossings
)
else: # the motifs are on the same position vertically
strand = "│" * (max_crossing + 1)
# can add the symbol "^" for retrocompatibility with ROAD
# instead add arrows for the directionality of the strand
# if you do this, increase the max_crossing by 1
strand += "↑"
strands.append(
Strand(
strand,
directionality=directionalities[ind],
start=(x1, start_y),
direction=Direction.DOWN,
)
)
# Extra +1 to the max_crossing to add the symbol "^" or "↑"
connection_height = Position((0, max_crossing + 1 + 1))
return Motif(strands, join=False), connection_height
###
### PROTECTED METHODS
###
def _assemble(self) -> List[List[Motif]]:
"""
Assemble the origami by shifting the motifs in the correct position,
concatenating the motifs in the lines, and creating the connection motifs.
"""
### Screen the matrix to remove the empty motifs
self._matrix = [[m for m in line if m] for line in self._matrix]
### initialize the variables
motif_lines = []
shifts = [[Position.zero() for _ in line] for line in self._matrix]
align_shifts = [Position.zero() for _ in range(self.num_lines)]
### Center the rows, can precompute the shift
if self.align == "center":
# take the maximum center position of the motifs in all lines
max_center = max([num_char // 2 for num_char in self.num_char], default=0)
# shift to the right to align the center of the motifs
align_shifts = [
Position((max_center - num_char // 2, 0)) for num_char in self.num_char
]
for ind, line in enumerate(self._matrix):
# create the line
mot_line, vh_shifts = Motif.concat(
line,
copy=True,
align=True,
extend=True,
return_shifts=True,
unlock_strands=self._ss_assembly,
lock_coords=False,
)
mot_line.shift(align_shifts[ind])
shifts[ind] = [
h + vh_shifts[i] + align_shifts[ind] for i, h in enumerate(shifts[ind])
]
motif_lines.append(mot_line)
# shift the motif horizontally to align the first junction
if self._align == "first":
_, h_shifts = Motif.align(
motif_lines,
axis=0,
return_shifts=True,
)
shifts = [[h + h_shifts[i] for h in line] for i, line in enumerate(shifts)]
### calculate the junctions
ind1 = 0
while ind1 < len(motif_lines) - 1:
# get the motifs
top_motif = motif_lines[ind1]
bot_motif = motif_lines[ind1 + 1]
# get the junctions of the motifs
j1 = top_motif.junctions
j2 = bot_motif.junctions
# get the directionalities of the top junction
mot_to_strand = motif_lines[ind1].get_strand_index_map()
directs = []
for pos in j1[Direction.DOWN]:
strand = top_motif[mot_to_strand[pos]]
if pos == strand.end and strand.end_direction == Direction.DOWN:
directs.append(strand.directionality)
else:
directs.append(strand.directionality[::-1])
# create the connection motifs
m_connect, _ = self._calculate_connections(j1, j2, directs)
# intercalate the connections into the motif lines
motif_lines.insert(ind1 + 1, m_connect)
ind1 += 2
# assemble the origami, piece by piece
mot = Motif()
for ind, line in enumerate(motif_lines):
# add the line to the motif
mot, v_shifts = Motif.concat(
mot,
line,
axis=0,
copy=False,
align=False,
position_based=True,
return_shifts=True,
unlock_strands=self._ss_assembly,
lock_coords=False,
)
if len(v_shifts) > 1 and ind % 2 == 0:
# add the vertical shift to the horizontal shift
shifts[ind // 2] = [h + v_shifts[1] for h in shifts[ind // 2]]
self._assembled = mot
self._index_shift_map = {
(i, j): shift
for i, line in enumerate(shifts)
for j, shift in enumerate(line)
}
self._pos_index_map = {
(pos + shifts[i][j]): (i, j)
for i, line in enumerate(self._matrix)
for j, m in enumerate(line)
for pos in m.positions
}
for s in self._assembled:
if s.directionality == "35":
s.invert()
def _updated_motif(self, **kwargs) -> None:
"""
Reset cached motif-derived properties and trigger callbacks.
Parameters
----------
**kwargs : dict
Optional keyword arguments passed to callbacks.
"""
self._assembled = None
self._pseudoknots = None
self._trigger_callbacks(**kwargs)
###
### METHODS
###
[docs]
def append(self, item: Union[Motif, List[Motif]], copy: bool = True) -> None:
"""
Append a Motif or a list of Motifs to the end of the matrix.
If the item is a single Motif, it is appended to the last line of the matrix.
If the item is a list of Motifs, it is appended as a new line in the matrix.
Parameters
----------
item : Motif or list of Motif
The motif(s) to append.
copy : bool, default=True
Whether to copy motifs before appending.
Raises
------
TypeError
If `item` is not a Motif or a list of Motifs.
"""
if isinstance(item, Motif):
if not self._matrix:
self._matrix.append([])
if copy:
item = item.copy()
# update the callbacks:
if self._updated_motif not in item._callbacks:
item.register_callback(self._updated_motif)
self._matrix[-1].append(item)
elif isinstance(item, (list, tuple)) and all(
isinstance(m, Motif) for m in item
):
if copy:
item = [m.copy() for m in item]
# update the callbacks:
for m in item:
if self._updated_motif not in m._callbacks:
m.register_callback(self._updated_motif)
self._matrix.append(item)
else:
raise TypeError(
f"Only motifs or lists of motifs can be added to the "
f"Origami, but the object {item} was added."
)
self._updated_motif()
[docs]
def barrier_repr(
self,
kl_delay: int = 150,
barriers: Optional[str] = None,
return_list: bool = False,
) -> Union[str, List[str]]:
"""
Overlay folding barrier characters onto the structure visualization.
Parameters
----------
kl_delay : int, default=150
Delay parameter for computing folding barriers.
barriers : str, optional
Precomputed folding barrier string. If None, it will be recomputed.
return_list : bool, default=False
Whether to return the result as a list of lines instead of a string.
Returns
-------
str or list of str
The annotated structure as a single string or a list of lines.
"""
motif = self.assembled
origami_lines = str(self).split("\n")
if barriers is None:
barriers = motif.folding_barriers(kl_delay=kl_delay)[0]
for i, (x, y) in enumerate(motif.seq_positions):
origami_lines[y] = (
origami_lines[y][:x] + barriers[i] + origami_lines[y][x + 1 :]
)
if return_list:
return origami_lines
return "\n".join(origami_lines)
[docs]
def clear_sequence(
self,
clear_kissing_loops: bool = True,
clear_tetraloops: bool = False,
) -> None:
"""
Clear the sequences of all stems in the origami, resetting it
to the default state.
Parameters
----------
clear_tetraloops : bool, default=True
Whether to clear the sequences of tetraloops.
clear_kissing_loops : bool, default=True
Whether to clear the sequences of kissing loops.
Returns
-------
None
"""
from ..motifs import Stem, KissingLoop, TetraLoop
for line in self._matrix:
for motif in line:
if isinstance(motif, Stem):
motif.length = motif.length
elif isinstance(motif, KissingLoop) and clear_kissing_loops:
motif.set_sequence("N" * len(motif.get_kissing_sequence()))
elif isinstance(motif, TetraLoop) and clear_tetraloops:
motif.set_sequence("N" * len(motif.sequence))
[docs]
def copy(self) -> "Origami":
"""
Create a deep copy of the Origami.
Returns
-------
Origami
A new instance identical to the current one.
"""
new = Origami.__new__(Origami)
# make sure to register the callback in all the motifs
# as a failsafe mechanism. This is needed in case you modify
# the motifs at the line level, like origami[0][0] = new_motif
for line in self._matrix:
for m in line:
if self._updated_motif not in m._callbacks:
m.register_callback(self._updated_motif)
# prepare the new attributes
new._matrix = [
[m.copy(callback=new._updated_motif) for m in line] for line in self._matrix
]
new._align = self._align
new._ss_assembly = self._ss_assembly
new._assembled = self._assembled.copy() if self._assembled else None
new._pos_index_map = {k: val for k, val in self._pos_index_map.items()}
new._index_shift_map = {k: val for k, val in self._index_shift_map.items()}
new._pseudoknots = copy.deepcopy(self._pseudoknots)
return new
[docs]
def duplicate_line(self, idx: int, insert_idx: Optional[int] = None) -> None:
"""
Duplicate a line of motifs and optionally insert it elsewhere.
Parameters
----------
idx : int
Index of the line to duplicate.
insert_idx : int, optional
Line index at which to insert the duplicated line. If None, it will be
added at the end.
Raises
------
ValueError
If the given `idx` is not an integer.
"""
if not isinstance(idx, int):
raise ValueError(f"The index must be an integer, but {idx} was given.")
line = self._matrix[idx]
new_line = [m.copy(callback=self._updated_motif) for m in line]
if insert_idx is None:
insert_idx = len(self._matrix)
self._matrix.insert(insert_idx, new_line)
self._updated_motif()
# inherit the documentation from the function
@wraps(Motif.folding_barriers)
def folding_barriers(self, kl_delay: int = 150) -> Tuple[str, int]:
return self.assembled.folding_barriers(kl_delay=kl_delay)
[docs]
def get_motif_at_position(self, position: Position) -> Motif:
"""
Get a motif from its position in 2D coordinates.
Parameters
----------
position : Position
Global coordinate (x, y) in the assembled structure.
Returns
-------
Motif
The motif located at the position.
Raises
------
ValueError
If the position is not valid or not found in the map.
"""
if (
isinstance(position, tuple)
and len(position) > 2
and all(isinstance(coord, int) for coord in position)
):
position = Position(position)
elif not isinstance(position, Position):
raise ValueError(
f"The position must be a Position object or a tuple of integers,"
f" but {position} was given (type: {type(position)})."
)
if position not in self.pos_index_map:
raise ValueError(
f"The position {position} is not in the map" f" of the origami."
)
return self[self.pos_index_map[position]]
[docs]
def get_motif_at_seq_index(self, index: int) -> Motif:
"""
Get the motif that contains the given sequence index.
Parameters
----------
index : int
Sequence index in the full assembled sequence.
Returns
-------
Motif
Motif containing the base at the given index.
"""
return self[self.get_slice_at_seq_index(index)]
[docs]
def get_motif_type(self, motif_type: type) -> List[Motif]:
"""
Get all motifs in the Origami that match the given type.
Parameters
----------
motif_type : type
Motif subclass/type to filter.
Returns
-------
List[Motif]
All motifs of the specified type.
"""
return [m for line in self._matrix for m in line if isinstance(m, motif_type)]
[docs]
def get_slice_at_seq_index(self, index: int) -> Tuple[int, int]:
"""
Get matrix coordinates of the motif containing the given sequence index.
Parameters
----------
index : int
Index in the full sequence.
Returns
-------
Tuple[int, int]
(row, column) coordinates in the matrix.
Raises
------
ValueError
If the index is not found.
"""
if not isinstance(index, int) or index >= len(self.sequence):
raise ValueError(
f"The sequence index must be an integer lower than "
f"the length of the sequence ({len(self.sequence)}), "
f"but {index} (type: {type(index)}) was given."
)
# map the sequence index to the slice
ind_to_slice = {
ind: self.pos_index_map[seq_pos]
for ind, seq_pos in enumerate(self.assembled.seq_positions)
}
return ind_to_slice.get(index)
[docs]
def improve_folding_pathway(self, kl_delay: int = 150) -> "Origami":
"""
Suggest a better folding pathway by circularly shifting the structure.
This method attempts to find a better folding pathway by shifting the
structure to a new position. IMPORTANT: this method is designed for
simple origami blueprints based on DAE crossovers and may not work
correctly for different structures.
Parameters
----------
kl_delay : int, default=150
Delay parameter for kinetic loop folding.
Returns
-------
Origami
A new Origami object with an optimized folding pathway.
"""
# import here to avoid circular imports
from ..motifs import Stem
from ..utils import start_end_stem
# remove the motif that start the Origami
ori = self.copy()
start_ind = ori.index(lambda m: "5" in m)
if start_ind:
ori.pop(start_ind[0])
# calculate the folding barriers
start_barrier = ori.folding_barriers(kl_delay=kl_delay)[1]
### Check the folding barriers of starting in each possible stem
### of at least 5 bases, assuming the motif is has a length property
# initialize the structures
db, stacks = dot_bracket_to_stacks(ori.structure)
min_bar = start_barrier
best_middle = 0
# map the sequence index to the slice
ind_to_slice = {
ind: ori.pos_index_map[pos]
for ind, pos in enumerate(ori.assembled.seq_positions)
}
# rotate the dot-bracket structure
for db, (start, end) in zip(db, stacks):
if db in "()" and (end - start) > 4:
middle = (start + end) // 2
new_strucutre = rotate_dot_bracket(ori.structure, middle)
_, new_bar = folding_barriers(
kl_delay=kl_delay, structure=new_strucutre
)
# save the best folding barrier
if new_bar < min_bar:
min_bar = new_bar
best_middle = middle
# replace the starting motif, check the two possible orientations
for flip in range(2):
# create a copy of the origami, get the slice and the motif
ori_copy = ori.copy()
start_slice = ind_to_slice[best_middle]
m = ori[start_slice]
### IMPORTANT REMARK:
### THIS ASSUMENTS THE MOTIF HAS A LENGTH PROPERTY
stem_1 = Stem(m.length // 2)
start_end = start_end_stem()
if flip:
start_end.flip()
stem2 = Stem(m.length - stem_1.length)
ori_copy[start_slice] = [stem_1, start_end, stem2]
# This is the good origami, save it
if ori_copy.folding_barriers(kl_delay=kl_delay)[1] == min_bar:
ori = ori_copy
break
return ori
[docs]
def index(
self,
condition: Union[Callable[[Motif], bool], Motif, Type[Motif]],
return_matrix_format: bool = False,
) -> Union[List[Tuple[int, int]], List[List[int]]]:
"""
Find the matrix coordinates of motifs that satisfy a given condition.
Parameters
----------
condition : Callable[[Motif], bool] or Motif or Type[Motif]
- A function that takes a Motif and returns True if it matches,
- a Motif instance to match directly (==, e.g. `pf.Tetraloop()`),
- or a Motif *class* to match by isinstance (e.g. `pf.Stem`).
return_matrix_format : bool, default=False
If True, returns indices in matrix format (row, column),
otherwise returns a flat list of indices.
The matrix format is a list of lists (row-wise) of indices
(column-wise).
Returns
-------
Union[List[Tuple[int, int]], List[List[int]]]
List of (row, column) indices of matching motifs.
If `return_matrix_format` is True,
returns a list of lists (row-wise) of indices (column-wise).
Raises
------
ValueError
If `condition` is not a callable, a Motif instance, or a Motif subclass.
"""
if isinstance(condition, Motif):
motif = condition
def condition(m: Motif) -> bool:
return m == motif
elif isinstance(condition, type) and issubclass(condition, Motif):
motif_type = condition
def condition(m: Motif) -> bool:
return isinstance(m, motif_type)
# if we submit a function, return the matrix filtered by the function
elif not hasattr(condition, "__call__"):
raise ValueError(
f"The condition must be a function or a Motif "
f"object, but {condition} was given."
)
if return_matrix_format:
return [
[x for x, m in enumerate(line) if condition(m)] for line in self._matrix
]
return [
(y, x)
for y, line in enumerate(self._matrix)
for x, m in enumerate(line)
if condition(m)
]
[docs]
def insert(
self,
idx: Union[int, slice, Tuple[int, int]],
item: Union[Motif, List[Motif]],
copy: bool = True,
) -> None:
"""
Insert a Motif or list of Motifs at a specific position.
Parameters
----------
idx : int, slice, tuple of int
The index or coordinate at which to insert the item(s).
item : Motif or list of Motif
The motif(s) to insert.
copy : bool, default=True
Whether to copy motifs before inserting.
Raises
------
ValueError
If the index or item is invalid.
"""
### check the item variable
if isinstance(item, (list, tuple)) and all(isinstance(m, Motif) for m in item):
dimension = 2
if copy:
item = [m.copy() for m in item]
# update the callbacks:
for m in item:
if self._updated_motif not in m._callbacks:
m.register_callback(self._updated_motif)
elif isinstance(item, Motif):
dimension = 1
if copy:
item = item.copy()
# update the callbacks:
if self._updated_motif not in item._callbacks:
item.register_callback(self._updated_motif)
else:
raise ValueError(
f"Only motifs or lists of motifs can be added to the "
f"Origami, but the object type {type(item)} was added."
)
### add the item to the matrix according to the index
if isinstance(idx, (int, slice)):
if dimension == 1:
self._matrix.insert(idx, [item])
elif dimension == 2:
self._matrix.insert(idx, item)
elif isinstance(idx, (tuple, list)) and len(idx) == 2:
if dimension == 2:
for i, m in enumerate(item):
self._matrix[idx[0]].insert(idx[1] + i, m)
if dimension == 1:
self._matrix[idx[0]].insert(idx[1], item)
else:
raise ValueError(
f"Index must be a single index or a tuple of "
f"(row, col) or a list of two indices. "
f" Got {idx} instead."
)
self._updated_motif()
[docs]
def pop(self, idx: Union[int, slice, Tuple[int, int]]) -> Union[Motif, List[Motif]]:
"""
Remove and return a motif or line of motifs at the given index.
Parameters
----------
idx : int, slice, tuple of int
The index or coordinates to remove from.
Returns
-------
Motif or list of Motif
The removed motif(s).
Raises
------
ValueError
If the index is not valid.
"""
if isinstance(idx, (int, slice)):
if self._matrix:
popped = self._matrix.pop(idx)
else:
return
elif isinstance(idx, (tuple, list)) and len(idx) == 2:
if self._matrix[idx[0]]:
popped = self._matrix[idx[0]].pop(idx[1])
else:
return
else:
raise ValueError(
f"Index must be a single index or a tuple of "
f"(row, col) or a list of two indices. "
f" Got {idx} instead."
)
# remove the callbacks
if isinstance(popped, list):
for m in popped:
m._clear_callbacks()
else:
popped._clear_callbacks()
self._updated_motif()
return popped
[docs]
def reload(self) -> None:
"""
Recompute the internal structure and regenerate the assembled motif.
"""
self._updated_motif()
self._assemble()
[docs]
def remove(self, motif: Motif) -> None:
"""
Remove a specific motif from the matrix.
Parameters
----------
motif : Motif
The motif to remove.
"""
for line in self._matrix:
if motif in line:
# remove the callback
motif._clear_callbacks()
line.remove(motif)
break
self._updated_motif()
# inherit the documentation from the function
@wraps(Motif.save_3d_model)
def save_3d_model(self, *args, **kwargs) -> Optional[Tuple[str, str]]:
return self.assembled.save_3d_model(*args, **kwargs)
[docs]
def save_fasta(self, filename: str, return_text: bool = False) -> Optional[str]:
"""
Save the sequence of the Origami to a FASTA file.
Parameters
----------
filename : str
Path to the output file.
return_text : bool, default=False
If True, return the text instead of saving it to a file.
Returns
-------
Optional[str]
The FASTA text if return_text is True.
"""
path = Path(filename).with_suffix(".fasta")
name = path.stem
text = f">{name}\n" f"{self.sequence}\n" f"{self.structure}\n"
if return_text:
return text
with open(str(path), "w", encoding="utf-8") as f:
f.write(text)
[docs]
def save_json(
self, filename: str = "Origami", return_data: bool = False
) -> Optional[Dict[str, Any]]:
"""
Save the Origami instance to a JSON file.
Parameters
----------
filename : str, default 'Origami'
Path to the output file.
return_data : bool, default False
If True, return the JSON data instead of saving it to a file.
Returns
-------
Optional[Dict[str, Any]]
The JSON data if return_data is True.
"""
from ... import __version__ # import here to avoid circular imports
path = Path(filename).with_suffix(".json")
data = {"pyfurnace_version": __version__}
data.update(self.to_json())
if return_data:
return json.dumps(data, indent=4)
with open(str(path), "w", encoding="utf-8") as f:
json.dump(data, f, indent=4)
[docs]
def save_text(
self, filename: str, to_road: bool = False, return_text: bool = False
) -> Optional[str]:
"""
Save only the structure part of the Origami to a text file.
Parameters
----------
filename : str
Path to the output file.
to_road : bool, default=False
If True, convert to ROAD-compatible format.
return_text : bool, default=False
If True, return the text instead of saving it to a file.
Returns
-------
Optional[str]
The text if return_text is True.
"""
path = Path(filename).with_suffix(".txt")
name = path.stem
text = (
f">{name}\n"
f"Sequence:\n{self.sequence}\n"
f"Structure:\n{self.structure}\n"
f"Pseudoknots info:\n{self.pseudoknots}\n\n"
f"Blueprint:\n\n"
f"{self.to_road() if to_road else str(self)}\n\n"
f"Folding Barriers:\n\n"
f"{self.barrier_repr()}\n"
)
if return_text:
return text
with open(str(path), "w", encoding="utf-8") as f:
f.write(text)
[docs]
def to_json(self) -> Dict[str, Any]:
"""
Convert the Origami instance to a JSON-serializable dictionary.
Returns
-------
Dict[str, Any]
A dictionary representation of the Origami.
"""
return {
"matrix": [[m.to_json() for m in line] for line in self._matrix],
"align": self._align,
"ss_assembly": self._ss_assembly,
}
[docs]
def to_road(self) -> str:
"""
Try to convert the Origami's text representation into ROAD-compatible format.
Returns
-------
str
ROAD-compatible structure representation.
"""
ori_str = str(self)
ori_str = ori_str.replace("↑", "^")
ori_str = ori_str.replace("↓", "^")
ori_str = ori_str.replace("│ ┊┊┊┊┊┊ │", "│ ****** │")
ori_str = ori_str.replace(" ┊┊ ", " !! ")
ori_str = ori_str.replace(" ┊ ", " ! ")
return ori_str