Source code for pyfurnace.design.motifs.stem

from typing import Optional, List, Union, Literal
import random
from ..core.symbols import nucl_to_pair
from ..core.sequence import Sequence
from ..core.coordinates_3d import Coords
from ..core.strand import Strand
from ..core.motif import Motif


[docs] class Stem(Motif): """ Represents a helical RNA stem motif consisting of complementary base-paired strands. The `Stem` class models a double-stranded RNA region with optional support for wobble base pairs and short-sequences set to strong bases. It can be created from a user-defined sequence or automatically generated based on a target length. Parameters ---------- length : int, optional Number of base pairs in the stem. Ignored if `sequence` is provided. sequence : str or Sequence, optional Sequence for the top strand. If provided, the bottom strand is inferred as its reverse complement. wobble_interval : int, optional Spacing interval for inserting wobble base pairs, e.g., every N bases. Default is 5. Ignored if `sequence` is given. wobble_tolerance : int, optional Maximum random deviation from the defined wobble interval. Default is 2. wobble_insert : {"middle", "start", "end"}, optional Positioning strategy for wobble base insertions. Default is "middle". strong_bases : bool, optional If True, use strong (GC) base pairing for short stems (≤ 3 bp). Default is True. **kwargs : dict, optional Additional keyword arguments passed to the `Motif` superclass. Attributes ---------- length : int Signed length of the stem (positive for forward orientation). wobble_interval : int Interval used for wobble base insertion. Zero if sequence is specified. wobble_tolerance : int Allowed range of randomness around the wobble interval. wobble_insert : str Strategy for where to place wobble base pairs. strands : list of Strand Two strands forming the helical stem, including sequence and 3D coordinates. """ def __init__( self, length: int = 0, sequence: Union[str, Sequence] = "", wobble_interval: int = 5, wobble_tolerance: int = 2, wobble_insert: Literal["middle", "start", "end"] = "middle", strong_bases: bool = True, **kwargs, ) -> None: """ Initialize a Stem motif, representing a double-stranded helical region. Parameters ---------- length : int, default 0 Number of base pairs in the stem. Ignored if `sequence` is provided. sequence : str or Sequence, default "" Nucleotide sequence for the top strand. If provided, wobble settings are ignored (default is ""). wobble_interval : int, default 5 Number of bases between wobble base pair insertions. wobble_tolerance : int, default 2 Random variation range for wobble base pair placement (0 to `wobble_tolerance`). wobble_insert : ["middle", "start", "end"], default "middle" Strategy for wobble insertion: "middle", "start", or "end", default is "middle". strong_bases : bool, default True If True, use strong bases (G or C) for short stems shorter than 4 bases. **kwargs : dict, optional Additional arguments passed to the Motif superclass. Raises ------ TypeError If parameter types are invalid. ValueError If `wobble_insert` is not one of {"middle", "start", "end"}. Returns ------- None """ ### set default values if wobble_insert not in ["middle", "start", "end"]: raise ValueError( f"Invalid value for wobble_insert: {wobble_insert}. " "The value must be 'middle', 'start' or 'end'." ) if not isinstance(wobble_interval, int) or wobble_interval < 0: raise TypeError( f"The wobble frequency must be a positive integer," f" got {wobble_interval}." ) if not isinstance(wobble_tolerance, int) or wobble_tolerance < 0: raise TypeError( f"The wobble tolerance must be a positive integer, " f"got {wobble_tolerance}." ) if not isinstance(length, int): raise TypeError(f"The length of a stem must be an integer, got {length}.") if not isinstance(sequence, (str, Sequence)): raise TypeError( f"The sequence of a stem must be a string or a Sequence " f"object, got {type(sequence)}." ) self._wobble_interval = wobble_interval if not sequence else 0 self._wobble_tolerance = wobble_tolerance if not sequence else 0 self._wobble_insert = wobble_insert self._length = length if sequence: self._length = len(sequence) * getattr(self, "_sign", 1) ### If the user doesn't provide strands, update them directly if "strands" in kwargs: strands = kwargs.pop("strands") else: ### create the strands strands = self._create_strands( sequence=sequence, length=length, return_strands=True, strong_bases=strong_bases, ) kwargs["join"] = False # Initialize the motif super().__init__(strands=strands, **kwargs) ### ### PROPERTIES ### @property def length(self): """Number of nucleotides in a stem""" return self._length @length.setter def length(self, new_length): if not isinstance(new_length, int): raise TypeError( f"The length of a stem must be an integer, " f"got {new_length}." ) self._create_strands(length=new_length) @property def wobble_interval(self): return self._wobble_interval @wobble_interval.setter def wobble_interval(self, new_freq): if not isinstance(new_freq, int) or new_freq < 0: raise TypeError( f"The wobble frequency must be a positive integer, " f"got {new_freq}." ) self._wobble_interval = new_freq # update the sequence of the top strand and the bottom strand self.length = self._length @property def wobble_tolerance(self): return self._wobble_tolerance @wobble_tolerance.setter def wobble_tolerance(self, new_tolerance): if not isinstance(new_tolerance, int) or new_tolerance < 0: raise TypeError( f"The wobble tolerance must be a positive integer, " f"got {new_tolerance}." ) self._wobble_tolerance = new_tolerance # update the sequence of the top strand and the bottom strand self.length = self._length @property def wobble_insert(self): return self._wobble_insert @wobble_insert.setter def wobble_insert(self, new_insert): if new_insert not in ["middle", "start", "end"]: raise ValueError( f"Invalid value for wobble_insert: {new_insert}. " f"The value must be 'middle', 'start' or 'end'." ) self._wobble_insert = new_insert # update the sequence of the top strand and the bottom strand self.length = self._length ### ### METHOD ###
[docs] def set_up_sequence(self, new_seq): """Set the sequence of the top strand""" if not isinstance(new_seq, str): raise TypeError(f"The sequence of a stem must be a string, got {new_seq}.") self._create_strands(sequence=new_seq)
[docs] def set_down_sequence(self, new_seq): """Set the sequence of the bottom strand""" self.set_up_sequence(new_seq.translate(nucl_to_pair)[::-1])
[docs] def set_strong_bases(self, strong_bases): """Set wether to use strong bases for short stems""" self._create_strands(length=self._length, strong_bases=strong_bases)
def _create_strands( self, sequence: Optional[str] = None, length: int = 0, return_strands: bool = False, compute_coords: bool = True, strong_bases: bool = True, ) -> Optional[List[Strand]]: """ Internal method to create the top and bottom strands for the stem motif. Parameters ---------- sequence : str, optional Nucleotide sequence for the top strand. If provided, it takes priority over `length`. length : int, default 0 Length of the stem in nucleotides, used if `sequence` is not provided. return_strands : bool, default False If True, return the generated strands instead of assigning them. compute_coords : bool, default True Whether to compute 3D coordinates for the strands. strong_bases : bool, default True Whether to enforce strong base pairs for very short sequences. Returns ------- list of Strand or None The generated strands if `return_strands` is True, otherwise None. """ ### Create the top and bottom 3D coordinates of the stem seq_len = len(sequence) if sequence else abs(length) if compute_coords: coords = Coords.compute_helix_from_nucl( (0, 0, 0), # start position (1, 0, 0), # base vector (0, 1, 0), # normal vector length=seq_len, double=True, ) top_coord = Coords(coords[:seq_len]) bot_coord = Coords(coords[seq_len:]) else: top_coord = None bot_coord = None ### Create the top and bottom strands if sequence: # if a sequence is provided, it has the priority if not isinstance(sequence, Sequence): sequence = Sequence(sequence, directionality="53") self._length = seq_len * getattr(self, "_sign", 1) strands = [ Strand(sequence, coords=top_coord), Strand( sequence.translate(nucl_to_pair)[::-1], directionality="53", start=(seq_len - 1, 2), direction=(-1, 0), coords=bot_coord, ), ] else: self._length = length if seq_len <= 3 and strong_bases: seq = "S" * seq_len elif self._wobble_interval: def get_wobble_interval(): if self._wobble_tolerance == 0: return self._wobble_interval min_wobble = max(1, self._wobble_interval - self._wobble_tolerance) return random.randint( min_wobble, self._wobble_interval + self._wobble_tolerance ) seq = ["N"] * seq_len # calculate the maximum index to calculate the wobble bases random_wobble = get_wobble_interval() i = 1 # the first and last nucleotides are always a normal nucleotide while i < seq_len - 1: if self._wobble_insert == "start": seq[i] = "K" elif ( self._wobble_insert == "end" and i + random_wobble < seq_len - 1 ): seq[i + random_wobble] = "K" elif ( self._wobble_insert == "middle" and i + random_wobble // 2 < seq_len - 1 ): seq[i + random_wobble // 2] = "K" # calculate the next index to insert a wobble base i += random_wobble + 1 # calculate a new random wobble frequency random_wobble = get_wobble_interval() seq = "".join(seq) else: seq = "N" * seq_len strands = [ Strand(seq, coords=top_coord), Strand( seq.translate(nucl_to_pair)[::-1], directionality="53", start=(seq_len - 1, 2), direction=(-1, 0), coords=bot_coord, ), ] if return_strands: return strands self.replace_all_strands(strands, copy=False, join=False)