Skip to content

Converters

Class Converter

Bases: object

Source code in genespeak/converters.py
class Converter(object):
    # The Converter API will enforce the use of text-encoding strategy names
    # with hyphens when applicable:
    #   - example: "utf-8" instead of "utf8".
    # This makes sure of conveying that to the users.
    strategy_names = [s.replace("utf", "utf-") for s in AVAILABLE_STRATEGY_NAMES]
    strategies = TextEncodingStrategies()
    encoder: DNABaseEncoder

    def __init__(
        self,
        schema: str = "AGCT",
        binary_string_length: int = 8,
        strategy: str = "ascii",
    ):
        self.encoder = DNABaseEncoder(
            schema=schema, binary_string_length=binary_string_length
        )
        self.strategy = self.get_strategy(strategy_name=strategy)

    def __repr__(self):
        kwargs = dict(
            schema=self.encoder.schema,
            binary_string_length=self.encoder.binary_string_length,
            strategy=self.strategy.name,
        )
        kwargs_string = ", ".join([f"{k}={v}" for k, v in kwargs.items()])
        classname = self.__class__.__name__
        return f"{classname}({kwargs_string})"

    def get_encoded_text(self, text: str) -> List[int]:
        """Encodes each character of the text and returns a list.

        The encoding strategy is picked from the ``Converter.strategy``.
        """
        text = str(text)
        encoded_chr_list = [ord(x) for x in text]
        return encoded_chr_list

    def get_decoded_text(
        self, encoded_chr_list: List[int], as_list: bool = False
    ) -> Union[List[str], str]:
        """Decodes each character from an encoded-character-list (``encoded_chr_list``) and
        returns a either a string (if ``as_list = False``) or
        a list of decoded strings (if ``as_list = True``).

        The encoding/decoding strategy is picked from the ``Converter.strategy``.
        """

        if not (as_list):
            text = "".join(chr(x) for x in encoded_chr_list)
        else:
            text = [chr(x) for x in encoded_chr_list]

        return text

    def dec_to_bin(self, decimal_numbers: List[int]) -> List[int]:
        """Returns a list of binary numbers for a given list of
        POSITIVE decimal integers.
        """
        return [int(bin(x)[2:]) for x in decimal_numbers]

    def convert_info_to_2Nbit_binary(self, text: str, length2N: int = 8) -> List[str]:
        # convert to list of ascii / encoded-characters
        text_encoded_chr = self.get_encoded_text(text)
        # convert to binary list of int
        text_encoded_bin = self.dec_to_bin(text_encoded_chr)
        # convert to list of binary-2N-bit str (just 2N digits: 0 or 1)
        text_encoded_bin_2Nbit = [str(x).zfill(length2N) for x in text_encoded_bin]

        return text_encoded_bin_2Nbit

    def get_bin2N_to_bin2(self, str_bin2N: str) -> List[str]:
        str_bin2_list = self.split_text(str_bin2N, length=2)
        return str_bin2_list

    def get_bin2_to_bin2N(self, str_bin2: str, length2N: int = 8) -> List[str]:
        str_bin2N_list = self.split_text(str_bin2, length=length2N)
        return str_bin2N_list

    def split_text(self, text: str, length=4) -> List[str]:
        split_text_list = list(map("".join, zip(*[iter(text)] * length)))
        return split_text_list

    def convert_to_dnabase(
        self, bin_str_list: List[str], strategy: Optional[str] = None
    ) -> Tuple[List[str], List[str]]:
        # A list of two-character binary strings
        dnabase_bin2: List[str] = []

        # Determine text-encoding-strategy
        if strategy is None:
            strategy = self.get_strategy(strategy_name=strategy)  # type: ignore
        else:
            strategy = self.strategy

        for x_bin2N in bin_str_list:
            dnabase_bin2 += self.get_bin2N_to_bin2(x_bin2N)
        dnabase_chr = [self.encoder.bin2chr.get(x_bin2) for x_bin2 in dnabase_bin2]
        return (dnabase_chr, dnabase_bin2)  # type: ignore

    def get_strategy(self, strategy_name: str = "ascii"):
        """Returns strategy based on name."""
        return set_strategy(strategy_name=strategy_name)

dec_to_bin(decimal_numbers)

Returns a list of binary numbers for a given list of POSITIVE decimal integers.

Source code in genespeak/converters.py
def dec_to_bin(self, decimal_numbers: List[int]) -> List[int]:
    """Returns a list of binary numbers for a given list of
    POSITIVE decimal integers.
    """
    return [int(bin(x)[2:]) for x in decimal_numbers]

get_decoded_text(encoded_chr_list, as_list=False)

Decodes each character from an encoded-character-list (encoded_chr_list) and returns a either a string (if as_list = False) or a list of decoded strings (if as_list = True).

The encoding/decoding strategy is picked from the Converter.strategy.

Source code in genespeak/converters.py
def get_decoded_text(
    self, encoded_chr_list: List[int], as_list: bool = False
) -> Union[List[str], str]:
    """Decodes each character from an encoded-character-list (``encoded_chr_list``) and
    returns a either a string (if ``as_list = False``) or
    a list of decoded strings (if ``as_list = True``).

    The encoding/decoding strategy is picked from the ``Converter.strategy``.
    """

    if not (as_list):
        text = "".join(chr(x) for x in encoded_chr_list)
    else:
        text = [chr(x) for x in encoded_chr_list]

    return text

get_encoded_text(text)

Encodes each character of the text and returns a list.

The encoding strategy is picked from the Converter.strategy.

Source code in genespeak/converters.py
def get_encoded_text(self, text: str) -> List[int]:
    """Encodes each character of the text and returns a list.

    The encoding strategy is picked from the ``Converter.strategy``.
    """
    text = str(text)
    encoded_chr_list = [ord(x) for x in text]
    return encoded_chr_list

get_strategy(strategy_name='ascii')

Returns strategy based on name.

Source code in genespeak/converters.py
def get_strategy(self, strategy_name: str = "ascii"):
    """Returns strategy based on name."""
    return set_strategy(strategy_name=strategy_name)

Function set_converter

Creates a Converter if no Converter is provided.

Source code in genespeak/converters.py
def set_converter(
    schema: str = "AGCT",
    binary_string_length: int = 8,
    strategy: str = "ascii",
    converter: Optional[Converter] = None,
) -> Converter:
    """Creates a ``Converter`` if no ``Converter`` is provided."""

    if (converter is None) or (not isinstance(converter, Converter)):
        converter = Converter(
            schema=schema,
            binary_string_length=binary_string_length,
            strategy=strategy,
        )

    return converter

Last update: 2022-11-20
Back to top