Skip to content

raxmlng

pypythia.raxmlng.RAxMLNG

Class to interact with the RAxML-NG binary.

Parameters:

Name Type Description Default
exe_path Path

Path to the RAxML-NG executable. Defaults to the binary found in the PATH environment variable.

DEFAULT_RAXMLNG_EXE

Attributes:

Name Type Description
exe_path Path

Path to the RAxML-NG executable.

Source code in pypythia/raxmlng.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
class RAxMLNG:
    """Class to interact with the RAxML-NG binary.

    Args:
        exe_path (pathlib.Path, optional): Path to the RAxML-NG executable. Defaults to the binary found in the PATH environment variable.

    Attributes:
        exe_path (pathlib.Path): Path to the RAxML-NG executable.
    """

    def __init__(self, exe_path: Optional[pathlib.Path] = DEFAULT_RAXMLNG_EXE):
        self.exe_path = exe_path

    def _base_cmd(
        self, msa_file: pathlib.Path, model: str, prefix: pathlib.Path, **kwargs
    ) -> list[str]:
        additional_settings = []
        for key, value in kwargs.items():
            if value is None:
                additional_settings += [f"--{key}"]
            else:
                additional_settings += [f"--{key}", str(value)]

        return [
            str(self.exe_path.absolute()),
            "--msa",
            str(msa_file.absolute()),
            "--model",
            model,
            "--prefix",
            str(prefix.absolute()),
            *additional_settings,
        ]

    def _run_alignment_parse(
        self, msa_file: pathlib.Path, model: str, prefix: pathlib.Path, **kwargs
    ) -> None:
        cmd = self._base_cmd(msa_file, model, prefix, parse=None, **kwargs)
        run_raxmlng_command(cmd)

    def _run_rfdist(
        self, trees_file: pathlib.Path, prefix: pathlib.Path, **kwargs
    ) -> None:
        additional_settings = []
        for key, value in kwargs.items():
            if value is None:
                additional_settings += [f"--{key}"]
            else:
                additional_settings += [f"--{key}", str(value)]
        cmd = [
            str(self.exe_path.absolute()),
            "--rfdist",
            str(trees_file.absolute()),
            "--prefix",
            str(prefix.absolute()),
            *additional_settings,
        ]
        run_raxmlng_command(cmd)

    def infer_parsimony_trees(
        self,
        msa_file: pathlib.Path,
        model: str,
        prefix: pathlib.Path,
        n_trees: int = 24,
        **kwargs,
    ) -> pathlib.Path:
        """Method that infers n_trees using the RAxML-NG implementation of maximum parsimony.

        Args:
            msa_file (pathlib.Path): Filepath pointing to the MSA file.
            model (str): String representation of the substitution model to use. Needs to be a valid RAxML-NG model.
                For example "GTR+G" for DNA data or "LG+G" for protein data.
            prefix (pathlib.Path): Prefix to use when running RAxML-NG.
            n_trees (int): Number of trees to infer. Defaults to 24.
            **kwargs: Additional arguments to pass to the RAxML-NG command.
                The name of the kwarg needs to be a valid RAxML-NG flag.
                For flags with a value pass it like this: "flag=value", for flags without a value pass it like this: "flag=None".
                See https://github.com/amkozlov/raxml-ng for all options.

        Returns:
            Filepath pointing to the inferred maximum parsimony trees.
        """
        cmd = self._base_cmd(
            msa_file, model, prefix, start=None, tree=f"pars{{{n_trees}}}", **kwargs
        )
        run_raxmlng_command(cmd)
        return pathlib.Path(f"{prefix}.raxml.startTree")

    def get_rfdistance_results(
        self, trees_file: pathlib.Path, prefix: pathlib.Path = None, **kwargs
    ) -> tuple[float, float, float]:
        """Method that computes the number of unique topologies, relative RF-Distance, and absolute RF-Distance for the given set of trees.

        Args:
            trees_file (pathlib.Path): Filepath pointing to the file containing the trees.
            prefix (pathlib.Path, optional): Prefix to use when running RAxML-NG. Defaults to None. If None, a temporary directory is used.
            **kwargs: Additional arguments to pass to the RAxML-NG command.
                The name of the kwarg needs to be a valid RAxML-NG flag.
                For flags with a value pass it like this: "flag=value", for flags without a value pass it like this: "flag=None".
                See

        Returns:
            num_topos (float): Number of unique topologies of the given set of trees.
            rel_rfdist (float): Relative RF-Distance of the given set of trees. Computed as average over all pairwise RF-Distances. Value between 0.0 and 1.0.
            abs_rfdist (float): Absolute RF-Distance of the given set of trees.
        """
        with TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            if not prefix:
                prefix = tmpdir / "rfdist"
            self._run_rfdist(trees_file, prefix, **kwargs)
            log_file = pathlib.Path(f"{prefix}.raxml.log")
            return _get_raxmlng_rfdist_results(log_file)

get_rfdistance_results(trees_file, prefix=None, **kwargs)

Method that computes the number of unique topologies, relative RF-Distance, and absolute RF-Distance for the given set of trees.

Parameters:

Name Type Description Default
trees_file Path

Filepath pointing to the file containing the trees.

required
prefix Path

Prefix to use when running RAxML-NG. Defaults to None. If None, a temporary directory is used.

None
**kwargs

Additional arguments to pass to the RAxML-NG command. The name of the kwarg needs to be a valid RAxML-NG flag. For flags with a value pass it like this: "flag=value", for flags without a value pass it like this: "flag=None". See

{}

Returns:

Name Type Description
num_topos float

Number of unique topologies of the given set of trees.

rel_rfdist float

Relative RF-Distance of the given set of trees. Computed as average over all pairwise RF-Distances. Value between 0.0 and 1.0.

abs_rfdist float

Absolute RF-Distance of the given set of trees.

Source code in pypythia/raxmlng.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def get_rfdistance_results(
    self, trees_file: pathlib.Path, prefix: pathlib.Path = None, **kwargs
) -> tuple[float, float, float]:
    """Method that computes the number of unique topologies, relative RF-Distance, and absolute RF-Distance for the given set of trees.

    Args:
        trees_file (pathlib.Path): Filepath pointing to the file containing the trees.
        prefix (pathlib.Path, optional): Prefix to use when running RAxML-NG. Defaults to None. If None, a temporary directory is used.
        **kwargs: Additional arguments to pass to the RAxML-NG command.
            The name of the kwarg needs to be a valid RAxML-NG flag.
            For flags with a value pass it like this: "flag=value", for flags without a value pass it like this: "flag=None".
            See

    Returns:
        num_topos (float): Number of unique topologies of the given set of trees.
        rel_rfdist (float): Relative RF-Distance of the given set of trees. Computed as average over all pairwise RF-Distances. Value between 0.0 and 1.0.
        abs_rfdist (float): Absolute RF-Distance of the given set of trees.
    """
    with TemporaryDirectory() as tmpdir:
        tmpdir = pathlib.Path(tmpdir)
        if not prefix:
            prefix = tmpdir / "rfdist"
        self._run_rfdist(trees_file, prefix, **kwargs)
        log_file = pathlib.Path(f"{prefix}.raxml.log")
        return _get_raxmlng_rfdist_results(log_file)

infer_parsimony_trees(msa_file, model, prefix, n_trees=24, **kwargs)

Method that infers n_trees using the RAxML-NG implementation of maximum parsimony.

Parameters:

Name Type Description Default
msa_file Path

Filepath pointing to the MSA file.

required
model str

String representation of the substitution model to use. Needs to be a valid RAxML-NG model. For example "GTR+G" for DNA data or "LG+G" for protein data.

required
prefix Path

Prefix to use when running RAxML-NG.

required
n_trees int

Number of trees to infer. Defaults to 24.

24
**kwargs

Additional arguments to pass to the RAxML-NG command. The name of the kwarg needs to be a valid RAxML-NG flag. For flags with a value pass it like this: "flag=value", for flags without a value pass it like this: "flag=None". See https://github.com/amkozlov/raxml-ng for all options.

{}

Returns:

Type Description
Path

Filepath pointing to the inferred maximum parsimony trees.

Source code in pypythia/raxmlng.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def infer_parsimony_trees(
    self,
    msa_file: pathlib.Path,
    model: str,
    prefix: pathlib.Path,
    n_trees: int = 24,
    **kwargs,
) -> pathlib.Path:
    """Method that infers n_trees using the RAxML-NG implementation of maximum parsimony.

    Args:
        msa_file (pathlib.Path): Filepath pointing to the MSA file.
        model (str): String representation of the substitution model to use. Needs to be a valid RAxML-NG model.
            For example "GTR+G" for DNA data or "LG+G" for protein data.
        prefix (pathlib.Path): Prefix to use when running RAxML-NG.
        n_trees (int): Number of trees to infer. Defaults to 24.
        **kwargs: Additional arguments to pass to the RAxML-NG command.
            The name of the kwarg needs to be a valid RAxML-NG flag.
            For flags with a value pass it like this: "flag=value", for flags without a value pass it like this: "flag=None".
            See https://github.com/amkozlov/raxml-ng for all options.

    Returns:
        Filepath pointing to the inferred maximum parsimony trees.
    """
    cmd = self._base_cmd(
        msa_file, model, prefix, start=None, tree=f"pars{{{n_trees}}}", **kwargs
    )
    run_raxmlng_command(cmd)
    return pathlib.Path(f"{prefix}.raxml.startTree")

pypythia.raxmlng.run_raxmlng_command(cmd)

Helper method to run a RAxML-NG command.

Parameters:

Name Type Description Default
cmd list

List of strings representing the RAxML-NG command to run.

required

Raises:

Type Description
RAxMLNGError

If the RAxML-NG command fails with a CalledProcessError.

RuntimeError

If the RAxML-NG command fails with any other error.

Source code in pypythia/raxmlng.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def run_raxmlng_command(cmd: list[str]) -> None:
    """Helper method to run a RAxML-NG command.

    Args:
        cmd (list): List of strings representing the RAxML-NG command to run.

    Raises:
        RAxMLNGError: If the RAxML-NG command fails with a CalledProcessError.
        RuntimeError: If the RAxML-NG command fails with any other error.
    """
    try:
        subprocess.check_output(cmd, encoding="utf-8")
    except subprocess.CalledProcessError as e:
        raise RAxMLNGError(subprocess_exception=e)
    except Exception as e:
        raise RuntimeError("Running RAxML-NG command failed.") from e