biomcmc-lib  0.1
low level library for phylogenetic analysis
topology_common.h
Go to the documentation of this file.
1 /*
2  * This file is part of biomcmc-lib, a low-level library for phylogenomic analysis.
3  * Copyright (C) 2019-today Leonardo de Oliveira Martins [ leomrtns at gmail.com; http://www.leomartins.org ]
4  *
5  * biomcmc is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
6  * License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8 
9  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
10  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
11  * details (file "COPYING" or http://www.gnu.org/copyleft/gpl.html).
12  */
13 
24 #ifndef _biomcmc_topology_common_h_
25 #define _biomcmc_topology_common_h_
26 
27 #include "bipartition.h"
28 #include "distance_matrix.h"
29 #include "empirical_frequency.h"
30 
31 typedef struct topol_node_struct* topol_node;
32 typedef struct topology_struct* topology;
33 
36 {
37  topol_node up, right, left, sister;
38  int id, level;
39  int mid[5];
40  bool internal,
41  u_done,
42  d_done;
44 };
45 
48 {
49  topol_node *nodelist;
50  double *blength;
51  int id;
52  topol_node root;
53  int nleaves;
54  int nnodes;
55  topol_node undo_prune;
56  topol_node undo_regraft;
57  bool undo_lca;
58  topol_node *postorder;
59  topol_node *undone;
60  int n_undone;
61  uint32_t hashID1, hashID2;
65  int *index;
66  bool quasirandom;
67 };
68 
69 
71 topology new_topology (int nleaves);
73 void topology_malloc_blength (topology tree);
75 void del_topology (topology topol);
76 
77 /* DEBUG function */
78 void debug_topol (topology tree);
79 
87 void copy_topology_from_topology (topology to_tree, topology from_tree);
88 
90 void update_topology_sisters (topology tree);
91 
94 void update_topology_traversal (topology tree);
95 
97 bool topology_is_equal (topology t1, topology t2);
98 
100 bool topology_is_equal_unrooted (topology t1, topology t2, bool use_root_later);
101 
103 void reorder_topology_leaves (topology tree);
104 
106 bool node1_is_child_of_node2 (topol_node node1, topol_node node2);
107 
115 char * topology_to_string_by_id (const topology tree, double *blen);
116 
124 char * topology_to_string_create_name (const topology tree, double *blen);
125 
134 char * topology_to_string_by_name (const topology tree, double *blen);
135 
147 void graphviz_file_topology (FILE * fout, char *label, const topology tree);
148 
163 void apply_spr_at_nodes (topology p, topol_node prune, topol_node regraft, bool update_done);
165 void apply_spr_at_nodes_LCAprune (topology tree, topol_node prune, topol_node regraft, bool update_done);
167 void apply_spr_at_nodes_notLCAprune (topology tree, topol_node prune, topol_node regraft, bool update_done);
169 void topology_undo_random_move (topology tree, bool update_done);
171 void clear_topology_flags (topology tree);
173 void raise_topology_flags (topology tree);
175 void topology_reset_random_move (topology tree);
176 
178 int copy_topology_to_intvector_by_postorder (topology tree, int *ivec);
180 int copy_intvector_to_topology_by_postorder (topology tree, int *ivec);
182 void copy_topology_to_intvector_by_id (topology tree, int *ivec);
184 void copy_intvector_to_topology_by_id (topology tree, int *ivec);
185 
186 #endif
double * blength
vector of nodes (the first are the leaves).
Definition: topology_common.h:50
void apply_spr_at_nodes(topology p, topol_node prune, topol_node regraft, bool update_done)
Apply one subtree prune-and-regraft (SPR branch swapping) operation at specified nodes.
Definition: topology_common.c:454
Creates a histogram of a vector, ordered by frequency.
char * topology_to_string_by_name(const topology tree, double *blen)
Print subtree in newick format to string using leaf names.
Definition: topology_common.c:387
bool topology_is_equal(topology t1, topology t2)
Compare two topologies based on bipartitions as clades (not on branch lengths)
Definition: topology_common.c:248
bool u_done
If internal node, TRUE; if leaf, FALSE.
Definition: topology_common.h:40
topol_node root
topology ID (should be updated by hand, e.g. by functions in topology_space.c)
Definition: topology_common.h:52
bool undo_lca
How to revert most recent SPR move (regraft node).
Definition: topology_common.h:57
void topology_undo_random_move(topology tree, bool update_done)
revert last SPR branch swapping
Definition: topology_common.c:648
void raise_topology_flags(topology tree)
reset all d_done and u_done booleans to "false" (when updating a model parameter with MTM) ...
Definition: topology_common.c:676
topol_node undo_prune
Number of nodes, including leaves ( for a binary rooted tree).
Definition: topology_common.h:55
bool d_done
Has the topology up this edge (eq. to node) changed? (needed in likelihood calc)
Definition: topology_common.h:40
void update_topology_traversal(topology tree)
Update topol_node::preorder, topol_node::postorder, topol_node::bipartition and order siblings by num...
Definition: topology_common.c:194
int id
Parent, children and sister nodes.
Definition: topology_common.h:38
void copy_intvector_to_topology_by_id(topology tree, int *ivec)
restore topological structure based on ID vector
Definition: topology_common.c:738
int * index
Taxon names (just a pointer; actual values are setup by newick_tree_struct or alignment_struct) ...
Definition: topology_common.h:65
void reorder_topology_leaves(topology tree)
Reorder char_vector_struct; leaf node ids (and bipartitions) must then follow this order...
Definition: topology_common.c:294
void update_topology_sisters(topology tree)
Update pointers to topol_node_struct::sister.
Definition: topology_common.c:180
vector of strings (char vectors) of variable length
Definition: char_vector.h:27
int ref_counter
zero if postorder[] vector needs update, one if we can use postdorder[] to traverse tree ...
Definition: topology_common.h:63
void topology_reset_random_move(topology tree)
revert last SPR branch swapping and clear flags (reject last proposal, in MCMC)
Definition: topology_common.c:683
bool traversal_updated
hash values of tree, ideally a unique value for each tree (collisions happen...)
Definition: topology_common.h:62
uint32_t hashID1
number of outdated nodes (which need likelihood calc etc) in topology_struct::undone.
Definition: topology_common.h:61
void copy_topology_from_topology(topology to_tree, topology from_tree)
Copy information from topology_struct.
Definition: topology_common.c:115
topol_node * undone
pointers to all internal nodes in postorder (from last to first is preorder)
Definition: topology_common.h:59
void graphviz_file_topology(FILE *fout, char *label, const topology tree)
Prints subtree in dot format to file.
Definition: topology_common.c:424
char_vector taxlabel
number of references of topology (how many places are pointing to it)
Definition: topology_common.h:64
distance matrix, that can be used in alignments and trees, and patristic-distance based species dista...
int copy_topology_to_intvector_by_postorder(topology tree, int *ivec)
store ID of each node's parent (in postorder) into vector, returning number of stored nodes ...
Definition: topology_common.c:690
void copy_topology_to_intvector_by_id(topology tree, int *ivec)
store ID of each node's parent into vector
Definition: topology_common.c:726
bipartition split
Has the topology down this edge (eq. to node) changed? (needed in likelihood calc) ...
Definition: topology_common.h:43
int n_undone
pointers to outdated nodes in postorder (from last to first is preorder)
Definition: topology_common.h:60
topol_node undo_regraft
How to revert most recent SPR move (prune node).
Definition: topology_common.h:56
bool topology_is_equal_unrooted(topology t1, topology t2, bool use_root_later)
Compare two topologies based on bipartitions neglecting root; boolean ask if split should be reverted...
Definition: topology_common.c:263
char * topology_to_string_create_name(const topology tree, double *blen)
Print subtree in newick format to string creating names (based on leaf IDs.)
Definition: topology_common.c:348
topology new_topology(int nleaves)
Allocate space for new topology_struct.
Definition: topology_common.c:32
void topology_malloc_blength(topology tree)
Allocate vector for branch lengths (3 vectors: mean, min and max values observed in topol_space colle...
Information of a node (binary tree).
Definition: topology_common.h:35
int id
Branch lengths, with mean, min, max vectors for topology_space.
Definition: topology_common.h:51
void clear_topology_flags(topology tree)
reset all d_done and u_done booleans to "true" (when rejecting a new state in MCMC) ...
Definition: topology_common.c:669
void del_topology(topology topol)
Free space allocated by topology_struct.
Definition: topology_common.c:92
void apply_spr_at_nodes_LCAprune(topology tree, topol_node prune, topol_node regraft, bool update_done)
Apply one SPR branch swapping at specified nodes when prune subtree is above prune node...
Definition: topology_common.c:461
Unary/binary operators on arbitrarily-sized bitstrings (strings of zeros and ones) like split biparti...
int nleaves
Pointer to root node.
Definition: topology_common.h:53
bool quasirandom
sandbox vector used in spr moves / quasirandom tree shuffle just to avoid recurrent allocation ...
Definition: topology_common.h:66
char * topology_to_string_by_id(const topology tree, double *blen)
Print subtree in newick format to string using leaf IDs.
Definition: topology_common.c:333
int copy_intvector_to_topology_by_postorder(topology tree, int *ivec)
restore topological structure based on postordered ID vector, returning number of restored nodes ...
Definition: topology_common.c:704
void apply_spr_at_nodes_notLCAprune(topology tree, topol_node prune, topol_node regraft, bool update_done)
Apply one SPR branch swapping at specified nodes when subtree to be pruned is below prune node...
Definition: topology_common.c:550
Bit-string representation of splits.
Definition: bipartition.h:30
int mid[5]
Node ID (values smaller than nleaves indicate leaves) and distance from root.
Definition: topology_common.h:39
Binary unrooted topology (rooted at leaf with ID zero)
Definition: topology_common.h:47
topol_node * postorder
revert SPR move is lca type or not
Definition: topology_common.h:58
bool node1_is_child_of_node2(topol_node node1, topol_node node2)
Boolean if node2 is on the path of node1 to the root.
Definition: topology_common.c:324
int nnodes
Number of leaves .
Definition: topology_common.h:54