biomcmc-lib  0.1
low level library for phylogenetic analysis
distance_matrix.h
Go to the documentation of this file.
1 /*
2  * This file is part of biomcmc-lib, a low-level library for phylogenomic analysis.
3  * Copyright (C) 2019-today Leonardo de Oliveira Martins [ leomrtns at gmail.com; http://www.leomartins.org ]
4  *
5  * biomcmc is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
6  * License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
10  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
11  * details (file "COPYING" or http://www.gnu.org/copyleft/gpl.html).
12  */
13 
21 #ifndef _biomcmc_distance_matrix_h_
22 #define _biomcmc_distance_matrix_h_
23 
24 #include "hashtable.h"
25 
27 typedef struct spdist_matrix_struct* spdist_matrix;
28 
30 {
31  int size;
32  double **d,
34  var_K2P_dist,
35  mean_JC_dist,
36  mean_R,
37  var_R,
38  freq[20];
39  double *fromroot;
40  int *idx, *i_l, *i_r;
42 };
43 
45 {
46  int size, n_missing;
47  double *mean, *min;
48  int *count;
51 };
52 
54 distance_matrix new_distance_matrix (int nseqs);
56 void zero_lower_distance_matrix (distance_matrix dist);
58 void transpose_distance_matrix (distance_matrix dist);
60 void del_distance_matrix (distance_matrix dist);
61 
62 spdist_matrix new_spdist_matrix (int n_species);
63 void zero_all_spdist_matrix (spdist_matrix dist);
64 void finalise_spdist_matrix (spdist_matrix dist);
65 void finalise_spdist_matrix_with_rescaling (spdist_matrix dist, double scale);
66 void complete_missing_spdist_from_global_spdist (spdist_matrix local, spdist_matrix global);
67 void copy_spdist_matrix_to_distance_matrix_upper (spdist_matrix spd, distance_matrix dist, bool use_means);
68 void del_spdist_matrix (spdist_matrix dist);
69 
70 
72 void fill_species_dists_from_gene_dists (distance_matrix spdist, distance_matrix gendist, int *sp_id, bool use_upper_gene);
74 void update_species_dists_from_spdist (distance_matrix global, distance_matrix local, int *spexist);
75 
76 int prepare_spdistmatrix_from_gene_species_map (spdist_matrix spdist, int *sp_id, int n_sp_id);
77 void fill_spdistmatrix_from_gene_dists (spdist_matrix spdist, distance_matrix gendist, int *sp_id, bool use_upper_gene);
79 void fill_spdistmatrix_from_gene_dist_vector (spdist_matrix spdist, double *gdist, int n_gdist, int *sp_id);
80 void update_spdistmatrix_from_spdistmatrix (spdist_matrix global, spdist_matrix local);
81 
82 #endif
int ref_counter
boolean marking if species is present at all in this matrix
Definition: distance_matrix.h:50
void zero_lower_distance_matrix(distance_matrix dist)
specially in gene/sptree distance methods (GLASS, STEAC, etc.) lower is used for means and upper for ...
Definition: distance_matrix.c:44
bool * species_present
how many times this pairwise comparison appears (between or within loci)
Definition: distance_matrix.h:49
double mean_JC_dist
variance in pairwise distance from K2P model
Definition: distance_matrix.h:32
double mean_K2P_dist
pairwise distance matrix (upper) and ti/tv rate ratio (lower triangle) for K2P formula for alignments...
Definition: distance_matrix.h:32
double var_R
average K2P transition/transversion ratio from pairwise distances
Definition: distance_matrix.h:32
void del_distance_matrix(distance_matrix dist)
releases memory allocated to distance_matrix (this structure has no smart ref_counter) ...
Definition: distance_matrix.c:66
distance_matrix new_distance_matrix(int nseqs)
creates new matrix of pairwise distances
Definition: distance_matrix.c:18
double ** d
number of sequences to calculate distances
Definition: distance_matrix.h:32
double mean_R
average pairwise distance from JC model
Definition: distance_matrix.h:32
double freq[20]
variance in K2P transition/transversion ratio from pairwise distances
Definition: distance_matrix.h:32
void fill_species_dists_from_gene_dists(distance_matrix spdist, distance_matrix gendist, int *sp_id, bool use_upper_gene)
updates distances between species based on genes and gene-to-species mapping, with min on upper and m...
Definition: distance_matrix.c:215
void transpose_distance_matrix(distance_matrix dist)
invert lower and upper diagonals of matrix (since some functions like upgma expect upper...
Definition: distance_matrix.c:54
void zero_all_spdist_matrix(spdist_matrix dist)
Definition: distance_matrix.c:104
double var_K2P_dist
average pairwise distance from K2P model
Definition: distance_matrix.h:32
int * idx
distance from root (used to calculate distance between tree leaves)
Definition: distance_matrix.h:40
void fill_spdistmatrix_from_gene_dist_vector(spdist_matrix spdist, double *gdist, int n_gdist, int *sp_id)
initialise spdist_matrix with patristic distances from gdist vector of size n_gdist (1D) ...
Definition: distance_matrix.c:295
double * fromroot
empirical equilibrium frequencies
Definition: distance_matrix.h:39
void update_species_dists_from_spdist(distance_matrix global, distance_matrix local, int *spexist)
update global (over loci) species distances besed on local (within locus) species distances ...
Definition: distance_matrix.c:246
Definition: distance_matrix.h:44
Definition: distance_matrix.h:29
int * count
mean or min distances across possibilities (within loci)
Definition: distance_matrix.h:48
int ref_counter
aux vectors for finding leaves spanned by subtrees on any node
Definition: distance_matrix.h:41
double hashing open-address hash table using strings as key – also has distance matrix, that can be used in alignments and trees