biomcmc-lib  0.1
low level library for phylogenetic analysis
alignment.h
Go to the documentation of this file.
1 /*
2  * This file is part of biomcmc-lib, a low-level library for phylogenomic analysis.
3  * Copyright (C) 2019-today Leonardo de Oliveira Martins [ leomrtns at gmail.com; http://www.leomartins.org ]
4  *
5  * biomcmc is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
6  * License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
10  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
11  * details (file "COPYING" or http://www.gnu.org/copyleft/gpl.html).
12  */
13 
23 #ifndef _biomcmc_alignment_h_
24 #define _biomcmc_alignment_h_
25 
26 #include "distance_matrix.h"
27 #include "nexus_common.h"
28 
29 typedef struct alignment_struct* alignment;
30 
33 {
34  int ntax, nchar, npat;
39  int n_charset;
40  int *charset_start, *charset_end;
41  bool is_aligned;
42  int *site_pattern;
43  int *pattern_freq;
44  char *filename;
46 };
47 
49 alignment read_alignment_from_file (char *seqfilename);
51 alignment read_fasta_alignment_from_file (char *seqfilename);
53 alignment read_nexus_alignment_from_file (char *seqfilename);
55 void print_alignment_in_fasta_format (alignment align, FILE *stream);
57 void del_alignment (alignment align);
58 
63 
67 void store_likelihood_info_at_leaf (double **l, char *align, int n_pat, int n_state);
68 
69 #endif
70 
char_vector taxlabel
Vector with aligned sequence for each taxon.
Definition: alignment.h:36
distance_matrix new_distance_matrix_from_valid_matrix_elems(distance_matrix original, int *valid, int n_valid)
new matrix of pairwise distance by simply excluding original elements not present in valid[] ...
Definition: alignment.c:594
alignment read_alignment_from_file(char *seqfilename)
Reads DNA alignment (guess format between FASTA and NEXUS) from file and store info in alignment_stru...
Definition: alignment.c:47
alignment read_nexus_alignment_from_file(char *seqfilename)
Reads DNA NEXUS alignment from file and store info in alignment_struct.
Definition: alignment.c:142
Hash table (vector indexed by strings).
Definition: hashtable.h:42
bool is_aligned
Start and end of each gene segment (from 1...NCHAR) (ASSUMPTIONS ).
Definition: alignment.h:41
int * site_pattern
FASTA files don't need to be aligned; NEXUS files do.
Definition: alignment.h:42
hashtable taxlabel_hash
Alias (short version) for taxon names that can be used in newick trees.
Definition: alignment.h:38
void del_alignment(alignment align)
Frees memory from alignment_struct.
Definition: alignment.c:295
char * filename
if sequences are aligned, this is the frequency of each pattern.
Definition: alignment.h:44
Data from alignment file.
Definition: alignment.h:32
vector of strings (char vectors) of variable length
Definition: char_vector.h:27
distance_matrix new_distance_matrix_from_alignment(alignment align)
creates and calculates matrix of pairwise distances based on alignment
Definition: alignment.c:604
char_vector character
Number of species, sites and patterns according to sequence file.
Definition: alignment.h:35
int n_charset
Lookup table with taxon names.
Definition: alignment.h:39
File handling functions for nexus format in general.
void store_likelihood_info_at_leaf(double **l, char *align, int n_pat, int n_state)
transform aligned sequence into likelihood for terminal taxa (e.g. A -> 0001, C-> 0010 etc) (e...
Definition: alignment.c:762
distance matrix, that can be used in alignments and trees, and patristic-distance based species dista...
alignment read_fasta_alignment_from_file(char *seqfilename)
Reads DNA FASTA alignment from file and store info in alignment_struct.
Definition: alignment.c:76
int * pattern_freq
pattern, in alignment_struct::character, to which original site belongs.
Definition: alignment.h:43
void print_alignment_in_fasta_format(alignment align, FILE *stream)
Prints alignment to FILE stream in FASTA format (debug purposes).
Definition: alignment.c:242
int ref_counter
name of the original file, with extension removed
Definition: alignment.h:45
int * charset_start
Number of gene segments (ASSUMPTIONS BLOCK).
Definition: alignment.h:40
Definition: distance_matrix.h:29
char_vector taxshort
Taxon names from file.
Definition: alignment.h:37