biomcmc-lib  0.1
low level library for phylogenetic analysis
lowlevel.h
Go to the documentation of this file.
1 /*
2  *This file is part of biomcmc-lib, a low-level library for phylogenomic analysis.
3  * Copyright (C) 2019-today Leonardo de Oliveira Martins [ leomrtns at gmail.com; http://www.leomartins.org ]
4  *
5  * biomcmc is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
6  * License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
10  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
11  * details (file "COPYING" or http://www.gnu.org/copyleft/gpl.html).
12  */
13 
18 #ifndef _biomcmc_lowlevel_h_
19 #define _biomcmc_lowlevel_h_
20 
21 #include "config.h"
22 
23 #include <stdio.h> /* [ANSI C C89] */
24 #include <stdlib.h> /* random number, searching, sorting, EXIT_SUCCESS [ANSI C C89] */
25 #include <string.h> /* string manipulation [ANSI C C89] */
26 #include <stdarg.h> /* Access to va_arg, va_list [ANSI C C89] */
27 #include <stdint.h> /* standard integer types (int32_t typedef etc.) [C99]*/
28 #include <ctype.h> /* char operation functions (e.g. isspace() ), case convertion [ANSI C C89] */
29 #include <math.h> /* standard math functions (e.g. exp() ) [ANSI C C89] */
30 #include <float.h> /* DBL_MAX_EXP, DBL_EPSILON constants (to avoid underflow etc) */
31 #include <time.h> /* speed profiling(e.g. clock(), clock_gettime(), struct timespec ) [ANSI C C89] */
32 #include <unistd.h> /* system values checking at runtime (e.g. sysconf() ) [POSIX C] */
33 #include <sys/time.h> /* random seed (e.g. gettimeofday(), struct timeval) [POSIX C] */
34 #include <sys/times.h> /* speed profiling in clock ticks (e.g. times() ) [POSIX.1 (or GNU extension?)] */
35 #include <sys/types.h> /* pid_t for process ID, used together with unistd.h (may not be necessary) */
36 #include <unistd.h> /* getpid() function, used together with sys/types.h (may not be necessary) */
37 #include <assert.h>
38 //#include <sys/resource.h> // suggested by goptics (gpu), but don't seem needed
39 //#include <sys/stat.h> // suggested by goptics (gpu), but don't seem needed
40 
41 
42 #include <libgen.h> /* standard XPG basename() - the one provided by string.h is a GNU extension, fails on macOSX*/
43 
44 #ifdef _OPENMP
45 #include <omp.h> /* OpenMP parallel threading library when available */
46 #endif
47 
48 /* Global constants */
49 
50 #define EXP_1 2.71828182845904523536028747135266 /* Euler's number */
51 
52 #define true 1U
53 #define false 0U
55 #if defined(__GNUC__) && __GNUC__ >= 7
56  #define attribute_FALLTHROUGH __attribute__ ((fallthrough));
57 #else
58  #define attribute_FALLTHROUGH ((void)0);
59 #endif /* __GNUC__ >= 7 */
60 
61 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
62 #define MAX(x,y) (((x)>(y)) ? (x) : (y))
63 #define MOD(a) (((a)>0) ? (a) :(-a))
64 
65 
67 typedef unsigned char bool;
68 
69 typedef struct hungarian_struct* hungarian;
70 
72 {
73  int **cost;
74  int size,
75  initial_cost,
76  final_cost;
77  int *col_mate, *unchosen_row, *slack_row, *row_mate, *parent_row;
78  double **dcost, initial_dcost, final_dcost;
79  double *row_dec_d, *col_inc_d, *slack_d;
80  int *row_dec, *col_inc, *slack; /* aux vectors */
81  bool is_double;
82 };
83 
89 void *biomcmc_malloc (size_t size);
90 
97 void *biomcmc_realloc (void *ptr, size_t size);
98 
106 FILE *biomcmc_fopen (const char *path, const char *mode);
107 
113 void biomcmc_error (const char *template, ...);
114 
116 int compare_int_increasing (const void *a, const void *b);
117 int compare_int_decreasing (const void *a, const void *b);
118 int compare_uint64_increasing (const void *a, const void *b);
119 int compare_uint64_decreasing (const void *a, const void *b);
120 int compare_double_increasing (const void *a, const void *b);
121 int compare_double_decreasing (const void *a, const void *b);
122 
131 int biomcmc_getline (char **lineptr, size_t *n, FILE *stream);
132 
134 uint32_t biomcmc_levenshtein_distance (const char *s1, uint32_t n1, const char *s2, uint32_t n2, uint32_t cost_sub, uint32_t cost_indel, bool skip_borders);
135 
136 /* Hungarian method for bipartite matching (assignment) */
137 hungarian new_hungarian (int size, bool is_double);
138 void hungarian_reset (hungarian p);
139 void hungarian_update_cost (hungarian p, int row, int col, void *cost); /* pointer since we decide if int/double later */
140 void del_hungarian (hungarian p);
141 void hungarian_solve (hungarian p, int this_size);
142 
143 #endif
int initial_cost
assignment size. Cost is a square matrix, so size should be an overestimate where "missing" nodes are...
Definition: lowlevel.h:74
uint32_t biomcmc_levenshtein_distance(const char *s1, uint32_t n1, const char *s2, uint32_t n2, uint32_t cost_sub, uint32_t cost_indel, bool skip_borders)
edit distance between two sequences (slow), with option to allow one of sequences to terminate soon (...
Definition: lowlevel.c:166
void * biomcmc_realloc(void *ptr, size_t size)
Memory-safe realloc() function.
Definition: lowlevel.c:31
void * biomcmc_malloc(size_t size)
Memory-safe malloc() function.
Definition: lowlevel.c:23
Definition: lowlevel.h:71
void biomcmc_error(const char *template,...)
Prints error message and quits program.
Definition: lowlevel.c:52
FILE * biomcmc_fopen(const char *path, const char *mode)
Memory-safe fopen() function.
Definition: lowlevel.c:39
int compare_int_increasing(const void *a, const void *b)
Comparison between integers, doubles, etc. used by qsort()
Definition: lowlevel.c:67
int biomcmc_getline(char **lineptr, size_t *n, FILE *stream)
read file line-by-line (like homonymous function from GNU C library)
Definition: lowlevel.c:113
double ** dcost
col_mate[row] with column match for row
Definition: lowlevel.h:78
int * col_mate
our final cost is on rescaled cost matrix, therefore to restore the "classical" optimal cost one shou...
Definition: lowlevel.h:77
int final_cost
sum of lowest input cost values for each column. The hungarian method rescales them so that minimum p...
Definition: lowlevel.h:74
unsigned char bool
Mnemonic for boolean (char is smaller than int)
Definition: lowlevel.h:67
int size
cost matrix
Definition: lowlevel.h:74
double * row_dec_d
costs when working with float numbers instead of integers
Definition: lowlevel.h:79