biomcmc-lib  0.1
low level library for phylogenetic analysis
kmerhash.h
Go to the documentation of this file.
1 /*
2  * This file is part of biomcmc-lib, a low-level library for phylogenomic analysis.
3  * Copyright (C) 2019-today Leonardo de Oliveira Martins [ leomrtns at gmail.com; http://www.leomartins.org ]
4  *
5  * biomcmc is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
6  * License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
10  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
11  * details (file "COPYING" or http://www.gnu.org/copyleft/gpl.html).
12  */
13 
19 #ifndef _biomcmc_kmerhash_h_
20 #define _biomcmc_kmerhash_h_
21 
22 #include "alignment.h"
23 
24 typedef struct kmer_params_struct* kmer_params;
25 typedef struct kmerhash_struct* kmerhash;
26 
27 extern const char *biomcmc_kmer_class_string[];
28 // maybe string must be declared extern here and defined in .c
29 
31 {
32  uint64_t mask1[7], mask2[7];
33  uint8_t n1, n2, shift1[7], shift2[7], size[14], nbytes[14]; // size = how many bases are stored (if dense, x2); nbytes = how many bytes (uint8_t) fit
34  uint32_t seed[14];
35  uint64_t (*hashfunction) (const void *, const size_t, const uint32_t);
36  int dense;
38  int ref_counter;
39 };
40 
42 {
43  kmer_params p;
44  uint64_t *forward, *reverse;
45  uint64_t *hash, *kmer;
46  int n_hash, n_f;
47  char *dna;
48  size_t i, n_dna;
49  int ref_counter;
50 };
51 
52 kmer_params new_kmer_params (int mode);
53 void del_kmer_params (kmer_params p);
54 kmerhash new_kmerhash (int mode);
55 void link_kmerhash_to_dna_sequence (kmerhash kmer, char *dna, size_t dna_length);
56 void del_kmerhash (kmerhash kmer);
57 bool kmerhash_iterator (kmerhash kmer);
58 
59 #endif
File handling functions and calculation of distances for sequence data in nexus format.
Definition: kmerhash.h:41
char * dna
n_f = 2 (128bits)
Definition: kmerhash.h:47
Definition: kmerhash.h:30
int n_hash
hash = 4mer, 8mer, etc. hashed ; kmer = original bitstring OR its complement, masked ...
Definition: kmerhash.h:46
int kmer_class_mode
4bits per base or 2bits or 1 bit (GC content)
Definition: kmerhash.h:37