#include "values/Similarity.hpp" #include #define MATCH 1 #define MISMATCH -1 #define GAP -2 namespace Similarity { float cosine_similarity(Dna *d1, Dna *d2) { uint8_t *d1a = (uint8_t *)d1; uint8_t *d2a = (uint8_t *)d2; float mag1 = 0.0f; float mag2 = 0.0f; float dot_prod = 0.0f; for (size_t i = 0; i < sizeof(Dna); i++) { dot_prod += d1a[i] * d2a[i]; mag1 += d1a[i] * d1a[i]; mag2 += d2a[i] * d2a[i]; } mag1 = sqrt(mag1); mag2 = sqrt(mag2); return dot_prod / (mag1 * mag2); } float hamming_distance(Dna *d1, Dna *d2) { uint8_t *d1a = (uint8_t *)d1; uint8_t *d2a = (uint8_t *)d2; float distance = 0; for (size_t i = 0; i < sizeof(Dna); i++) { if (d1a[i] != d2a[i]) { distance++; } } return 1 - (distance / sizeof(Dna)); } float jaccard_index(Dna *d1, Dna *d2) { uint8_t *d1a = (uint8_t *)d1; uint8_t *d2a = (uint8_t *)d2; size_t intersection = 0; size_t union_size = sizeof(Dna) + sizeof(Dna); for (size_t i = 0; i < sizeof(Dna); i++) { for (size_t j = 0; j < sizeof(Dna); j++) { if (d1a[i] == d2a[j]) { intersection++; break; } } } union_size -= intersection; return (float)intersection / union_size; } float levenshtein_distance(Dna *d1, Dna *d2) { auto min = [](uint8_t a, uint8_t b, uint8_t c) -> uint8_t { return (a < b ? (a < c ? a : c) : (b < c ? b : c)); }; uint8_t *d1a = (uint8_t *)d1; uint8_t *d2a = (uint8_t *)d2; float matrix[sizeof(Dna) + 1][sizeof(Dna) + 1]; for (size_t i = 0; i <= sizeof(Dna); i++) { matrix[i][0] = i; } for (size_t j = 0; j <= sizeof(Dna); j++) { matrix[0][j] = j; } for (size_t i = 1; i <= sizeof(Dna); i++) { for (size_t j = 1; j <= sizeof(Dna); j++) { uint8_t cost = (d1a[i - 1] == d2a[j - 1]) ? 0 : 1; matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost); } } float ld = matrix[sizeof(Dna)][sizeof(Dna)]; return 1 - (ld / sizeof(Dna)); } float calc_similarity(std::vector &vec, simil_func f) { size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2; float total_similarity = 0.0; for (size_t i = 0; i < vec.size(); i++) { for (size_t j = i + 1; j < vec.size(); j++) { total_similarity += f(&vec[i], &vec[j]); } } float average_similarity = total_similarity / num_pairs; return average_similarity; } }