diff --git a/shared/inc/values/Similarity.hpp b/shared/inc/values/Similarity.hpp index 30396f3..b0c0edb 100644 --- a/shared/inc/values/Similarity.hpp +++ b/shared/inc/values/Similarity.hpp @@ -6,9 +6,10 @@ namespace Similarity // float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1 // float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1 float cosine_similarity(Dna *d1, Dna *d2); + float cosine_similarity_int(Dna *d1, Dna *d2); float hamming_distance(Dna *d1, Dna *d2); - float jaccard_index(Dna *d1, Dna *d2); - float levenshtein_distance(Dna *d1, Dna *d2); + // float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo + // float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun // float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy typedef float(simil_func)(Dna *d1, Dna *d2); diff --git a/shared/src/values/Similarity.cpp b/shared/src/values/Similarity.cpp index ee5be8d..7de0818 100644 --- a/shared/src/values/Similarity.cpp +++ b/shared/src/values/Similarity.cpp @@ -7,7 +7,10 @@ namespace Similarity { - + // todo: use int8_t insted of uint8_t and map data + // 0 -> -128 + // 255 -> 127 + // int8_t = uint8_t - 128 float cosine_similarity(Dna *d1, Dna *d2) { uint8_t *d1a = (uint8_t *)d1; @@ -28,6 +31,31 @@ namespace Similarity return dot_prod / (mag1 * mag2); } + float cosine_similarity_int(Dna *d1, Dna *d2) + { + auto map = [](uint8_t a) -> int8_t + { return a - 128; }; + + uint8_t *d1a = (uint8_t *)d1; + uint8_t *d2a = (uint8_t *)d2; + + float mag1 = 0.0f; + float mag2 = 0.0f; + float dot_prod = 0.0f; + for (size_t i = 0; i < sizeof(Dna); i++) + { + int8_t a = map(d1a[i]); + int8_t b = map(d2a[i]); + dot_prod += a * b; + mag1 += a * a; + mag2 += b * b; + } + mag1 = sqrt(mag1); + mag2 = sqrt(mag2); + + return dot_prod / (mag1 * mag2); + } + float hamming_distance(Dna *d1, Dna *d2) { uint8_t *d1a = (uint8_t *)d1; @@ -43,59 +71,6 @@ namespace Similarity return 1 - (distance / sizeof(Dna)); } - float jaccard_index(Dna *d1, Dna *d2) - { - uint8_t *d1a = (uint8_t *)d1; - uint8_t *d2a = (uint8_t *)d2; - size_t intersection = 0; - size_t union_size = sizeof(Dna) + sizeof(Dna); - - for (size_t i = 0; i < sizeof(Dna); i++) - { - for (size_t j = 0; j < sizeof(Dna); j++) - { - if (d1a[i] == d2a[j]) - { - intersection++; - break; - } - } - } - - union_size -= intersection; - return (float)intersection / union_size; - } - - float levenshtein_distance(Dna *d1, Dna *d2) - { - auto min = [](uint8_t a, uint8_t b, uint8_t c) -> uint8_t - { - return (a < b ? (a < c ? a : c) : (b < c ? b : c)); - }; - - uint8_t *d1a = (uint8_t *)d1; - uint8_t *d2a = (uint8_t *)d2; - float matrix[sizeof(Dna) + 1][sizeof(Dna) + 1]; - for (size_t i = 0; i <= sizeof(Dna); i++) - { - matrix[i][0] = i; - } - for (size_t j = 0; j <= sizeof(Dna); j++) - { - matrix[0][j] = j; - } - for (size_t i = 1; i <= sizeof(Dna); i++) - { - for (size_t j = 1; j <= sizeof(Dna); j++) - { - uint8_t cost = (d1a[i - 1] == d2a[j - 1]) ? 0 : 1; - matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost); - } - } - float ld = matrix[sizeof(Dna)][sizeof(Dna)]; - return 1 - (ld / sizeof(Dna)); - } - float calc_similarity(std::vector &vec, simil_func f) { size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2; diff --git a/view/inc/Vapp.hpp b/view/inc/Vapp.hpp index 219fd20..47d0457 100644 --- a/view/inc/Vapp.hpp +++ b/view/inc/Vapp.hpp @@ -45,5 +45,5 @@ private: int drawY = 0; void setUpManager(); - std::array simil; + std::array simil; }; \ No newline at end of file diff --git a/view/src/Vapp.cpp b/view/src/Vapp.cpp index 5fef5de..d94af93 100644 --- a/view/src/Vapp.cpp +++ b/view/src/Vapp.cpp @@ -102,9 +102,7 @@ void Vapp::update() simil[0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity); simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); - simil[2] = Similarity::calc_similarity(manager.vector, Similarity::jaccard_index); - simil[3] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance); - + simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int); stageOfDrawing = DrawingStage::done; break; @@ -174,8 +172,7 @@ void Vapp::draw() ImGui::Begin("Status", &showStats); ImGui::LabelText("##sim1", "cosine_similarity: %f", simil[0]); ImGui::LabelText("##sim2", "hamming_distance: %f", simil[1]); - ImGui::LabelText("##sim3", "jaccard_index: %f", simil[2]); - ImGui::LabelText("##sim4", "levenshtein_distance: %f", simil[3]); + ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]); ImGui::End(); }