From 266d250881eae2eb6825206b4b8cf5af24dc1411 Mon Sep 17 00:00:00 2001 From: Nikola Petrov Date: Sun, 9 Mar 2025 15:14:21 +0100 Subject: [PATCH] Update similarity calc --- app/src/App.cpp | 4 +- shared/inc/values/Similarity.hpp | 7 +-- shared/src/values/Similarity.cpp | 93 +++++++++++++++++++------------- view/inc/Vapp.hpp | 8 +-- view/src/Vapp.cpp | 17 +++--- 5 files changed, 73 insertions(+), 56 deletions(-) diff --git a/app/src/App.cpp b/app/src/App.cpp index 3835dba..cb1dda1 100644 --- a/app/src/App.cpp +++ b/app/src/App.cpp @@ -74,7 +74,7 @@ void App::init(int screenWidth, int screenHeight) } DnaStore::load(&manager); - simil = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); + simil = Similarity::calc_similarity(manager.vector); upTex(Liked::tbd); while (!canvas.tick(canvasTexure[TOP])) { @@ -128,7 +128,7 @@ void App::upTex(Liked liked) DnaStore::saveGen(&manager); DnaManager::newGen(&manager); DnaStore::saveVec(&manager); - simil = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); + simil = Similarity::calc_similarity(manager.vector); } DnaStore::saveData(&manager); } diff --git a/shared/inc/values/Similarity.hpp b/shared/inc/values/Similarity.hpp index b0c0edb..ed01179 100644 --- a/shared/inc/values/Similarity.hpp +++ b/shared/inc/values/Similarity.hpp @@ -5,13 +5,14 @@ namespace Similarity { // float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1 // float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1 - float cosine_similarity(Dna *d1, Dna *d2); - float cosine_similarity_int(Dna *d1, Dna *d2); + // float cosine_similarity(Dna *d1, Dna *d2); + // float cosine_similarity_int(Dna *d1, Dna *d2); float hamming_distance(Dna *d1, Dna *d2); + float hamming_distance_without_seeds(Dna *d1, Dna *d2); // float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo // float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun // float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy typedef float(simil_func)(Dna *d1, Dna *d2); - float calc_similarity(std::vector &vec, simil_func f); + float calc_similarity(std::vector &vec, simil_func f = hamming_distance_without_seeds); } \ No newline at end of file diff --git a/shared/src/values/Similarity.cpp b/shared/src/values/Similarity.cpp index b36bd98..fcb5a05 100644 --- a/shared/src/values/Similarity.cpp +++ b/shared/src/values/Similarity.cpp @@ -7,50 +7,50 @@ namespace Similarity // 0 -> -128 // 255 -> 127 // int8_t = uint8_t - 128 - float cosine_similarity(Dna *d1, Dna *d2) - { - uint8_t *d1a = (uint8_t *)d1; - uint8_t *d2a = (uint8_t *)d2; + // float cosine_similarity(Dna *d1, Dna *d2) + // { + // uint8_t *d1a = (uint8_t *)d1; + // uint8_t *d2a = (uint8_t *)d2; - float mag1 = 0.0f; - float mag2 = 0.0f; - float dot_prod = 0.0f; - for (size_t i = 0; i < sizeof(Dna); i++) - { - dot_prod += d1a[i] * d2a[i]; - mag1 += d1a[i] * d1a[i]; - mag2 += d2a[i] * d2a[i]; - } - mag1 = sqrt(mag1); - mag2 = sqrt(mag2); + // float mag1 = 0.0f; + // float mag2 = 0.0f; + // float dot_prod = 0.0f; + // for (size_t i = 0; i < sizeof(Dna); i++) + // { + // dot_prod += d1a[i] * d2a[i]; + // mag1 += d1a[i] * d1a[i]; + // mag2 += d2a[i] * d2a[i]; + // } + // mag1 = sqrt(mag1); + // mag2 = sqrt(mag2); - return dot_prod / (mag1 * mag2); - } + // return dot_prod / (mag1 * mag2); + // } - float cosine_similarity_int(Dna *d1, Dna *d2) - { - auto map = [](uint8_t a) -> int8_t - { return a - 128; }; + // float cosine_similarity_int(Dna *d1, Dna *d2) + // { + // auto map = [](uint8_t a) -> int8_t + // { return a - 128; }; - uint8_t *d1a = (uint8_t *)d1; - uint8_t *d2a = (uint8_t *)d2; + // uint8_t *d1a = (uint8_t *)d1; + // uint8_t *d2a = (uint8_t *)d2; - float mag1 = 0.0f; - float mag2 = 0.0f; - float dot_prod = 0.0f; - for (size_t i = 0; i < sizeof(Dna); i++) - { - int8_t a = map(d1a[i]); - int8_t b = map(d2a[i]); - dot_prod += a * b; - mag1 += a * a; - mag2 += b * b; - } - mag1 = sqrt(mag1); - mag2 = sqrt(mag2); + // float mag1 = 0.0f; + // float mag2 = 0.0f; + // float dot_prod = 0.0f; + // for (size_t i = 0; i < sizeof(Dna); i++) + // { + // int8_t a = map(d1a[i]); + // int8_t b = map(d2a[i]); + // dot_prod += a * b; + // mag1 += a * a; + // mag2 += b * b; + // } + // mag1 = sqrt(mag1); + // mag2 = sqrt(mag2); - return dot_prod / (mag1 * mag2); - } + // return dot_prod / (mag1 * mag2); + // } float hamming_distance(Dna *d1, Dna *d2) { @@ -67,6 +67,23 @@ namespace Similarity return 1 - (distance / sizeof(Dna)); } + float hamming_distance_without_seeds(Dna *d1, Dna *d2) + { + constexpr size_t start = sizeof(uint128) * 3; + constexpr size_t end = sizeof(Dna); + uint8_t *d1a = (uint8_t *)d1; + uint8_t *d2a = (uint8_t *)d2; + float distance = 0; + for (size_t i = start; i < end; i++) + { + if (d1a[i] != d2a[i]) + { + distance++; + } + } + return 1 - (distance / (end - start)); + } + float calc_similarity(std::vector &vec, simil_func f) { size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2; diff --git a/view/inc/Vapp.hpp b/view/inc/Vapp.hpp index fe6b20d..5f9873d 100644 --- a/view/inc/Vapp.hpp +++ b/view/inc/Vapp.hpp @@ -14,6 +14,8 @@ enum DrawingStage done, }; +constexpr int numberOfFunc = 2; + class Vapp { public: @@ -45,9 +47,9 @@ private: int drawX = 0; int drawY = 0; void setUpManager(); - - std::array simil; - std::vector> similTable; + + std::array simil; + std::vector> similTable; void setUpTable(); }; \ No newline at end of file diff --git a/view/src/Vapp.cpp b/view/src/Vapp.cpp index 996d049..6551095 100644 --- a/view/src/Vapp.cpp +++ b/view/src/Vapp.cpp @@ -100,9 +100,8 @@ void Vapp::update() break; case DrawingStage::calSim: - simil[0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity); - simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); - simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int); + simil[0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); + simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds); stageOfDrawing = DrawingStage::done; break; @@ -174,12 +173,11 @@ void Vapp::draw() if (showStats) { ImGui::Begin("Status", &showStats); - ImGui::LabelText("##sim1", "cosine_similarity: %f", simil[0]); - ImGui::LabelText("##sim2", "hamming_distance: %f", simil[1]); - ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]); + ImGui::LabelText("##sim1", "hamming_distance: %f", simil[0]); + ImGui::LabelText("##sim2", "hamming_distance_without_seeds: %f", simil[1]); const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody; - const int columns = 4; + const int columns = numberOfFunc + 1; if (ImGui::BeginTable("table1", columns, flags)) { for (int row = 0; row < similTable.size(); row++) @@ -292,9 +290,8 @@ void Vapp::setUpTable() { similTable.emplace_back(); int s = similTable.size() - 1; - similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity); - similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); - similTable[s][2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int); + similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); + similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds); DnaManager::newGen(&manager); }