This commit is contained in:
2025-10-24 16:54:16 +02:00
parent 060b9f9182
commit 525936c6e7
4 changed files with 81 additions and 12 deletions

View File

@@ -4,7 +4,7 @@
namespace Similarity
{
float euclidean_distance(Dna *d1, Dna *d2);// direct distance betwen vector. wont give 0 and 1
// float dot_product(Dna *d1, Dna *d2); // doent return betwen 0 to 1
float dot_minmax(Dna *d1, Dna *d2); // doent return betwen 0 to 1
float cosine_similarity(Dna *d1, Dna *d2);
float cosine_similarity_int(Dna *d1, Dna *d2);
float hamming_distance(Dna *d1, Dna *d2);

View File

@@ -2,10 +2,26 @@
#include <cmath>
#include <algorithm>
#include <numeric>
#include <raylib.h>
#include <chrono>
namespace Similarity
{
float dot_minmax(Dna *d1, Dna *d2)
{
uint64_t max = sizeof(Dna) * 255 * 255;
uint8_t *a = (uint8_t *)d1;
uint8_t *b = (uint8_t *)d2;
uint32_t result = 0;
for (size_t i = 0; i < sizeof(Dna); ++i)
{
result += static_cast<uint32_t>(a[i]) * static_cast<uint32_t>(b[i]);
}
return result / (double)max;
}
float euclidean_distance(Dna *d1, Dna *d2)
{
uint8_t *a = (uint8_t *)d1;
@@ -100,8 +116,41 @@ namespace Similarity
return 1 - (distance / (end - start));
}
const char *nameofFunc(simil_func f)
{
if (f == &Similarity::euclidean_distance)
{
return "eucl";
}
else if (f == &Similarity::dot_minmax)
{
return "dot";
}
else if (f == &Similarity::cosine_similarity)
{
return "cos";
}
else if (f == &Similarity::cosine_similarity_int)
{
return "cos_i";
}
else if (f == &Similarity::hamming_distance)
{
return "hamming";
}
else if (f == &Similarity::levenshtein_distance)
{
return "leven";
}
else
{
return "unknown";
}
}
float calc_similarity(std::vector<Dna> &vec, simil_func f)
{
auto start = std::chrono::high_resolution_clock::now();
size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2;
float total_similarity = 0.0;
@@ -113,6 +162,12 @@ namespace Similarity
}
}
float average_similarity = total_similarity / num_pairs;
auto stop = std::chrono::high_resolution_clock::now();
const auto int_ms = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
TraceLog(LOG_INFO, "%s, %d", nameofFunc(f), int_ms);
return average_similarity * 100.0f;
}
@@ -123,7 +178,7 @@ namespace Similarity
uint8_t *b = (uint8_t *)d2;
// Create a distance matrix
std::vector<std::vector<uint32_t>> dp(len + 1, std::vector<uint32_t>(len + 1, 0));
static std::vector<std::vector<uint32_t>> dp(len + 1, std::vector<uint32_t>(len + 1, 0));
// Initialize the first row and column
for (size_t i = 0; i <= len; ++i)

14
tmp.txt Normal file
View File

@@ -0,0 +1,14 @@
eucl cos cos_i hamming dot leven
91 117 181 87 41 105799
60 78 305 250 40 100331
61 78 121 105 40 97438
66 81 124 106 40 97529
60 78 127 108 40 96296
62 85 131 104 39 96456
61 81 125 106 40 96510
61 81 125 103 40 97253
61 81 125 78 40 97409
60 82 125 103 40 99816
62 81 128 81 40 98978
68 81 126 58 40 98289
61 88 130 60 39 99663

View File

@@ -105,8 +105,8 @@ void Vapp::update()
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
simil[3] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
simil[4] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
simil[5] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
simil[4] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
simil[5] = Similarity::calc_similarity(manager.vector, Similarity::dot_minmax);
stageOfDrawing = DrawingStage::save;
break;
case DrawingStage::save:
@@ -185,8 +185,8 @@ void Vapp::draw()
ImGui::LabelText("##sim2", "cosine_similarity: %f", simil[1]);
ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]);
ImGui::LabelText("##sim4", "hamming_distance: %f", simil[3]);
ImGui::LabelText("##sim5", "hamming_distance_without_seeds: %f", simil[4]);
ImGui::LabelText("##sim6", "levenshtein_distance: %f", simil[5]);
ImGui::LabelText("##sim5", "levenshtein_distance: %f", simil[4]);
ImGui::LabelText("##sim6", "dot_minmax: %f", simil[5]);
const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody;
const int columns = numberOfFunc + 1;
@@ -206,9 +206,9 @@ void Vapp::draw()
ImGui::TableSetColumnIndex(4);
ImGui::Text("hamming_distance");
ImGui::TableSetColumnIndex(5);
ImGui::Text("hamming_distance_without_seeds");
ImGui::TableSetColumnIndex(6);
ImGui::Text("levenshtein_distance");
ImGui::TableSetColumnIndex(6);
ImGui::Text("dot_minmax");
for (int row = 0; row < similTable.size(); row++)
{
@@ -325,8 +325,8 @@ void Vapp::setUpTable()
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
similTable[s][2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
similTable[s][3] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
similTable[s][4] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
similTable[s][5] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
similTable[s][4] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
similTable[s][5] = Similarity::calc_similarity(manager.vector, Similarity::dot_minmax);
DnaManager::newGen(&manager);
}
else
@@ -341,8 +341,8 @@ void Vapp::setUpTable()
sprintf(buff, "%ld.txt", id);
std::ofstream file(buff);
file << "| index | euclidean_distance | cosine_similarity | cosine_similarity_int | hamming_distance | hamming_distance_without_seeds | levenshtein_distance |\n";
file << "| --- | --- | --- | --- | --- | --- | --- |\n";
file << "| index | euclidean_distance | cosine_similarity | cosine_similarity_int | hamming_distance | levenshtein_distance | dot_minmax |\n";
file << "| --- | --- | --- | --- | --- | --- | --- |\n";