This commit is contained in:
2025-10-24 16:54:16 +02:00
parent 060b9f9182
commit 525936c6e7
4 changed files with 81 additions and 12 deletions

View File

@@ -4,7 +4,7 @@
namespace Similarity
{
float euclidean_distance(Dna *d1, Dna *d2);// direct distance betwen vector. wont give 0 and 1
// float dot_product(Dna *d1, Dna *d2); // doent return betwen 0 to 1
float dot_minmax(Dna *d1, Dna *d2); // doent return betwen 0 to 1
float cosine_similarity(Dna *d1, Dna *d2);
float cosine_similarity_int(Dna *d1, Dna *d2);
float hamming_distance(Dna *d1, Dna *d2);

View File

@@ -2,10 +2,26 @@
#include <cmath>
#include <algorithm>
#include <numeric>
#include <raylib.h>
#include <chrono>
namespace Similarity
{
float dot_minmax(Dna *d1, Dna *d2)
{
uint64_t max = sizeof(Dna) * 255 * 255;
uint8_t *a = (uint8_t *)d1;
uint8_t *b = (uint8_t *)d2;
uint32_t result = 0;
for (size_t i = 0; i < sizeof(Dna); ++i)
{
result += static_cast<uint32_t>(a[i]) * static_cast<uint32_t>(b[i]);
}
return result / (double)max;
}
float euclidean_distance(Dna *d1, Dna *d2)
{
uint8_t *a = (uint8_t *)d1;
@@ -100,8 +116,41 @@ namespace Similarity
return 1 - (distance / (end - start));
}
const char *nameofFunc(simil_func f)
{
if (f == &Similarity::euclidean_distance)
{
return "eucl";
}
else if (f == &Similarity::dot_minmax)
{
return "dot";
}
else if (f == &Similarity::cosine_similarity)
{
return "cos";
}
else if (f == &Similarity::cosine_similarity_int)
{
return "cos_i";
}
else if (f == &Similarity::hamming_distance)
{
return "hamming";
}
else if (f == &Similarity::levenshtein_distance)
{
return "leven";
}
else
{
return "unknown";
}
}
float calc_similarity(std::vector<Dna> &vec, simil_func f)
{
auto start = std::chrono::high_resolution_clock::now();
size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2;
float total_similarity = 0.0;
@@ -113,6 +162,12 @@ namespace Similarity
}
}
float average_similarity = total_similarity / num_pairs;
auto stop = std::chrono::high_resolution_clock::now();
const auto int_ms = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
TraceLog(LOG_INFO, "%s, %d", nameofFunc(f), int_ms);
return average_similarity * 100.0f;
}
@@ -123,7 +178,7 @@ namespace Similarity
uint8_t *b = (uint8_t *)d2;
// Create a distance matrix
std::vector<std::vector<uint32_t>> dp(len + 1, std::vector<uint32_t>(len + 1, 0));
static std::vector<std::vector<uint32_t>> dp(len + 1, std::vector<uint32_t>(len + 1, 0));
// Initialize the first row and column
for (size_t i = 0; i <= len; ++i)