Compare commits

...

2 Commits

Author SHA1 Message Date
60a053b929 Remove two similaritys 2025-02-07 21:37:50 +01:00
92acdf0f53 rm unused 2025-02-06 16:17:46 +01:00
5 changed files with 35 additions and 63 deletions

View File

@ -6,9 +6,10 @@ namespace Similarity
// float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1 // float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1
// float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1 // float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1
float cosine_similarity(Dna *d1, Dna *d2); float cosine_similarity(Dna *d1, Dna *d2);
float cosine_similarity_int(Dna *d1, Dna *d2);
float hamming_distance(Dna *d1, Dna *d2); float hamming_distance(Dna *d1, Dna *d2);
float jaccard_index(Dna *d1, Dna *d2); // float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo
float levenshtein_distance(Dna *d1, Dna *d2); // float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun
// float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy // float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy
typedef float(simil_func)(Dna *d1, Dna *d2); typedef float(simil_func)(Dna *d1, Dna *d2);

View File

@ -7,7 +7,10 @@
namespace Similarity namespace Similarity
{ {
// todo: use int8_t insted of uint8_t and map data
// 0 -> -128
// 255 -> 127
// int8_t = uint8_t - 128
float cosine_similarity(Dna *d1, Dna *d2) float cosine_similarity(Dna *d1, Dna *d2)
{ {
uint8_t *d1a = (uint8_t *)d1; uint8_t *d1a = (uint8_t *)d1;
@ -28,6 +31,31 @@ namespace Similarity
return dot_prod / (mag1 * mag2); return dot_prod / (mag1 * mag2);
} }
float cosine_similarity_int(Dna *d1, Dna *d2)
{
auto map = [](uint8_t a) -> int8_t
{ return a - 128; };
uint8_t *d1a = (uint8_t *)d1;
uint8_t *d2a = (uint8_t *)d2;
float mag1 = 0.0f;
float mag2 = 0.0f;
float dot_prod = 0.0f;
for (size_t i = 0; i < sizeof(Dna); i++)
{
int8_t a = map(d1a[i]);
int8_t b = map(d2a[i]);
dot_prod += a * b;
mag1 += a * a;
mag2 += b * b;
}
mag1 = sqrt(mag1);
mag2 = sqrt(mag2);
return dot_prod / (mag1 * mag2);
}
float hamming_distance(Dna *d1, Dna *d2) float hamming_distance(Dna *d1, Dna *d2)
{ {
uint8_t *d1a = (uint8_t *)d1; uint8_t *d1a = (uint8_t *)d1;
@ -43,59 +71,6 @@ namespace Similarity
return 1 - (distance / sizeof(Dna)); return 1 - (distance / sizeof(Dna));
} }
float jaccard_index(Dna *d1, Dna *d2)
{
uint8_t *d1a = (uint8_t *)d1;
uint8_t *d2a = (uint8_t *)d2;
size_t intersection = 0;
size_t union_size = sizeof(Dna) + sizeof(Dna);
for (size_t i = 0; i < sizeof(Dna); i++)
{
for (size_t j = 0; j < sizeof(Dna); j++)
{
if (d1a[i] == d2a[j])
{
intersection++;
break;
}
}
}
union_size -= intersection;
return (float)intersection / union_size;
}
float levenshtein_distance(Dna *d1, Dna *d2)
{
auto min = [](uint8_t a, uint8_t b, uint8_t c) -> uint8_t
{
return (a < b ? (a < c ? a : c) : (b < c ? b : c));
};
uint8_t *d1a = (uint8_t *)d1;
uint8_t *d2a = (uint8_t *)d2;
float matrix[sizeof(Dna) + 1][sizeof(Dna) + 1];
for (size_t i = 0; i <= sizeof(Dna); i++)
{
matrix[i][0] = i;
}
for (size_t j = 0; j <= sizeof(Dna); j++)
{
matrix[0][j] = j;
}
for (size_t i = 1; i <= sizeof(Dna); i++)
{
for (size_t j = 1; j <= sizeof(Dna); j++)
{
uint8_t cost = (d1a[i - 1] == d2a[j - 1]) ? 0 : 1;
matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost);
}
}
float ld = matrix[sizeof(Dna)][sizeof(Dna)];
return 1 - (ld / sizeof(Dna));
}
float calc_similarity(std::vector<Dna> &vec, simil_func f) float calc_similarity(std::vector<Dna> &vec, simil_func f)
{ {
size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2; size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2;

View File

@ -45,5 +45,5 @@ private:
int drawY = 0; int drawY = 0;
void setUpManager(); void setUpManager();
std::array<float, 4> simil; std::array<float, 3> simil;
}; };

View File

@ -102,9 +102,7 @@ void Vapp::update()
simil[0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity); simil[0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
simil[2] = Similarity::calc_similarity(manager.vector, Similarity::jaccard_index); simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
simil[3] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
stageOfDrawing = DrawingStage::done; stageOfDrawing = DrawingStage::done;
break; break;
@ -174,8 +172,7 @@ void Vapp::draw()
ImGui::Begin("Status", &showStats); ImGui::Begin("Status", &showStats);
ImGui::LabelText("##sim1", "cosine_similarity: %f", simil[0]); ImGui::LabelText("##sim1", "cosine_similarity: %f", simil[0]);
ImGui::LabelText("##sim2", "hamming_distance: %f", simil[1]); ImGui::LabelText("##sim2", "hamming_distance: %f", simil[1]);
ImGui::LabelText("##sim3", "jaccard_index: %f", simil[2]); ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]);
ImGui::LabelText("##sim4", "levenshtein_distance: %f", simil[3]);
ImGui::End(); ImGui::End();
} }

View File

@ -14,7 +14,6 @@ int main(int argc, char *argv[])
SetTargetFPS(60); SetTargetFPS(60);
rlImGuiSetup(true); rlImGuiSetup(true);
app.init(); app.init();
bool showDemoWindow = true;
ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_DockingEnable; ImGui::GetIO().ConfigFlags |= ImGuiConfigFlags_DockingEnable;
while (!WindowShouldClose()) while (!WindowShouldClose())