Update similarity calc

This commit is contained in:
Nikola Petrov 2025-03-09 15:14:21 +01:00
parent 1994e7ccb1
commit 266d250881
5 changed files with 73 additions and 56 deletions

View File

@ -74,7 +74,7 @@ void App::init(int screenWidth, int screenHeight)
} }
DnaStore::load(&manager); DnaStore::load(&manager);
simil = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); simil = Similarity::calc_similarity(manager.vector);
upTex(Liked::tbd); upTex(Liked::tbd);
while (!canvas.tick(canvasTexure[TOP])) while (!canvas.tick(canvasTexure[TOP]))
{ {
@ -128,7 +128,7 @@ void App::upTex(Liked liked)
DnaStore::saveGen(&manager); DnaStore::saveGen(&manager);
DnaManager::newGen(&manager); DnaManager::newGen(&manager);
DnaStore::saveVec(&manager); DnaStore::saveVec(&manager);
simil = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); simil = Similarity::calc_similarity(manager.vector);
} }
DnaStore::saveData(&manager); DnaStore::saveData(&manager);
} }

View File

@ -5,13 +5,14 @@ namespace Similarity
{ {
// float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1 // float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1
// float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1 // float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1
float cosine_similarity(Dna *d1, Dna *d2); // float cosine_similarity(Dna *d1, Dna *d2);
float cosine_similarity_int(Dna *d1, Dna *d2); // float cosine_similarity_int(Dna *d1, Dna *d2);
float hamming_distance(Dna *d1, Dna *d2); float hamming_distance(Dna *d1, Dna *d2);
float hamming_distance_without_seeds(Dna *d1, Dna *d2);
// float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo // float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo
// float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun // float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun
// float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy // float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy
typedef float(simil_func)(Dna *d1, Dna *d2); typedef float(simil_func)(Dna *d1, Dna *d2);
float calc_similarity(std::vector<Dna> &vec, simil_func f); float calc_similarity(std::vector<Dna> &vec, simil_func f = hamming_distance_without_seeds);
} }

View File

@ -7,50 +7,50 @@ namespace Similarity
// 0 -> -128 // 0 -> -128
// 255 -> 127 // 255 -> 127
// int8_t = uint8_t - 128 // int8_t = uint8_t - 128
float cosine_similarity(Dna *d1, Dna *d2) // float cosine_similarity(Dna *d1, Dna *d2)
{ // {
uint8_t *d1a = (uint8_t *)d1; // uint8_t *d1a = (uint8_t *)d1;
uint8_t *d2a = (uint8_t *)d2; // uint8_t *d2a = (uint8_t *)d2;
float mag1 = 0.0f; // float mag1 = 0.0f;
float mag2 = 0.0f; // float mag2 = 0.0f;
float dot_prod = 0.0f; // float dot_prod = 0.0f;
for (size_t i = 0; i < sizeof(Dna); i++) // for (size_t i = 0; i < sizeof(Dna); i++)
{ // {
dot_prod += d1a[i] * d2a[i]; // dot_prod += d1a[i] * d2a[i];
mag1 += d1a[i] * d1a[i]; // mag1 += d1a[i] * d1a[i];
mag2 += d2a[i] * d2a[i]; // mag2 += d2a[i] * d2a[i];
} // }
mag1 = sqrt(mag1); // mag1 = sqrt(mag1);
mag2 = sqrt(mag2); // mag2 = sqrt(mag2);
return dot_prod / (mag1 * mag2); // return dot_prod / (mag1 * mag2);
} // }
float cosine_similarity_int(Dna *d1, Dna *d2) // float cosine_similarity_int(Dna *d1, Dna *d2)
{ // {
auto map = [](uint8_t a) -> int8_t // auto map = [](uint8_t a) -> int8_t
{ return a - 128; }; // { return a - 128; };
uint8_t *d1a = (uint8_t *)d1; // uint8_t *d1a = (uint8_t *)d1;
uint8_t *d2a = (uint8_t *)d2; // uint8_t *d2a = (uint8_t *)d2;
float mag1 = 0.0f; // float mag1 = 0.0f;
float mag2 = 0.0f; // float mag2 = 0.0f;
float dot_prod = 0.0f; // float dot_prod = 0.0f;
for (size_t i = 0; i < sizeof(Dna); i++) // for (size_t i = 0; i < sizeof(Dna); i++)
{ // {
int8_t a = map(d1a[i]); // int8_t a = map(d1a[i]);
int8_t b = map(d2a[i]); // int8_t b = map(d2a[i]);
dot_prod += a * b; // dot_prod += a * b;
mag1 += a * a; // mag1 += a * a;
mag2 += b * b; // mag2 += b * b;
} // }
mag1 = sqrt(mag1); // mag1 = sqrt(mag1);
mag2 = sqrt(mag2); // mag2 = sqrt(mag2);
return dot_prod / (mag1 * mag2); // return dot_prod / (mag1 * mag2);
} // }
float hamming_distance(Dna *d1, Dna *d2) float hamming_distance(Dna *d1, Dna *d2)
{ {
@ -67,6 +67,23 @@ namespace Similarity
return 1 - (distance / sizeof(Dna)); return 1 - (distance / sizeof(Dna));
} }
float hamming_distance_without_seeds(Dna *d1, Dna *d2)
{
constexpr size_t start = sizeof(uint128) * 3;
constexpr size_t end = sizeof(Dna);
uint8_t *d1a = (uint8_t *)d1;
uint8_t *d2a = (uint8_t *)d2;
float distance = 0;
for (size_t i = start; i < end; i++)
{
if (d1a[i] != d2a[i])
{
distance++;
}
}
return 1 - (distance / (end - start));
}
float calc_similarity(std::vector<Dna> &vec, simil_func f) float calc_similarity(std::vector<Dna> &vec, simil_func f)
{ {
size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2; size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2;

View File

@ -14,6 +14,8 @@ enum DrawingStage
done, done,
}; };
constexpr int numberOfFunc = 2;
class Vapp class Vapp
{ {
public: public:
@ -46,8 +48,8 @@ private:
int drawY = 0; int drawY = 0;
void setUpManager(); void setUpManager();
std::array<float, 3> simil; std::array<float, numberOfFunc> simil;
std::vector<std::array<float, 3>> similTable; std::vector<std::array<float, numberOfFunc>> similTable;
void setUpTable(); void setUpTable();
}; };

View File

@ -100,9 +100,8 @@ void Vapp::update()
break; break;
case DrawingStage::calSim: case DrawingStage::calSim:
simil[0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity); simil[0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
stageOfDrawing = DrawingStage::done; stageOfDrawing = DrawingStage::done;
break; break;
@ -174,12 +173,11 @@ void Vapp::draw()
if (showStats) if (showStats)
{ {
ImGui::Begin("Status", &showStats); ImGui::Begin("Status", &showStats);
ImGui::LabelText("##sim1", "cosine_similarity: %f", simil[0]); ImGui::LabelText("##sim1", "hamming_distance: %f", simil[0]);
ImGui::LabelText("##sim2", "hamming_distance: %f", simil[1]); ImGui::LabelText("##sim2", "hamming_distance_without_seeds: %f", simil[1]);
ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]);
const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody; const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody;
const int columns = 4; const int columns = numberOfFunc + 1;
if (ImGui::BeginTable("table1", columns, flags)) if (ImGui::BeginTable("table1", columns, flags))
{ {
for (int row = 0; row < similTable.size(); row++) for (int row = 0; row < similTable.size(); row++)
@ -292,9 +290,8 @@ void Vapp::setUpTable()
{ {
similTable.emplace_back(); similTable.emplace_back();
int s = similTable.size() - 1; int s = similTable.size() - 1;
similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity); similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
similTable[s][2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
DnaManager::newGen(&manager); DnaManager::newGen(&manager);
} }