Update similarity calc
This commit is contained in:
parent
1994e7ccb1
commit
266d250881
@ -74,7 +74,7 @@ void App::init(int screenWidth, int screenHeight)
|
|||||||
}
|
}
|
||||||
|
|
||||||
DnaStore::load(&manager);
|
DnaStore::load(&manager);
|
||||||
simil = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
simil = Similarity::calc_similarity(manager.vector);
|
||||||
upTex(Liked::tbd);
|
upTex(Liked::tbd);
|
||||||
while (!canvas.tick(canvasTexure[TOP]))
|
while (!canvas.tick(canvasTexure[TOP]))
|
||||||
{
|
{
|
||||||
@ -128,7 +128,7 @@ void App::upTex(Liked liked)
|
|||||||
DnaStore::saveGen(&manager);
|
DnaStore::saveGen(&manager);
|
||||||
DnaManager::newGen(&manager);
|
DnaManager::newGen(&manager);
|
||||||
DnaStore::saveVec(&manager);
|
DnaStore::saveVec(&manager);
|
||||||
simil = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
simil = Similarity::calc_similarity(manager.vector);
|
||||||
}
|
}
|
||||||
DnaStore::saveData(&manager);
|
DnaStore::saveData(&manager);
|
||||||
}
|
}
|
||||||
|
@ -5,13 +5,14 @@ namespace Similarity
|
|||||||
{
|
{
|
||||||
// float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1
|
// float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1
|
||||||
// float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1
|
// float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1
|
||||||
float cosine_similarity(Dna *d1, Dna *d2);
|
// float cosine_similarity(Dna *d1, Dna *d2);
|
||||||
float cosine_similarity_int(Dna *d1, Dna *d2);
|
// float cosine_similarity_int(Dna *d1, Dna *d2);
|
||||||
float hamming_distance(Dna *d1, Dna *d2);
|
float hamming_distance(Dna *d1, Dna *d2);
|
||||||
|
float hamming_distance_without_seeds(Dna *d1, Dna *d2);
|
||||||
// float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo
|
// float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo
|
||||||
// float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun
|
// float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun
|
||||||
// float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy
|
// float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy
|
||||||
|
|
||||||
typedef float(simil_func)(Dna *d1, Dna *d2);
|
typedef float(simil_func)(Dna *d1, Dna *d2);
|
||||||
float calc_similarity(std::vector<Dna> &vec, simil_func f);
|
float calc_similarity(std::vector<Dna> &vec, simil_func f = hamming_distance_without_seeds);
|
||||||
}
|
}
|
@ -7,50 +7,50 @@ namespace Similarity
|
|||||||
// 0 -> -128
|
// 0 -> -128
|
||||||
// 255 -> 127
|
// 255 -> 127
|
||||||
// int8_t = uint8_t - 128
|
// int8_t = uint8_t - 128
|
||||||
float cosine_similarity(Dna *d1, Dna *d2)
|
// float cosine_similarity(Dna *d1, Dna *d2)
|
||||||
{
|
// {
|
||||||
uint8_t *d1a = (uint8_t *)d1;
|
// uint8_t *d1a = (uint8_t *)d1;
|
||||||
uint8_t *d2a = (uint8_t *)d2;
|
// uint8_t *d2a = (uint8_t *)d2;
|
||||||
|
|
||||||
float mag1 = 0.0f;
|
// float mag1 = 0.0f;
|
||||||
float mag2 = 0.0f;
|
// float mag2 = 0.0f;
|
||||||
float dot_prod = 0.0f;
|
// float dot_prod = 0.0f;
|
||||||
for (size_t i = 0; i < sizeof(Dna); i++)
|
// for (size_t i = 0; i < sizeof(Dna); i++)
|
||||||
{
|
// {
|
||||||
dot_prod += d1a[i] * d2a[i];
|
// dot_prod += d1a[i] * d2a[i];
|
||||||
mag1 += d1a[i] * d1a[i];
|
// mag1 += d1a[i] * d1a[i];
|
||||||
mag2 += d2a[i] * d2a[i];
|
// mag2 += d2a[i] * d2a[i];
|
||||||
}
|
// }
|
||||||
mag1 = sqrt(mag1);
|
// mag1 = sqrt(mag1);
|
||||||
mag2 = sqrt(mag2);
|
// mag2 = sqrt(mag2);
|
||||||
|
|
||||||
return dot_prod / (mag1 * mag2);
|
// return dot_prod / (mag1 * mag2);
|
||||||
}
|
// }
|
||||||
|
|
||||||
float cosine_similarity_int(Dna *d1, Dna *d2)
|
// float cosine_similarity_int(Dna *d1, Dna *d2)
|
||||||
{
|
// {
|
||||||
auto map = [](uint8_t a) -> int8_t
|
// auto map = [](uint8_t a) -> int8_t
|
||||||
{ return a - 128; };
|
// { return a - 128; };
|
||||||
|
|
||||||
uint8_t *d1a = (uint8_t *)d1;
|
// uint8_t *d1a = (uint8_t *)d1;
|
||||||
uint8_t *d2a = (uint8_t *)d2;
|
// uint8_t *d2a = (uint8_t *)d2;
|
||||||
|
|
||||||
float mag1 = 0.0f;
|
// float mag1 = 0.0f;
|
||||||
float mag2 = 0.0f;
|
// float mag2 = 0.0f;
|
||||||
float dot_prod = 0.0f;
|
// float dot_prod = 0.0f;
|
||||||
for (size_t i = 0; i < sizeof(Dna); i++)
|
// for (size_t i = 0; i < sizeof(Dna); i++)
|
||||||
{
|
// {
|
||||||
int8_t a = map(d1a[i]);
|
// int8_t a = map(d1a[i]);
|
||||||
int8_t b = map(d2a[i]);
|
// int8_t b = map(d2a[i]);
|
||||||
dot_prod += a * b;
|
// dot_prod += a * b;
|
||||||
mag1 += a * a;
|
// mag1 += a * a;
|
||||||
mag2 += b * b;
|
// mag2 += b * b;
|
||||||
}
|
// }
|
||||||
mag1 = sqrt(mag1);
|
// mag1 = sqrt(mag1);
|
||||||
mag2 = sqrt(mag2);
|
// mag2 = sqrt(mag2);
|
||||||
|
|
||||||
return dot_prod / (mag1 * mag2);
|
// return dot_prod / (mag1 * mag2);
|
||||||
}
|
// }
|
||||||
|
|
||||||
float hamming_distance(Dna *d1, Dna *d2)
|
float hamming_distance(Dna *d1, Dna *d2)
|
||||||
{
|
{
|
||||||
@ -67,6 +67,23 @@ namespace Similarity
|
|||||||
return 1 - (distance / sizeof(Dna));
|
return 1 - (distance / sizeof(Dna));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float hamming_distance_without_seeds(Dna *d1, Dna *d2)
|
||||||
|
{
|
||||||
|
constexpr size_t start = sizeof(uint128) * 3;
|
||||||
|
constexpr size_t end = sizeof(Dna);
|
||||||
|
uint8_t *d1a = (uint8_t *)d1;
|
||||||
|
uint8_t *d2a = (uint8_t *)d2;
|
||||||
|
float distance = 0;
|
||||||
|
for (size_t i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
if (d1a[i] != d2a[i])
|
||||||
|
{
|
||||||
|
distance++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1 - (distance / (end - start));
|
||||||
|
}
|
||||||
|
|
||||||
float calc_similarity(std::vector<Dna> &vec, simil_func f)
|
float calc_similarity(std::vector<Dna> &vec, simil_func f)
|
||||||
{
|
{
|
||||||
size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2;
|
size_t num_pairs = (vec.size() * (vec.size() - 1)) / 2;
|
||||||
|
@ -14,6 +14,8 @@ enum DrawingStage
|
|||||||
done,
|
done,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
constexpr int numberOfFunc = 2;
|
||||||
|
|
||||||
class Vapp
|
class Vapp
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -46,8 +48,8 @@ private:
|
|||||||
int drawY = 0;
|
int drawY = 0;
|
||||||
void setUpManager();
|
void setUpManager();
|
||||||
|
|
||||||
std::array<float, 3> simil;
|
std::array<float, numberOfFunc> simil;
|
||||||
std::vector<std::array<float, 3>> similTable;
|
std::vector<std::array<float, numberOfFunc>> similTable;
|
||||||
|
|
||||||
void setUpTable();
|
void setUpTable();
|
||||||
};
|
};
|
@ -100,9 +100,8 @@ void Vapp::update()
|
|||||||
break;
|
break;
|
||||||
case DrawingStage::calSim:
|
case DrawingStage::calSim:
|
||||||
|
|
||||||
simil[0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
|
simil[0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
||||||
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
|
||||||
simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
|
|
||||||
stageOfDrawing = DrawingStage::done;
|
stageOfDrawing = DrawingStage::done;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -174,12 +173,11 @@ void Vapp::draw()
|
|||||||
if (showStats)
|
if (showStats)
|
||||||
{
|
{
|
||||||
ImGui::Begin("Status", &showStats);
|
ImGui::Begin("Status", &showStats);
|
||||||
ImGui::LabelText("##sim1", "cosine_similarity: %f", simil[0]);
|
ImGui::LabelText("##sim1", "hamming_distance: %f", simil[0]);
|
||||||
ImGui::LabelText("##sim2", "hamming_distance: %f", simil[1]);
|
ImGui::LabelText("##sim2", "hamming_distance_without_seeds: %f", simil[1]);
|
||||||
ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]);
|
|
||||||
|
|
||||||
const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody;
|
const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody;
|
||||||
const int columns = 4;
|
const int columns = numberOfFunc + 1;
|
||||||
if (ImGui::BeginTable("table1", columns, flags))
|
if (ImGui::BeginTable("table1", columns, flags))
|
||||||
{
|
{
|
||||||
for (int row = 0; row < similTable.size(); row++)
|
for (int row = 0; row < similTable.size(); row++)
|
||||||
@ -292,9 +290,8 @@ void Vapp::setUpTable()
|
|||||||
{
|
{
|
||||||
similTable.emplace_back();
|
similTable.emplace_back();
|
||||||
int s = similTable.size() - 1;
|
int s = similTable.size() - 1;
|
||||||
similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
|
similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
||||||
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
|
||||||
similTable[s][2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
|
|
||||||
|
|
||||||
DnaManager::newGen(&manager);
|
DnaManager::newGen(&manager);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user