Add similaritys
This commit is contained in:
@@ -16,7 +16,7 @@
|
|||||||
#define DATA_FILE_NAME "DATA.bin"
|
#define DATA_FILE_NAME "DATA.bin"
|
||||||
#define VECTOR_FILE_NAME "VECTOR.bin"
|
#define VECTOR_FILE_NAME "VECTOR.bin"
|
||||||
#define GEN_FILE_PATTRN "gen/%04d.bin"
|
#define GEN_FILE_PATTRN "gen/%04d.bin"
|
||||||
#define HOST_NAME "petrovv.com"
|
#define HOST_NAME "localhost"
|
||||||
|
|
||||||
void DnaStore::load(DnaManagerData *data)
|
void DnaStore::load(DnaManagerData *data)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -9,8 +9,6 @@
|
|||||||
#define MAX_DEPTH 8
|
#define MAX_DEPTH 8
|
||||||
#define MAX_POSIBLE_DEPTH 11
|
#define MAX_POSIBLE_DEPTH 11
|
||||||
static_assert(MAX_DEPTH <= MAX_POSIBLE_DEPTH);
|
static_assert(MAX_DEPTH <= MAX_POSIBLE_DEPTH);
|
||||||
static_assert(180 == sizeof(Dna));
|
|
||||||
constexpr int SIZE_OF_DNA = sizeof(Dna);
|
|
||||||
|
|
||||||
struct Branch
|
struct Branch
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -3,14 +3,14 @@
|
|||||||
|
|
||||||
namespace Similarity
|
namespace Similarity
|
||||||
{
|
{
|
||||||
// float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1
|
float euclidean_distance(Dna *d1, Dna *d2);// direct distance betwen vector. wont give 0 and 1
|
||||||
// float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1
|
// float dot_product(Dna *d1, Dna *d2); // doent return betwen 0 to 1
|
||||||
// float cosine_similarity(Dna *d1, Dna *d2);
|
float cosine_similarity(Dna *d1, Dna *d2);
|
||||||
// float cosine_similarity_int(Dna *d1, Dna *d2);
|
float cosine_similarity_int(Dna *d1, Dna *d2);
|
||||||
float hamming_distance(Dna *d1, Dna *d2);
|
float hamming_distance(Dna *d1, Dna *d2);
|
||||||
float hamming_distance_without_seeds(Dna *d1, Dna *d2);
|
float hamming_distance_without_seeds(Dna *d1, Dna *d2);
|
||||||
// float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo
|
// float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo
|
||||||
// float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun
|
float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun
|
||||||
// float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy
|
// float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy
|
||||||
|
|
||||||
typedef float(simil_func)(Dna *d1, Dna *d2);
|
typedef float(simil_func)(Dna *d1, Dna *d2);
|
||||||
|
|||||||
@@ -1,56 +1,72 @@
|
|||||||
#include "values/Similarity.hpp"
|
#include "values/Similarity.hpp"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <numeric>
|
||||||
|
#include <raylib.h>
|
||||||
|
|
||||||
namespace Similarity
|
namespace Similarity
|
||||||
{
|
{
|
||||||
|
|
||||||
|
float euclidean_distance(Dna *d1, Dna *d2)
|
||||||
|
{
|
||||||
|
uint8_t *a = (uint8_t *)d1;
|
||||||
|
uint8_t *b = (uint8_t *)d2;
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (size_t i = 0; i < sizeof(Dna); ++i) {
|
||||||
|
float diff = static_cast<float>(a[i]) - static_cast<float>(b[i]);
|
||||||
|
sum += diff * diff;
|
||||||
|
}
|
||||||
|
|
||||||
|
float distance = std::sqrt(sum);
|
||||||
|
float max_distance = 255.0f * std::sqrt(static_cast<float>(sizeof(Dna)));
|
||||||
|
return distance / max_distance;
|
||||||
|
}
|
||||||
|
|
||||||
// todo: use int8_t insted of uint8_t and map data
|
// todo: use int8_t insted of uint8_t and map data
|
||||||
// 0 -> -128
|
// 0 -> -128
|
||||||
// 255 -> 127
|
// 255 -> 127
|
||||||
// int8_t = uint8_t - 128
|
// int8_t = uint8_t - 128
|
||||||
// float cosine_similarity(Dna *d1, Dna *d2)
|
float cosine_similarity(Dna *d1, Dna *d2)
|
||||||
// {
|
{
|
||||||
// uint8_t *d1a = (uint8_t *)d1;
|
uint8_t *d1a = (uint8_t *)d1;
|
||||||
// uint8_t *d2a = (uint8_t *)d2;
|
uint8_t *d2a = (uint8_t *)d2;
|
||||||
|
|
||||||
// float mag1 = 0.0f;
|
float mag1 = 0.0f;
|
||||||
// float mag2 = 0.0f;
|
float mag2 = 0.0f;
|
||||||
// float dot_prod = 0.0f;
|
float dot_prod = 0.0f;
|
||||||
// for (size_t i = 0; i < sizeof(Dna); i++)
|
for (size_t i = 0; i < sizeof(Dna); i++)
|
||||||
// {
|
{
|
||||||
// dot_prod += d1a[i] * d2a[i];
|
dot_prod += d1a[i] * d2a[i];
|
||||||
// mag1 += d1a[i] * d1a[i];
|
mag1 += d1a[i] * d1a[i];
|
||||||
// mag2 += d2a[i] * d2a[i];
|
mag2 += d2a[i] * d2a[i];
|
||||||
// }
|
}
|
||||||
// mag1 = sqrt(mag1);
|
mag1 = sqrt(mag1);
|
||||||
// mag2 = sqrt(mag2);
|
mag2 = sqrt(mag2);
|
||||||
|
|
||||||
// return dot_prod / (mag1 * mag2);
|
return dot_prod / (mag1 * mag2);
|
||||||
// }
|
}
|
||||||
|
|
||||||
// float cosine_similarity_int(Dna *d1, Dna *d2)
|
float cosine_similarity_int(Dna *d1, Dna *d2)
|
||||||
// {
|
{
|
||||||
// auto map = [](uint8_t a) -> int8_t
|
auto map = [](uint8_t a) -> int8_t
|
||||||
// { return a - 128; };
|
{ return a - 128; };
|
||||||
|
uint8_t *d1a = (uint8_t *)d1;
|
||||||
// uint8_t *d1a = (uint8_t *)d1;
|
uint8_t *d2a = (uint8_t *)d2;
|
||||||
// uint8_t *d2a = (uint8_t *)d2;
|
float mag1 = 0.0f;
|
||||||
|
float mag2 = 0.0f;
|
||||||
// float mag1 = 0.0f;
|
float dot_prod = 0.0f;
|
||||||
// float mag2 = 0.0f;
|
for (size_t i = 0; i < sizeof(Dna); i++)
|
||||||
// float dot_prod = 0.0f;
|
{
|
||||||
// for (size_t i = 0; i < sizeof(Dna); i++)
|
int8_t a = map(d1a[i]);
|
||||||
// {
|
int8_t b = map(d2a[i]);
|
||||||
// int8_t a = map(d1a[i]);
|
dot_prod += a * b;
|
||||||
// int8_t b = map(d2a[i]);
|
mag1 += a * a;
|
||||||
// dot_prod += a * b;
|
mag2 += b * b;
|
||||||
// mag1 += a * a;
|
}
|
||||||
// mag2 += b * b;
|
mag1 = sqrt(mag1);
|
||||||
// }
|
mag2 = sqrt(mag2);
|
||||||
// mag1 = sqrt(mag1);
|
return dot_prod / (mag1 * mag2);
|
||||||
// mag2 = sqrt(mag2);
|
}
|
||||||
|
|
||||||
// return dot_prod / (mag1 * mag2);
|
|
||||||
// }
|
|
||||||
|
|
||||||
float hamming_distance(Dna *d1, Dna *d2)
|
float hamming_distance(Dna *d1, Dna *d2)
|
||||||
{
|
{
|
||||||
@@ -99,4 +115,40 @@ namespace Similarity
|
|||||||
float average_similarity = total_similarity / num_pairs;
|
float average_similarity = total_similarity / num_pairs;
|
||||||
return average_similarity * 100.0f;
|
return average_similarity * 100.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float levenshtein_distance(Dna *d1, Dna *d2)
|
||||||
|
{
|
||||||
|
size_t len = sizeof(Dna);
|
||||||
|
uint8_t *a = (uint8_t *)d1;
|
||||||
|
uint8_t *b = (uint8_t *)d2;
|
||||||
|
|
||||||
|
// Create a distance matrix
|
||||||
|
std::vector<std::vector<uint32_t>> dp(len + 1, std::vector<uint32_t>(len + 1, 0));
|
||||||
|
|
||||||
|
// Initialize the first row and column
|
||||||
|
for (size_t i = 0; i <= len; ++i)
|
||||||
|
{
|
||||||
|
dp[i][0] = i;
|
||||||
|
}
|
||||||
|
for (size_t j = 0; j <= len; ++j)
|
||||||
|
{
|
||||||
|
dp[0][j] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the distance matrix
|
||||||
|
for (size_t i = 1; i <= len; ++i)
|
||||||
|
{
|
||||||
|
for (size_t j = 1; j <= len; ++j)
|
||||||
|
{
|
||||||
|
uint32_t cost = (a[i - 1] == b[j - 1]) ? 0 : 1;
|
||||||
|
dp[i][j] = std::min({
|
||||||
|
dp[i - 1][j] + 1, // deletion
|
||||||
|
dp[i][j - 1] + 1, // insertion
|
||||||
|
dp[i - 1][j - 1] + cost // substitution
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1 - (dp[len][len] / float (len + len));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ enum DrawingStage
|
|||||||
done,
|
done,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr int numberOfFunc = 2;
|
constexpr int numberOfFunc = 6;
|
||||||
|
|
||||||
class Vapp
|
class Vapp
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ const char select_user_id[] = "SELECT USER_ID FROM user_table GROUP BY USER_ID;"
|
|||||||
|
|
||||||
constexpr int sizeOfCanvas = 1000;
|
constexpr int sizeOfCanvas = 1000;
|
||||||
|
|
||||||
void Vapp::init(char* filename)
|
void Vapp::init(char *filename)
|
||||||
{
|
{
|
||||||
bigTexture = LoadRenderTexture(sizeOfCanvas * 4, sizeOfCanvas * 4);
|
bigTexture = LoadRenderTexture(sizeOfCanvas * 4, sizeOfCanvas * 4);
|
||||||
treeTexture = LoadRenderTexture(sizeOfCanvas, sizeOfCanvas);
|
treeTexture = LoadRenderTexture(sizeOfCanvas, sizeOfCanvas);
|
||||||
@@ -100,8 +100,12 @@ void Vapp::update()
|
|||||||
break;
|
break;
|
||||||
case DrawingStage::calSim:
|
case DrawingStage::calSim:
|
||||||
|
|
||||||
simil[0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
simil[0] = Similarity::calc_similarity(manager.vector, Similarity::euclidean_distance);
|
||||||
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
|
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
|
||||||
|
simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
|
||||||
|
simil[3] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
||||||
|
simil[4] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
|
||||||
|
simil[5] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
|
||||||
stageOfDrawing = DrawingStage::done;
|
stageOfDrawing = DrawingStage::done;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -173,13 +177,35 @@ void Vapp::draw()
|
|||||||
if (showStats)
|
if (showStats)
|
||||||
{
|
{
|
||||||
ImGui::Begin("Status", &showStats);
|
ImGui::Begin("Status", &showStats);
|
||||||
ImGui::LabelText("##sim1", "hamming_distance: %f", simil[0]);
|
ImGui::LabelText("##sim1", "euclidean_distance: %f", simil[0]);
|
||||||
ImGui::LabelText("##sim2", "hamming_distance_without_seeds: %f", simil[1]);
|
ImGui::LabelText("##sim2", "cosine_similarity: %f", simil[1]);
|
||||||
|
ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]);
|
||||||
|
ImGui::LabelText("##sim4", "hamming_distance: %f", simil[3]);
|
||||||
|
ImGui::LabelText("##sim5", "hamming_distance_without_seeds: %f", simil[4]);
|
||||||
|
ImGui::LabelText("##sim6", "levenshtein_distance: %f", simil[5]);
|
||||||
|
|
||||||
const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody;
|
const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody;
|
||||||
const int columns = numberOfFunc + 1;
|
const int columns = numberOfFunc + 1;
|
||||||
if (ImGui::BeginTable("table1", columns, flags))
|
if (ImGui::BeginTable("table1", columns, flags))
|
||||||
{
|
{
|
||||||
|
|
||||||
|
ImGui::TableNextRow();
|
||||||
|
|
||||||
|
ImGui::TableSetColumnIndex(0);
|
||||||
|
ImGui::Text("index");
|
||||||
|
ImGui::TableSetColumnIndex(1);
|
||||||
|
ImGui::Text("euclidean_distance");
|
||||||
|
ImGui::TableSetColumnIndex(2);
|
||||||
|
ImGui::Text("cosine_similarity");
|
||||||
|
ImGui::TableSetColumnIndex(3);
|
||||||
|
ImGui::Text("cosine_similarity_int");
|
||||||
|
ImGui::TableSetColumnIndex(4);
|
||||||
|
ImGui::Text("hamming_distance");
|
||||||
|
ImGui::TableSetColumnIndex(5);
|
||||||
|
ImGui::Text("hamming_distance_without_seeds");
|
||||||
|
ImGui::TableSetColumnIndex(6);
|
||||||
|
ImGui::Text("levenshtein_distance");
|
||||||
|
|
||||||
for (int row = 0; row < similTable.size(); row++)
|
for (int row = 0; row < similTable.size(); row++)
|
||||||
{
|
{
|
||||||
ImGui::TableNextRow();
|
ImGui::TableNextRow();
|
||||||
@@ -277,7 +303,7 @@ void Vapp::setUpTable()
|
|||||||
UiUnit unit = DnaManager::next(&manager);
|
UiUnit unit = DnaManager::next(&manager);
|
||||||
if ((unit.index != pos))
|
if ((unit.index != pos))
|
||||||
{
|
{
|
||||||
// DOTO: SET ERROR
|
// TODO: SET ERROR
|
||||||
TraceLog(LOG_ERROR, "LOADING DNA");
|
TraceLog(LOG_ERROR, "LOADING DNA");
|
||||||
sql::finalize(get_gen_stmt);
|
sql::finalize(get_gen_stmt);
|
||||||
return;
|
return;
|
||||||
@@ -290,9 +316,13 @@ void Vapp::setUpTable()
|
|||||||
{
|
{
|
||||||
similTable.emplace_back();
|
similTable.emplace_back();
|
||||||
int s = similTable.size() - 1;
|
int s = similTable.size() - 1;
|
||||||
similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
|
||||||
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
|
|
||||||
|
|
||||||
|
similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::euclidean_distance);
|
||||||
|
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
|
||||||
|
similTable[s][2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
|
||||||
|
similTable[s][3] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
|
||||||
|
similTable[s][4] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
|
||||||
|
similTable[s][5] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
|
||||||
DnaManager::newGen(&manager);
|
DnaManager::newGen(&manager);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
Reference in New Issue
Block a user