Compare commits

5 Commits

Author SHA1 Message Date
060b9f9182 s 2025-10-22 19:07:59 +02:00
59fd47e684 Add print image 2025-10-22 18:47:05 +02:00
593b813988 Add similaritys 2025-10-22 17:56:26 +02:00
5f6305a2f2 s 2025-10-09 15:53:58 +02:00
dddf8ca632 t 2025-10-08 19:41:46 +02:00
6 changed files with 176 additions and 58 deletions

View File

@@ -16,7 +16,7 @@
#define DATA_FILE_NAME "DATA.bin" #define DATA_FILE_NAME "DATA.bin"
#define VECTOR_FILE_NAME "VECTOR.bin" #define VECTOR_FILE_NAME "VECTOR.bin"
#define GEN_FILE_PATTRN "gen/%04d.bin" #define GEN_FILE_PATTRN "gen/%04d.bin"
#define HOST_NAME "petrovv.com" #define HOST_NAME "localhost"
void DnaStore::load(DnaManagerData *data) void DnaStore::load(DnaManagerData *data)
{ {

View File

@@ -121,7 +121,7 @@ bool try_remove(std::filesystem::path path)
{ {
try try
{ {
std::filesystem::remove("old_build"); std::filesystem::remove(path);
return true; return true;
} }
catch (const std::exception &e) catch (const std::exception &e)

View File

@@ -3,14 +3,14 @@
namespace Similarity namespace Similarity
{ {
// float euclidean_distance(Dna *d1, Dna *d2); direct distance betwen vector. wont give 0 and 1 float euclidean_distance(Dna *d1, Dna *d2);// direct distance betwen vector. wont give 0 and 1
// float dot_product(Dna *d1, Dna *d2); doent return betwen 0 to 1 // float dot_product(Dna *d1, Dna *d2); // doent return betwen 0 to 1
// float cosine_similarity(Dna *d1, Dna *d2); float cosine_similarity(Dna *d1, Dna *d2);
// float cosine_similarity_int(Dna *d1, Dna *d2); float cosine_similarity_int(Dna *d1, Dna *d2);
float hamming_distance(Dna *d1, Dna *d2); float hamming_distance(Dna *d1, Dna *d2);
float hamming_distance_without_seeds(Dna *d1, Dna *d2); float hamming_distance_without_seeds(Dna *d1, Dna *d2);
// float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo // float jaccard_index(Dna *d1, Dna *d2); // primerja unio genov naprimer gleda ce je gen za nebo isti z genom za barvo za liste, to nerabimo
// float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun float levenshtein_distance(Dna *d1, Dna *d2); // odstranjen ker mi vrne iste podatke kot hamming distance ki je bolj enostaven za izracun
// float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy // float needleman_wunsch(Dna *d1, Dna *d2); used for bioinformatics and aligment. Dont need its aligned alredy
typedef float(simil_func)(Dna *d1, Dna *d2); typedef float(simil_func)(Dna *d1, Dna *d2);

View File

@@ -1,56 +1,72 @@
#include "values/Similarity.hpp" #include "values/Similarity.hpp"
#include <cmath> #include <cmath>
#include <algorithm>
#include <numeric>
namespace Similarity namespace Similarity
{ {
float euclidean_distance(Dna *d1, Dna *d2)
{
uint8_t *a = (uint8_t *)d1;
uint8_t *b = (uint8_t *)d2;
float sum = 0.0f;
for (size_t i = 0; i < sizeof(Dna); ++i)
{
float diff = static_cast<float>(a[i]) - static_cast<float>(b[i]);
sum += diff * diff;
}
float distance = std::sqrt(sum);
float max_distance = 255.0f * std::sqrt(static_cast<float>(sizeof(Dna)));
return 1 - (distance / max_distance);
}
// todo: use int8_t insted of uint8_t and map data // todo: use int8_t insted of uint8_t and map data
// 0 -> -128 // 0 -> -128
// 255 -> 127 // 255 -> 127
// int8_t = uint8_t - 128 // int8_t = uint8_t - 128
// float cosine_similarity(Dna *d1, Dna *d2) float cosine_similarity(Dna *d1, Dna *d2)
// { {
// uint8_t *d1a = (uint8_t *)d1; uint8_t *d1a = (uint8_t *)d1;
// uint8_t *d2a = (uint8_t *)d2; uint8_t *d2a = (uint8_t *)d2;
// float mag1 = 0.0f; float mag1 = 0.0f;
// float mag2 = 0.0f; float mag2 = 0.0f;
// float dot_prod = 0.0f; float dot_prod = 0.0f;
// for (size_t i = 0; i < sizeof(Dna); i++) for (size_t i = 0; i < sizeof(Dna); i++)
// { {
// dot_prod += d1a[i] * d2a[i]; dot_prod += d1a[i] * d2a[i];
// mag1 += d1a[i] * d1a[i]; mag1 += d1a[i] * d1a[i];
// mag2 += d2a[i] * d2a[i]; mag2 += d2a[i] * d2a[i];
// } }
// mag1 = sqrt(mag1); mag1 = sqrt(mag1);
// mag2 = sqrt(mag2); mag2 = sqrt(mag2);
// return dot_prod / (mag1 * mag2); return dot_prod / (mag1 * mag2);
// } }
// float cosine_similarity_int(Dna *d1, Dna *d2) float cosine_similarity_int(Dna *d1, Dna *d2)
// { {
// auto map = [](uint8_t a) -> int8_t auto map = [](uint8_t a) -> int8_t
// { return a - 128; }; { return a - 128; };
uint8_t *d1a = (uint8_t *)d1;
// uint8_t *d1a = (uint8_t *)d1; uint8_t *d2a = (uint8_t *)d2;
// uint8_t *d2a = (uint8_t *)d2; float mag1 = 0.0f;
float mag2 = 0.0f;
// float mag1 = 0.0f; float dot_prod = 0.0f;
// float mag2 = 0.0f; for (size_t i = 0; i < sizeof(Dna); i++)
// float dot_prod = 0.0f; {
// for (size_t i = 0; i < sizeof(Dna); i++) int8_t a = map(d1a[i]);
// { int8_t b = map(d2a[i]);
// int8_t a = map(d1a[i]); dot_prod += a * b;
// int8_t b = map(d2a[i]); mag1 += a * a;
// dot_prod += a * b; mag2 += b * b;
// mag1 += a * a; }
// mag2 += b * b; mag1 = sqrt(mag1);
// } mag2 = sqrt(mag2);
// mag1 = sqrt(mag1); return dot_prod / (mag1 * mag2);
// mag2 = sqrt(mag2); }
// return dot_prod / (mag1 * mag2);
// }
float hamming_distance(Dna *d1, Dna *d2) float hamming_distance(Dna *d1, Dna *d2)
{ {
@@ -99,4 +115,40 @@ namespace Similarity
float average_similarity = total_similarity / num_pairs; float average_similarity = total_similarity / num_pairs;
return average_similarity * 100.0f; return average_similarity * 100.0f;
} }
float levenshtein_distance(Dna *d1, Dna *d2)
{
size_t len = sizeof(Dna);
uint8_t *a = (uint8_t *)d1;
uint8_t *b = (uint8_t *)d2;
// Create a distance matrix
std::vector<std::vector<uint32_t>> dp(len + 1, std::vector<uint32_t>(len + 1, 0));
// Initialize the first row and column
for (size_t i = 0; i <= len; ++i)
{
dp[i][0] = i;
}
for (size_t j = 0; j <= len; ++j)
{
dp[0][j] = j;
}
// Fill the distance matrix
for (size_t i = 1; i <= len; ++i)
{
for (size_t j = 1; j <= len; ++j)
{
uint32_t cost = (a[i - 1] == b[j - 1]) ? 0 : 1;
dp[i][j] = std::min({
dp[i - 1][j] + 1, // deletion
dp[i][j - 1] + 1, // insertion
dp[i - 1][j - 1] + cost // substitution
});
}
}
return 1 - (dp[len][len] / float(len + len));
}
} }

View File

@@ -11,10 +11,11 @@ enum DrawingStage
drawTree, drawTree,
drawBig, drawBig,
calSim, calSim,
save,
done, done,
}; };
constexpr int numberOfFunc = 2; constexpr int numberOfFunc = 6;
class Vapp class Vapp
{ {
@@ -52,4 +53,5 @@ private:
std::vector<std::array<float, numberOfFunc>> similTable; std::vector<std::array<float, numberOfFunc>> similTable;
void setUpTable(); void setUpTable();
void drawToFile();
}; };

View File

@@ -6,12 +6,13 @@
#include <rlImGui.h> #include <rlImGui.h>
#include <imgui.h> #include <imgui.h>
#include <raylib.h> #include <raylib.h>
#include <fstream>
const char select_user_id[] = "SELECT USER_ID FROM user_table GROUP BY USER_ID;"; const char select_user_id[] = "SELECT USER_ID FROM user_table GROUP BY USER_ID;";
constexpr int sizeOfCanvas = 1000; constexpr int sizeOfCanvas = 1000;
void Vapp::init(char* filename) void Vapp::init(char *filename)
{ {
bigTexture = LoadRenderTexture(sizeOfCanvas * 4, sizeOfCanvas * 4); bigTexture = LoadRenderTexture(sizeOfCanvas * 4, sizeOfCanvas * 4);
treeTexture = LoadRenderTexture(sizeOfCanvas, sizeOfCanvas); treeTexture = LoadRenderTexture(sizeOfCanvas, sizeOfCanvas);
@@ -100,11 +101,18 @@ void Vapp::update()
break; break;
case DrawingStage::calSim: case DrawingStage::calSim:
simil[0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance); simil[0] = Similarity::calc_similarity(manager.vector, Similarity::euclidean_distance);
simil[1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds); simil[1] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
simil[2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
simil[3] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
simil[4] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
simil[5] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
stageOfDrawing = DrawingStage::save;
break;
case DrawingStage::save:
drawToFile();
stageOfDrawing = DrawingStage::done; stageOfDrawing = DrawingStage::done;
break; break;
case DrawingStage::done: case DrawingStage::done:
enableAll = true; enableAll = true;
break; break;
@@ -173,13 +181,35 @@ void Vapp::draw()
if (showStats) if (showStats)
{ {
ImGui::Begin("Status", &showStats); ImGui::Begin("Status", &showStats);
ImGui::LabelText("##sim1", "hamming_distance: %f", simil[0]); ImGui::LabelText("##sim1", "euclidean_distance: %f", simil[0]);
ImGui::LabelText("##sim2", "hamming_distance_without_seeds: %f", simil[1]); ImGui::LabelText("##sim2", "cosine_similarity: %f", simil[1]);
ImGui::LabelText("##sim3", "cosine_similarity_int: %f", simil[2]);
ImGui::LabelText("##sim4", "hamming_distance: %f", simil[3]);
ImGui::LabelText("##sim5", "hamming_distance_without_seeds: %f", simil[4]);
ImGui::LabelText("##sim6", "levenshtein_distance: %f", simil[5]);
const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody; const ImGuiTableFlags flags = ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_Resizable | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_ContextMenuInBody;
const int columns = numberOfFunc + 1; const int columns = numberOfFunc + 1;
if (ImGui::BeginTable("table1", columns, flags)) if (ImGui::BeginTable("table1", columns, flags))
{ {
ImGui::TableNextRow();
ImGui::TableSetColumnIndex(0);
ImGui::Text("index");
ImGui::TableSetColumnIndex(1);
ImGui::Text("euclidean_distance");
ImGui::TableSetColumnIndex(2);
ImGui::Text("cosine_similarity");
ImGui::TableSetColumnIndex(3);
ImGui::Text("cosine_similarity_int");
ImGui::TableSetColumnIndex(4);
ImGui::Text("hamming_distance");
ImGui::TableSetColumnIndex(5);
ImGui::Text("hamming_distance_without_seeds");
ImGui::TableSetColumnIndex(6);
ImGui::Text("levenshtein_distance");
for (int row = 0; row < similTable.size(); row++) for (int row = 0; row < similTable.size(); row++)
{ {
ImGui::TableNextRow(); ImGui::TableNextRow();
@@ -277,7 +307,7 @@ void Vapp::setUpTable()
UiUnit unit = DnaManager::next(&manager); UiUnit unit = DnaManager::next(&manager);
if ((unit.index != pos)) if ((unit.index != pos))
{ {
// DOTO: SET ERROR // TODO: SET ERROR
TraceLog(LOG_ERROR, "LOADING DNA"); TraceLog(LOG_ERROR, "LOADING DNA");
sql::finalize(get_gen_stmt); sql::finalize(get_gen_stmt);
return; return;
@@ -290,9 +320,13 @@ void Vapp::setUpTable()
{ {
similTable.emplace_back(); similTable.emplace_back();
int s = similTable.size() - 1; int s = similTable.size() - 1;
similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
similTable[s][0] = Similarity::calc_similarity(manager.vector, Similarity::euclidean_distance);
similTable[s][1] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity);
similTable[s][2] = Similarity::calc_similarity(manager.vector, Similarity::cosine_similarity_int);
similTable[s][3] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance);
similTable[s][4] = Similarity::calc_similarity(manager.vector, Similarity::hamming_distance_without_seeds);
similTable[s][5] = Similarity::calc_similarity(manager.vector, Similarity::levenshtein_distance);
DnaManager::newGen(&manager); DnaManager::newGen(&manager);
} }
else else
@@ -301,7 +335,37 @@ void Vapp::setUpTable()
} }
sql::reset(get_gen_stmt); sql::reset(get_gen_stmt);
int64_t id = ids[selected_id_index];
char buff[50];
sprintf(buff, "%ld.txt", id);
std::ofstream file(buff);
file << "| index | euclidean_distance | cosine_similarity | cosine_similarity_int | hamming_distance | hamming_distance_without_seeds | levenshtein_distance |\n";
file << "| --- | --- | --- | --- | --- | --- | --- |\n";
for (size_t i = 0; i < similTable.size(); i++)
{
file << "|" << i << "|";
for (size_t j = 0; j < similTable[i].size(); j++)
{
file << similTable[i][j] << "|";
}
file << "\n";
}
} }
sql::finalize(get_gen_stmt); sql::finalize(get_gen_stmt);
} }
void Vapp::drawToFile()
{
int64_t id = ids[selected_id_index];
char buff[50];
sprintf(buff, "%ld_%d.png", id, selected_gen);
Image image = LoadImageFromTexture(bigTexture.texture);
ExportImage(image, buff);
UnloadImage(image);
}