parallelize synapse generation using OpenMP

- only tested on Debian Linux with a 16-thread amd64 CPU
- Windows may need DLLs to be shipped with the EXE
- using `-fopenmp` should be made optional via qmake somehow
  (without the flag, parallelization pragmas are ignored
  and it reverts to serial operation)
- assumes `status::update()` among other code is thread-safe,
  it seems to work on my machine without issues...
This commit is contained in:
Claude Heiland-Allen 2022-09-27 13:19:04 +01:00
parent 79fef7cd66
commit 8aa0dee3d1
2 changed files with 17 additions and 12 deletions

View File

@ -16,6 +16,7 @@
#include <iostream> #include <iostream>
#include <algorithm> #include <algorithm>
#include <atomic>
#include <sndfile.h> #include <sndfile.h>
#include <float.h> #include <float.h>
#include <spiralcore/audio.h> #include <spiralcore/audio.h>
@ -242,10 +243,12 @@ void brain::build_synapses_thresh(search_params &params, double thresh) {
m_average_error = calc_average_diff(params)*thresh; m_average_error = calc_average_diff(params)*thresh;
double err = m_average_error*thresh; double err = m_average_error*thresh;
u32 brain_size = m_blocks.size(); u32 brain_size = m_blocks.size();
u32 outer_index = 0; std::atomic<u32> progress{0};
for (auto &i : m_blocks) { #pragma omp parallel for
for (u32 outer_index = 0; outer_index < brain_size; ++outer_index) {
auto &i = m_blocks[outer_index];
u32 index = 0; u32 index = 0;
status::update("building synapses %d%%",(int)(outer_index/(float)brain_size*100)); status::update("building synapses %d%%",(int)(progress/(float)brain_size*100));
for (auto &j : m_blocks) { for (auto &j : m_blocks) {
if (index!=outer_index) { if (index!=outer_index) {
// collect connections that are under threshold in closeness // collect connections that are under threshold in closeness
@ -256,30 +259,32 @@ void brain::build_synapses_thresh(search_params &params, double thresh) {
} }
++index; ++index;
} }
++outer_index; ++progress;
} }
} }
void brain::build_synapses_fixed(search_params &params) { void brain::build_synapses_fixed(search_params &params) {
//m_average_error = calc_average_diff(params)*thresh; //m_average_error = calc_average_diff(params)*thresh;
u32 brain_size = m_blocks.size(); u32 brain_size = m_blocks.size();
u32 outer_index = 0;
u32 num_synapses = NUM_FIXED_SYNAPSES; u32 num_synapses = NUM_FIXED_SYNAPSES;
if (num_synapses>=m_blocks.size()) num_synapses=m_blocks.size()-1; if (num_synapses>=m_blocks.size()) num_synapses=m_blocks.size()-1;
// need to stop the progress updates flooding osc // need to stop the progress updates flooding osc
u32 update_period = 100; u32 update_period = 100;
u32 update_tick = 0; std::atomic<u32> update_tick{0};
std::atomic<u32> progress{0};
for (auto &i:m_blocks) { #pragma omp parallel for
for (u32 outer_index = 0; outer_index < brain_size; ++outer_index) {
auto &i = m_blocks[outer_index];
if (update_tick>update_period) { if (update_tick>update_period) {
status::update("building synapses %d%%",(int)(outer_index/(float)brain_size*100)); status::update("building synapses %d%%",(int)(progress/(float)brain_size*100));
update_tick=0; update_tick=0;
} }
update_tick++; update_tick++;
u32 index = 0; u32 index = 0;
vector<pair<u32,double>> collect; vector<pair<u32,double>> collect;
collect.reserve(brain_size);
// collect comparisons to all other blocks // collect comparisons to all other blocks
for (auto &j:m_blocks) { for (auto &j:m_blocks) {
@ -304,7 +309,7 @@ void brain::build_synapses_fixed(search_params &params) {
i.get_synapse().push_back(collect[n].first); i.get_synapse().push_back(collect[n].first);
} }
++outer_index; ++progress;
} }
status::update("Done: %d synapses grown for %d blocks",num_synapses*brain_size,brain_size); status::update("Done: %d synapses grown for %d blocks",num_synapses*brain_size,brain_size);
} }

View File

@ -44,9 +44,9 @@ SOURCES += app/MainWindow.cpp \
INCLUDEPATH += brain/src INCLUDEPATH += brain/src
INCLUDEPATH += /usr/local/include INCLUDEPATH += /usr/local/include
INCLUDEPATH += /opt/homebrew/include INCLUDEPATH += /opt/homebrew/include
LIBS += -L.. -L/usr/local/lib -L/opt/homebrew/lib -lportaudio -lfftw3 -lsndfile -llo -ldl -lpthread -lm LIBS += -L.. -L/usr/local/lib -L/opt/homebrew/lib -lportaudio -lfftw3 -lsndfile -llo -ldl -lpthread -lm -fopenmp
QMAKE_CXXFLAGS += -O3 -Wall -Wno-unused -std=c++11 QMAKE_CXXFLAGS += -O3 -fopenmp -Wall -Wno-unused -std=c++11
# assets # assets
RESOURCES = app/samplebrain.qrc RESOURCES = app/samplebrain.qrc