I am trying to write a multithreaded logger. In the destructor for the logger I call to join on the worker thread, and some of the times it throws an error, and sometimes not. I was able to find an example where the issue was in the makefile, but that one seemed to always have the issue, and I assume I compile correctly if most of the time everything works. This questions had an error in the c++, but as far as I can tell I don't delete anything, since I wait in the destructor for the wait. The code is quite long so if you prefer you can grab it from my github.
logger.cc
#include <string>
#include <sstream>
#include <algorithm>
#include <iterator>
#include <iostream>
#include <chrono>
#include "Logger.hh"
using namespace std::chrono;
int main() {
std::string lyrics = "Row, row, row your boat Gently down the stream, Merrily merrily, merrily, merrily Life is but a dream";
std::istringstream iss(lyrics);
std::vector<std::string> lyric_vec(std::istream_iterator<std::string>{iss},
std::istream_iterator<std::string>{});
std::ofstream mystream("/tmp/logger1.log", std::ios::out);
auto start1 = high_resolution_clock::now();
for (auto& lyric : lyric_vec) {
mystream << lyric << std::endl;
mystream.flush();
}
mystream.close();
std::cout
<< duration_cast<nanoseconds>(high_resolution_clock::now()-start1).count()
<< std::endl;
matan::Logger bunchLogger("/tmp/logger2.log");
auto start2 = high_resolution_clock::now();
for (auto& lyric : lyric_vec) {
bunchLogger << lyric << std::endl;
}
std::cout
<< duration_cast<nanoseconds>(high_resolution_clock::now()-start2).count()
<< std::endl;
return 0;
}
Logger.hh
#pragma once
#include "AsyncWorker.hh"
#include <fstream>
#include <string>
/*
* Has LogQueue, file to write to.
*/
namespace matan {
class Logger : public AsyncWorker {
/*
* Logger intended to prevent IO from blocking. Pushes the actual writing
* to disk onto a separate thread.
*
* Single writer. To make the interface similar to std::cout we need to allow
* separate calls to operator<<. For this to be multi writer we would need
* each operator<< call to contain a complete elements, as opposed to
* building it within m_buf and only later flushing it. (Please note this
* issue would exist even if we actually flushed on every call to flush()).
*/
public:
Logger(const std::string& ofname);
virtual ~Logger();
Logger& operator<<(const std::string& str) {m_buf += str; return *this;}
Logger& operator<<(const char* c) { m_buf += c; return *this; }
Logger& operator<<(char c) { m_buf += c; return *this; }
Logger& operator<<(Logger& (*pf)(Logger&)) {return pf(*this);}
void flush();
private:
void doFlush();
virtual void doit();
virtual bool shouldSleep() { return m_contents.empty(); }
std::string m_buf;
std::ofstream m_ofstream;
BunchQueue<TakerQueue<std::string>> m_contents;
/*
* I'm making a guess here that one page in memory is 4KB and that it will
* be fastest if I can stay on one page (I need to pick a threshold
* somehow) and that most logs will be less than 1024 characters.
*/
static constexpr std::size_t MAX_LEN = 3 * 1024;
};
} // matan
namespace std {
inline matan::Logger& endl(matan::Logger& logger) {
logger << '\n';
logger.flush();
return logger;
}
}
Logger.cc
#include "Logger.hh"
namespace matan {
/********************** BunchLogger *******************************/
Logger::Logger(const std::string& ofname) :
AsyncWorker(),
m_ofstream(ofname, std::ios::out) {}
Logger::~Logger() {
std::cout << "~Logger" << std::endl;
m_bDone = true;
std::cout << "m_bDone" << std::endl;
doFlush();
std::cout << "pre-join" << std::endl;
if (m_worker.joinable()) {
std::cout << "try-join" << std::endl;
m_worker.join(); //Perhaps I should detach?
std::cout << "did-join" << std::endl;
}
std::cout << "post-join" << std::endl;
m_ofstream.close();
}
void Logger::flush() {
if (m_buf.size() > MAX_LEN) {
doFlush();
}
}
void Logger::doFlush() {
std::cout << "doFlush" << std::endl;
m_contents.push_back(m_buf);
notifyNewEle();
m_buf.clear();
}
void Logger::doit() {
std::cout << "doit" << std::endl;
for (const auto& line : m_contents.takeQueue()) {
m_ofstream << line;
m_ofstream.flush();
}
}
} // matan
AsyncWorker.hh
#pragma once
#include "BunchQueue.hh"
#include <condition_variable>
#include <thread>
#include <iostream>
namespace matan {
class AsyncWorker {
/*
* A class to allow for a process to contain a worker thread that follows
* a simple loop. The expected usage is for there to be some sort of queue
* like data structure that the owners write to, and the worker thread
* will run over each element performing some operation defined by doit.
*
* This class is capable of Multi writer, single reader (the internal thread).
* but the details of implementation will determine the reality of if you
* can take multiple writers.
*/
public:
AsyncWorker() : m_worker([this]() { this->workerLoop();}) {}
virtual ~AsyncWorker() = 0;
protected:
void workerLoop() {
std::unique_lock<std::mutex> lock(m_mtx);
lock.unlock();
while (true) {
lock.lock();
if (shouldSleep()) {
m_shouldDoit.wait(lock);
}
lock.unlock();
doit();
if (m_bDone) {
break;
}
doit();
}
}
/*
* doit is the function that we actual want the worker thread to perform.
* I assume that each doit call is enough to completely utilize all the
* contents on the worker threads "queue."
*/
virtual void doit() = 0;
/*
* Checks if there is any work for the worker thread to do, and if not puts
* the thread to sleep.
*/
virtual bool shouldSleep() = 0;
/*
* Need to prevent this situation (in this order) so as not to miss a
* notify when deciding to wait.
*
* flush_thread: m_logs.empty() == true
* write_thread: m_logs.push_back(m_buf)
* write_thread: m_shouldWrite.notify_one()
* flush_thread: m_shouldWrite.wait(lock)
*/
void notifyNewEle() {
m_mtx.lock();
m_shouldDoit.notify_one();
m_mtx.unlock();
}
bool m_bDone = false;
std::thread m_worker;
private:
std::mutex m_mtx;
std::condition_variable m_shouldDoit;
};
inline AsyncWorker::~AsyncWorker() {}
} // matan
BunchQueue.hh
#pragma once
#include <stdlib.h>
#include <string.h>
#include <mutex>
#include <vector>
#include <utility>
#include <type_traits>
#include "general.hh"
namespace matan {
template <typename T>
class BaseQueue {
public:
typedef T value_type;
BaseQueue(size_t initCapacity) : m_capacity(initCapacity) {}
void reset() { m_size=0; }
size_t size() const { return m_size; }
T* begin() { return m_vec; }
const T* begin() const { return m_vec; }
T* end() { return m_vec+m_size; }
const T* end() const { return m_vec+m_size; }
protected:
T* m_vec = nullptr;
size_t m_capacity = 0;
size_t m_size = 0;
};
template <typename T>
class TakerQueue : public BaseQueue<T> {
/*
* A vector, but you have to use std::move
*/
public:
TakerQueue(size_t initCapacity = 1) : BaseQueue<T>(initCapacity) {
this->m_vec = new T[this->m_capacity];
}
void push_back(T& t) {
if (unlikely(this->m_size >= this->m_capacity)) {
this->m_capacity = this->m_capacity << 1;
T* oldVec = this->m_vec;
this->m_vec = new T[this->m_capacity];
for (size_t i = 0; i < this->m_size; i++) {
new (this->m_vec+i) T(std::move(oldVec[i]));
}
delete[] oldVec;
}
new (this->m_vec+this->m_size) T(std::move(t));
++(this->m_size);
}
};
template <typename T>
class ShallowQueue : public BaseQueue<T>{
//TODO: figure out the correct concept to use to guarantee T is trivially movable at compile time
/*
* A queue that instead of freeing and allocating memory constantly
* simply reuses the same memory overwriting the appropriate values.
*
* It's use case is to be filled, then iterated through, and then reset.
*
* Meant for usage with trivial classes, specifically structs as
* messages. The use of memcpy means I am not actually constructing
* an object in place, but just taking a shallow copy,
* and the use of realloc in vectors is only be valid for a trivially movable
* object.
*
*/
public:
ShallowQueue(size_t initCapacity = 1) : BaseQueue<T>(initCapacity) {
this->m_vec = (T*) malloc(sizeof(T)*(this->m_capacity));
}
void push_back(const T& msg) {
if (unlikely(this->m_size >= this->m_capacity)) {
this->m_capacity = this->m_capacity << 1;
this->m_vec = (T*) realloc(this->m_vec, sizeof(T)*this->m_capacity);
}
memcpy(this->m_vec+this->m_size, &msg, sizeof(T));
++(this->m_size);
}
};
template <typename Queue>
class BunchQueue {
/*
* Multi writer single reader.
*
* Instead of popping off individual messages the reader takes all of them
* at once. This works well if the architecture is focused on bunches.
* Also good memory wise because it means fewer allocations and frees and
* allows for adjacent memory access when reading through the messages.
* Drawback is that you have a relatively large memory footprint with 1
* vector just sitting around. Works best if you are not RAM bound and can
* expect fairly consistent bunch sizes.
*/
public:
BunchQueue(size_t initCapacity = 1) :
m_queueA(initCapacity), m_queueB(initCapacity) {
}
void push_back(const typename Queue::value_type& msg) {
m_mtx.lock();
auto& q = getQueue();
q.push_back(msg);
m_mtx.unlock();
}
void push_back(typename Queue::value_type& msg) {
m_mtx.lock();
auto& q = getQueue();
q.push_back(msg);
m_mtx.unlock();
}
const Queue& takeQueue() {
m_mtx.lock();
auto q = &(getQueue());
m_whichQueue = !m_whichQueue;
getQueue().reset();
m_mtx.unlock();
return *q;
}
bool empty() { return m_queueA.size() == 0 && m_queueB.size() == 0; }
private:
bool m_whichQueue = true;
std::mutex m_mtx;
Queue m_queueA;
Queue m_queueB;
Queue& getQueue() { return m_whichQueue ? m_queueA : m_queueB; };
};
template <typename Msg>
using MessageQueue = BunchQueue<ShallowQueue<Msg>>;
} //namespace matan
makefile
CC = clang++
CFLAGS = -g -Wall -std=c++1z -pthread
BINDIR = bin
logger: Logger.o logger.cc
$(CC) $(CFLAGS) Logger.o logger.cc -o $(BINDIR)/logger
Logger.o: BunchQueue.hh AsyncWorker.hh Logger.hh Logger.cc
$(CC) $(CFLAGS) -c Logger.cc
clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
Ubuntu 16.04
Aucun commentaire:
Enregistrer un commentaire