lundi 26 juin 2017

std::thread only sometimes "terminate called after throwing an instance of 'std::system_error'"

I am trying to write a multithreaded logger. In the destructor for the logger I call to join on the worker thread, and some of the times it throws an error, and sometimes not. I was able to find an example where the issue was in the makefile, but that one seemed to always have the issue, and I assume I compile correctly if most of the time everything works. This questions had an error in the c++, but as far as I can tell I don't delete anything, since I wait in the destructor for the wait. The code is quite long so if you prefer you can grab it from my github.

logger.cc

#include <string>
#include <sstream>
#include <algorithm>
#include <iterator>
#include <iostream>
#include <chrono>
#include "Logger.hh"

using namespace std::chrono;

int main() {
  std::string lyrics = "Row, row, row your boat Gently down the stream, Merrily merrily, merrily, merrily Life is but a dream";
  std::istringstream iss(lyrics);
  std::vector<std::string> lyric_vec(std::istream_iterator<std::string>{iss},
                                     std::istream_iterator<std::string>{});
  std::ofstream mystream("/tmp/logger1.log", std::ios::out);
  auto start1 = high_resolution_clock::now();
  for (auto& lyric : lyric_vec) {
    mystream << lyric << std::endl;
    mystream.flush();
  }
  mystream.close();
  std::cout
    << duration_cast<nanoseconds>(high_resolution_clock::now()-start1).count()
    << std::endl;

  matan::Logger bunchLogger("/tmp/logger2.log");
  auto start2 = high_resolution_clock::now();
  for (auto& lyric : lyric_vec) {
    bunchLogger << lyric << std::endl;
  }
  std::cout
    << duration_cast<nanoseconds>(high_resolution_clock::now()-start2).count()
    << std::endl;

  return 0;
}

Logger.hh

#pragma once

#include "AsyncWorker.hh"
#include <fstream>
#include <string>

/*
 * Has LogQueue, file to write to.
 */
namespace matan {

class Logger : public AsyncWorker {
  /*
   * Logger intended to prevent IO from blocking. Pushes the actual writing
   * to disk onto a separate thread.
   *
   * Single writer. To make the interface similar to std::cout we need to allow
   * separate calls to operator<<. For this to be multi writer we would need
   * each operator<< call to contain a complete elements, as opposed to
   * building it within m_buf and only later flushing it. (Please note this
   * issue would exist even if we actually flushed on every call to flush()).
   */
public:
  Logger(const std::string& ofname);
  virtual ~Logger();
  Logger& operator<<(const std::string& str) {m_buf += str; return *this;}
  Logger& operator<<(const char* c) { m_buf += c; return *this; }
  Logger& operator<<(char c) { m_buf += c; return *this; }
  Logger& operator<<(Logger& (*pf)(Logger&)) {return pf(*this);}
  void flush();

private:
  void doFlush();
  virtual void doit();
  virtual bool shouldSleep() { return m_contents.empty(); }

  std::string m_buf;
  std::ofstream m_ofstream;
  BunchQueue<TakerQueue<std::string>> m_contents;
  /*
   * I'm making a guess here that one page in memory is 4KB and that it will
   * be fastest if I can stay on one page (I need to pick a threshold
   * somehow) and that most logs will be less than 1024 characters.
   */
  static constexpr std::size_t MAX_LEN = 3 * 1024;
};

} // matan

namespace std {

inline matan::Logger& endl(matan::Logger& logger) {
  logger << '\n';
  logger.flush();
  return logger;
}

}

Logger.cc

#include "Logger.hh"


namespace matan {


/**********************    BunchLogger    *******************************/


Logger::Logger(const std::string& ofname) :
    AsyncWorker(),
    m_ofstream(ofname, std::ios::out) {}

Logger::~Logger() {
  std::cout << "~Logger" << std::endl;
  m_bDone = true;
  std::cout << "m_bDone" << std::endl;
  doFlush();
  std::cout << "pre-join" << std::endl;
  if (m_worker.joinable()) {
    std::cout << "try-join" << std::endl;
    m_worker.join(); //Perhaps I should detach?
    std::cout << "did-join" << std::endl;
  }
  std::cout << "post-join" << std::endl;
  m_ofstream.close();
}

void Logger::flush() {
  if (m_buf.size() > MAX_LEN) {
    doFlush();
  }
}

void Logger::doFlush() {
  std::cout << "doFlush" << std::endl;
  m_contents.push_back(m_buf);
  notifyNewEle();
  m_buf.clear();
}

void Logger::doit() {
  std::cout << "doit" << std::endl;
  for (const auto& line : m_contents.takeQueue()) {
    m_ofstream << line;
    m_ofstream.flush();
  }
}

} // matan

AsyncWorker.hh

#pragma once

#include "BunchQueue.hh"
#include <condition_variable>
#include <thread>

#include <iostream>

namespace matan {

class AsyncWorker {
  /*
   * A class to allow for a process to contain a worker thread that follows
   * a simple loop. The expected usage is for there to be some sort of queue
   * like data structure that the owners write to, and the worker thread
   * will run over each element performing some operation defined by doit.
   *
   * This class is capable of Multi writer, single reader (the internal thread).
   * but the details of implementation will determine the reality of if you
   * can take multiple writers.
   */
public:
  AsyncWorker() : m_worker([this]() { this->workerLoop();}) {}
  virtual ~AsyncWorker() = 0;

protected:
  void workerLoop() {
    std::unique_lock<std::mutex> lock(m_mtx);
    lock.unlock();
    while (true) {
      lock.lock();
      if (shouldSleep()) {
        m_shouldDoit.wait(lock);
      }
      lock.unlock();
      doit();
      if (m_bDone) {
        break;
      }
      doit();
    }
  }

  /*
   * doit is the function that we actual want the worker thread to perform.
   * I assume that each doit call is enough to completely utilize all the
   * contents on the worker threads "queue."
   */
  virtual void doit() = 0;

  /*
   * Checks if there is any work for the worker thread to do, and if not puts
   * the thread to sleep.
   */
  virtual bool shouldSleep() = 0;

  /*
   * Need to prevent this situation (in this order) so as not to miss a
   * notify when deciding to wait.
   *
   * flush_thread: m_logs.empty() == true
   * write_thread: m_logs.push_back(m_buf)
   * write_thread: m_shouldWrite.notify_one()
   * flush_thread: m_shouldWrite.wait(lock)
   */
  void notifyNewEle() {
    m_mtx.lock();
    m_shouldDoit.notify_one();
    m_mtx.unlock();
  }

  bool m_bDone = false;
  std::thread m_worker;

private:
  std::mutex m_mtx;
  std::condition_variable m_shouldDoit;
};

inline AsyncWorker::~AsyncWorker() {}

} // matan

BunchQueue.hh

#pragma once

#include <stdlib.h>
#include <string.h>
#include <mutex>
#include <vector>
#include <utility>
#include <type_traits>

#include "general.hh"

namespace matan {

template <typename T>
class BaseQueue {
public:
  typedef T value_type;
  BaseQueue(size_t initCapacity) : m_capacity(initCapacity) {}
  void reset() { m_size=0; }
  size_t size() const { return m_size; }
  T* begin() { return m_vec; }
  const T* begin() const { return m_vec; }
  T* end() { return m_vec+m_size; }
  const T* end() const { return m_vec+m_size; }

protected:
  T* m_vec = nullptr;
  size_t m_capacity = 0;
  size_t m_size = 0;
};

template <typename T>
class TakerQueue : public BaseQueue<T> {
  /*
   * A vector, but you have to use std::move
   */
public:
  TakerQueue(size_t initCapacity = 1) :  BaseQueue<T>(initCapacity) {
    this->m_vec = new T[this->m_capacity];
  }
  void push_back(T& t) {
    if (unlikely(this->m_size >= this->m_capacity)) {
      this->m_capacity = this->m_capacity << 1;
      T* oldVec = this->m_vec;
      this->m_vec = new T[this->m_capacity];
      for (size_t i = 0; i < this->m_size; i++) {
        new (this->m_vec+i) T(std::move(oldVec[i]));
      }
      delete[] oldVec;
    }
    new (this->m_vec+this->m_size) T(std::move(t));
    ++(this->m_size);
  }
};


template <typename T>
class ShallowQueue : public BaseQueue<T>{
  //TODO: figure out the correct concept to use to guarantee T is trivially movable at compile time
  /*
   * A queue that instead of freeing and allocating memory constantly
   * simply reuses the same memory overwriting the appropriate values.
   *
   * It's use case is to be filled, then iterated through, and then reset.
   *
   * Meant for usage with trivial classes, specifically structs as
   * messages. The use of memcpy means I am not actually constructing
   * an object in place, but just taking a shallow copy,
   * and the use of realloc  in vectors is only be valid for a trivially movable
   * object.
   *
   */
public:
  ShallowQueue(size_t initCapacity = 1) : BaseQueue<T>(initCapacity) {
    this->m_vec = (T*) malloc(sizeof(T)*(this->m_capacity));
  }
  void push_back(const T& msg) {
    if (unlikely(this->m_size >= this->m_capacity)) {
      this->m_capacity = this->m_capacity << 1;
      this->m_vec = (T*) realloc(this->m_vec, sizeof(T)*this->m_capacity);
    }
    memcpy(this->m_vec+this->m_size, &msg, sizeof(T));
    ++(this->m_size);
  }
};

template <typename Queue>
class BunchQueue {
  /*
   * Multi writer single reader.
   *
   * Instead of popping off individual messages the reader takes all of them
   * at once. This works well if the architecture is focused on bunches.
   * Also good memory wise because it means fewer allocations and frees and
   * allows for adjacent memory access when reading through the messages.
   * Drawback is that you have a relatively large memory footprint with 1
   * vector just sitting around. Works best if you are not RAM bound and can
   * expect fairly consistent bunch sizes.
   */
public:
  BunchQueue(size_t initCapacity = 1) :
    m_queueA(initCapacity), m_queueB(initCapacity) {
  }
  void push_back(const typename Queue::value_type& msg) {
    m_mtx.lock();
    auto& q = getQueue();
    q.push_back(msg);
    m_mtx.unlock();
  }

  void push_back(typename Queue::value_type& msg) {
    m_mtx.lock();
    auto& q = getQueue();
    q.push_back(msg);
    m_mtx.unlock();
  }

  const Queue& takeQueue() {
    m_mtx.lock();
    auto q = &(getQueue());
    m_whichQueue = !m_whichQueue;
    getQueue().reset();
    m_mtx.unlock();
    return *q;
  }

  bool empty() { return m_queueA.size() == 0 && m_queueB.size() == 0; }

private:
  bool m_whichQueue = true;
  std::mutex m_mtx;
  Queue m_queueA;
  Queue m_queueB;
  Queue& getQueue() { return m_whichQueue ? m_queueA : m_queueB; };
};

template <typename Msg>
using MessageQueue = BunchQueue<ShallowQueue<Msg>>;

} //namespace matan

makefile

    CC = clang++
    CFLAGS = -g -Wall -std=c++1z -pthread
    BINDIR = bin

    logger: Logger.o logger.cc
                    $(CC) $(CFLAGS) Logger.o logger.cc -o $(BINDIR)/logger

    Logger.o: BunchQueue.hh AsyncWorker.hh Logger.hh Logger.cc
                    $(CC) $(CFLAGS) -c Logger.cc


clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin

Ubuntu 16.04

Aucun commentaire:

Enregistrer un commentaire