I need to calculate the length of the longest sequence of zero bytes in a binary file as fast as possible. I have a basic implementation in C++ below:
#include <iostream>
#include <fstream>
#include <algorithm>
#include <string>
int get_max_zero_streak(std::string fname)
{
std::ifstream myfile(fname, std::ios_base::binary);
int length = 0;
int streak = 0;
while(myfile)
{
unsigned char x = myfile.get(); // unsigned 8 bit integer
if(x == 0)
{
streak += 1;
}
else
{
length = std::max(length, streak);
streak = 0;
}
}
return length;
}
int main()
{
std::cout << get_max_zero_streak("000_c.aep") << std::endl;
std::cout << get_max_zero_streak("000_g1.aep") << std::endl;
std::cout << get_max_zero_streak("000_g2.aep") << std::endl;
std::cout << get_max_zero_streak("001_c.aep") << std::endl;
std::cout << get_max_zero_streak("001_g1.aep") << std::endl;
std::cout << get_max_zero_streak("001_g2.aep") << std::endl;
std::cout << get_max_zero_streak("002_c.aep") << std::endl;
std::cout << get_max_zero_streak("002_g1.aep") << std::endl;
std::cout << get_max_zero_streak("002_g2.aep") << std::endl;
return 0;
}
This is OK on smaller files, but is extraordinarily slow on larger files (such as 50 GB+). Is there a more efficient way to write this, or is parallelizing it my only hope? I'm reading the files from an NVMe SSD, so I don't think the drive read speed is the limitation.
Aucun commentaire:
Enregistrer un commentaire