import datetime
import functools
import pathlib
import random
import time
import pandas as pd
def measure_and_log_running_time(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# вытаскиваем логгер из именованных аргументов функции
logger = kwargs.get('logger')
# замеряем время работы
t_1 = time.time()
ans = func(*args, **kwargs)
running_time = datetime.timedelta(seconds=int(time.time() - t_1))
# логируем время работы
if logger:
logger.info(f"{func.__name__}() running time {running_time}")
else:
print(f"{func.__name__}() running time {running_time}")
return ans
return wrapper
def format_size_in_bytes_with_si_prefix(value, *, unit_full=False, prefix_full=False):
unit = ('B', 'byte')
prefix_list = (
('', ''),
('K', 'kilo'),
('M', 'mega'),
('G', 'giga'),
('T', 'tera'))
idx = 0
while value >= 1024:
idx += 1
value /= 1024
return f"{value:.2f} {prefix_list[idx][int(prefix_full)]}{unit[int(unit_full)]}"
@measure_and_log_running_time
def generate_lines(n):
lines = list()
for dummy in range(n):
line_size = random.randrange(10, 100)
lines.append(f"{random.randrange(16 ** line_size):0{line_size}x}\n")
return lines
@measure_and_log_running_time
def write_lines(lines, filename):
with open(filename, mode='w', encoding='UTF-8') as fout:
fout.writelines(lines)
return pathlib.Path(filename).stat().st_size
@measure_and_log_running_time
def generate_sample_data():
print('a.txt')
print(format_size_in_bytes_with_si_prefix(write_lines(generate_lines(20_000_000), 'a.txt')))
print('b.txt')
print(format_size_in_bytes_with_si_prefix(write_lines(generate_lines(20_000_000), 'b.txt')))
@measure_and_log_running_time
def process_sample_data(filename_1, filename_2, filename_3):
df1 = pd.read_csv(filename_1, header=None, names=['single_data_column'])
df2 = pd.read_csv(filename_2, header=None, names=['single_data_column'])
pd.merge(
df1,
df2,
how='inner',
left_on=['single_data_column'],
right_on=['single_data_column']
).to_csv(
filename_3,
index=False,
header=False)
if __name__ == '__main__':
# generate_sample_data()
"""
a.txt
generate_lines() running time 0:00:38
write_lines() running time 0:00:05
1.05 GB
b.txt
generate_lines() running time 0:00:38
write_lines() running time 0:00:05
1.05 GB
generate_sample_data() running time 0:01:29
"""
process_sample_data('a.txt', 'b.txt', 'c.txt')
"""
process_sample_data() running time 0:00:45
"""
#include <fstream>
#include <iostream>
#include <set>
#include <string>
#include "profiler.h"
void process_sample_data(
const std::string& filename_1,
const std::string& filename_2,
const std::string& filename_3) {
LOG_RUNNING_TIME(std::cout, "process_sample_data running time"); // profiler.h
std::ifstream file_1(filename_1);
std::ifstream file_2(filename_2);
std::ofstream file_3(filename_3);
std::multiset<std::string> lines_1, lines_2;
std::string line;
{
LOG_RUNNING_TIME(std::cout, "file_1"); // profiler.h
while (std::getline(file_1, line)) {
lines_1.insert(line);
}
}
{
LOG_RUNNING_TIME(std::cout, "file_2"); // profiler.h
while (std::getline(file_2, line)) {
lines_2.insert(line);
}
}
{
LOG_RUNNING_TIME(std::cout, "comparison"); // profiler.h
for (const auto& line_1 : lines_1) {
if (lines_2.count(line_1) > 0) {
file_3 << line_1 << "\n";
}
}
}
file_1.close();
file_2.close();
file_3.close();
}
int main() {
process_sample_data(
"C:/Users/fpn/PycharmProjects/nnf1_project/a.txt",
"C:/Users/fpn/PycharmProjects/nnf1_project/b.txt",
"C:/Users/fpn/PycharmProjects/nnf1_project/c.txt");
/*
file_1: 40958 ms
file_2: 41392 ms
comparison: 15027 ms
process_sample_data running time: 118150 ms
*/
return 0;
}
#pragma once
#include <chrono>
#include <string>
using namespace std;
using namespace std::chrono;
#define UNIQ_ID_GLUE(x, y) x##y
#define UNIQ_ID_HELPER(prefix, id) UNIQ_ID_GLUE(prefix, id)
#define UNIQ_ID UNIQ_ID_HELPER(var_, __LINE__)
#define LOG_RUNNING_TIME(stream, message) RunningTimeLogger UNIQ_ID(stream, message)
class RunningTimeLogger {
public:
explicit RunningTimeLogger(ostream& new_stream, const string& new_log_message)
: stream(new_stream)
, log_message(new_log_message)
, start(steady_clock::now()) {
// empty block of code
}
~RunningTimeLogger() {
stream << log_message << (log_message.size() > 0 ? ": " : "")
<< duration_cast<milliseconds>(steady_clock::now() - start).count() << " ms" << endl;
}
private:
ostream& stream;
string log_message;
steady_clock::time_point start;
};
++i
, vector
и индекс типа size_t
- современный с++, новые стандарты, все дела. new
). Собственно вся инициализация происходит внутри функции T create_t_1()
, укороченный альтернативный вариант в T create_t_2()
.#include <iostream>
struct T {
struct _T_ {
int* a;
}* p1;
float* a;
int* b;
struct _T {
int a;
int b;
double c;
}* p2;
int c;
};
T create_t_1() {
int* _t_a = new int {76};
T::_T_* p1 = new T::_T_ {_t_a};
float* a = new float {85.012};
int* b = new int {100};
T::_T* p2 = new T::_T {122, 152, 203.013};
int c = 250;
T ans {p1, a, b, p2, c};
return ans;
}
T create_t_2() {
T ans {
new T::_T_ {new int {76}},
new float {85.012},
new int {100},
new T::_T {122, 152, 203.013},
250};
return ans;
}
void print_t(const T& t) {
std::cout << "&.p1.a = " << t.p1->a << "\n"
<< "&.a = " << t.a << "\n"
<< "&.b = " << t.b << "\n"
<< std::endl;
std::cout << ".p1.a = " << *t.p1->a << "\n"
<< ".a = " << *t.a << "\n"
<< ".b = " << *t.b << "\n"
<< ".p2.a = " << t.p2->a << "\n"
<< ".p2.b = " << t.p2->b << "\n"
<< ".p2.c = " << t.p2->c << "\n"
<< ".c = " << t.c << "\n"
<< std::endl;
}
int main() {
T value_1 = create_t_1();
print_t(value_1);
T value_2 = create_t_1();
print_t(value_2);
return 0;
}