#include <iostream>
#include <fstream>
#include <vector>
#include <string>
int main() {
std::string path_first;
std::string path_second;
std::string path_result;
std::cout << "First file path: ";
std::getline(std::cin, path_first);
std::cout << "Second file path: ";
std::getline(std::cin, path_second);
std::cout << "Result pass: ";
std::getline(std::cin, path_result);
std::ifstream file_first(path_first);
std::ifstream file_second(path_second);
std::ofstream file_result(path_result);
if (!file_first.is_open() or !file_second.is_open() or !file_result.is_open()) {
std::cout << "Invalid jobaniy blyat." << std::endl;
}
else {
std::vector<std::string> lines_first;
std::vector<std::string> lines_second;
std::string buff;
while (std::getline(file_first, buff)) lines_first.push_back(buff);
while (std::getline(file_second, buff)) lines_second.push_back(buff);
for (auto line_first : lines_first) {
for (auto line_second : lines_second) {
if (line_first == line_second) file_result << line_first << "\n";
}
}
}
file_first.close();
file_second.close();
file_result.close();
return 0;
}
import datetime
import functools
import pathlib
import random
import time
import pandas as pd
def measure_and_log_running_time(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# вытаскиваем логгер из именованных аргументов функции
logger = kwargs.get('logger')
# замеряем время работы
t_1 = time.time()
ans = func(*args, **kwargs)
running_time = datetime.timedelta(seconds=int(time.time() - t_1))
# логируем время работы
if logger:
logger.info(f"{func.__name__}() running time {running_time}")
else:
print(f"{func.__name__}() running time {running_time}")
return ans
return wrapper
def format_size_in_bytes_with_si_prefix(value, *, unit_full=False, prefix_full=False):
unit = ('B', 'byte')
prefix_list = (
('', ''),
('K', 'kilo'),
('M', 'mega'),
('G', 'giga'),
('T', 'tera'))
idx = 0
while value >= 1024:
idx += 1
value /= 1024
return f"{value:.2f} {prefix_list[idx][int(prefix_full)]}{unit[int(unit_full)]}"
@measure_and_log_running_time
def generate_lines(n):
lines = list()
for dummy in range(n):
line_size = random.randrange(10, 100)
lines.append(f"{random.randrange(16 ** line_size):0{line_size}x}\n")
return lines
@measure_and_log_running_time
def write_lines(lines, filename):
with open(filename, mode='w', encoding='UTF-8') as fout:
fout.writelines(lines)
return pathlib.Path(filename).stat().st_size
@measure_and_log_running_time
def generate_sample_data():
print('a.txt')
print(format_size_in_bytes_with_si_prefix(write_lines(generate_lines(20_000_000), 'a.txt')))
print('b.txt')
print(format_size_in_bytes_with_si_prefix(write_lines(generate_lines(20_000_000), 'b.txt')))
@measure_and_log_running_time
def process_sample_data(filename_1, filename_2, filename_3):
df1 = pd.read_csv(filename_1, header=None, names=['single_data_column'])
df2 = pd.read_csv(filename_2, header=None, names=['single_data_column'])
pd.merge(
df1,
df2,
how='inner',
left_on=['single_data_column'],
right_on=['single_data_column']
).to_csv(
filename_3,
index=False,
header=False)
if __name__ == '__main__':
# generate_sample_data()
"""
a.txt
generate_lines() running time 0:00:38
write_lines() running time 0:00:05
1.05 GB
b.txt
generate_lines() running time 0:00:38
write_lines() running time 0:00:05
1.05 GB
generate_sample_data() running time 0:01:29
"""
process_sample_data('a.txt', 'b.txt', 'c.txt')
"""
process_sample_data() running time 0:00:45
"""
#include <fstream>
#include <iostream>
#include <set>
#include <string>
#include "profiler.h"
void process_sample_data(
const std::string& filename_1,
const std::string& filename_2,
const std::string& filename_3) {
LOG_RUNNING_TIME(std::cout, "process_sample_data running time"); // profiler.h
std::ifstream file_1(filename_1);
std::ifstream file_2(filename_2);
std::ofstream file_3(filename_3);
std::multiset<std::string> lines_1, lines_2;
std::string line;
{
LOG_RUNNING_TIME(std::cout, "file_1"); // profiler.h
while (std::getline(file_1, line)) {
lines_1.insert(line);
}
}
{
LOG_RUNNING_TIME(std::cout, "file_2"); // profiler.h
while (std::getline(file_2, line)) {
lines_2.insert(line);
}
}
{
LOG_RUNNING_TIME(std::cout, "comparison"); // profiler.h
for (const auto& line_1 : lines_1) {
if (lines_2.count(line_1) > 0) {
file_3 << line_1 << "\n";
}
}
}
file_1.close();
file_2.close();
file_3.close();
}
int main() {
process_sample_data(
"C:/Users/fpn/PycharmProjects/nnf1_project/a.txt",
"C:/Users/fpn/PycharmProjects/nnf1_project/b.txt",
"C:/Users/fpn/PycharmProjects/nnf1_project/c.txt");
/*
file_1: 40958 ms
file_2: 41392 ms
comparison: 15027 ms
process_sample_data running time: 118150 ms
*/
return 0;
}
#pragma once
#include <chrono>
#include <string>
using namespace std;
using namespace std::chrono;
#define UNIQ_ID_GLUE(x, y) x##y
#define UNIQ_ID_HELPER(prefix, id) UNIQ_ID_GLUE(prefix, id)
#define UNIQ_ID UNIQ_ID_HELPER(var_, __LINE__)
#define LOG_RUNNING_TIME(stream, message) RunningTimeLogger UNIQ_ID(stream, message)
class RunningTimeLogger {
public:
explicit RunningTimeLogger(ostream& new_stream, const string& new_log_message)
: stream(new_stream)
, log_message(new_log_message)
, start(steady_clock::now()) {
// empty block of code
}
~RunningTimeLogger() {
stream << log_message << (log_message.size() > 0 ? ": " : "")
<< duration_cast<milliseconds>(steady_clock::now() - start).count() << " ms" << endl;
}
private:
ostream& stream;
string log_message;
steady_clock::time_point start;
};
std::unordered_map<std::string, int>
вместо trie. std::unordered_set<std::string>
.std::unordered_set<std::string>
лучше заменить на std::unordered_set<const char*>
или std::unordered_set<std::string_view>
.