From 0dd5962b9d0215877bda64c58989673732736713 Mon Sep 17 00:00:00 2001 From: Stanislav Pastushenko Date: Tue, 2 Dec 2025 18:41:12 +0100 Subject: [PATCH] goddamn it is fast now --- Cargo.lock | 46 ++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/line_index.rs | 36 +++++++++++++++++++++++++ src/search.rs | 68 +++++++++++++++++++++++++++++++++++++---------- 4 files changed, 137 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bd8fd64..f19ec45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -798,6 +798,25 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1002,6 +1021,12 @@ dependencies = [ "winit", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "emath" version = "0.29.1" @@ -2545,6 +2570,26 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -2627,6 +2672,7 @@ name = "rlogg" version = "0.1.0" dependencies = [ "eframe", + "rayon", "regex", "rfd", "serde", diff --git a/Cargo.toml b/Cargo.toml index 1b3c1f2..e755602 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ rfd = "0.15" regex = "1.11" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +rayon = "1.10" diff --git a/src/line_index.rs b/src/line_index.rs index 35903e0..5c82156 100644 --- a/src/line_index.rs +++ b/src/line_index.rs @@ -51,4 +51,40 @@ impl LineIndex { Some(line.trim_end_matches(['\r', '\n']).to_string()) } + + /// Reads a range of lines efficiently by seeking once and reading sequentially + pub fn read_line_range( + &self, + file: &mut BufReader, + start: usize, + end: usize, + ) -> Vec<(usize, String)> { + let end = end.min(self.total_lines); + if start >= end || start >= self.total_lines { + return Vec::new(); + } + + let mut results = Vec::with_capacity(end - start); + + // Seek to the start position once + let start_pos = self.positions[start]; + if file.seek(SeekFrom::Start(start_pos)).is_err() { + return results; + } + + // Read lines sequentially without additional seeks + let mut line = String::new(); + for line_num in start..end { + line.clear(); + if file.read_line(&mut line).is_err() { + break; + } + if line.is_empty() { + break; + } + results.push((line_num, line.trim_end_matches(['\r', '\n']).to_string())); + } + + results + } } diff --git a/src/search.rs b/src/search.rs index 1282d54..d97063d 100644 --- a/src/search.rs +++ b/src/search.rs @@ -1,6 +1,8 @@ +use rayon::prelude::*; use regex::Regex; use std::fs::File; use std::io::BufReader; +use std::path::Path; use std::sync::{Arc, Mutex}; use std::thread; @@ -100,31 +102,69 @@ pub fn start_search( fn search_lines( params: &SearchParams, line_index: &LineIndex, - file_path: &std::path::Path, + file_path: &Path, progress: &Arc>, ) -> Vec { - let mut filtered = Vec::new(); - let regex_matcher = params.build_regex_matcher(); + let total_lines = line_index.total_lines; + if total_lines == 0 { + return Vec::new(); + } - if let Ok(file) = File::open(file_path) { - let mut file_handle = BufReader::new(file); - let total_lines = line_index.total_lines; + // Determine optimal chunk size based on total lines + // Aim for enough chunks to utilize all cores, but not too many to avoid overhead + let num_threads = rayon::current_num_threads(); + let min_chunk_size = 1000; // Process at least 1000 lines per chunk + let chunk_size = (total_lines / (num_threads * 4)).max(min_chunk_size); - for line_num in 0..total_lines { - if let Some(content) = line_index.read_line(&mut file_handle, line_num) { + // Split line numbers into chunks + let chunks: Vec<(usize, usize)> = (0..total_lines) + .step_by(chunk_size) + .map(|start| { + let end = (start + chunk_size).min(total_lines); + (start, end) + }) + .collect(); + + let total_chunks = chunks.len(); + let processed_chunks = Arc::new(Mutex::new(0usize)); + + // Process chunks in parallel + let results: Vec> = chunks + .par_iter() + .filter_map(|(start, end)| { + // Each thread opens its own file handle + let file = File::open(file_path).ok()?; + let mut file_handle = BufReader::new(file); + let regex_matcher = params.build_regex_matcher(); + let mut chunk_results = Vec::new(); + + // Read lines in this chunk efficiently (one seek, sequential reads) + let lines = line_index.read_line_range(&mut file_handle, *start, *end); + + // Process each line + for (line_number, content) in lines { if params.matches_line(&content, ®ex_matcher) { - filtered.push(FilteredLine { - line_number: line_num, + chunk_results.push(FilteredLine { + line_number, content, }); } } - if line_num % 1000 == 0 { - *progress.lock().unwrap() = line_num as f32 / total_lines as f32; + // Update progress + { + let mut count = processed_chunks.lock().unwrap(); + *count += 1; + *progress.lock().unwrap() = *count as f32 / total_chunks as f32; } - } - } + + Some(chunk_results) + }) + .collect(); + + // Flatten and sort results by line number + let mut filtered: Vec = results.into_iter().flatten().collect(); + filtered.sort_by_key(|f| f.line_number); filtered }