goddamn it is fast now
This commit is contained in:
46
Cargo.lock
generated
46
Cargo.lock
generated
@@ -798,6 +798,25 @@ dependencies = [
|
|||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-utils"
|
name = "crossbeam-utils"
|
||||||
version = "0.8.21"
|
version = "0.8.21"
|
||||||
@@ -1002,6 +1021,12 @@ dependencies = [
|
|||||||
"winit",
|
"winit",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "emath"
|
name = "emath"
|
||||||
version = "0.29.1"
|
version = "0.29.1"
|
||||||
@@ -2545,6 +2570,26 @@ version = "0.6.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539"
|
checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redox_syscall"
|
name = "redox_syscall"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
@@ -2627,6 +2672,7 @@ name = "rlogg"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"eframe",
|
"eframe",
|
||||||
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
"rfd",
|
"rfd",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@@ -16,3 +16,4 @@ rfd = "0.15"
|
|||||||
regex = "1.11"
|
regex = "1.11"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
rayon = "1.10"
|
||||||
|
|||||||
@@ -51,4 +51,40 @@ impl LineIndex {
|
|||||||
|
|
||||||
Some(line.trim_end_matches(['\r', '\n']).to_string())
|
Some(line.trim_end_matches(['\r', '\n']).to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reads a range of lines efficiently by seeking once and reading sequentially
|
||||||
|
pub fn read_line_range(
|
||||||
|
&self,
|
||||||
|
file: &mut BufReader<File>,
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
) -> Vec<(usize, String)> {
|
||||||
|
let end = end.min(self.total_lines);
|
||||||
|
if start >= end || start >= self.total_lines {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut results = Vec::with_capacity(end - start);
|
||||||
|
|
||||||
|
// Seek to the start position once
|
||||||
|
let start_pos = self.positions[start];
|
||||||
|
if file.seek(SeekFrom::Start(start_pos)).is_err() {
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read lines sequentially without additional seeks
|
||||||
|
let mut line = String::new();
|
||||||
|
for line_num in start..end {
|
||||||
|
line.clear();
|
||||||
|
if file.read_line(&mut line).is_err() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if line.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
results.push((line_num, line.trim_end_matches(['\r', '\n']).to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
results
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
|
use rayon::prelude::*;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
use std::path::Path;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use std::thread;
|
use std::thread;
|
||||||
|
|
||||||
@@ -100,31 +102,69 @@ pub fn start_search(
|
|||||||
fn search_lines(
|
fn search_lines(
|
||||||
params: &SearchParams,
|
params: &SearchParams,
|
||||||
line_index: &LineIndex,
|
line_index: &LineIndex,
|
||||||
file_path: &std::path::Path,
|
file_path: &Path,
|
||||||
progress: &Arc<Mutex<f32>>,
|
progress: &Arc<Mutex<f32>>,
|
||||||
) -> Vec<FilteredLine> {
|
) -> Vec<FilteredLine> {
|
||||||
let mut filtered = Vec::new();
|
let total_lines = line_index.total_lines;
|
||||||
let regex_matcher = params.build_regex_matcher();
|
if total_lines == 0 {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
if let Ok(file) = File::open(file_path) {
|
// Determine optimal chunk size based on total lines
|
||||||
let mut file_handle = BufReader::new(file);
|
// Aim for enough chunks to utilize all cores, but not too many to avoid overhead
|
||||||
let total_lines = line_index.total_lines;
|
let num_threads = rayon::current_num_threads();
|
||||||
|
let min_chunk_size = 1000; // Process at least 1000 lines per chunk
|
||||||
|
let chunk_size = (total_lines / (num_threads * 4)).max(min_chunk_size);
|
||||||
|
|
||||||
for line_num in 0..total_lines {
|
// Split line numbers into chunks
|
||||||
if let Some(content) = line_index.read_line(&mut file_handle, line_num) {
|
let chunks: Vec<(usize, usize)> = (0..total_lines)
|
||||||
|
.step_by(chunk_size)
|
||||||
|
.map(|start| {
|
||||||
|
let end = (start + chunk_size).min(total_lines);
|
||||||
|
(start, end)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let total_chunks = chunks.len();
|
||||||
|
let processed_chunks = Arc::new(Mutex::new(0usize));
|
||||||
|
|
||||||
|
// Process chunks in parallel
|
||||||
|
let results: Vec<Vec<FilteredLine>> = chunks
|
||||||
|
.par_iter()
|
||||||
|
.filter_map(|(start, end)| {
|
||||||
|
// Each thread opens its own file handle
|
||||||
|
let file = File::open(file_path).ok()?;
|
||||||
|
let mut file_handle = BufReader::new(file);
|
||||||
|
let regex_matcher = params.build_regex_matcher();
|
||||||
|
let mut chunk_results = Vec::new();
|
||||||
|
|
||||||
|
// Read lines in this chunk efficiently (one seek, sequential reads)
|
||||||
|
let lines = line_index.read_line_range(&mut file_handle, *start, *end);
|
||||||
|
|
||||||
|
// Process each line
|
||||||
|
for (line_number, content) in lines {
|
||||||
if params.matches_line(&content, ®ex_matcher) {
|
if params.matches_line(&content, ®ex_matcher) {
|
||||||
filtered.push(FilteredLine {
|
chunk_results.push(FilteredLine {
|
||||||
line_number: line_num,
|
line_number,
|
||||||
content,
|
content,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if line_num % 1000 == 0 {
|
// Update progress
|
||||||
*progress.lock().unwrap() = line_num as f32 / total_lines as f32;
|
{
|
||||||
|
let mut count = processed_chunks.lock().unwrap();
|
||||||
|
*count += 1;
|
||||||
|
*progress.lock().unwrap() = *count as f32 / total_chunks as f32;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
Some(chunk_results)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Flatten and sort results by line number
|
||||||
|
let mut filtered: Vec<FilteredLine> = results.into_iter().flatten().collect();
|
||||||
|
filtered.sort_by_key(|f| f.line_number);
|
||||||
|
|
||||||
filtered
|
filtered
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user