goddamn it is fast now

This commit is contained in:
2025-12-02 18:41:12 +01:00
parent a53ced9160
commit 0dd5962b9d
4 changed files with 137 additions and 14 deletions

View File

@@ -51,4 +51,40 @@ impl LineIndex {
Some(line.trim_end_matches(['\r', '\n']).to_string())
}
/// Reads a range of lines efficiently by seeking once and reading sequentially
pub fn read_line_range(
&self,
file: &mut BufReader<File>,
start: usize,
end: usize,
) -> Vec<(usize, String)> {
let end = end.min(self.total_lines);
if start >= end || start >= self.total_lines {
return Vec::new();
}
let mut results = Vec::with_capacity(end - start);
// Seek to the start position once
let start_pos = self.positions[start];
if file.seek(SeekFrom::Start(start_pos)).is_err() {
return results;
}
// Read lines sequentially without additional seeks
let mut line = String::new();
for line_num in start..end {
line.clear();
if file.read_line(&mut line).is_err() {
break;
}
if line.is_empty() {
break;
}
results.push((line_num, line.trim_end_matches(['\r', '\n']).to_string()));
}
results
}
}

View File

@@ -1,6 +1,8 @@
use rayon::prelude::*;
use regex::Regex;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::thread;
@@ -100,31 +102,69 @@ pub fn start_search(
fn search_lines(
params: &SearchParams,
line_index: &LineIndex,
file_path: &std::path::Path,
file_path: &Path,
progress: &Arc<Mutex<f32>>,
) -> Vec<FilteredLine> {
let mut filtered = Vec::new();
let regex_matcher = params.build_regex_matcher();
let total_lines = line_index.total_lines;
if total_lines == 0 {
return Vec::new();
}
if let Ok(file) = File::open(file_path) {
let mut file_handle = BufReader::new(file);
let total_lines = line_index.total_lines;
// Determine optimal chunk size based on total lines
// Aim for enough chunks to utilize all cores, but not too many to avoid overhead
let num_threads = rayon::current_num_threads();
let min_chunk_size = 1000; // Process at least 1000 lines per chunk
let chunk_size = (total_lines / (num_threads * 4)).max(min_chunk_size);
for line_num in 0..total_lines {
if let Some(content) = line_index.read_line(&mut file_handle, line_num) {
// Split line numbers into chunks
let chunks: Vec<(usize, usize)> = (0..total_lines)
.step_by(chunk_size)
.map(|start| {
let end = (start + chunk_size).min(total_lines);
(start, end)
})
.collect();
let total_chunks = chunks.len();
let processed_chunks = Arc::new(Mutex::new(0usize));
// Process chunks in parallel
let results: Vec<Vec<FilteredLine>> = chunks
.par_iter()
.filter_map(|(start, end)| {
// Each thread opens its own file handle
let file = File::open(file_path).ok()?;
let mut file_handle = BufReader::new(file);
let regex_matcher = params.build_regex_matcher();
let mut chunk_results = Vec::new();
// Read lines in this chunk efficiently (one seek, sequential reads)
let lines = line_index.read_line_range(&mut file_handle, *start, *end);
// Process each line
for (line_number, content) in lines {
if params.matches_line(&content, &regex_matcher) {
filtered.push(FilteredLine {
line_number: line_num,
chunk_results.push(FilteredLine {
line_number,
content,
});
}
}
if line_num % 1000 == 0 {
*progress.lock().unwrap() = line_num as f32 / total_lines as f32;
// Update progress
{
let mut count = processed_chunks.lock().unwrap();
*count += 1;
*progress.lock().unwrap() = *count as f32 / total_chunks as f32;
}
}
}
Some(chunk_results)
})
.collect();
// Flatten and sort results by line number
let mut filtered: Vec<FilteredLine> = results.into_iter().flatten().collect();
filtered.sort_by_key(|f| f.line_number);
filtered
}