goddamn it is fast now
This commit is contained in:
@@ -51,4 +51,40 @@ impl LineIndex {
|
||||
|
||||
Some(line.trim_end_matches(['\r', '\n']).to_string())
|
||||
}
|
||||
|
||||
/// Reads a range of lines efficiently by seeking once and reading sequentially
|
||||
pub fn read_line_range(
|
||||
&self,
|
||||
file: &mut BufReader<File>,
|
||||
start: usize,
|
||||
end: usize,
|
||||
) -> Vec<(usize, String)> {
|
||||
let end = end.min(self.total_lines);
|
||||
if start >= end || start >= self.total_lines {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut results = Vec::with_capacity(end - start);
|
||||
|
||||
// Seek to the start position once
|
||||
let start_pos = self.positions[start];
|
||||
if file.seek(SeekFrom::Start(start_pos)).is_err() {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Read lines sequentially without additional seeks
|
||||
let mut line = String::new();
|
||||
for line_num in start..end {
|
||||
line.clear();
|
||||
if file.read_line(&mut line).is_err() {
|
||||
break;
|
||||
}
|
||||
if line.is_empty() {
|
||||
break;
|
||||
}
|
||||
results.push((line_num, line.trim_end_matches(['\r', '\n']).to_string()));
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use rayon::prelude::*;
|
||||
use regex::Regex;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
|
||||
@@ -100,31 +102,69 @@ pub fn start_search(
|
||||
fn search_lines(
|
||||
params: &SearchParams,
|
||||
line_index: &LineIndex,
|
||||
file_path: &std::path::Path,
|
||||
file_path: &Path,
|
||||
progress: &Arc<Mutex<f32>>,
|
||||
) -> Vec<FilteredLine> {
|
||||
let mut filtered = Vec::new();
|
||||
let regex_matcher = params.build_regex_matcher();
|
||||
let total_lines = line_index.total_lines;
|
||||
if total_lines == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
if let Ok(file) = File::open(file_path) {
|
||||
let mut file_handle = BufReader::new(file);
|
||||
let total_lines = line_index.total_lines;
|
||||
// Determine optimal chunk size based on total lines
|
||||
// Aim for enough chunks to utilize all cores, but not too many to avoid overhead
|
||||
let num_threads = rayon::current_num_threads();
|
||||
let min_chunk_size = 1000; // Process at least 1000 lines per chunk
|
||||
let chunk_size = (total_lines / (num_threads * 4)).max(min_chunk_size);
|
||||
|
||||
for line_num in 0..total_lines {
|
||||
if let Some(content) = line_index.read_line(&mut file_handle, line_num) {
|
||||
// Split line numbers into chunks
|
||||
let chunks: Vec<(usize, usize)> = (0..total_lines)
|
||||
.step_by(chunk_size)
|
||||
.map(|start| {
|
||||
let end = (start + chunk_size).min(total_lines);
|
||||
(start, end)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let total_chunks = chunks.len();
|
||||
let processed_chunks = Arc::new(Mutex::new(0usize));
|
||||
|
||||
// Process chunks in parallel
|
||||
let results: Vec<Vec<FilteredLine>> = chunks
|
||||
.par_iter()
|
||||
.filter_map(|(start, end)| {
|
||||
// Each thread opens its own file handle
|
||||
let file = File::open(file_path).ok()?;
|
||||
let mut file_handle = BufReader::new(file);
|
||||
let regex_matcher = params.build_regex_matcher();
|
||||
let mut chunk_results = Vec::new();
|
||||
|
||||
// Read lines in this chunk efficiently (one seek, sequential reads)
|
||||
let lines = line_index.read_line_range(&mut file_handle, *start, *end);
|
||||
|
||||
// Process each line
|
||||
for (line_number, content) in lines {
|
||||
if params.matches_line(&content, ®ex_matcher) {
|
||||
filtered.push(FilteredLine {
|
||||
line_number: line_num,
|
||||
chunk_results.push(FilteredLine {
|
||||
line_number,
|
||||
content,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if line_num % 1000 == 0 {
|
||||
*progress.lock().unwrap() = line_num as f32 / total_lines as f32;
|
||||
// Update progress
|
||||
{
|
||||
let mut count = processed_chunks.lock().unwrap();
|
||||
*count += 1;
|
||||
*progress.lock().unwrap() = *count as f32 / total_chunks as f32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(chunk_results)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Flatten and sort results by line number
|
||||
let mut filtered: Vec<FilteredLine> = results.into_iter().flatten().collect();
|
||||
filtered.sort_by_key(|f| f.line_number);
|
||||
|
||||
filtered
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user