Skip to content

Commit 4ab49b6

Browse files
committed
count matches in bench iters + chunk among threads based on nts
1 parent 7f7a7dd commit 4ab49b6

File tree

1 file changed

+215
-77
lines changed

1 file changed

+215
-77
lines changed

evals/src/sassy2/bench.rs

Lines changed: 215 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,48 @@ where
8080
(median, mean, std_dev, ci_lower, ci_upper)
8181
}
8282

83+
fn benchmark_with_stats_and_results<F, R>(
84+
f: &mut F,
85+
warmup: usize,
86+
iterations: usize,
87+
) -> ((f64, f64, f64, f64, f64), Vec<R>)
88+
where
89+
F: FnMut() -> R,
90+
{
91+
for _ in 0..warmup {
92+
f();
93+
}
94+
95+
let mut times = Vec::with_capacity(iterations);
96+
let mut results = Vec::with_capacity(iterations);
97+
for _ in 0..iterations {
98+
let start = Instant::now();
99+
let result = f();
100+
times.push(start.elapsed().as_nanos() as f64 / 1_000_000.0);
101+
results.push(result);
102+
}
103+
104+
times.sort_by(|a, b| a.partial_cmp(b).unwrap());
105+
let median = times[times.len() / 2];
106+
let mean = times.iter().sum::<f64>() / times.len() as f64;
107+
let variance = times.iter().map(|t| (t - mean).powi(2)).sum::<f64>() / times.len() as f64;
108+
let std_dev = variance.sqrt();
109+
110+
let n = iterations as f64;
111+
let standard_error = std_dev / n.sqrt();
112+
let z_score = 1.96;
113+
let margin = z_score * standard_error;
114+
let ci_lower = mean - margin;
115+
let ci_upper = mean + margin;
116+
117+
((median, mean, std_dev, ci_lower, ci_upper), results)
118+
}
119+
83120
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
84-
fn measure_ipc_median<F: FnMut()>(f: &mut F, iterations: usize) -> f64 {
121+
fn measure_ipc_median<F, R>(f: &mut F, iterations: usize) -> f64
122+
where
123+
F: FnMut() -> R,
124+
{
85125
let mut ipc_values = Vec::with_capacity(iterations);
86126

87127
for _ in 0..iterations {
@@ -119,11 +159,74 @@ fn measure_ipc_median<F: FnMut()>(f: &mut F, iterations: usize) -> f64 {
119159

120160
// If it's not linux we don't measure IPC and just return 0.0
121161
#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
122-
fn measure_ipc_median<F: FnMut()>(f: &mut F, _iterations: usize) -> f64 {
162+
fn measure_ipc_median<F, R>(f: &mut F, _iterations: usize) -> f64
163+
where
164+
F: FnMut() -> R,
165+
{
123166
f();
124167
NO_IPC
125168
}
126169

170+
fn p_chunks<T: AsRef<[u8]>>(texts: &[T], boundaries: &[usize]) {
171+
let total: usize = texts.iter().map(|t| t.as_ref().len()).sum();
172+
let mut out = String::new();
173+
let mut total_reads = 0;
174+
for (i, w) in boundaries.windows(2).enumerate() {
175+
let chunk_len: usize = texts[w[0]..w[1]].iter().map(|t| t.as_ref().len()).sum();
176+
let perc = 100.0 * chunk_len as f64 / total as f64;
177+
let n_reads = w[1] - w[0];
178+
total_reads += n_reads;
179+
out.push_str(&format!(
180+
"chunk {} ({:.1}% nts) {} reads ",
181+
i, perc, n_reads
182+
));
183+
}
184+
assert_eq!(total_reads, texts.len());
185+
println!("{}", out.trim_end());
186+
}
187+
188+
fn balance_by_text_len<T: AsRef<[u8]>>(texts: &[T], threads: usize) -> Vec<usize> {
189+
let n = texts.len();
190+
if n == 0 {
191+
return vec![];
192+
}
193+
194+
// At least 1 at most n
195+
let num_chunks = threads.clamp(1, n);
196+
197+
// Only 1, we just return boundary for entire text slice
198+
if num_chunks == 1 {
199+
return vec![0, n];
200+
}
201+
202+
let mut cum = Vec::with_capacity(n + 1);
203+
cum.push(0usize);
204+
for t in texts {
205+
cum.push(cum.last().unwrap() + t.as_ref().len());
206+
}
207+
let total = *cum.last().unwrap();
208+
assert_eq!(total, texts.iter().map(|t| t.as_ref().len()).sum());
209+
assert!(total > 0);
210+
211+
let mut boundaries = Vec::with_capacity(num_chunks + 1);
212+
boundaries.push(0);
213+
214+
for k in 1..num_chunks {
215+
let target = (total * k) / num_chunks;
216+
// We find the index where the cumulative length is greater than or equal to our target
217+
// length
218+
let idx = match cum.binary_search_by(|&c| c.cmp(&target)) {
219+
Ok(i) => i,
220+
Err(i) => i,
221+
};
222+
boundaries.push(idx);
223+
}
224+
225+
boundaries.push(n);
226+
p_chunks(texts, &boundaries);
227+
boundaries
228+
}
229+
127230
fn calculate_throughput(total_bytes: usize, median_ms: f64) -> f64 {
128231
(total_bytes as f64 / (median_ms / 1000.0)) / 1_000_000_000.0
129232
}
@@ -379,55 +482,65 @@ pub fn benchmark_individual_search_many_texts(
379482
Searcher::<Iupac>::new_fwd()
380483
};
381484

485+
let boundaries = if threads > 1 {
486+
balance_by_text_len(texts, threads)
487+
} else {
488+
vec![]
489+
};
490+
382491
let mut do_work = || {
383492
if threads <= 1 {
493+
let mut count = 0usize;
384494
for text in texts {
385495
let t = text.as_ref();
386496
for query in queries {
387-
let _matches = searcher.search(query, t, k);
388-
black_box(_matches);
497+
count += searcher.search(query, t, k).len();
389498
}
390499
}
500+
count
391501
} else {
502+
let num_chunks = boundaries.len().saturating_sub(1);
503+
if num_chunks == 0 {
504+
return 0usize;
505+
}
392506
let pool = rayon::ThreadPoolBuilder::new()
393507
.num_threads(threads)
394508
.build()
395509
.unwrap();
396-
let chunk_size = (texts.len() + threads - 1) / threads;
397510
pool.install(|| {
398-
texts.par_chunks(chunk_size).for_each(|chunk| {
399-
let mut worker = if USE_RC {
400-
Searcher::<Iupac>::new_rc()
401-
} else {
402-
Searcher::<Iupac>::new_fwd()
403-
};
404-
for text in chunk {
405-
let t = text.as_ref();
406-
for query in queries {
407-
let _matches = worker.search(query, t, k);
408-
black_box(_matches);
511+
(0..num_chunks)
512+
.into_par_iter()
513+
.map(|j| {
514+
let chunk = &texts[boundaries[j]..boundaries[j + 1]];
515+
let mut worker = if USE_RC {
516+
Searcher::<Iupac>::new_rc()
517+
} else {
518+
Searcher::<Iupac>::new_fwd()
519+
};
520+
let mut count = 0usize;
521+
for text in chunk {
522+
let t = text.as_ref();
523+
for query in queries {
524+
count += worker.search(query, t, k).len();
525+
}
409526
}
410-
}
411-
});
412-
});
527+
count
528+
})
529+
.sum()
530+
})
413531
}
414532
};
415533

416-
let (median, mean, std_dev, ci_lower, ci_upper) =
417-
benchmark_with_stats(&mut do_work, warmup, iterations);
534+
let ((median, mean, std_dev, ci_lower, ci_upper), match_counts) =
535+
benchmark_with_stats_and_results(&mut do_work, warmup, iterations);
418536

419537
let ipc = if measure_ipc {
420538
Some(measure_ipc_median(&mut do_work, iterations))
421539
} else {
422540
None
423541
};
424542

425-
let n_matches = texts.iter().fold(0usize, |acc, text| {
426-
acc + queries
427-
.iter()
428-
.map(|q| searcher.search(q, text.as_ref(), k).len())
429-
.sum::<usize>()
430-
});
543+
let n_matches = match_counts.first().copied().unwrap_or(0);
431544

432545
finish_results(
433546
median,
@@ -466,52 +579,64 @@ pub fn benchmark_tiling_many_texts(
466579
};
467580

468581
let encoded = Arc::new(searcher.encode_patterns(queries));
582+
let boundaries = if threads > 1 {
583+
balance_by_text_len(texts, threads)
584+
} else {
585+
vec![]
586+
};
587+
469588
let mut do_work = || {
470589
if threads <= 1 {
590+
let mut count = 0usize;
471591
for text in texts {
472-
let _matches = searcher.search_encoded_patterns(&*encoded, text.as_ref(), k);
473-
black_box(_matches);
592+
count += searcher
593+
.search_encoded_patterns(&*encoded, text.as_ref(), k)
594+
.len();
474595
}
596+
count
475597
} else {
598+
let num_chunks = boundaries.len().saturating_sub(1);
599+
if num_chunks == 0 {
600+
return 0usize;
601+
}
476602
let pool = rayon::ThreadPoolBuilder::new()
477603
.num_threads(threads)
478604
.build()
479605
.unwrap();
480-
let chunk_size = (texts.len() + threads - 1) / threads;
481606
pool.install(|| {
482-
texts.par_chunks(chunk_size).for_each(|chunk| {
483-
let encoded = Arc::clone(&encoded);
484-
let mut worker = if USE_RC {
485-
Searcher::<Iupac>::new_rc()
486-
} else {
487-
Searcher::<Iupac>::new_fwd()
488-
};
489-
for text in chunk {
490-
let _matches = worker.search_encoded_patterns(&*encoded, text.as_ref(), k);
491-
black_box(_matches);
492-
}
493-
});
494-
});
607+
(0..num_chunks)
608+
.into_par_iter()
609+
.map(|j| {
610+
let chunk = &texts[boundaries[j]..boundaries[j + 1]];
611+
let encoded = Arc::clone(&encoded);
612+
let mut worker = if USE_RC {
613+
Searcher::<Iupac>::new_rc()
614+
} else {
615+
Searcher::<Iupac>::new_fwd()
616+
};
617+
let mut count = 0usize;
618+
for text in chunk {
619+
count += worker
620+
.search_encoded_patterns(&*encoded, text.as_ref(), k)
621+
.len();
622+
}
623+
count
624+
})
625+
.sum()
626+
})
495627
}
496628
};
497629

498-
let (median, mean, std_dev, ci_lower, ci_upper) =
499-
benchmark_with_stats(&mut do_work, warmup, iterations);
630+
let ((median, mean, std_dev, ci_lower, ci_upper), match_counts) =
631+
benchmark_with_stats_and_results(&mut do_work, warmup, iterations);
500632

501633
let ipc = if measure_ipc {
502634
Some(measure_ipc_median(&mut do_work, iterations))
503635
} else {
504636
None
505637
};
506638

507-
let n_matches = texts
508-
.iter()
509-
.map(|t| {
510-
searcher
511-
.search_encoded_patterns(&*encoded, t.as_ref(), k)
512-
.len()
513-
})
514-
.sum();
639+
let n_matches = match_counts.first().copied().unwrap_or(0);
515640

516641
finish_results(
517642
median,
@@ -545,54 +670,67 @@ pub fn benchmark_edlib_many_texts(
545670
);
546671

547672
let edlib_config = Arc::new(get_edlib_config(k as i32, alphabet));
673+
let boundaries = if threads > 1 {
674+
balance_by_text_len(texts, threads)
675+
} else {
676+
vec![]
677+
};
678+
548679
let mut do_work = || {
549680
if threads <= 1 {
681+
let mut count = 0usize;
550682
for text in texts {
551683
let t = text.as_ref();
552684
for query in queries {
553-
let _result = run_edlib(query, t, &*edlib_config);
554-
black_box(_result);
685+
let result = run_edlib(query, t, &*edlib_config);
686+
if result.editDistance <= k as i32 && result.editDistance != -1 {
687+
count += 1;
688+
}
555689
}
556690
}
691+
count
557692
} else {
693+
let num_chunks = boundaries.len().saturating_sub(1);
694+
if num_chunks == 0 {
695+
return 0usize;
696+
}
558697
let pool = rayon::ThreadPoolBuilder::new()
559698
.num_threads(threads)
560699
.build()
561700
.unwrap();
562-
let chunk_size = (texts.len() + threads - 1) / threads;
563701
pool.install(|| {
564-
texts.par_chunks(chunk_size).for_each(|chunk| {
565-
let config = Arc::clone(&edlib_config);
566-
for text in chunk {
567-
let t = text.as_ref();
568-
for query in queries {
569-
let _result = run_edlib(query, t, &*config);
570-
black_box(_result);
702+
(0..num_chunks)
703+
.into_par_iter()
704+
.map(|j| {
705+
let chunk = &texts[boundaries[j]..boundaries[j + 1]];
706+
let config = Arc::clone(&edlib_config);
707+
let mut count = 0usize;
708+
for text in chunk {
709+
let t = text.as_ref();
710+
for query in queries {
711+
let result = run_edlib(query, t, &*config);
712+
if result.editDistance <= k as i32 && result.editDistance != -1 {
713+
count += 1;
714+
}
715+
}
571716
}
572-
}
573-
});
574-
});
717+
count
718+
})
719+
.sum()
720+
})
575721
}
576722
};
577723

578-
let (median, mean, std_dev, ci_lower, ci_upper) =
579-
benchmark_with_stats(&mut do_work, warmup, iterations);
724+
let ((median, mean, std_dev, ci_lower, ci_upper), match_counts) =
725+
benchmark_with_stats_and_results(&mut do_work, warmup, iterations);
580726

581727
let ipc = if measure_ipc {
582728
Some(measure_ipc_median(&mut do_work, iterations))
583729
} else {
584730
None
585731
};
586732

587-
let n_matches = texts.iter().fold(0usize, |acc, text| {
588-
acc + queries
589-
.iter()
590-
.filter(|q| {
591-
let result = run_edlib(q, text.as_ref(), &*edlib_config);
592-
result.editDistance <= k as i32 && result.editDistance != -1
593-
})
594-
.count()
595-
});
733+
let n_matches = match_counts.first().copied().unwrap_or(0);
596734

597735
finish_results(
598736
median,

0 commit comments

Comments
 (0)