|
80 | 80 | (median, mean, std_dev, ci_lower, ci_upper) |
81 | 81 | } |
82 | 82 |
|
| 83 | +fn benchmark_with_stats_and_results<F, R>( |
| 84 | + f: &mut F, |
| 85 | + warmup: usize, |
| 86 | + iterations: usize, |
| 87 | +) -> ((f64, f64, f64, f64, f64), Vec<R>) |
| 88 | +where |
| 89 | + F: FnMut() -> R, |
| 90 | +{ |
| 91 | + for _ in 0..warmup { |
| 92 | + f(); |
| 93 | + } |
| 94 | + |
| 95 | + let mut times = Vec::with_capacity(iterations); |
| 96 | + let mut results = Vec::with_capacity(iterations); |
| 97 | + for _ in 0..iterations { |
| 98 | + let start = Instant::now(); |
| 99 | + let result = f(); |
| 100 | + times.push(start.elapsed().as_nanos() as f64 / 1_000_000.0); |
| 101 | + results.push(result); |
| 102 | + } |
| 103 | + |
| 104 | + times.sort_by(|a, b| a.partial_cmp(b).unwrap()); |
| 105 | + let median = times[times.len() / 2]; |
| 106 | + let mean = times.iter().sum::<f64>() / times.len() as f64; |
| 107 | + let variance = times.iter().map(|t| (t - mean).powi(2)).sum::<f64>() / times.len() as f64; |
| 108 | + let std_dev = variance.sqrt(); |
| 109 | + |
| 110 | + let n = iterations as f64; |
| 111 | + let standard_error = std_dev / n.sqrt(); |
| 112 | + let z_score = 1.96; |
| 113 | + let margin = z_score * standard_error; |
| 114 | + let ci_lower = mean - margin; |
| 115 | + let ci_upper = mean + margin; |
| 116 | + |
| 117 | + ((median, mean, std_dev, ci_lower, ci_upper), results) |
| 118 | +} |
| 119 | + |
83 | 120 | #[cfg(all(target_os = "linux", target_arch = "x86_64"))] |
84 | | -fn measure_ipc_median<F: FnMut()>(f: &mut F, iterations: usize) -> f64 { |
| 121 | +fn measure_ipc_median<F, R>(f: &mut F, iterations: usize) -> f64 |
| 122 | +where |
| 123 | + F: FnMut() -> R, |
| 124 | +{ |
85 | 125 | let mut ipc_values = Vec::with_capacity(iterations); |
86 | 126 |
|
87 | 127 | for _ in 0..iterations { |
@@ -119,11 +159,74 @@ fn measure_ipc_median<F: FnMut()>(f: &mut F, iterations: usize) -> f64 { |
119 | 159 |
|
120 | 160 | // If it's not linux we don't measure IPC and just return 0.0 |
121 | 161 | #[cfg(not(all(target_os = "linux", target_arch = "x86_64")))] |
122 | | -fn measure_ipc_median<F: FnMut()>(f: &mut F, _iterations: usize) -> f64 { |
| 162 | +fn measure_ipc_median<F, R>(f: &mut F, _iterations: usize) -> f64 |
| 163 | +where |
| 164 | + F: FnMut() -> R, |
| 165 | +{ |
123 | 166 | f(); |
124 | 167 | NO_IPC |
125 | 168 | } |
126 | 169 |
|
| 170 | +fn p_chunks<T: AsRef<[u8]>>(texts: &[T], boundaries: &[usize]) { |
| 171 | + let total: usize = texts.iter().map(|t| t.as_ref().len()).sum(); |
| 172 | + let mut out = String::new(); |
| 173 | + let mut total_reads = 0; |
| 174 | + for (i, w) in boundaries.windows(2).enumerate() { |
| 175 | + let chunk_len: usize = texts[w[0]..w[1]].iter().map(|t| t.as_ref().len()).sum(); |
| 176 | + let perc = 100.0 * chunk_len as f64 / total as f64; |
| 177 | + let n_reads = w[1] - w[0]; |
| 178 | + total_reads += n_reads; |
| 179 | + out.push_str(&format!( |
| 180 | + "chunk {} ({:.1}% nts) {} reads ", |
| 181 | + i, perc, n_reads |
| 182 | + )); |
| 183 | + } |
| 184 | + assert_eq!(total_reads, texts.len()); |
| 185 | + println!("{}", out.trim_end()); |
| 186 | +} |
| 187 | + |
| 188 | +fn balance_by_text_len<T: AsRef<[u8]>>(texts: &[T], threads: usize) -> Vec<usize> { |
| 189 | + let n = texts.len(); |
| 190 | + if n == 0 { |
| 191 | + return vec![]; |
| 192 | + } |
| 193 | + |
| 194 | + // At least 1 at most n |
| 195 | + let num_chunks = threads.clamp(1, n); |
| 196 | + |
| 197 | + // Only 1, we just return boundary for entire text slice |
| 198 | + if num_chunks == 1 { |
| 199 | + return vec![0, n]; |
| 200 | + } |
| 201 | + |
| 202 | + let mut cum = Vec::with_capacity(n + 1); |
| 203 | + cum.push(0usize); |
| 204 | + for t in texts { |
| 205 | + cum.push(cum.last().unwrap() + t.as_ref().len()); |
| 206 | + } |
| 207 | + let total = *cum.last().unwrap(); |
| 208 | + assert_eq!(total, texts.iter().map(|t| t.as_ref().len()).sum()); |
| 209 | + assert!(total > 0); |
| 210 | + |
| 211 | + let mut boundaries = Vec::with_capacity(num_chunks + 1); |
| 212 | + boundaries.push(0); |
| 213 | + |
| 214 | + for k in 1..num_chunks { |
| 215 | + let target = (total * k) / num_chunks; |
| 216 | + // We find the index where the cumulative length is greater than or equal to our target |
| 217 | + // length |
| 218 | + let idx = match cum.binary_search_by(|&c| c.cmp(&target)) { |
| 219 | + Ok(i) => i, |
| 220 | + Err(i) => i, |
| 221 | + }; |
| 222 | + boundaries.push(idx); |
| 223 | + } |
| 224 | + |
| 225 | + boundaries.push(n); |
| 226 | + p_chunks(texts, &boundaries); |
| 227 | + boundaries |
| 228 | +} |
| 229 | + |
127 | 230 | fn calculate_throughput(total_bytes: usize, median_ms: f64) -> f64 { |
128 | 231 | (total_bytes as f64 / (median_ms / 1000.0)) / 1_000_000_000.0 |
129 | 232 | } |
@@ -379,55 +482,65 @@ pub fn benchmark_individual_search_many_texts( |
379 | 482 | Searcher::<Iupac>::new_fwd() |
380 | 483 | }; |
381 | 484 |
|
| 485 | + let boundaries = if threads > 1 { |
| 486 | + balance_by_text_len(texts, threads) |
| 487 | + } else { |
| 488 | + vec![] |
| 489 | + }; |
| 490 | + |
382 | 491 | let mut do_work = || { |
383 | 492 | if threads <= 1 { |
| 493 | + let mut count = 0usize; |
384 | 494 | for text in texts { |
385 | 495 | let t = text.as_ref(); |
386 | 496 | for query in queries { |
387 | | - let _matches = searcher.search(query, t, k); |
388 | | - black_box(_matches); |
| 497 | + count += searcher.search(query, t, k).len(); |
389 | 498 | } |
390 | 499 | } |
| 500 | + count |
391 | 501 | } else { |
| 502 | + let num_chunks = boundaries.len().saturating_sub(1); |
| 503 | + if num_chunks == 0 { |
| 504 | + return 0usize; |
| 505 | + } |
392 | 506 | let pool = rayon::ThreadPoolBuilder::new() |
393 | 507 | .num_threads(threads) |
394 | 508 | .build() |
395 | 509 | .unwrap(); |
396 | | - let chunk_size = (texts.len() + threads - 1) / threads; |
397 | 510 | pool.install(|| { |
398 | | - texts.par_chunks(chunk_size).for_each(|chunk| { |
399 | | - let mut worker = if USE_RC { |
400 | | - Searcher::<Iupac>::new_rc() |
401 | | - } else { |
402 | | - Searcher::<Iupac>::new_fwd() |
403 | | - }; |
404 | | - for text in chunk { |
405 | | - let t = text.as_ref(); |
406 | | - for query in queries { |
407 | | - let _matches = worker.search(query, t, k); |
408 | | - black_box(_matches); |
| 511 | + (0..num_chunks) |
| 512 | + .into_par_iter() |
| 513 | + .map(|j| { |
| 514 | + let chunk = &texts[boundaries[j]..boundaries[j + 1]]; |
| 515 | + let mut worker = if USE_RC { |
| 516 | + Searcher::<Iupac>::new_rc() |
| 517 | + } else { |
| 518 | + Searcher::<Iupac>::new_fwd() |
| 519 | + }; |
| 520 | + let mut count = 0usize; |
| 521 | + for text in chunk { |
| 522 | + let t = text.as_ref(); |
| 523 | + for query in queries { |
| 524 | + count += worker.search(query, t, k).len(); |
| 525 | + } |
409 | 526 | } |
410 | | - } |
411 | | - }); |
412 | | - }); |
| 527 | + count |
| 528 | + }) |
| 529 | + .sum() |
| 530 | + }) |
413 | 531 | } |
414 | 532 | }; |
415 | 533 |
|
416 | | - let (median, mean, std_dev, ci_lower, ci_upper) = |
417 | | - benchmark_with_stats(&mut do_work, warmup, iterations); |
| 534 | + let ((median, mean, std_dev, ci_lower, ci_upper), match_counts) = |
| 535 | + benchmark_with_stats_and_results(&mut do_work, warmup, iterations); |
418 | 536 |
|
419 | 537 | let ipc = if measure_ipc { |
420 | 538 | Some(measure_ipc_median(&mut do_work, iterations)) |
421 | 539 | } else { |
422 | 540 | None |
423 | 541 | }; |
424 | 542 |
|
425 | | - let n_matches = texts.iter().fold(0usize, |acc, text| { |
426 | | - acc + queries |
427 | | - .iter() |
428 | | - .map(|q| searcher.search(q, text.as_ref(), k).len()) |
429 | | - .sum::<usize>() |
430 | | - }); |
| 543 | + let n_matches = match_counts.first().copied().unwrap_or(0); |
431 | 544 |
|
432 | 545 | finish_results( |
433 | 546 | median, |
@@ -466,52 +579,64 @@ pub fn benchmark_tiling_many_texts( |
466 | 579 | }; |
467 | 580 |
|
468 | 581 | let encoded = Arc::new(searcher.encode_patterns(queries)); |
| 582 | + let boundaries = if threads > 1 { |
| 583 | + balance_by_text_len(texts, threads) |
| 584 | + } else { |
| 585 | + vec![] |
| 586 | + }; |
| 587 | + |
469 | 588 | let mut do_work = || { |
470 | 589 | if threads <= 1 { |
| 590 | + let mut count = 0usize; |
471 | 591 | for text in texts { |
472 | | - let _matches = searcher.search_encoded_patterns(&*encoded, text.as_ref(), k); |
473 | | - black_box(_matches); |
| 592 | + count += searcher |
| 593 | + .search_encoded_patterns(&*encoded, text.as_ref(), k) |
| 594 | + .len(); |
474 | 595 | } |
| 596 | + count |
475 | 597 | } else { |
| 598 | + let num_chunks = boundaries.len().saturating_sub(1); |
| 599 | + if num_chunks == 0 { |
| 600 | + return 0usize; |
| 601 | + } |
476 | 602 | let pool = rayon::ThreadPoolBuilder::new() |
477 | 603 | .num_threads(threads) |
478 | 604 | .build() |
479 | 605 | .unwrap(); |
480 | | - let chunk_size = (texts.len() + threads - 1) / threads; |
481 | 606 | pool.install(|| { |
482 | | - texts.par_chunks(chunk_size).for_each(|chunk| { |
483 | | - let encoded = Arc::clone(&encoded); |
484 | | - let mut worker = if USE_RC { |
485 | | - Searcher::<Iupac>::new_rc() |
486 | | - } else { |
487 | | - Searcher::<Iupac>::new_fwd() |
488 | | - }; |
489 | | - for text in chunk { |
490 | | - let _matches = worker.search_encoded_patterns(&*encoded, text.as_ref(), k); |
491 | | - black_box(_matches); |
492 | | - } |
493 | | - }); |
494 | | - }); |
| 607 | + (0..num_chunks) |
| 608 | + .into_par_iter() |
| 609 | + .map(|j| { |
| 610 | + let chunk = &texts[boundaries[j]..boundaries[j + 1]]; |
| 611 | + let encoded = Arc::clone(&encoded); |
| 612 | + let mut worker = if USE_RC { |
| 613 | + Searcher::<Iupac>::new_rc() |
| 614 | + } else { |
| 615 | + Searcher::<Iupac>::new_fwd() |
| 616 | + }; |
| 617 | + let mut count = 0usize; |
| 618 | + for text in chunk { |
| 619 | + count += worker |
| 620 | + .search_encoded_patterns(&*encoded, text.as_ref(), k) |
| 621 | + .len(); |
| 622 | + } |
| 623 | + count |
| 624 | + }) |
| 625 | + .sum() |
| 626 | + }) |
495 | 627 | } |
496 | 628 | }; |
497 | 629 |
|
498 | | - let (median, mean, std_dev, ci_lower, ci_upper) = |
499 | | - benchmark_with_stats(&mut do_work, warmup, iterations); |
| 630 | + let ((median, mean, std_dev, ci_lower, ci_upper), match_counts) = |
| 631 | + benchmark_with_stats_and_results(&mut do_work, warmup, iterations); |
500 | 632 |
|
501 | 633 | let ipc = if measure_ipc { |
502 | 634 | Some(measure_ipc_median(&mut do_work, iterations)) |
503 | 635 | } else { |
504 | 636 | None |
505 | 637 | }; |
506 | 638 |
|
507 | | - let n_matches = texts |
508 | | - .iter() |
509 | | - .map(|t| { |
510 | | - searcher |
511 | | - .search_encoded_patterns(&*encoded, t.as_ref(), k) |
512 | | - .len() |
513 | | - }) |
514 | | - .sum(); |
| 639 | + let n_matches = match_counts.first().copied().unwrap_or(0); |
515 | 640 |
|
516 | 641 | finish_results( |
517 | 642 | median, |
@@ -545,54 +670,67 @@ pub fn benchmark_edlib_many_texts( |
545 | 670 | ); |
546 | 671 |
|
547 | 672 | let edlib_config = Arc::new(get_edlib_config(k as i32, alphabet)); |
| 673 | + let boundaries = if threads > 1 { |
| 674 | + balance_by_text_len(texts, threads) |
| 675 | + } else { |
| 676 | + vec![] |
| 677 | + }; |
| 678 | + |
548 | 679 | let mut do_work = || { |
549 | 680 | if threads <= 1 { |
| 681 | + let mut count = 0usize; |
550 | 682 | for text in texts { |
551 | 683 | let t = text.as_ref(); |
552 | 684 | for query in queries { |
553 | | - let _result = run_edlib(query, t, &*edlib_config); |
554 | | - black_box(_result); |
| 685 | + let result = run_edlib(query, t, &*edlib_config); |
| 686 | + if result.editDistance <= k as i32 && result.editDistance != -1 { |
| 687 | + count += 1; |
| 688 | + } |
555 | 689 | } |
556 | 690 | } |
| 691 | + count |
557 | 692 | } else { |
| 693 | + let num_chunks = boundaries.len().saturating_sub(1); |
| 694 | + if num_chunks == 0 { |
| 695 | + return 0usize; |
| 696 | + } |
558 | 697 | let pool = rayon::ThreadPoolBuilder::new() |
559 | 698 | .num_threads(threads) |
560 | 699 | .build() |
561 | 700 | .unwrap(); |
562 | | - let chunk_size = (texts.len() + threads - 1) / threads; |
563 | 701 | pool.install(|| { |
564 | | - texts.par_chunks(chunk_size).for_each(|chunk| { |
565 | | - let config = Arc::clone(&edlib_config); |
566 | | - for text in chunk { |
567 | | - let t = text.as_ref(); |
568 | | - for query in queries { |
569 | | - let _result = run_edlib(query, t, &*config); |
570 | | - black_box(_result); |
| 702 | + (0..num_chunks) |
| 703 | + .into_par_iter() |
| 704 | + .map(|j| { |
| 705 | + let chunk = &texts[boundaries[j]..boundaries[j + 1]]; |
| 706 | + let config = Arc::clone(&edlib_config); |
| 707 | + let mut count = 0usize; |
| 708 | + for text in chunk { |
| 709 | + let t = text.as_ref(); |
| 710 | + for query in queries { |
| 711 | + let result = run_edlib(query, t, &*config); |
| 712 | + if result.editDistance <= k as i32 && result.editDistance != -1 { |
| 713 | + count += 1; |
| 714 | + } |
| 715 | + } |
571 | 716 | } |
572 | | - } |
573 | | - }); |
574 | | - }); |
| 717 | + count |
| 718 | + }) |
| 719 | + .sum() |
| 720 | + }) |
575 | 721 | } |
576 | 722 | }; |
577 | 723 |
|
578 | | - let (median, mean, std_dev, ci_lower, ci_upper) = |
579 | | - benchmark_with_stats(&mut do_work, warmup, iterations); |
| 724 | + let ((median, mean, std_dev, ci_lower, ci_upper), match_counts) = |
| 725 | + benchmark_with_stats_and_results(&mut do_work, warmup, iterations); |
580 | 726 |
|
581 | 727 | let ipc = if measure_ipc { |
582 | 728 | Some(measure_ipc_median(&mut do_work, iterations)) |
583 | 729 | } else { |
584 | 730 | None |
585 | 731 | }; |
586 | 732 |
|
587 | | - let n_matches = texts.iter().fold(0usize, |acc, text| { |
588 | | - acc + queries |
589 | | - .iter() |
590 | | - .filter(|q| { |
591 | | - let result = run_edlib(q, text.as_ref(), &*edlib_config); |
592 | | - result.editDistance <= k as i32 && result.editDistance != -1 |
593 | | - }) |
594 | | - .count() |
595 | | - }); |
| 733 | + let n_matches = match_counts.first().copied().unwrap_or(0); |
596 | 734 |
|
597 | 735 | finish_results( |
598 | 736 | median, |
|
0 commit comments