Rollup merge of #99933 - alex:parallel-html-checking, r=Mark-Simulacrum

parallelize HTML checking tool

there's a lot of IO, so timings on my laptop are far from stable, but it seems to be considerably faster.

this step often appears to take 5+ minutes in CI, so hopefully this offers a speedup
This commit is contained in:
Matthias Krüger 2022-08-02 23:07:44 +02:00 committed by GitHub
commit 63cd10154d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 21 deletions

View file

@ -1788,6 +1788,7 @@ dependencies = [
name = "html-checker"
version = "0.1.0"
dependencies = [
"rayon",
"walkdir",
]

View file

@ -9,3 +9,4 @@ path = "main.rs"
[dependencies]
walkdir = "2"
rayon = "1.5"

View file

@ -1,3 +1,4 @@
use rayon::iter::{ParallelBridge, ParallelIterator};
use std::env;
use std::path::Path;
use std::process::{Command, Output};
@ -56,27 +57,30 @@ const DOCS_TO_CHECK: &[&str] =
// Returns the number of files read and the number of errors.
fn find_all_html_files(dir: &Path) -> (usize, usize) {
let mut files_read = 0;
let mut errors = 0;
for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| {
e.depth() != 1
|| e.file_name()
.to_str()
.map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
.unwrap_or(false)
}) {
let entry = entry.expect("failed to read file");
if !entry.file_type().is_file() {
continue;
}
let entry = entry.path();
if entry.extension().and_then(|s| s.to_str()) == Some("html") {
errors += check_html_file(&entry);
files_read += 1;
}
}
(files_read, errors)
walkdir::WalkDir::new(dir)
.into_iter()
.filter_entry(|e| {
e.depth() != 1
|| e.file_name()
.to_str()
.map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
.unwrap_or(false)
})
.par_bridge()
.map(|entry| {
let entry = entry.expect("failed to read file");
if !entry.file_type().is_file() {
return (0, 0);
}
let entry = entry.path();
// (Number of files processed, number of errors)
if entry.extension().and_then(|s| s.to_str()) == Some("html") {
(1, check_html_file(&entry))
} else {
(0, 0)
}
})
.reduce(|| (0, 0), |a, b| (a.0 + b.0, a.1 + b.1))
}
/// Default `tidy` command for macOS is too old that it does not have `mute-id` and `mute` options.