bring over latest vorebot tweaks #4
1 changed files with 11 additions and 0 deletions
|
@ -220,6 +220,17 @@ pub async fn web_parse(
|
||||||
// i guess we're good
|
// i guess we're good
|
||||||
driver.goto(url).await.map_err(|_| ())?;
|
driver.goto(url).await.map_err(|_| ())?;
|
||||||
|
|
||||||
|
let html_element = driver.find(By::Tag("html")).await.map_err(|_| ())?;
|
||||||
|
|
||||||
|
if let Some(lang) = html_element.attr("lang").await.ok().flatten() {
|
||||||
|
if !lang.starts_with("en") && !lang.starts_with("unknown") {
|
||||||
|
// i.e. non-english language
|
||||||
|
// fixme: remove this once we start expanding to non-english-speaking markets?
|
||||||
|
warn!("skipping {} due to {} language (currently prioritizing english", url, lang);
|
||||||
|
return Err(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let meta_elements = driver.find_all(By::Tag("meta")).await.map_err(|_| ())?;
|
let meta_elements = driver.find_all(By::Tag("meta")).await.map_err(|_| ())?;
|
||||||
|
|
||||||
let title = driver.title().await.map_err(|_| ())?;
|
let title = driver.title().await.map_err(|_| ())?;
|
||||||
|
|
Loading…
Add table
Reference in a new issue