From c754338bf4eab7dbc694df67ba4ccad1ef6cd319 Mon Sep 17 00:00:00 2001 From: husky Date: Fri, 14 Mar 2025 12:13:45 -0700 Subject: [PATCH] favor newer hostnames --- vorebot/src/webparse/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vorebot/src/webparse/mod.rs b/vorebot/src/webparse/mod.rs index d6d6580..ba84f74 100644 --- a/vorebot/src/webparse/mod.rs +++ b/vorebot/src/webparse/mod.rs @@ -67,7 +67,7 @@ pub async fn web_parse( .create_key_value(kv::Config { bucket: "hosts".to_string(), description: "prevent the same host from being scraped too quickly".to_string(), - max_age: Duration::from_secs(60 * 5), + max_age: Duration::from_secs(60 * 10), ..Default::default() }) .await; @@ -93,7 +93,7 @@ pub async fn web_parse( if let Ok(Some(host)) = hosts_bucket.get(hash.to_string()).await { let count = *host.first().unwrap_or(&0); - if count > 100 { + if count > 10 { warn!("scraping {} too quickly, avoiding for one minute", robots_url); return Err(()); }