forked from asklyphe-public/asklyphe
		
	Compare commits
	
		
			1 commit
		
	
	
		
			spellcheck
			...
			main
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 016e1957e3 | 
					 60 changed files with 305 additions and 3953 deletions
				
			
		| 
						 | 
				
			
			@ -1,16 +0,0 @@
 | 
			
		|||
inputs:
 | 
			
		||||
  service-name:
 | 
			
		||||
    description: 'name of the service to build and upload'
 | 
			
		||||
    required: true
 | 
			
		||||
runs:
 | 
			
		||||
  using: "composite"
 | 
			
		||||
  steps:
 | 
			
		||||
    - run: |
 | 
			
		||||
        mkdir -pv artifacts
 | 
			
		||||
        cargo build --release --bin ${{ inputs.service-name }}
 | 
			
		||||
        mv target/release/${{ inputs.service-name }} artifacts/
 | 
			
		||||
      shell: bash
 | 
			
		||||
    - uses: actions/upload-artifact@v3
 | 
			
		||||
      with:
 | 
			
		||||
        name: "${{ inputs.service-name }}"
 | 
			
		||||
        path: artifacts/
 | 
			
		||||
| 
						 | 
				
			
			@ -1,39 +0,0 @@
 | 
			
		|||
on: [push]
 | 
			
		||||
jobs:
 | 
			
		||||
  build-all-services:
 | 
			
		||||
    runs-on: docker
 | 
			
		||||
    container:
 | 
			
		||||
      image: rust:1-bookworm
 | 
			
		||||
    steps:
 | 
			
		||||
      - run: |
 | 
			
		||||
          apt-get update -qq
 | 
			
		||||
          apt-get install -y -qq nodejs git clang
 | 
			
		||||
      - uses: actions/checkout@v3
 | 
			
		||||
      - id: vorebot
 | 
			
		||||
        uses: ./.forgejo/build-service
 | 
			
		||||
        with:
 | 
			
		||||
          service-name: "vorebot"
 | 
			
		||||
      - id: asklyphe-auth-frontend
 | 
			
		||||
        uses: ./.forgejo/build-service
 | 
			
		||||
        with:
 | 
			
		||||
          service-name: "asklyphe-auth-frontend"
 | 
			
		||||
      - id: asklyphe-frontend
 | 
			
		||||
        uses: ./.forgejo/build-service
 | 
			
		||||
        with:
 | 
			
		||||
          service-name: "asklyphe-frontend"
 | 
			
		||||
      - id: authservice
 | 
			
		||||
        uses: ./.forgejo/build-service
 | 
			
		||||
        with:
 | 
			
		||||
          service-name: "authservice"
 | 
			
		||||
      - id: bingservice
 | 
			
		||||
        uses: ./.forgejo/build-service
 | 
			
		||||
        with:
 | 
			
		||||
          service-name: "bingservice"
 | 
			
		||||
      - id: googleservice
 | 
			
		||||
        uses: ./.forgejo/build-service
 | 
			
		||||
        with:
 | 
			
		||||
          service-name: "googleservice"
 | 
			
		||||
      - id: lyphedb
 | 
			
		||||
        uses: ./.forgejo/build-service
 | 
			
		||||
        with:
 | 
			
		||||
          service-name: "lyphedb"
 | 
			
		||||
							
								
								
									
										985
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										985
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
				
			
			@ -1,2 +1,2 @@
 | 
			
		|||
[workspace]
 | 
			
		||||
members = ["asklyphe-common", "asklyphe-frontend", "asklyphe-auth-frontend", "unit_converter", "authservice", "authservice/migration", "authservice/entity", "bingservice", "googleservice", "vorebot", "lyphedb", "lyphedb/ldbtesttool"]
 | 
			
		||||
members = ["asklyphe-common", "asklyphe-frontend", "searchservice", "asklyphe-auth-frontend", "unit_converter", "authservice", "authservice/migration", "authservice/entity", "bingservice", "googleservice"]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
{% block title %}Login{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/auth.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/auth.css"/>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -11,10 +11,10 @@
 | 
			
		|||
    <a id="alyphebig" href="https://asklyphe.com/">
 | 
			
		||||
    <div id="lyphebig">
 | 
			
		||||
            <div class="bent-surrounding">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent.png" alt="image of lyphe, our mascot!">
 | 
			
		||||
            </div>
 | 
			
		||||
            <div id="lyphetitle">
 | 
			
		||||
                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
			
		||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
			
		||||
                <p>the best search engine!</p>
 | 
			
		||||
            </div>
 | 
			
		||||
    </div>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
{% block title %}Register{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/auth.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/auth.css"/>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -11,10 +11,10 @@
 | 
			
		|||
    <a id="alyphebig" href="https://asklyphe.com/">
 | 
			
		||||
        <div id="lyphebig">
 | 
			
		||||
            <div class="bent-surrounding">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent.png" alt="image of lyphe, our mascot!">
 | 
			
		||||
            </div>
 | 
			
		||||
            <div id="lyphetitle">
 | 
			
		||||
                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
			
		||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
			
		||||
                <p>the best search engine!</p>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,7 +11,7 @@
 | 
			
		|||
              title="AskLyphe"
 | 
			
		||||
              href="/static/osd.xml" />
 | 
			
		||||
 | 
			
		||||
        <link rel="stylesheet" href="/static/shell.css?git={{ git_commit }}" />
 | 
			
		||||
        <link rel="stylesheet" href="/static/shell.css" />
 | 
			
		||||
 | 
			
		||||
        {% block head %}{% endblock %}
 | 
			
		||||
    </head>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
{% block title %}Verify Email{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/auth.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/auth.css"/>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -11,10 +11,10 @@
 | 
			
		|||
    <a id="alyphebig" href="https://asklyphe.com/">
 | 
			
		||||
        <div id="lyphebig">
 | 
			
		||||
            <div class="bent-surrounding">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent.png" alt="image of lyphe, our mascot!">
 | 
			
		||||
            </div>
 | 
			
		||||
            <div id="lyphetitle">
 | 
			
		||||
                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
			
		||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
			
		||||
                <p>the best search engine!</p>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,16 +11,10 @@ license-file = "LICENSE"
 | 
			
		|||
tokio = { version = "1.0", features = ["full"] }
 | 
			
		||||
chrono = "0.4.31"
 | 
			
		||||
serde = { version = "1.0", features = ["derive"] }
 | 
			
		||||
lyphedb = { path = "../lyphedb" }
 | 
			
		||||
foundationdb = { version = "0.8.0", features = ["embedded-fdb-include"], optional = true }
 | 
			
		||||
foundationdb = { version = "0.8.0", features = ["embedded-fdb-include"] }
 | 
			
		||||
log = "0.4.20"
 | 
			
		||||
rmp-serde = "1.1.2"
 | 
			
		||||
futures = "0.3.30"
 | 
			
		||||
async-nats = "0.38.0"
 | 
			
		||||
ulid = "1.1.0"
 | 
			
		||||
rand = "0.8.5"
 | 
			
		||||
percent-encoding = "2.3.1"
 | 
			
		||||
sha-rs = "0.1.0"
 | 
			
		||||
 | 
			
		||||
[features]
 | 
			
		||||
default = []
 | 
			
		||||
rand = "0.8.5"
 | 
			
		||||
| 
						 | 
				
			
			@ -1,112 +0,0 @@
 | 
			
		|||
use crate::ldb::{construct_path, hash, DBConn};
 | 
			
		||||
use log::{error, warn};
 | 
			
		||||
use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
			
		||||
 | 
			
		||||
pub const INCOMINGSTORE: &str = "incomingstore";
 | 
			
		||||
pub const OUTGOINGSTORE: &str = "outgoingstore";
 | 
			
		||||
 | 
			
		||||
pub async fn a_linksto_b(db: &DBConn, a: &str, b: &str) -> Result<(), ()> {
 | 
			
		||||
    let key_sets = vec![
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[OUTGOINGSTORE, &hash(a)])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            a.as_bytes().to_vec(),
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[OUTGOINGSTORE, &hash(a), &hash(b)])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            b.as_bytes().to_vec(),
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[INCOMINGSTORE, &hash(b)])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            b.as_bytes().to_vec(),
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[INCOMINGSTORE, &hash(b), &hash(a)])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            a.as_bytes().to_vec(),
 | 
			
		||||
        ),
 | 
			
		||||
    ];
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(
 | 
			
		||||
        KVList { kvs: key_sets },
 | 
			
		||||
        PropagationStrategy::OnRead,
 | 
			
		||||
    ));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Success => Ok(()),
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            error!("bad request for a_linksto_b");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            error!("not found for a_linksto_b");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn what_links_to_this(db: &DBConn, url: &str) -> Result<Vec<String>, ()> {
 | 
			
		||||
    let path = construct_path(&[INCOMINGSTORE, &hash(url)])
 | 
			
		||||
        .as_bytes()
 | 
			
		||||
        .to_vec();
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory { key: path }));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Entries(kvs) => Ok(kvs
 | 
			
		||||
            .kvs
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|v| String::from_utf8_lossy(&v.1).to_string())
 | 
			
		||||
            .collect()),
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            warn!("lyphedb responded with \"success\" to what_links_to_this, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            warn!("bad request for what_links_to_this");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => Ok(vec![]),
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn what_links_from_this(db: &DBConn, url: &str) -> Result<Vec<String>, ()> {
 | 
			
		||||
    let path = construct_path(&[OUTGOINGSTORE, &hash(url)])
 | 
			
		||||
        .as_bytes()
 | 
			
		||||
        .to_vec();
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory { key: path }));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Entries(kvs) => Ok(kvs
 | 
			
		||||
            .kvs
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|v| String::from_utf8_lossy(&v.1).to_string())
 | 
			
		||||
            .collect()),
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            warn!("lyphedb responded with \"success\" to what_links_from_this, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            warn!("bad request for what_links_from_this");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => Ok(vec![]),
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,63 +0,0 @@
 | 
			
		|||
use log::{error, warn};
 | 
			
		||||
use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
			
		||||
use crate::ldb::{construct_path, hash, DBConn};
 | 
			
		||||
 | 
			
		||||
pub const LINKSTORE: &str = "linkstore";
 | 
			
		||||
 | 
			
		||||
pub async fn add_url_to_linkwords(db: &DBConn, linkwords: &[&str], url: &str) -> Result<(), ()> {
 | 
			
		||||
    let mut key_sets = Vec::new();
 | 
			
		||||
    for linkword in linkwords {
 | 
			
		||||
        let path = construct_path(&[LINKSTORE, linkword, &hash(url)]).as_bytes().to_vec();
 | 
			
		||||
        key_sets.push((path, url.as_bytes().to_vec()));
 | 
			
		||||
    }
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
			
		||||
        kvs: key_sets,
 | 
			
		||||
    }, PropagationStrategy::OnRead));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            error!("bad request for add_url_to_linkwords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            error!("not found for add_url_to_linkwords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn get_urls_from_linkword(db: &DBConn, keyword: &str) -> Result<Vec<String>, ()> {
 | 
			
		||||
    let path = construct_path(&[LINKSTORE, keyword]).as_bytes().to_vec();
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
			
		||||
        key: path,
 | 
			
		||||
    }));
 | 
			
		||||
    
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Entries(kvs) => {
 | 
			
		||||
            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            warn!("lyphedb responded with \"success\" to get_urls_from_linkwords, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            warn!("bad request for get_urls_from_linkwords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            Ok(vec![])
 | 
			
		||||
        }
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,63 +0,0 @@
 | 
			
		|||
use log::{error, warn};
 | 
			
		||||
use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
			
		||||
use crate::ldb::{construct_path, hash, DBConn};
 | 
			
		||||
 | 
			
		||||
pub const METASTORE: &str = "metastore";
 | 
			
		||||
 | 
			
		||||
pub async fn add_url_to_metawords(db: &DBConn, metawords: &[&str], url: &str) -> Result<(), ()> {
 | 
			
		||||
    let mut key_sets = Vec::new();
 | 
			
		||||
    for metaword in metawords {
 | 
			
		||||
        let path = construct_path(&[METASTORE, metaword, &hash(url)]).as_bytes().to_vec();
 | 
			
		||||
        key_sets.push((path, url.as_bytes().to_vec()));
 | 
			
		||||
    }
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
			
		||||
        kvs: key_sets,
 | 
			
		||||
    }, PropagationStrategy::OnRead));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            error!("bad request for add_url_to_metawords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            error!("not found for add_url_to_metawords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn get_urls_from_metaword(db: &DBConn, metaword: &str) -> Result<Vec<String>, ()> {
 | 
			
		||||
    let path = construct_path(&[METASTORE, metaword]).as_bytes().to_vec();
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
			
		||||
        key: path,
 | 
			
		||||
    }));
 | 
			
		||||
    
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Entries(kvs) => {
 | 
			
		||||
            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            warn!("lyphedb responded with \"success\" to get_urls_from_metawords, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            warn!("bad request for get_urls_from_metawords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            Ok(vec![])
 | 
			
		||||
        }
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,63 +0,0 @@
 | 
			
		|||
pub mod wordstore;
 | 
			
		||||
pub mod sitestore;
 | 
			
		||||
pub mod linkstore;
 | 
			
		||||
pub mod metastore;
 | 
			
		||||
pub mod titlestore;
 | 
			
		||||
pub mod linkrelstore;
 | 
			
		||||
 | 
			
		||||
use std::hash::{DefaultHasher, Hasher};
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use std::sync::atomic::AtomicU64;
 | 
			
		||||
use futures::StreamExt;
 | 
			
		||||
use log::warn;
 | 
			
		||||
use lyphedb::LDBNatsMessage;
 | 
			
		||||
use percent_encoding::{percent_encode, AsciiSet, CONTROLS, NON_ALPHANUMERIC};
 | 
			
		||||
use sha_rs::{Sha, Sha256};
 | 
			
		||||
 | 
			
		||||
static NEXT_REPLY_ID: AtomicU64 = AtomicU64::new(0);
 | 
			
		||||
 | 
			
		||||
pub const NOT_ALLOWED_ASCII: AsciiSet = CONTROLS.add(b' ').add(b'/').add(b'.').add(b'\\');
 | 
			
		||||
 | 
			
		||||
pub fn hash(str: &str) -> String {
 | 
			
		||||
    let hasher = Sha256::new();
 | 
			
		||||
    hasher.digest(str.as_bytes())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn construct_path(path_elements: &[&str]) -> String {
 | 
			
		||||
    let mut buf = String::new();
 | 
			
		||||
    buf.push_str("ASKLYPHE/");
 | 
			
		||||
    for el in path_elements {
 | 
			
		||||
        buf.push_str(&percent_encode(el.as_bytes(), &NOT_ALLOWED_ASCII).to_string());
 | 
			
		||||
        buf.push('/');
 | 
			
		||||
    }
 | 
			
		||||
    buf.pop();
 | 
			
		||||
    buf
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub struct DBConn {
 | 
			
		||||
    nats: async_nats::Client,
 | 
			
		||||
    name: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl DBConn {
 | 
			
		||||
    pub fn new(nats: async_nats::Client, name: impl ToString) -> DBConn {
 | 
			
		||||
        DBConn { nats, name: name.to_string() }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    pub async fn query(&self, message: LDBNatsMessage) -> LDBNatsMessage {
 | 
			
		||||
        let data = rmp_serde::to_vec(&message).unwrap();
 | 
			
		||||
        let replyto = format!("ldb-reply-{}", NEXT_REPLY_ID.fetch_add(1, std::sync::atomic::Ordering::SeqCst));
 | 
			
		||||
        let mut subscriber = self.nats.subscribe(replyto.clone()).await.expect("NATS ERROR");
 | 
			
		||||
        self.nats.publish_with_reply(self.name.clone(), replyto, data.into()).await.expect("NATS ERROR");
 | 
			
		||||
        if let Some(reply) = subscriber.next().await {
 | 
			
		||||
            let reply = rmp_serde::from_slice::<LDBNatsMessage>(&reply.payload);
 | 
			
		||||
            if reply.is_err() {
 | 
			
		||||
                warn!("DECODED BAD MESSAGE FROM LYPHEDB: {}", reply.err().unwrap());
 | 
			
		||||
                return LDBNatsMessage::NotFound;
 | 
			
		||||
            }
 | 
			
		||||
            return reply.unwrap();
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,178 +0,0 @@
 | 
			
		|||
use crate::ldb::{construct_path, hash, DBConn};
 | 
			
		||||
use log::{error, warn};
 | 
			
		||||
use lyphedb::{KVList, KeyDirectory, KeyList, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
			
		||||
 | 
			
		||||
pub const SITESTORE: &str = "sitestore";
 | 
			
		||||
 | 
			
		||||
pub const TITLE: &str = "title";
 | 
			
		||||
pub const DESCRIPTION: &str = "desc";
 | 
			
		||||
pub const KEYWORDS: &str = "keywords";
 | 
			
		||||
pub const PAGE_TEXT_RANKING: &str = "ptranks";
 | 
			
		||||
pub const PAGE_TEXT_RAW: &str = "textraw";
 | 
			
		||||
pub const DAMPING: &str = "damping";
 | 
			
		||||
 | 
			
		||||
pub async fn add_website(
 | 
			
		||||
    db: &DBConn,
 | 
			
		||||
    url: &str,
 | 
			
		||||
    title: Option<String>,
 | 
			
		||||
    description: Option<String>,
 | 
			
		||||
    keywords: Option<Vec<String>>,
 | 
			
		||||
    page_text_ranking: &[(String, f64)],
 | 
			
		||||
    page_text_raw: String,
 | 
			
		||||
    damping: f64,
 | 
			
		||||
) -> Result<(), ()> {
 | 
			
		||||
    let keyurl = hash(url);
 | 
			
		||||
    let mut kvs = vec![
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[SITESTORE, &keyurl]).as_bytes().to_vec(),
 | 
			
		||||
            url.as_bytes().to_vec(),
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RANKING])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            rmp_serde::to_vec(page_text_ranking).unwrap(),
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RAW])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            page_text_raw.as_bytes().to_vec(),
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            construct_path(&[SITESTORE, &keyurl, DAMPING])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            damping.to_be_bytes().to_vec(),
 | 
			
		||||
        ),
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    if let Some(title) = title {
 | 
			
		||||
        kvs.push((
 | 
			
		||||
            construct_path(&[SITESTORE, &keyurl, TITLE]).as_bytes().to_vec(),
 | 
			
		||||
            title.as_bytes().to_vec(),
 | 
			
		||||
        ))
 | 
			
		||||
    }
 | 
			
		||||
    if let Some(description) = description {
 | 
			
		||||
        kvs.push((
 | 
			
		||||
            construct_path(&[SITESTORE, &keyurl, DESCRIPTION])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            description.as_bytes().to_vec(),
 | 
			
		||||
        ))
 | 
			
		||||
    }
 | 
			
		||||
    if let Some(keywords) = keywords {
 | 
			
		||||
        kvs.push((
 | 
			
		||||
            construct_path(&[SITESTORE, &keyurl, KEYWORDS])
 | 
			
		||||
                .as_bytes()
 | 
			
		||||
                .to_vec(),
 | 
			
		||||
            rmp_serde::to_vec(&keywords).unwrap(),
 | 
			
		||||
        ))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(
 | 
			
		||||
        KVList { kvs },
 | 
			
		||||
        PropagationStrategy::OnRead,
 | 
			
		||||
    ));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Success => Ok(()),
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            error!("bad request for add_website");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            error!("not found for add_website");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Default, Debug, Clone)]
 | 
			
		||||
pub struct WebsiteData {
 | 
			
		||||
    pub title: Option<String>,
 | 
			
		||||
    pub description: Option<String>,
 | 
			
		||||
    pub keywords: Option<Vec<String>>,
 | 
			
		||||
    pub page_text_ranking: Vec<(String, f64)>,
 | 
			
		||||
    pub page_text_raw: String,
 | 
			
		||||
    pub damping: f64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn get_website(db: &DBConn, url: &str) -> Result<WebsiteData, ()> {
 | 
			
		||||
    let keyurl = hash(url);
 | 
			
		||||
    let keys = [
 | 
			
		||||
        construct_path(&[SITESTORE, &keyurl, TITLE]).as_bytes().to_vec(),
 | 
			
		||||
        construct_path(&[SITESTORE, &keyurl, DESCRIPTION]).as_bytes().to_vec(),
 | 
			
		||||
        construct_path(&[SITESTORE, &keyurl, KEYWORDS]).as_bytes().to_vec(),
 | 
			
		||||
        construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RANKING]).as_bytes().to_vec(),
 | 
			
		||||
        construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RAW]).as_bytes().to_vec(),
 | 
			
		||||
        construct_path(&[SITESTORE, &keyurl, DAMPING]).as_bytes().to_vec(),
 | 
			
		||||
    ].to_vec();
 | 
			
		||||
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeys(KeyList { keys }));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Entries(kvlist) => {
 | 
			
		||||
            let mut data = WebsiteData::default();
 | 
			
		||||
            for (key, value) in kvlist.kvs {
 | 
			
		||||
                let key = String::from_utf8_lossy(&key).to_string();
 | 
			
		||||
                match key.as_str() {
 | 
			
		||||
                    _ if key.ends_with(TITLE) => {
 | 
			
		||||
                        data.title = Some(String::from_utf8_lossy(&value).to_string());
 | 
			
		||||
                    }
 | 
			
		||||
                    _ if key.ends_with(DESCRIPTION) => {
 | 
			
		||||
                        data.description = Some(String::from_utf8_lossy(&value).to_string());
 | 
			
		||||
                    }
 | 
			
		||||
                    _ if key.ends_with(KEYWORDS) => {
 | 
			
		||||
                        let deser = rmp_serde::from_slice::<Vec<String>>(&value);
 | 
			
		||||
                        if let Err(e) = deser {
 | 
			
		||||
                            error!("bad keywords entry for {}, deser error: {:?}", key, e);
 | 
			
		||||
                        } else {
 | 
			
		||||
                            data.keywords = Some(deser.unwrap());
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    _ if key.ends_with(PAGE_TEXT_RANKING) => {
 | 
			
		||||
                        let deser = rmp_serde::from_slice::<Vec<(String, f64)>>(&value);
 | 
			
		||||
                        if let Err(e) = deser {
 | 
			
		||||
                            error!("bad page_text_ranking entry for {}, deser error: {:?}", key, e);
 | 
			
		||||
                        } else {
 | 
			
		||||
                            data.page_text_ranking = deser.unwrap();
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    _ if key.ends_with(PAGE_TEXT_RAW) => {
 | 
			
		||||
                        data.page_text_raw = String::from_utf8_lossy(&value).to_string();
 | 
			
		||||
                    }
 | 
			
		||||
                    _ if key.ends_with(DAMPING) => {
 | 
			
		||||
                        data.damping = f64::from_be_bytes(value.try_into().unwrap_or(0.85f64.to_be_bytes()));
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    _ => {
 | 
			
		||||
                        warn!("encountered weird returned key for get_website");
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            Ok(data)
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            warn!("lyphedb responded with \"success\" to get_website, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            error!("bad request for get_website");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            warn!("not found for get_website");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,63 +0,0 @@
 | 
			
		|||
use log::{error, warn};
 | 
			
		||||
use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
			
		||||
use crate::ldb::{construct_path, hash, DBConn};
 | 
			
		||||
 | 
			
		||||
pub const TITLESTORE: &str = "titlestore";
 | 
			
		||||
 | 
			
		||||
pub async fn add_url_to_titlewords(db: &DBConn, titlewords: &[&str], url: &str) -> Result<(), ()> {
 | 
			
		||||
    let mut key_sets = Vec::new();
 | 
			
		||||
    for titleword in titlewords {
 | 
			
		||||
        let path = construct_path(&[TITLESTORE, titleword, &hash(url)]).as_bytes().to_vec();
 | 
			
		||||
        key_sets.push((path, url.as_bytes().to_vec()));
 | 
			
		||||
    }
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
			
		||||
        kvs: key_sets,
 | 
			
		||||
    }, PropagationStrategy::OnRead));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            error!("bad request for add_url_to_titlewords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            error!("not found for add_url_to_titlewords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn get_urls_from_titleword(db: &DBConn, titleword: &str) -> Result<Vec<String>, ()> {
 | 
			
		||||
    let path = construct_path(&[TITLESTORE, titleword]).as_bytes().to_vec();
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
			
		||||
        key: path,
 | 
			
		||||
    }));
 | 
			
		||||
    
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Entries(kvs) => {
 | 
			
		||||
            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            warn!("lyphedb responded with \"success\" to get_urls_from_titlewords, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            warn!("bad request for get_urls_from_titlewords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            Ok(vec![])
 | 
			
		||||
        }
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,64 +0,0 @@
 | 
			
		|||
use log::{error, warn};
 | 
			
		||||
use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
			
		||||
use crate::ldb::{construct_path, hash, DBConn};
 | 
			
		||||
 | 
			
		||||
pub const WORDSTORE: &str = "wordstore";
 | 
			
		||||
 | 
			
		||||
pub async fn add_url_to_keywords(db: &DBConn, keywords: &[&str], url: &str) -> Result<(), ()> {
 | 
			
		||||
    let mut key_sets = Vec::new();
 | 
			
		||||
    for keyword in keywords {
 | 
			
		||||
        let path = construct_path(&[WORDSTORE, keyword, &hash(url)]).as_bytes().to_vec();
 | 
			
		||||
        let data = url.as_bytes().to_vec();
 | 
			
		||||
        key_sets.push((path, data));
 | 
			
		||||
    }
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
			
		||||
        kvs: key_sets,
 | 
			
		||||
    }, PropagationStrategy::OnRead));
 | 
			
		||||
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            Ok(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            error!("bad request for add_url_to_keywords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            error!("not found for add_url_to_keywords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn get_urls_from_keyword(db: &DBConn, keyword: &str) -> Result<Vec<String>, ()> {
 | 
			
		||||
    let path = construct_path(&[WORDSTORE, keyword]).as_bytes().to_vec();
 | 
			
		||||
    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
			
		||||
        key: path,
 | 
			
		||||
    }));
 | 
			
		||||
    
 | 
			
		||||
    match db.query(cmd).await {
 | 
			
		||||
        LDBNatsMessage::Entries(kvs) => {
 | 
			
		||||
            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::Success => {
 | 
			
		||||
            warn!("lyphedb responded with \"success\" to get_urls_from_keywords, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::BadRequest => {
 | 
			
		||||
            warn!("bad request for get_urls_from_keywords");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
        LDBNatsMessage::NotFound => {
 | 
			
		||||
            Ok(vec![])
 | 
			
		||||
        }
 | 
			
		||||
        _ => {
 | 
			
		||||
            warn!("lyphedb sent weird message as response, treating as error");
 | 
			
		||||
            Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -12,12 +12,8 @@
 | 
			
		|||
*/
 | 
			
		||||
 | 
			
		||||
pub mod nats;
 | 
			
		||||
#[cfg(feature = "foundationdb")]
 | 
			
		||||
pub mod db;
 | 
			
		||||
pub mod ldb;
 | 
			
		||||
 | 
			
		||||
pub use lyphedb;
 | 
			
		||||
#[cfg(feature = "foundationdb")]
 | 
			
		||||
pub use foundationdb;
 | 
			
		||||
 | 
			
		||||
pub fn add(left: usize, right: usize) -> usize {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,7 +18,7 @@ pub const VOREBOT_NEWHOSTNAME_SERVICE: &str = "websiteparse_highpriority";
 | 
			
		|||
pub const VOREBOT_SUGGESTED_SERVICE: &str = "websiteparse_highestpriority";
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Clone, Serialize, Deserialize)]
 | 
			
		||||
pub struct CrawlRequest {
 | 
			
		||||
pub struct WebParseRequest {
 | 
			
		||||
    pub url: String,
 | 
			
		||||
    pub damping: f64,
 | 
			
		||||
}
 | 
			
		||||
    pub damping_factor: f32,
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -36,7 +36,5 @@ once_cell = "1.19.0"
 | 
			
		|||
chrono = "0.4.33"
 | 
			
		||||
rand = "0.8.5"
 | 
			
		||||
url_encoded_data = "0.6.1"
 | 
			
		||||
strum = "0.27.1"
 | 
			
		||||
strum_macros = "0.27.1"
 | 
			
		||||
 | 
			
		||||
env_logger = "*"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,7 +14,6 @@
 | 
			
		|||
pub mod searchbot;
 | 
			
		||||
pub mod wikipedia;
 | 
			
		||||
pub mod unit_converter;
 | 
			
		||||
pub mod spellcheck;
 | 
			
		||||
pub mod routes;
 | 
			
		||||
 | 
			
		||||
use std::{env, process};
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -343,45 +343,14 @@ pub async fn admin_invitecode(
 | 
			
		|||
        }
 | 
			
		||||
        
 | 
			
		||||
        let active_codes = match list_invite_codes(nats.clone(), token.clone(), false).await {
 | 
			
		||||
            Ok(mut v) => {
 | 
			
		||||
                for v in &mut v {
 | 
			
		||||
                    if let Some(used_by) = &v.used_by {
 | 
			
		||||
                        if used_by.len() > 32 {
 | 
			
		||||
                            v.used_by = Some(format!("{}...", &used_by[0..32]));
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    if v.creator.len() > 32 {
 | 
			
		||||
                        v.creator = format!("{}...", &v.creator[0..32]);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                v
 | 
			
		||||
            },
 | 
			
		||||
            Ok(v) => v,
 | 
			
		||||
            Err(e) => {
 | 
			
		||||
                return e.into_response();
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
        
 | 
			
		||||
        let used_codes = match list_invite_codes(nats.clone(), token.clone(), true).await {
 | 
			
		||||
            Ok(v) => v.into_iter().map(|mut v| {
 | 
			
		||||
                if let Some(used_by) = &v.used_by {
 | 
			
		||||
                    if used_by.len() > 32 {
 | 
			
		||||
                        v.used_by = Some(format!("{}...", &used_by[0..32]));
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if v.creator.len() > 32 {
 | 
			
		||||
                    v.creator = format!("{}...", &v.creator[0..32]);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if v.used_at.is_none() {
 | 
			
		||||
                    v.used_at = Some(String::from("unset"));
 | 
			
		||||
                    v
 | 
			
		||||
                } else {
 | 
			
		||||
                    v
 | 
			
		||||
                }
 | 
			
		||||
            }).collect(),
 | 
			
		||||
            Ok(v) => v.into_iter().map(|mut v| if v.used_at.is_none() { v.used_at = Some(String::from("unset")); v } else { v }).collect(),
 | 
			
		||||
            Err(e) => {
 | 
			
		||||
                return e.into_response();
 | 
			
		||||
            }
 | 
			
		||||
| 
						 | 
				
			
			@ -657,4 +626,4 @@ pub async fn admin_user_list(
 | 
			
		|||
    } else {
 | 
			
		||||
        Redirect::to("/").into_response()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -27,7 +27,6 @@ use tracing::{debug, error};
 | 
			
		|||
use tracing::log::warn;
 | 
			
		||||
use crate::{Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
 | 
			
		||||
use crate::routes::index::FrontpageAnnouncement;
 | 
			
		||||
use crate::routes::Themes;
 | 
			
		||||
 | 
			
		||||
#[derive(Serialize, Debug)]
 | 
			
		||||
struct FullAnnouncement {
 | 
			
		||||
| 
						 | 
				
			
			@ -97,7 +96,7 @@ pub struct AnnouncementTemplate {
 | 
			
		|||
    built_on: String,
 | 
			
		||||
    year: String,
 | 
			
		||||
    alpha: bool,
 | 
			
		||||
    theme: Themes,
 | 
			
		||||
    theme: String,
 | 
			
		||||
    announcement: FullAnnouncement,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -110,10 +109,10 @@ pub async fn announcement_full(Extension(nats): Extension<Arc<jetstream::Context
 | 
			
		|||
            built_on: BUILT_ON.to_string(),
 | 
			
		||||
            year: YEAR.to_string(),
 | 
			
		||||
            alpha: ALPHA,
 | 
			
		||||
            theme: Themes::Default,
 | 
			
		||||
            theme: "default".to_string(),
 | 
			
		||||
            announcement,
 | 
			
		||||
        }.into_response()
 | 
			
		||||
    } else {
 | 
			
		||||
        StatusCode::NOT_FOUND.into_response()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -30,7 +30,7 @@ use tokio::sync::Mutex;
 | 
			
		|||
use tracing::error;
 | 
			
		||||
use tracing::log::warn;
 | 
			
		||||
use crate::{BUILT_ON, GIT_COMMIT, Opts, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
 | 
			
		||||
use crate::routes::{authenticate_user, Themes, UserInfo};
 | 
			
		||||
use crate::routes::{authenticate_user, UserInfo};
 | 
			
		||||
 | 
			
		||||
#[derive(Serialize, Debug)]
 | 
			
		||||
pub struct FrontpageAnnouncement {
 | 
			
		||||
| 
						 | 
				
			
			@ -102,7 +102,7 @@ pub fn frontpage_error(error: &str, auth_url: String) -> FrontpageTemplate {
 | 
			
		|||
        year: YEAR.to_string(),
 | 
			
		||||
        alpha: ALPHA,
 | 
			
		||||
        count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
			
		||||
        theme: Themes::Default,
 | 
			
		||||
        theme: "default".to_string(),
 | 
			
		||||
        announcement: None,
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -185,7 +185,7 @@ pub struct FrontpageTemplate {
 | 
			
		|||
    year: String,
 | 
			
		||||
    alpha: bool,
 | 
			
		||||
    count: u64,
 | 
			
		||||
    theme: Themes,
 | 
			
		||||
    theme: String,
 | 
			
		||||
    announcement: Option<FrontpageAnnouncement>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -202,7 +202,7 @@ pub async fn frontpage(
 | 
			
		|||
        year: YEAR.to_string(),
 | 
			
		||||
        alpha: ALPHA,
 | 
			
		||||
        count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
			
		||||
        theme: Themes::Default,
 | 
			
		||||
        theme: "default".to_string(),
 | 
			
		||||
        announcement,
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -217,7 +217,7 @@ struct IndexTemplate {
 | 
			
		|||
    year: String,
 | 
			
		||||
    alpha: bool,
 | 
			
		||||
    count: u64,
 | 
			
		||||
    theme: Themes,
 | 
			
		||||
    theme: String,
 | 
			
		||||
    announcement: Option<FrontpageAnnouncement>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -234,7 +234,7 @@ pub async fn index(
 | 
			
		|||
                return (jar.remove("token"), frontpage_error(e.as_str(), opts.auth_url.clone())).into_response();
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
        let theme = info.get_theme();
 | 
			
		||||
        let theme = info.theme.clone();
 | 
			
		||||
        
 | 
			
		||||
        let announcement = latest_announcement(nats.clone()).await;
 | 
			
		||||
        
 | 
			
		||||
| 
						 | 
				
			
			@ -260,7 +260,7 @@ pub async fn index(
 | 
			
		|||
            year: YEAR.to_string(),
 | 
			
		||||
            alpha: ALPHA,
 | 
			
		||||
            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
			
		||||
            theme: Themes::Default,
 | 
			
		||||
            theme: "default".to_string(),
 | 
			
		||||
            announcement,
 | 
			
		||||
        }.into_response()
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,8 +10,7 @@
 | 
			
		|||
 *
 | 
			
		||||
 * You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
use std::fmt::Display;
 | 
			
		||||
use std::str::FromStr;
 | 
			
		||||
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use askama::Template;
 | 
			
		||||
use askama_axum::IntoResponse;
 | 
			
		||||
| 
						 | 
				
			
			@ -22,13 +21,7 @@ use asklyphe_common::nats::comms::ServiceResponse;
 | 
			
		|||
use async_nats::jetstream;
 | 
			
		||||
use axum::http::StatusCode;
 | 
			
		||||
use serde::Serialize;
 | 
			
		||||
use strum::IntoEnumIterator;
 | 
			
		||||
use strum_macros::EnumIter;
 | 
			
		||||
use time::macros::utc_datetime;
 | 
			
		||||
use time::{OffsetDateTime, UtcDateTime};
 | 
			
		||||
use tracing::{debug, error};
 | 
			
		||||
 | 
			
		||||
const RANDOM_THEME_EPOCH: UtcDateTime = utc_datetime!(2025-03-19 00:00);
 | 
			
		||||
use tracing::error;
 | 
			
		||||
 | 
			
		||||
pub mod search;
 | 
			
		||||
pub mod index;
 | 
			
		||||
| 
						 | 
				
			
			@ -37,102 +30,7 @@ pub mod user_settings;
 | 
			
		|||
pub mod admin;
 | 
			
		||||
pub mod announcement;
 | 
			
		||||
 | 
			
		||||
#[derive(Default, EnumIter, PartialEq, Eq, Copy, Clone)]
 | 
			
		||||
pub enum Themes {
 | 
			
		||||
    Classic,
 | 
			
		||||
    Dark,
 | 
			
		||||
    #[default]
 | 
			
		||||
    Default,
 | 
			
		||||
    Freaky,
 | 
			
		||||
    Gloss,
 | 
			
		||||
    Oled,
 | 
			
		||||
    Water,
 | 
			
		||||
    Random
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Themes {
 | 
			
		||||
    pub fn get_all_themes() -> Vec<Themes> {
 | 
			
		||||
        Self::iter().collect()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn display_name(&self) -> String {
 | 
			
		||||
        match self {
 | 
			
		||||
            Themes::Classic => {
 | 
			
		||||
                "classic".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Dark => {
 | 
			
		||||
                "dark theme".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Default => {
 | 
			
		||||
                "default theme".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Freaky => {
 | 
			
		||||
                "freaky".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Gloss => {
 | 
			
		||||
                "gloss".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Oled => {
 | 
			
		||||
                "lights out".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Water => {
 | 
			
		||||
                "water".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Random => {
 | 
			
		||||
                "random".to_string()
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn internal_name(&self) -> String {
 | 
			
		||||
        match self {
 | 
			
		||||
            Themes::Classic => {
 | 
			
		||||
                "classic".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Dark => {
 | 
			
		||||
                "dark".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Default => {
 | 
			
		||||
                "default".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Freaky => {
 | 
			
		||||
                "freaky".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Gloss => {
 | 
			
		||||
                "gloss".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Oled => {
 | 
			
		||||
                "oled".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Water => {
 | 
			
		||||
                "water".to_string()
 | 
			
		||||
            }
 | 
			
		||||
            Themes::Random => {
 | 
			
		||||
                "random".to_string()
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl FromStr for Themes {
 | 
			
		||||
    type Err = ();
 | 
			
		||||
 | 
			
		||||
    fn from_str(s: &str) -> Result<Self, Self::Err> {
 | 
			
		||||
        match s {
 | 
			
		||||
            "classic" => Ok(Themes::Classic),
 | 
			
		||||
            "dark" => Ok(Themes::Dark),
 | 
			
		||||
            "default" => Ok(Themes::Default),
 | 
			
		||||
            "freaky" => Ok(Themes::Freaky),
 | 
			
		||||
            "gloss" => Ok(Themes::Gloss),
 | 
			
		||||
            "oled" => Ok(Themes::Oled),
 | 
			
		||||
            "water" => Ok(Themes::Water),
 | 
			
		||||
            "random" => Ok(Themes::Random),
 | 
			
		||||
            _ => Err(())
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Serialize, Clone)]
 | 
			
		||||
#[derive(Serialize)]
 | 
			
		||||
pub struct UserInfo {
 | 
			
		||||
    pub username: String,
 | 
			
		||||
    pub email: String,
 | 
			
		||||
| 
						 | 
				
			
			@ -141,27 +39,6 @@ pub struct UserInfo {
 | 
			
		|||
    pub administrator: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl UserInfo {
 | 
			
		||||
    pub fn get_theme(&self) -> Themes {
 | 
			
		||||
        let theme: Themes = self.theme.parse().unwrap_or_default();
 | 
			
		||||
 | 
			
		||||
        if theme.eq(&Themes::Random) {
 | 
			
		||||
            let possible_themes = Themes::get_all_themes();
 | 
			
		||||
            let current_day = UtcDateTime::now();
 | 
			
		||||
 | 
			
		||||
            let rand_value = (((current_day - RANDOM_THEME_EPOCH).as_seconds_f64() / 86400.0) % possible_themes.len() as f64) as usize;
 | 
			
		||||
            
 | 
			
		||||
            *possible_themes.get(rand_value).unwrap_or(&Themes::Default)
 | 
			
		||||
        } else {
 | 
			
		||||
            theme
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    pub fn get_true_theme(&self) -> Themes {
 | 
			
		||||
        self.theme.parse().unwrap()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn authenticate_user(nats: Arc<jetstream::Context>, token: String) -> Result<UserInfo, String> {
 | 
			
		||||
    let response = comms::query_service(
 | 
			
		||||
        comms::Query::AuthService(AuthServiceQuery {
 | 
			
		||||
| 
						 | 
				
			
			@ -237,4 +114,4 @@ pub struct NotFoundTemplate;
 | 
			
		|||
 | 
			
		||||
pub async fn not_found() -> impl IntoResponse {
 | 
			
		||||
    (StatusCode::NOT_FOUND, NotFoundTemplate).into_response()
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -12,14 +12,12 @@
 | 
			
		|||
*/
 | 
			
		||||
 | 
			
		||||
use crate::routes::index::frontpage_error;
 | 
			
		||||
use crate::routes::{authenticate_user, Themes, UserInfo};
 | 
			
		||||
use crate::routes::{authenticate_user, UserInfo};
 | 
			
		||||
use crate::searchbot::{gather_image_results, gather_search_results};
 | 
			
		||||
use crate::unit_converter;
 | 
			
		||||
use crate::unit_converter::UnitConversion;
 | 
			
		||||
use crate::wikipedia::WikipediaSummary;
 | 
			
		||||
use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
 | 
			
		||||
use crate::spellcheck;
 | 
			
		||||
use crate::spellcheck::SpellCheckResults;
 | 
			
		||||
use askama::Template;
 | 
			
		||||
use asklyphe_common::nats;
 | 
			
		||||
use asklyphe_common::nats::bingservice::{
 | 
			
		||||
| 
						 | 
				
			
			@ -70,7 +68,6 @@ pub struct Complications {
 | 
			
		|||
    disabled: bool,
 | 
			
		||||
    wikipedia: Option<WikipediaSummary>,
 | 
			
		||||
    unit_converter: Option<UnitConversion>,
 | 
			
		||||
    spellcheck: Option<SpellCheckResults>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn search(
 | 
			
		||||
| 
						 | 
				
			
			@ -114,7 +111,7 @@ struct SearchTemplateJavascript {
 | 
			
		|||
    built_on: String,
 | 
			
		||||
    year: String,
 | 
			
		||||
    alpha: bool,
 | 
			
		||||
    theme: Themes,
 | 
			
		||||
    theme: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn search_js(
 | 
			
		||||
| 
						 | 
				
			
			@ -124,7 +121,7 @@ pub async fn search_js(
 | 
			
		|||
    Extension(opts): Extension<Opts>,
 | 
			
		||||
) -> impl IntoResponse {
 | 
			
		||||
    fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplateJavascript {
 | 
			
		||||
        let theme = info.get_theme();
 | 
			
		||||
        let theme = info.theme.clone();
 | 
			
		||||
        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
			
		||||
        SearchTemplateJavascript {
 | 
			
		||||
            info,
 | 
			
		||||
| 
						 | 
				
			
			@ -161,27 +158,24 @@ pub async fn search_js(
 | 
			
		|||
        let mut complications = Complications::default();
 | 
			
		||||
        // todo: better way of specifying that user doesn't want complications
 | 
			
		||||
        if !query.contains("-complications") {
 | 
			
		||||
            /*let mut wikiquery = query.clone().to_lowercase();
 | 
			
		||||
            let mut wikiquery = query.clone().to_lowercase();
 | 
			
		||||
            wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
 | 
			
		||||
            wikiquery = wikiquery.replace(' ', "%20");
 | 
			
		||||
            // todo: proper url escaping
 | 
			
		||||
            let wikipedia_comp =
 | 
			
		||||
                tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
 | 
			
		||||
            complications.wikipedia = wikipedia_comp.await.unwrap_or_default();*/
 | 
			
		||||
            complications.wikipedia = wikipedia_comp.await.unwrap_or_default();
 | 
			
		||||
 | 
			
		||||
            let mut unit_query = query.clone().to_lowercase();
 | 
			
		||||
            unit_query = unit_query.replace("metre", "meter");
 | 
			
		||||
            let unit_comp = unit_converter::convert_unit(&unit_query);
 | 
			
		||||
            complications.unit_converter = unit_comp;
 | 
			
		||||
 | 
			
		||||
            let corrections = spellcheck::check(&query);
 | 
			
		||||
            complications.spellcheck = corrections;
 | 
			
		||||
        } else {
 | 
			
		||||
            complications.disabled = true;
 | 
			
		||||
            query = query.replace("-complications", "");
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let theme = info.get_theme();
 | 
			
		||||
        let theme = info.theme.clone();
 | 
			
		||||
        let querystr = url_encoded_data::stringify(&[("q", og_query.as_str())]);
 | 
			
		||||
        SearchTemplateJavascript {
 | 
			
		||||
            info,
 | 
			
		||||
| 
						 | 
				
			
			@ -223,7 +217,7 @@ pub struct SearchTemplate {
 | 
			
		|||
    pub built_on: String,
 | 
			
		||||
    pub year: String,
 | 
			
		||||
    pub alpha: bool,
 | 
			
		||||
    pub theme: Themes,
 | 
			
		||||
    pub theme: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn search_nojs(
 | 
			
		||||
| 
						 | 
				
			
			@ -233,7 +227,7 @@ pub async fn search_nojs(
 | 
			
		|||
    Extension(opts): Extension<Opts>,
 | 
			
		||||
) -> impl IntoResponse {
 | 
			
		||||
    fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplate {
 | 
			
		||||
        let theme = info.get_theme();
 | 
			
		||||
        let theme = info.theme.clone();
 | 
			
		||||
        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
			
		||||
        SearchTemplate {
 | 
			
		||||
            info,
 | 
			
		||||
| 
						 | 
				
			
			@ -422,7 +416,7 @@ pub struct ImageSearchTemplate {
 | 
			
		|||
    pub built_on: String,
 | 
			
		||||
    pub year: String,
 | 
			
		||||
    pub alpha: bool,
 | 
			
		||||
    pub theme: Themes,
 | 
			
		||||
    pub theme: String,
 | 
			
		||||
}
 | 
			
		||||
pub async fn image_search(
 | 
			
		||||
    jar: CookieJar,
 | 
			
		||||
| 
						 | 
				
			
			@ -431,7 +425,7 @@ pub async fn image_search(
 | 
			
		|||
    Extension(opts): Extension<Opts>,
 | 
			
		||||
) -> impl IntoResponse {
 | 
			
		||||
    fn error_response(query: String, info: UserInfo, error: &str) -> ImageSearchTemplate {
 | 
			
		||||
        let theme = info.get_theme();
 | 
			
		||||
        let theme = info.theme.clone();
 | 
			
		||||
        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
			
		||||
        ImageSearchTemplate {
 | 
			
		||||
            info,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,12 +27,49 @@ use serde::Deserialize;
 | 
			
		|||
use tokio::sync::Mutex;
 | 
			
		||||
use tracing::error;
 | 
			
		||||
use crate::{BUILT_ON, GIT_COMMIT, Opts, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
 | 
			
		||||
use crate::routes::{authenticate_user, Themes, UserInfo};
 | 
			
		||||
use crate::routes::{authenticate_user, UserInfo};
 | 
			
		||||
 | 
			
		||||
pub struct Theme<'a> {
 | 
			
		||||
    pub value: &'a str,
 | 
			
		||||
    pub name: &'a str,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub static THEMES: &[Theme] = &[
 | 
			
		||||
    Theme {
 | 
			
		||||
        value: "default",
 | 
			
		||||
        name: "default theme",
 | 
			
		||||
    },
 | 
			
		||||
    Theme {
 | 
			
		||||
        value: "dark",
 | 
			
		||||
        name: "dark theme",
 | 
			
		||||
    },
 | 
			
		||||
    Theme {
 | 
			
		||||
        value: "oled",
 | 
			
		||||
        name: "lights out",
 | 
			
		||||
    },
 | 
			
		||||
    Theme {
 | 
			
		||||
        value: "classic",
 | 
			
		||||
        name: "classic",
 | 
			
		||||
    },
 | 
			
		||||
    Theme {
 | 
			
		||||
        value: "freaky",
 | 
			
		||||
        name: "freaky",
 | 
			
		||||
    },
 | 
			
		||||
    Theme {
 | 
			
		||||
        value: "water",
 | 
			
		||||
        name: "water",
 | 
			
		||||
    },
 | 
			
		||||
    Theme {
 | 
			
		||||
        value: "gloss",
 | 
			
		||||
        name: "gloss",
 | 
			
		||||
    },
 | 
			
		||||
];
 | 
			
		||||
 | 
			
		||||
#[derive(Template)]
 | 
			
		||||
#[template(path = "user_settings.html")]
 | 
			
		||||
pub struct SettingsTemplate {
 | 
			
		||||
    themes: Vec<Themes>,
 | 
			
		||||
    themes: &'static [Theme<'static>],
 | 
			
		||||
 | 
			
		||||
    error: Option<String>,
 | 
			
		||||
 | 
			
		||||
    info: UserInfo,
 | 
			
		||||
| 
						 | 
				
			
			@ -43,8 +80,7 @@ pub struct SettingsTemplate {
 | 
			
		|||
    year: String,
 | 
			
		||||
    alpha: bool,
 | 
			
		||||
    count: u64,
 | 
			
		||||
    theme: Themes,
 | 
			
		||||
    true_theme: Themes,
 | 
			
		||||
    theme: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn user_settings(
 | 
			
		||||
| 
						 | 
				
			
			@ -60,11 +96,11 @@ pub async fn user_settings(
 | 
			
		|||
                return (jar.remove("token"), crate::routes::index::frontpage_error(e.as_str(), opts.auth_url.clone())).into_response();
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
        let theme = info.get_theme();
 | 
			
		||||
        let theme = info.theme.clone();
 | 
			
		||||
        SettingsTemplate {
 | 
			
		||||
            themes: Themes::get_all_themes(),
 | 
			
		||||
            themes: THEMES,
 | 
			
		||||
            error: None,
 | 
			
		||||
            info: info.clone(),
 | 
			
		||||
            info,
 | 
			
		||||
            search_query: "".to_string(),
 | 
			
		||||
            version: VERSION.to_string(),
 | 
			
		||||
            git_commit: GIT_COMMIT.to_string(),
 | 
			
		||||
| 
						 | 
				
			
			@ -73,7 +109,6 @@ pub async fn user_settings(
 | 
			
		|||
            alpha: ALPHA,
 | 
			
		||||
            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
			
		||||
            theme,
 | 
			
		||||
            true_theme: info.get_true_theme(),
 | 
			
		||||
        }.into_response()
 | 
			
		||||
    } else {
 | 
			
		||||
        Redirect::temporary("/").into_response()
 | 
			
		||||
| 
						 | 
				
			
			@ -91,11 +126,11 @@ pub async fn theme_change_post(
 | 
			
		|||
    Extension(opts): Extension<Opts>,
 | 
			
		||||
    Form(input): Form<ThemeChangeForm>,
 | 
			
		||||
) -> impl IntoResponse {
 | 
			
		||||
    fn settings_error(info: UserInfo, theme: Themes, error: String) -> impl IntoResponse {
 | 
			
		||||
    fn settings_error(info: UserInfo, theme: String, error: String) -> impl IntoResponse {
 | 
			
		||||
        SettingsTemplate {
 | 
			
		||||
            themes: Themes::get_all_themes(),
 | 
			
		||||
            themes: THEMES,
 | 
			
		||||
            error: Some(error),
 | 
			
		||||
            info: info.clone(),
 | 
			
		||||
            info,
 | 
			
		||||
            search_query: "".to_string(),
 | 
			
		||||
            version: VERSION.to_string(),
 | 
			
		||||
            git_commit: GIT_COMMIT.to_string(),
 | 
			
		||||
| 
						 | 
				
			
			@ -104,7 +139,6 @@ pub async fn theme_change_post(
 | 
			
		|||
            alpha: ALPHA,
 | 
			
		||||
            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
			
		||||
            theme,
 | 
			
		||||
            true_theme: info.get_true_theme(),
 | 
			
		||||
        }.into_response()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -117,12 +151,10 @@ pub async fn theme_change_post(
 | 
			
		|||
            }
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let theme = info.get_theme();
 | 
			
		||||
 | 
			
		||||
        if let Some(theme_input) = &input.theme {
 | 
			
		||||
            if !Themes::get_all_themes().iter().map(|x| x.internal_name().to_string()).collect::<Vec<String>>().contains(&theme_input) {
 | 
			
		||||
                return settings_error(info, theme.clone(), "invalid input, please try again!".to_string()).into_response();
 | 
			
		||||
            }
 | 
			
		||||
        let theme = info.theme.clone();
 | 
			
		||||
        
 | 
			
		||||
        if !THEMES.iter().map(|v| v.value.to_string()).collect::<Vec<String>>().contains(&input.theme.clone().unwrap_or("default".to_string())) {
 | 
			
		||||
            return settings_error(info, theme, "invalid input, please try again!".to_string()).into_response();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let response = comms::query_service(comms::Query::AuthService(AuthServiceQuery {
 | 
			
		||||
| 
						 | 
				
			
			@ -168,4 +200,4 @@ pub async fn theme_change_post(
 | 
			
		|||
    } else {
 | 
			
		||||
        Redirect::to("/").into_response()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -341,7 +341,7 @@ pub async fn gather_search_results(nats: Arc<jetstream::Context>, query: &str, u
 | 
			
		|||
        search_results.remove(rm - i);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let theme = user_info.get_theme();
 | 
			
		||||
    let theme = user_info.theme.clone();
 | 
			
		||||
    let querystr = url_encoded_data::stringify(&[("q", query)]);
 | 
			
		||||
    SearchTemplate {
 | 
			
		||||
        info: user_info,
 | 
			
		||||
| 
						 | 
				
			
			@ -489,7 +489,7 @@ pub async fn gather_image_results(nats: Arc<jetstream::Context>, query: &str, us
 | 
			
		|||
        result.src = format!("/imgproxy?{}", url);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let theme = user_info.get_theme();
 | 
			
		||||
    let theme = user_info.theme.clone();
 | 
			
		||||
    ImageSearchTemplate {
 | 
			
		||||
        info: user_info,
 | 
			
		||||
        error: None,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,258 +0,0 @@
 | 
			
		|||
use once_cell::sync::Lazy;
 | 
			
		||||
use tracing::{debug, error};
 | 
			
		||||
use std::{cmp, mem};
 | 
			
		||||
use std::collections::BTreeMap;
 | 
			
		||||
use std::sync::Mutex;
 | 
			
		||||
 | 
			
		||||
// how to generate words.txt:
 | 
			
		||||
// clone https://github.com/en-wl/wordlist && cd wordlist
 | 
			
		||||
// make
 | 
			
		||||
// ./scowl wl --deaccent > words0.txt
 | 
			
		||||
// filtered with this python script:
 | 
			
		||||
// -----------------------------------
 | 
			
		||||
// with open("words0.txt", "r") as f:
 | 
			
		||||
// 	out = []
 | 
			
		||||
// 	for line in f:
 | 
			
		||||
// 		line = line.lower()
 | 
			
		||||
// 		if not line in out:
 | 
			
		||||
// 			out.append(line)
 | 
			
		||||
// 	out.sort()
 | 
			
		||||
// 	with open("words.txt", "w") as out_file:
 | 
			
		||||
// 		for line in out:
 | 
			
		||||
// 			out_file.write(f'{line}')
 | 
			
		||||
// ------------------------------------
 | 
			
		||||
// then use regex or similar to enclose every line in quotes and add comma, then add 'static KNOWN_WORDS: &[&str] = &[' to the start and '];' to the end
 | 
			
		||||
include!("./words.txt");
 | 
			
		||||
 | 
			
		||||
// a cache of misspelled words and the closest match in the database
 | 
			
		||||
static MATCH_CACHE: Lazy<Mutex<BTreeMap<String, Option<&str>>>> = Lazy::new(|| Mutex::new(BTreeMap::new()));
 | 
			
		||||
 | 
			
		||||
// max distance before no alternatives are considered
 | 
			
		||||
const MAX_DISTANCE: usize = 6;
 | 
			
		||||
// max input text size before spellcheck is not run. on my laptop 13,000 chars of input takes around 4 seconds so this should be fine
 | 
			
		||||
// update: got a larger word database and it doesn't take 4 seconds anymore lmao
 | 
			
		||||
// update 2: added binary search & caching and now 50000 chars takes ~2-4 seconds
 | 
			
		||||
const MAX_QUERY_WORDS: usize = 512;
 | 
			
		||||
// Not really a huge issue, just used to hopefully reduce the allocations made in levenshtein_distance & provide minor performance improvements
 | 
			
		||||
// not needed for now
 | 
			
		||||
// const MAX_WORD_SIZE: usize = 64;
 | 
			
		||||
 | 
			
		||||
pub type SpellCheckResults = Vec<SpellCheckResult>;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub struct SpellCheckResult {
 | 
			
		||||
	pub orig: String,
 | 
			
		||||
	pub correction: &'static str,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn check(query: &String) -> Option<SpellCheckResults> {
 | 
			
		||||
	error!("Query: {}", query);
 | 
			
		||||
	/*let query: &str = {
 | 
			
		||||
		if query.len() > MAX_QUERY_SIZE {
 | 
			
		||||
			error!("Query is too large to be spell checked, only checking first {} chars", MAX_QUERY_SIZE);
 | 
			
		||||
			query.get(0..MAX_QUERY_SIZE).unwrap()
 | 
			
		||||
			// return None;
 | 
			
		||||
		} else {
 | 
			
		||||
			query
 | 
			
		||||
		}
 | 
			
		||||
	};*/
 | 
			
		||||
 | 
			
		||||
	// TODO: look into how 'wc -w' counts words and copy how it splits things
 | 
			
		||||
	let query_flattened = prepare(query);
 | 
			
		||||
	let words = query_flattened
 | 
			
		||||
		.split_whitespace()
 | 
			
		||||
		.filter(|word| word.len() > 0)
 | 
			
		||||
		// .filter(|word|)
 | 
			
		||||
		.collect::<Vec<_>>();
 | 
			
		||||
 | 
			
		||||
	error!("Words in query: {}", words.len());
 | 
			
		||||
 | 
			
		||||
	if (words.len() > MAX_QUERY_WORDS) {
 | 
			
		||||
		error!("{} is too many words in query to spell check", words.len());
 | 
			
		||||
		// return None;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	let mut distances: SpellCheckResults = vec![];
 | 
			
		||||
	for qword in words {
 | 
			
		||||
		// error!("Word: {}", qword);
 | 
			
		||||
		// error!("is known: {:?}", KNOWN_WORDS.binary_search(&qword));
 | 
			
		||||
		if KNOWN_WORDS.binary_search(&qword).is_ok() {
 | 
			
		||||
			// error!("Exact word match: {}", qword);
 | 
			
		||||
		} else {
 | 
			
		||||
			let mut cache = MATCH_CACHE.lock().unwrap();
 | 
			
		||||
			if cache.contains_key(qword) {
 | 
			
		||||
				// We don't need to tell the user if there is no suggestion for an unknown word
 | 
			
		||||
				if (cache.get(qword).unwrap().is_some()) {
 | 
			
		||||
					// TODO: don't push duplicate misspelled words
 | 
			
		||||
					distances.push(SpellCheckResult{orig: qword.to_owned(), correction: cache.get(qword).unwrap().unwrap()});
 | 
			
		||||
				}
 | 
			
		||||
			} else {
 | 
			
		||||
				let closest_match = KNOWN_WORDS.iter()
 | 
			
		||||
					.map(|kword| (kword, levenshtein_distance(&qword, &kword)))
 | 
			
		||||
					.min_by(|a, b| a.1.cmp(&b.1)).unwrap();
 | 
			
		||||
 | 
			
		||||
				assert!(closest_match.1 > 0, "Found exact match not caught by binary search, is the word database properly sorted?");
 | 
			
		||||
 | 
			
		||||
				if closest_match.1 <= MAX_DISTANCE {
 | 
			
		||||
					cache.insert(qword.to_owned(), Some(*closest_match.0));
 | 
			
		||||
					distances.push(SpellCheckResult{orig: qword.to_owned(), correction: *closest_match.0});
 | 
			
		||||
				} else {
 | 
			
		||||
					// even though there is no close enough match, cache it anyway so that it doesn't have to be looked up every time
 | 
			
		||||
					cache.insert(qword.to_owned(), None);
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		// error!("End");
 | 
			
		||||
	}
 | 
			
		||||
	error!("Spell check results:");
 | 
			
		||||
	for word in &distances {
 | 
			
		||||
		debug!("instead of '{}' did you mean '{}'?", word.orig, word.correction);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if distances.len() > 0 {
 | 
			
		||||
		Some(distances)
 | 
			
		||||
	} else {
 | 
			
		||||
		None
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
/*	let distances = prepare(query).split_whitespace()
 | 
			
		||||
		.filter(|qword| qword.len() > 0)
 | 
			
		||||
		.map(|qword| qword.to_lowercase())
 | 
			
		||||
		.map(
 | 
			
		||||
			|qword| {
 | 
			
		||||
				let mut exact_match = false;
 | 
			
		||||
				KNOWN_WORDS.iter()
 | 
			
		||||
					.map(|kword| kword.to_lowercase())
 | 
			
		||||
					.map(
 | 
			
		||||
						|kword|
 | 
			
		||||
						(qword.clone(), kword.clone(), levenshtein_distance(&qword, &kword)))
 | 
			
		||||
							// totally isn't jank at all and is the best solution totally
 | 
			
		||||
							.take_while(|val| if exact_match {false} else if val.2 == 0 {exact_match = true; true} else {true})
 | 
			
		||||
							// .map(|val| {error!("Val: {:?}", val); val})
 | 
			
		||||
							.min_by(|a, b| a.2.cmp(&b.2)).unwrap()
 | 
			
		||||
			})/*.filter_map(|word| word)*/.filter(|word| word.2 > 0 && word.2 <= MAX_DISTANCE)/*.filter(|(_, _, dist)| *dist > 0 && *dist <= MAX_DISTANCE)*/.map(|word| SpellCheckResult{orig: word.0, correction: word.1.to_owned().to_owned()})/*.filter(|(_, _, d)| *d > 0)*/
 | 
			
		||||
		.map(|word| {
 | 
			
		||||
			debug!("instead of '{}' did you mean '{}'?", word.orig, word.correction);
 | 
			
		||||
			word
 | 
			
		||||
		})
 | 
			
		||||
		.collect::<Vec<_>>();
 | 
			
		||||
	/*for word in &distances {
 | 
			
		||||
		debug!("instead of '{}' did you mean '{}'? (distance of )", word.0, word.1/*, word.2*/);
 | 
			
		||||
	}*/
 | 
			
		||||
 | 
			
		||||
	if distances.len() > 0 {
 | 
			
		||||
		Some(distances)
 | 
			
		||||
	} else {
 | 
			
		||||
		None
 | 
			
		||||
	}*/
 | 
			
		||||
	// None
 | 
			
		||||
	// vec![]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO: handle symbols better, probably with a regex
 | 
			
		||||
fn prepare(s: &str) -> String {
 | 
			
		||||
	s.replace("\"", "")
 | 
			
		||||
		.replace(",", " ")
 | 
			
		||||
		.replace(":", " ")
 | 
			
		||||
		.replace(".", " ")
 | 
			
		||||
		.replace("/", " ")
 | 
			
		||||
		.replace("&", " ")
 | 
			
		||||
		.replace("!", " ")
 | 
			
		||||
		.replace("?", " ")
 | 
			
		||||
		/*.replace("'", "")*/
 | 
			
		||||
		.replace("0", "")
 | 
			
		||||
		.replace("1", "")
 | 
			
		||||
		.replace("2", "")
 | 
			
		||||
		.replace("3", "")
 | 
			
		||||
		.replace("4", "")
 | 
			
		||||
		.replace("5", "")
 | 
			
		||||
		.replace("6", "")
 | 
			
		||||
		.replace("7", "")
 | 
			
		||||
		.replace("8", "")
 | 
			
		||||
		.replace("9", "")
 | 
			
		||||
		.to_lowercase()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// cost of 2 for add/remove, cost of 1 for replace
 | 
			
		||||
fn levenshtein_distance(a: &str, other: &str) -> usize {
 | 
			
		||||
	// debug!("Self: '{}', Other: '{}'", a, other);
 | 
			
		||||
	// let mut dist: &mut [usize; MAX_WORD_SIZE] = &mut [0usize; MAX_WORD_SIZE];
 | 
			
		||||
	// let mut dist_prev: &mut [usize; MAX_WORD_SIZE] = &mut [0usize; MAX_WORD_SIZE];
 | 
			
		||||
 | 
			
		||||
	let mut dist = vec![0usize; other.len() + 1];
 | 
			
		||||
	let mut dist_prev = vec![0usize; other.len() + 1];
 | 
			
		||||
 | 
			
		||||
	for i in 0..=other.len() {
 | 
			
		||||
		dist_prev[i] = i;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for i in 1..=a.len() {
 | 
			
		||||
		dist[0] = i;
 | 
			
		||||
 | 
			
		||||
		for j in 1..=other.len() {
 | 
			
		||||
			if a.get(i - 1..i).unwrap() == other.get(j - 1..j).unwrap() {
 | 
			
		||||
				dist[j] = dist_prev[j - 1];
 | 
			
		||||
			} else {
 | 
			
		||||
				// TODO: make addition/subtraction 1 more expensive than replacement, presumably by adding '+ 1' to 2/3 of these
 | 
			
		||||
				// motivation: honex from bee movie script is turned into hone instead of honey, this will also generally improve results & is what wikipedia says to do (best reason)
 | 
			
		||||
				dist[j] = 1 + cmp::min(
 | 
			
		||||
					*dist.get(j - 1).unwrap() + 1,
 | 
			
		||||
					cmp::min(*dist_prev.get(j).unwrap() + 1, *dist_prev.get(j - 1).unwrap()));
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		mem::swap(&mut dist, &mut dist_prev);
 | 
			
		||||
	}
 | 
			
		||||
	dist_prev[other.len()]
 | 
			
		||||
 | 
			
		||||
	
 | 
			
		||||
 | 
			
		||||
	/*let mut distances = vec![vec![0usize; other.len() + 1]; a.len() + 1];
 | 
			
		||||
	for i in 1..=a.len() {
 | 
			
		||||
		distances[i][0] = i;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for j in 1..=other.len() {
 | 
			
		||||
		distances[0][j] = j;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*unsafe {
 | 
			
		||||
	for i in 1..=a.len() {
 | 
			
		||||
		for j in 1..=other.len() {
 | 
			
		||||
			if *a.get_unchecked(i - 1..i) == *other.get_unchecked(j - 1..j) {
 | 
			
		||||
				// 0
 | 
			
		||||
				distances[i][j] = *distances.get_unchecked(i - 1).get_unchecked(j - 1);
 | 
			
		||||
			} else {
 | 
			
		||||
				// 1
 | 
			
		||||
				distances[i][j] = 1 + cmp::min(
 | 
			
		||||
					(*distances.get_unchecked(i - 1).get_unchecked(j - 1)),
 | 
			
		||||
					cmp::min(
 | 
			
		||||
						(*distances.get_unchecked(i - 1).get_unchecked(j)),
 | 
			
		||||
						(*distances.get_unchecked(i).get_unchecked(j - 1))
 | 
			
		||||
					)
 | 
			
		||||
				);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	}*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	for i in 1..=a.len()  {
 | 
			
		||||
		for j in 1..=other.len() {
 | 
			
		||||
			if a.get(i - 1..i).unwrap() == other.get(j - 1..j).unwrap() {
 | 
			
		||||
				// 0
 | 
			
		||||
				distances[i][j] = *distances.get(i - 1).unwrap().get(j - 1).unwrap();
 | 
			
		||||
			} else {
 | 
			
		||||
				// 1
 | 
			
		||||
				distances[i][j] = 1 + cmp::min(
 | 
			
		||||
					(*distances.get(i - 1).unwrap().get(j - 1).unwrap()),
 | 
			
		||||
					cmp::min(
 | 
			
		||||
						(*distances.get(i - 1).unwrap().get(j).unwrap()),
 | 
			
		||||
						(*distances.get(i).unwrap().get(j - 1).unwrap())
 | 
			
		||||
					)
 | 
			
		||||
				);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	*distances.get(a.len()).unwrap().get(other.len()).unwrap()*/
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1,11 +0,0 @@
 | 
			
		|||
.pagecontent {
 | 
			
		||||
    background: url("/static/snow.gif");
 | 
			
		||||
    border: 3px inset black;
 | 
			
		||||
    color: black;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@media screen and (max-width: 800px) {
 | 
			
		||||
    .pagecontent {
 | 
			
		||||
        width: calc(100% - 4px);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -2,6 +2,7 @@
 | 
			
		|||
    background: url("/static/snow.gif");
 | 
			
		||||
    border: 3px inset black;
 | 
			
		||||
    color: black;
 | 
			
		||||
    box-sizing: border-box;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@media screen and (max-width: 800px) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
.settings-area {
 | 
			
		||||
    background: url("/static/snow.gif");
 | 
			
		||||
    border: 1px solid white;
 | 
			
		||||
    box-sizing: border-box;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@media screen and (max-width: 800px) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,6 @@
 | 
			
		|||
{% extends "admin/shell.html" %}
 | 
			
		||||
 | 
			
		||||
{% block title %}Invite Codes{% endblock %}
 | 
			
		||||
{% block title %}Home{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
{% block title %}{{ announcement.title }}{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/announcement.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/announcement.css"/>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -12,11 +12,11 @@
 | 
			
		|||
        <a href="/" class="lyphe">
 | 
			
		||||
            <div id="lyphesmall">
 | 
			
		||||
                <div class="bent-surrounding">
 | 
			
		||||
                    <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png?git={{ git_commit }}"
 | 
			
		||||
                    <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png"
 | 
			
		||||
                         alt="image of lyphe, our mascot!">
 | 
			
		||||
                </div>
 | 
			
		||||
                <div id="lyphetitlenav">
 | 
			
		||||
                    <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask lyphe!"/>
 | 
			
		||||
                    <img src="/static/img/logo.png" alt="ask lyphe!"/>
 | 
			
		||||
                </div>
 | 
			
		||||
            </div>
 | 
			
		||||
        </a>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,8 +3,8 @@
 | 
			
		|||
{% block title %}the best search engine{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/frontpage.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/inline-announcement.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/frontpage.css"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/inline-announcement.css"/>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -17,10 +17,10 @@
 | 
			
		|||
    {% endmatch %}
 | 
			
		||||
    <div id="lyphebig">
 | 
			
		||||
        <div class="bent-surrounding">
 | 
			
		||||
            <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
			
		||||
            <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png" alt="image of lyphe, our mascot!">
 | 
			
		||||
        </div>
 | 
			
		||||
        <div id="lyphetitle">
 | 
			
		||||
            <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
			
		||||
            <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
			
		||||
            <p>a user-first, customizable, useful, and fun search engine!</p>
 | 
			
		||||
        </div>
 | 
			
		||||
    </div>
 | 
			
		||||
| 
						 | 
				
			
			@ -67,7 +67,7 @@
 | 
			
		|||
        <div class="pagegradient">
 | 
			
		||||
            <div id="userfirst" class="feature">
 | 
			
		||||
                <h1>user-first</h1>
 | 
			
		||||
                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <p>
 | 
			
		||||
                    we will never serve a single ad.
 | 
			
		||||
                    <br/>
 | 
			
		||||
| 
						 | 
				
			
			@ -90,7 +90,7 @@
 | 
			
		|||
            </div>
 | 
			
		||||
            <div id="customizable" class="feature">
 | 
			
		||||
                <h1>customizable</h1>
 | 
			
		||||
                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <p>
 | 
			
		||||
                    we aim to design askLyphe in a way that makes personalizing your search results easy and fun!
 | 
			
		||||
                    <br/>
 | 
			
		||||
| 
						 | 
				
			
			@ -102,7 +102,7 @@
 | 
			
		|||
            </div>
 | 
			
		||||
            <div id="useful-fun" class="feature">
 | 
			
		||||
                <h1>useful, but fun!</h1>
 | 
			
		||||
                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <p>
 | 
			
		||||
                    our search engine does not rely on (however may still include, for completeness) results from
 | 
			
		||||
                    Google, Bing, or any other search engine out there.
 | 
			
		||||
| 
						 | 
				
			
			@ -135,7 +135,7 @@
 | 
			
		|||
            </div>
 | 
			
		||||
            <div id="fourth-box" class="feature">
 | 
			
		||||
                <h1>fourth box!</h1>
 | 
			
		||||
                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
			
		||||
                <p>
 | 
			
		||||
                    i haven't decided what to put here yet! we're still in alpha so i'm sure i'll come up with something
 | 
			
		||||
                    eventually, but i wanted this fourth box because i feel like the design flows nicer with it (:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,9 +3,9 @@
 | 
			
		|||
{% block title %}Home{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/home.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/home.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/inline-announcement.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/home.css"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/home.css"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/inline-announcement.css"/>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -15,10 +15,10 @@
 | 
			
		|||
    <div class="pagecontent">
 | 
			
		||||
        <div id="lyphebig">
 | 
			
		||||
            <div class="bent-surrounding">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
			
		||||
                <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png" alt="image of lyphe, our mascot!">
 | 
			
		||||
            </div>
 | 
			
		||||
            <div id="lyphetitle">
 | 
			
		||||
                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
			
		||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
			
		||||
                <h2>the best search engine!</h2>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
| 
						 | 
				
			
			@ -71,4 +71,4 @@
 | 
			
		|||
    </div>
 | 
			
		||||
    {% include "ui/footer.html" %}
 | 
			
		||||
</div>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
| 
						 | 
				
			
			@ -3,9 +3,9 @@
 | 
			
		|||
{% block title %}Images - {{ search_query }}{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/imagesearch.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/imagesearch.css?git={{ git_commit }}"/>
 | 
			
		||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css?git={{ git_commit }}"/>{% endif %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/imagesearch.css"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/imagesearch.css"/>
 | 
			
		||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css"/>{% endif %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -16,10 +16,10 @@
 | 
			
		|||
 | 
			
		||||
    <div class="index-bar">
 | 
			
		||||
        <a href="{{ websearch_url }}">
 | 
			
		||||
            <img id="websearch-img" src="/static/img/websearch.png?git={{ git_commit }}" alt="search the web"/>
 | 
			
		||||
            <img id="websearch-img" src="/static/img/websearch.png" alt="search the web"/>
 | 
			
		||||
        </a>
 | 
			
		||||
        <a href="{{ imagesearch_url }}">
 | 
			
		||||
            <img id="imagesearch-img-selected" src="/static/img/imagesearch_selected.png?git={{ git_commit }}" alt="search for images"/>
 | 
			
		||||
            <img id="imagesearch-img-selected" src="/static/img/imagesearch_selected.png" alt="search for images"/>
 | 
			
		||||
        </a>
 | 
			
		||||
    </div>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -61,4 +61,4 @@
 | 
			
		|||
 | 
			
		||||
    {% include "ui/footer.html" %}
 | 
			
		||||
</div>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
| 
						 | 
				
			
			@ -3,9 +3,9 @@
 | 
			
		|||
{% block title %}{{ search_query }}{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/search.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/search.css?git={{ git_commit }}"/>
 | 
			
		||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css?git={{ git_commit }}"/>{% endif %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/search.css"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/search.css"/>
 | 
			
		||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css"/>{% endif %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -16,10 +16,10 @@
 | 
			
		|||
 | 
			
		||||
    <div class="index-bar">
 | 
			
		||||
        <a href="{{ websearch_url }}">
 | 
			
		||||
            <img id="websearch-img-selected" src="/static/img/websearch_selected.png?git={{ git_commit }}" alt="search the web"/>
 | 
			
		||||
            <img id="websearch-img-selected" src="/static/img/websearch_selected.png" alt="search the web"/>
 | 
			
		||||
        </a>
 | 
			
		||||
        <a href="{{ imagesearch_url }}">
 | 
			
		||||
            <img id="imagesearch-img" src="/static/img/imagesearch.png?git={{ git_commit }}" alt="search for images"/>
 | 
			
		||||
            <img id="imagesearch-img" src="/static/img/imagesearch.png" alt="search for images"/>
 | 
			
		||||
        </a>
 | 
			
		||||
    </div>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -97,4 +97,4 @@
 | 
			
		|||
 | 
			
		||||
    {% include "ui/footer.html" %}
 | 
			
		||||
</div>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
| 
						 | 
				
			
			@ -3,10 +3,10 @@
 | 
			
		|||
{% block title %}{{ search_query }}{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/search.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/search.css?git={{ git_commit }}"/>
 | 
			
		||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css?git={{ git_commit }}"/>{% endif %}
 | 
			
		||||
<script src="/static/js/search.js?git={{ git_commit }}" defer></script>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/search.css"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/search.css"/>
 | 
			
		||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css"/>{% endif %}
 | 
			
		||||
<script src="/static/js/search.js" defer></script>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -17,10 +17,10 @@
 | 
			
		|||
 | 
			
		||||
    <div class="index-bar">
 | 
			
		||||
        <a href="{{ websearch_url }}">
 | 
			
		||||
            <img id="websearch-img-selected" src="/static/img/websearch_selected.png?git={{ git_commit }}" alt="search the web"/>
 | 
			
		||||
            <img id="websearch-img-selected" src="/static/img/websearch_selected.png" alt="search the web"/>
 | 
			
		||||
        </a>
 | 
			
		||||
        <a href="{{ imagesearch_url }}">
 | 
			
		||||
            <img id="imagesearch-img" src="/static/img/imagesearch.png?git={{ git_commit }}" alt="search for images"/>
 | 
			
		||||
            <img id="imagesearch-img" src="/static/img/imagesearch.png" alt="search for images"/>
 | 
			
		||||
        </a>
 | 
			
		||||
    </div>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -76,4 +76,4 @@
 | 
			
		|||
 | 
			
		||||
    {% include "ui/footer.html" %}
 | 
			
		||||
</div>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
| 
						 | 
				
			
			@ -11,12 +11,12 @@
 | 
			
		|||
              title="AskLyphe"
 | 
			
		||||
              href="/static/osd.xml" />
 | 
			
		||||
 | 
			
		||||
        <link rel="stylesheet" href="/static/themes/default/shell.css?git={{ git_commit }}" />
 | 
			
		||||
        <link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/shell.css?git={{ git_commit }}" />
 | 
			
		||||
        <link rel="stylesheet" href="/static/themes/default/shell.css" />
 | 
			
		||||
        <link rel="stylesheet" href="/static/themes/{{theme}}/shell.css" />
 | 
			
		||||
 | 
			
		||||
        {% block head %}{% endblock %}
 | 
			
		||||
    </head>
 | 
			
		||||
    <body>
 | 
			
		||||
        {% block page %}{% endblock %}
 | 
			
		||||
    </body>
 | 
			
		||||
</html>
 | 
			
		||||
</html>
 | 
			
		||||
| 
						 | 
				
			
			@ -3,8 +3,8 @@
 | 
			
		|||
{% block title %}{{ search_query }}{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block head %}
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/settings.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/settings.css?git={{ git_commit }}"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/default/settings.css"/>
 | 
			
		||||
<link rel="stylesheet" href="/static/themes/{{theme}}/settings.css"/>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
 | 
			
		||||
{% block page %}
 | 
			
		||||
| 
						 | 
				
			
			@ -43,15 +43,16 @@
 | 
			
		|||
                <div class="settings-row">
 | 
			
		||||
                    <div id="theme" class="settings-section">
 | 
			
		||||
                        <h2>theme</h2>
 | 
			
		||||
                        <p>your current theme is: "{{true_theme.display_name()}}"</p>
 | 
			
		||||
                        {% if true_theme.internal_name() != theme.internal_name() %}
 | 
			
		||||
                        <p>today's random theme is {{ theme.display_name() }}</p>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                        {% for t in themes %}
 | 
			
		||||
                        {%if theme==t.value%}
 | 
			
		||||
                        <p>your current theme is: "{{t.name}}"</p>
 | 
			
		||||
                        {%endif%}
 | 
			
		||||
                        {% endfor %}
 | 
			
		||||
                        <form action="/user_settings/set_theme" method="post">
 | 
			
		||||
                            <label for="theme-selector">theme</label>
 | 
			
		||||
                            <select name="theme" id="theme-selector">
 | 
			
		||||
                                {% for t in themes %}
 | 
			
		||||
                                <option value="{{t.internal_name()}}" {%if true_theme.internal_name()==t.internal_name()%}selected{%endif%}>{{t.display_name()}}</option>
 | 
			
		||||
                                <option value="{{t.value}}" {%if theme==t.value%}selected{%endif%}>{{t.name}}</option>
 | 
			
		||||
                                {% endfor %}
 | 
			
		||||
                            </select>
 | 
			
		||||
                            <button type="submit" id="theme-submit">change theme!</button>
 | 
			
		||||
| 
						 | 
				
			
			@ -64,4 +65,4 @@
 | 
			
		|||
 | 
			
		||||
    {% include "ui/footer.html" %}
 | 
			
		||||
</div>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
{% endblock %}
 | 
			
		||||
| 
						 | 
				
			
			@ -457,7 +457,6 @@ pub async fn user_count(db: &DatabaseConnection) -> Result<usize, FetchUserError
 | 
			
		|||
/// returns the number of users in the database who are admins
 | 
			
		||||
pub async fn admin_count(db: &DatabaseConnection) -> Result<usize, FetchUserError> {
 | 
			
		||||
    // dont fucking touch this, i don't know why it works but it does, it's actually evil
 | 
			
		||||
    // note: doesn't work
 | 
			
		||||
    Ok(user::Entity::find().filter(user::Column::Flags.into_expr().binary(BinOper::LShift, Expr::value(63 - 2)).lt(1 << (63 - 2)))
 | 
			
		||||
        .count(db).await.map_err(|e| {
 | 
			
		||||
        error!("DATABASE ERROR WHILE ADMINCOUNT: {e}");
 | 
			
		||||
| 
						 | 
				
			
			@ -483,4 +482,4 @@ pub async fn email_list(db: &DatabaseConnection) -> Result<Vec<(String, String)>
 | 
			
		|||
    })?.into_iter().map(|v| {
 | 
			
		||||
        (v.username, v.email)
 | 
			
		||||
    }).collect())
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +15,6 @@ mod process;
 | 
			
		|||
pub mod db;
 | 
			
		||||
mod email;
 | 
			
		||||
 | 
			
		||||
use std::env;
 | 
			
		||||
use std::string::ToString;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
 | 
			
		||||
| 
						 | 
				
			
			@ -37,8 +36,6 @@ pub static SMTP_URL: Lazy<String> = Lazy::new(|| std::env::var("SMTP_URL").expec
 | 
			
		|||
pub static SMTP_USERNAME: Lazy<String> = Lazy::new(|| std::env::var("SMTP_USERNAME").expect("NO SMTP_USERNAME DEFINED"));
 | 
			
		||||
pub static SMTP_PASSWORD: Lazy<String> = Lazy::new(|| std::env::var("SMTP_PASSWORD").expect("NO SMTP_PASSWORD DEFINED"));
 | 
			
		||||
 | 
			
		||||
pub static AUTH_URL: Lazy<String> = Lazy::new(|| std::env::var("AUTH_URL").unwrap_or("https://auth.asklyphe.com".to_string()));
 | 
			
		||||
 | 
			
		||||
pub static PROCESSES_HANDLED: AtomicU64 = AtomicU64::new(0);
 | 
			
		||||
pub static LAST_MESSAGE: AtomicI64 = AtomicI64::new(0);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,7 +21,7 @@ use crate::db::{invite_code, session, user};
 | 
			
		|||
use crate::db::invite_code::ConsumeInviteCodeError;
 | 
			
		||||
use crate::db::session::{CreateSessionError, DeleteSessionError, FetchSessionError};
 | 
			
		||||
use crate::db::user::{CreateUserError, DeleteUserError, EmailChangeError, FetchUserError, RegisterVerificationCodeError, VerifyEmailPassComboError, VerifyVerificationCodeError};
 | 
			
		||||
use crate::{email, AUTH_URL};
 | 
			
		||||
use crate::email;
 | 
			
		||||
 | 
			
		||||
fn generate_email_verification_code() -> String {
 | 
			
		||||
    rand::thread_rng()
 | 
			
		||||
| 
						 | 
				
			
			@ -121,7 +121,7 @@ pub async fn register_request(db: &DatabaseConnection, request: RegisterRequest)
 | 
			
		|||
    }
 | 
			
		||||
 | 
			
		||||
    debug!("verification code for {} is \"{}\"", request.username, verification_code);
 | 
			
		||||
    email::send_verification_code_email(&request.email, &request.username, format!("{}/verify?username={}&token={}", AUTH_URL.as_str(), request.username, verification_code).as_str());
 | 
			
		||||
    email::send_verification_code_email(&request.email, &request.username, format!("https://auth.asklyphe.com/verify?username={}&token={}", request.username, verification_code).as_str());
 | 
			
		||||
 | 
			
		||||
    RegisterResponse::Success
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										30
									
								
								lyphedb/.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								lyphedb/.gitignore
									
										
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -1,30 +0,0 @@
 | 
			
		|||
# ---> Rust
 | 
			
		||||
# Generated by Cargo
 | 
			
		||||
# will have compiled files and executables
 | 
			
		||||
debug/
 | 
			
		||||
target/
 | 
			
		||||
 | 
			
		||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 | 
			
		||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 | 
			
		||||
Cargo.lock
 | 
			
		||||
 | 
			
		||||
# These are backup files generated by rustfmt
 | 
			
		||||
**/*.rs.bk
 | 
			
		||||
 | 
			
		||||
# MSVC Windows builds of rustc generate these, which store debugging information
 | 
			
		||||
*.pdb
 | 
			
		||||
 | 
			
		||||
# RustRover
 | 
			
		||||
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 | 
			
		||||
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 | 
			
		||||
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
 | 
			
		||||
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 | 
			
		||||
#.idea/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Added by cargo
 | 
			
		||||
 | 
			
		||||
/target
 | 
			
		||||
 | 
			
		||||
# don't commit employee test configs (:
 | 
			
		||||
husky_config.toml
 | 
			
		||||
| 
						 | 
				
			
			@ -1,18 +0,0 @@
 | 
			
		|||
[package]
 | 
			
		||||
name = "lyphedb"
 | 
			
		||||
version = "0.1.0"
 | 
			
		||||
edition = "2024"
 | 
			
		||||
 | 
			
		||||
[dependencies]
 | 
			
		||||
log = "0.4.20"
 | 
			
		||||
rmp-serde = "1.1.2"
 | 
			
		||||
futures = "0.3.30"
 | 
			
		||||
async-nats = "0.38.0"
 | 
			
		||||
tokio = { version = "1.0", features = ["full"] }
 | 
			
		||||
chrono = "0.4.31"
 | 
			
		||||
serde = { version = "1.0", features = ["derive"] }
 | 
			
		||||
rand = "0.9.0"
 | 
			
		||||
toml = "0.8.20"
 | 
			
		||||
env_logger = "0.11.6"
 | 
			
		||||
once_cell = "1.20.3"
 | 
			
		||||
percent-encoding = "2.3.1"
 | 
			
		||||
| 
						 | 
				
			
			@ -1,10 +0,0 @@
 | 
			
		|||
name = "lyphedb-test"
 | 
			
		||||
write = true
 | 
			
		||||
master = "/lyphedb_master"
 | 
			
		||||
ram_limit = "1mb" # supported suffixes: b,kb,mb,gb
 | 
			
		||||
gc_limit = "512kb"
 | 
			
		||||
avg_entry_size = "1kb"
 | 
			
		||||
log = "debug"
 | 
			
		||||
nats_cert = "nats/nats.cert"
 | 
			
		||||
nats_key = "nats/nats.pem"
 | 
			
		||||
nats_url = "127.0.0.1:4222"
 | 
			
		||||
| 
						 | 
				
			
			@ -1,14 +0,0 @@
 | 
			
		|||
[package]
 | 
			
		||||
name = "ldbtesttool"
 | 
			
		||||
version = "0.1.0"
 | 
			
		||||
edition = "2024"
 | 
			
		||||
 | 
			
		||||
[dependencies]
 | 
			
		||||
rmp-serde = "1.1.2"
 | 
			
		||||
futures = "0.3.30"
 | 
			
		||||
async-nats = "0.38.0"
 | 
			
		||||
tokio = { version = "1.0", features = ["full"] }
 | 
			
		||||
serde = { version = "1.0", features = ["derive"] }
 | 
			
		||||
lyphedb = { path = "../" }
 | 
			
		||||
toml = "0.8.20"
 | 
			
		||||
rand = "0.9.0"
 | 
			
		||||
| 
						 | 
				
			
			@ -1,40 +0,0 @@
 | 
			
		|||
use serde::Deserialize;
 | 
			
		||||
 | 
			
		||||
#[derive(Deserialize)]
 | 
			
		||||
struct ConfigFile {
 | 
			
		||||
    pub name: String,
 | 
			
		||||
    pub nats_cert: String,
 | 
			
		||||
    pub nats_key: String,
 | 
			
		||||
    pub nats_url: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub struct LypheDBConfig {
 | 
			
		||||
    pub name: String,
 | 
			
		||||
    pub nats_cert: String,
 | 
			
		||||
    pub nats_key: String,
 | 
			
		||||
    pub nats_url: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn load_config(path: &str) -> LypheDBConfig {
 | 
			
		||||
    let s = std::fs::read_to_string(path);
 | 
			
		||||
    if let Err(e) = s {
 | 
			
		||||
        panic!("failed to read config file: {}", e);
 | 
			
		||||
    }
 | 
			
		||||
    let s = s.unwrap();
 | 
			
		||||
    let cnf = toml::from_str::<ConfigFile>(&s);
 | 
			
		||||
    if let Err(e) = cnf {
 | 
			
		||||
        panic!("failed to parse config file: {}", e);
 | 
			
		||||
    }
 | 
			
		||||
    let cnf = cnf.unwrap();
 | 
			
		||||
 | 
			
		||||
    // quick checks and conversions
 | 
			
		||||
    let mut config = LypheDBConfig {
 | 
			
		||||
        name: cnf.name,
 | 
			
		||||
        nats_cert: cnf.nats_cert,
 | 
			
		||||
        nats_key: cnf.nats_key,
 | 
			
		||||
        nats_url: cnf.nats_url,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    config
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,114 +0,0 @@
 | 
			
		|||
use crate::config::load_config;
 | 
			
		||||
use lyphedb::{KVList, KeyList, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
			
		||||
use std::collections::BTreeMap;
 | 
			
		||||
use async_nats::Message;
 | 
			
		||||
use futures::StreamExt;
 | 
			
		||||
use tokio::sync::mpsc;
 | 
			
		||||
 | 
			
		||||
mod config;
 | 
			
		||||
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
async fn main() {
 | 
			
		||||
    let config = load_config(&std::env::args().nth(1).expect("please specify config file"));
 | 
			
		||||
    let nats = async_nats::ConnectOptions::new()
 | 
			
		||||
        .add_client_certificate(
 | 
			
		||||
            config.nats_cert.as_str().into(),
 | 
			
		||||
            config.nats_key.as_str().into(),
 | 
			
		||||
        )
 | 
			
		||||
        .connect(config.nats_url.as_str())
 | 
			
		||||
        .await;
 | 
			
		||||
    if let Err(e) = nats {
 | 
			
		||||
        eprintln!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    let nats = nats.unwrap();
 | 
			
		||||
 | 
			
		||||
    // test 1: create 10000 keys, send in chunks of 10
 | 
			
		||||
    println!("test 1: create 10_000 keys, send in chunks of 100");
 | 
			
		||||
    let (key_tx, mut key_rx) = mpsc::unbounded_channel();
 | 
			
		||||
    let start = std::time::Instant::now();
 | 
			
		||||
    let mut tasks = vec![];
 | 
			
		||||
    for _ in 0..(10_000 / 100) {
 | 
			
		||||
        let name = config.name.clone();
 | 
			
		||||
        let nats = nats.clone();
 | 
			
		||||
        let key_tx = key_tx.clone();
 | 
			
		||||
        tasks.push(tokio::spawn(async move {
 | 
			
		||||
            let mut keys = BTreeMap::new();
 | 
			
		||||
            for _ in 0..100 {
 | 
			
		||||
                let key = rand::random::<[u8; 16]>();
 | 
			
		||||
                let data = rand::random::<[u8; 16]>();
 | 
			
		||||
                key_tx.send((key, data)).unwrap();
 | 
			
		||||
                keys.insert(key, data);
 | 
			
		||||
            }
 | 
			
		||||
            let data = rmp_serde::to_vec(&LDBNatsMessage::Command(LypheDBCommand::SetKeys(
 | 
			
		||||
                KVList {
 | 
			
		||||
                    kvs: keys
 | 
			
		||||
                        .into_iter()
 | 
			
		||||
                        .map(|(k, v)| (k.to_vec(), v.to_vec()))
 | 
			
		||||
                        .collect(),
 | 
			
		||||
                },
 | 
			
		||||
                PropagationStrategy::Immediate,
 | 
			
		||||
            )))
 | 
			
		||||
                .unwrap();
 | 
			
		||||
            let replyto_sub = "lyphedb_test_1".to_string();
 | 
			
		||||
            let mut subscriber = nats.subscribe(replyto_sub.clone()).await.unwrap();
 | 
			
		||||
            nats.publish_with_reply(name.clone(), replyto_sub, data.into()).await.unwrap();
 | 
			
		||||
            if let Some(reply) = subscriber.next().await {
 | 
			
		||||
                let reply = rmp_serde::from_slice::<LDBNatsMessage>(&reply.payload).unwrap();
 | 
			
		||||
                if let LDBNatsMessage::Success = reply {} else {
 | 
			
		||||
                    eprintln!("failure");
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }));
 | 
			
		||||
    }
 | 
			
		||||
    for task in tasks {
 | 
			
		||||
        task.await.unwrap();
 | 
			
		||||
    }
 | 
			
		||||
    let end = std::time::Instant::now();
 | 
			
		||||
    println!("test 1: {}ms", end.duration_since(start).as_millis());
 | 
			
		||||
    let mut our_copy = BTreeMap::new();
 | 
			
		||||
    let mut buffer = vec![];
 | 
			
		||||
    key_rx.recv_many(&mut buffer, 10_000).await;
 | 
			
		||||
    for (k, v) in buffer {
 | 
			
		||||
        our_copy.insert(k, v);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // test 2: read back all keys and check for accuracy
 | 
			
		||||
    println!("test 2: read back all keys and check for accuracy");
 | 
			
		||||
    let kv: Vec<_> = our_copy.into_iter().map(|(k, v)| (k.to_vec(), v.to_vec())).collect();
 | 
			
		||||
 | 
			
		||||
    let start = std::time::Instant::now();
 | 
			
		||||
    let mut tasks = vec![];
 | 
			
		||||
    for i in 0..100 {
 | 
			
		||||
        let batch = kv[i * 100..((i + 1) * 100).min(kv.len())].to_vec();
 | 
			
		||||
        let name = config.name.clone();
 | 
			
		||||
        let nats = nats.clone();
 | 
			
		||||
        tasks.push(tokio::spawn(async move {
 | 
			
		||||
            let data = rmp_serde::to_vec(&LDBNatsMessage::Command(LypheDBCommand::GetKeys(
 | 
			
		||||
                KeyList {
 | 
			
		||||
                    keys: batch.iter().map(|(k, _)| k.to_vec()).collect()
 | 
			
		||||
                }
 | 
			
		||||
            ))).unwrap();
 | 
			
		||||
            let replyto_sub = format!("lyphedb_test_2_{}", i);
 | 
			
		||||
            let mut subscriber = nats.subscribe(replyto_sub.clone()).await.unwrap();
 | 
			
		||||
            nats.publish_with_reply(name.clone(), replyto_sub, data.into()).await.unwrap();
 | 
			
		||||
            if let Some(reply) = subscriber.next().await {
 | 
			
		||||
                let reply = rmp_serde::from_slice::<LDBNatsMessage>(&reply.payload).unwrap();
 | 
			
		||||
                if let LDBNatsMessage::Entries(kvlist) = reply {
 | 
			
		||||
                    // check all keys
 | 
			
		||||
                    for (k1, v1) in batch {
 | 
			
		||||
                        let v2 = kvlist.kvs.iter().find(|(k, v)| k == &k1).expect("key not found");
 | 
			
		||||
                        assert_eq!(v1, v2.1);
 | 
			
		||||
                    }
 | 
			
		||||
                } else {
 | 
			
		||||
                    eprintln!("failure");
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }));
 | 
			
		||||
    }
 | 
			
		||||
    for task in tasks {
 | 
			
		||||
        task.await.unwrap();
 | 
			
		||||
    }
 | 
			
		||||
    let end = std::time::Instant::now();
 | 
			
		||||
    println!("test 2: {}ms", end.duration_since(start).as_millis());
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,129 +0,0 @@
 | 
			
		|||
use serde::Deserialize;
 | 
			
		||||
 | 
			
		||||
#[derive(Deserialize)]
 | 
			
		||||
struct ConfigFile {
 | 
			
		||||
    pub name: String,
 | 
			
		||||
    pub write: bool,
 | 
			
		||||
    pub master: String,
 | 
			
		||||
    pub ram_limit: String,
 | 
			
		||||
    pub gc_limit: String,
 | 
			
		||||
    pub avg_entry_size: String,
 | 
			
		||||
    pub log: String,
 | 
			
		||||
    pub nats_cert: String,
 | 
			
		||||
    pub nats_key: String,
 | 
			
		||||
    pub nats_url: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub struct LypheDBConfig {
 | 
			
		||||
    pub name: String,
 | 
			
		||||
    pub write: bool,
 | 
			
		||||
    pub master: String,
 | 
			
		||||
    pub ram_limit: usize,
 | 
			
		||||
    pub gc_limit: usize,
 | 
			
		||||
    pub avg_entry_size: usize,
 | 
			
		||||
    pub log: String,
 | 
			
		||||
    pub nats_cert: String,
 | 
			
		||||
    pub nats_key: String,
 | 
			
		||||
    pub nats_url: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn load_config(path: &str) -> LypheDBConfig {
 | 
			
		||||
    let s = std::fs::read_to_string(path);
 | 
			
		||||
    if let Err(e) = s {
 | 
			
		||||
        panic!("failed to read config file: {}", e);
 | 
			
		||||
    }
 | 
			
		||||
    let s = s.unwrap();
 | 
			
		||||
    let cnf = toml::from_str::<ConfigFile>(&s);
 | 
			
		||||
    if let Err(e) = cnf {
 | 
			
		||||
        panic!("failed to parse config file: {}", e);
 | 
			
		||||
    }
 | 
			
		||||
    let cnf = cnf.unwrap();
 | 
			
		||||
 | 
			
		||||
    // quick checks and conversions
 | 
			
		||||
    let mut config = LypheDBConfig {
 | 
			
		||||
        name: cnf.name,
 | 
			
		||||
        write: cnf.write,
 | 
			
		||||
        master: cnf.master,
 | 
			
		||||
        ram_limit: 0,
 | 
			
		||||
        gc_limit: 0,
 | 
			
		||||
        avg_entry_size: 0,
 | 
			
		||||
        log: cnf.log,
 | 
			
		||||
        nats_cert: cnf.nats_cert,
 | 
			
		||||
        nats_key: cnf.nats_key,
 | 
			
		||||
        nats_url: cnf.nats_url,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if !(cnf.ram_limit.ends_with("b") &&
 | 
			
		||||
        (
 | 
			
		||||
            cnf.ram_limit.trim_end_matches("b").ends_with("k") ||
 | 
			
		||||
                cnf.ram_limit.trim_end_matches("b").ends_with("m") ||
 | 
			
		||||
                cnf.ram_limit.trim_end_matches("b").ends_with("g")
 | 
			
		||||
        )
 | 
			
		||||
    ) {
 | 
			
		||||
        panic!("invalid ram limit");
 | 
			
		||||
    }
 | 
			
		||||
    config.ram_limit = if cnf.ram_limit.ends_with("gb") {
 | 
			
		||||
        cnf.ram_limit.trim_end_matches("gb").parse::<usize>().unwrap() * 1024 * 1024 * 1024
 | 
			
		||||
    } else if cnf.ram_limit.ends_with("mb") {
 | 
			
		||||
        cnf.ram_limit.trim_end_matches("mb").parse::<usize>().unwrap() * 1024 * 1024
 | 
			
		||||
    } else if cnf.ram_limit.ends_with("kb") {
 | 
			
		||||
        cnf.ram_limit.trim_end_matches("kb").parse::<usize>().unwrap() * 1024
 | 
			
		||||
    } else {
 | 
			
		||||
        cnf.ram_limit.trim_end_matches("b").parse::<usize>().unwrap()
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if !(cnf.gc_limit.ends_with("b") &&
 | 
			
		||||
        (
 | 
			
		||||
            cnf.gc_limit.trim_end_matches("b").ends_with("k") ||
 | 
			
		||||
                cnf.gc_limit.trim_end_matches("b").ends_with("m") ||
 | 
			
		||||
                cnf.gc_limit.trim_end_matches("b").ends_with("g")
 | 
			
		||||
        )
 | 
			
		||||
    ) {
 | 
			
		||||
        panic!("invalid ram limit");
 | 
			
		||||
    }
 | 
			
		||||
    config.gc_limit = if cnf.gc_limit.ends_with("gb") {
 | 
			
		||||
        cnf.gc_limit.trim_end_matches("gb").parse::<usize>().unwrap() * 1024 * 1024 * 1024
 | 
			
		||||
    } else if cnf.gc_limit.ends_with("mb") {
 | 
			
		||||
        cnf.gc_limit.trim_end_matches("mb").parse::<usize>().unwrap() * 1024 * 1024
 | 
			
		||||
    } else if cnf.gc_limit.ends_with("kb") {
 | 
			
		||||
        cnf.gc_limit.trim_end_matches("kb").parse::<usize>().unwrap() * 1024
 | 
			
		||||
    } else {
 | 
			
		||||
        cnf.gc_limit.trim_end_matches("b").parse::<usize>().unwrap()
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if !(cnf.avg_entry_size.ends_with("b") &&
 | 
			
		||||
        (
 | 
			
		||||
            cnf.avg_entry_size.trim_end_matches("b").ends_with("k") ||
 | 
			
		||||
                cnf.avg_entry_size.trim_end_matches("b").ends_with("m") ||
 | 
			
		||||
                cnf.avg_entry_size.trim_end_matches("b").ends_with("g")
 | 
			
		||||
        )
 | 
			
		||||
    ) {
 | 
			
		||||
        panic!("invalid ram limit");
 | 
			
		||||
    }
 | 
			
		||||
    config.avg_entry_size = if cnf.avg_entry_size.ends_with("gb") {
 | 
			
		||||
        cnf.avg_entry_size.trim_end_matches("gb").parse::<usize>().unwrap() * 1024 * 1024 * 1024
 | 
			
		||||
    } else if cnf.avg_entry_size.ends_with("mb") {
 | 
			
		||||
        cnf.avg_entry_size.trim_end_matches("mb").parse::<usize>().unwrap() * 1024 * 1024
 | 
			
		||||
    } else if cnf.avg_entry_size.ends_with("kb") {
 | 
			
		||||
        cnf.avg_entry_size.trim_end_matches("kb").parse::<usize>().unwrap() * 1024
 | 
			
		||||
    } else {
 | 
			
		||||
        cnf.avg_entry_size.trim_end_matches("b").parse::<usize>().unwrap()
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if config.avg_entry_size > config.ram_limit {
 | 
			
		||||
        panic!("avg entry size is larger than ram limit");
 | 
			
		||||
    }
 | 
			
		||||
    if config.gc_limit > config.ram_limit {
 | 
			
		||||
        panic!("gc limit is larger than ram limit");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if config.log.is_empty() {
 | 
			
		||||
        config.log = "info".to_string();
 | 
			
		||||
    }
 | 
			
		||||
    if config.log != "debug" && config.log != "info" && config.log != "warn" && config.log != "error" {
 | 
			
		||||
        panic!("invalid log level");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    config
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,321 +0,0 @@
 | 
			
		|||
use crate::config::LypheDBConfig;
 | 
			
		||||
use log::*;
 | 
			
		||||
use lyphedb::PropagationStrategy;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
use std::collections::BTreeMap;
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use std::sync::atomic::{AtomicU64, Ordering};
 | 
			
		||||
use percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS};
 | 
			
		||||
use tokio::sync::RwLock;
 | 
			
		||||
pub const NOT_ALLOWED_ASCII: AsciiSet = CONTROLS.add(b' ').add(b'/').add(b'.').add(b'\\');
 | 
			
		||||
 | 
			
		||||
pub type LDBCache = BTreeMap<Arc<Vec<u8>>, Arc<Vec<u8>>>;
 | 
			
		||||
 | 
			
		||||
/// Reads will read from this cache
 | 
			
		||||
pub static PRIMARY_CACHE: Lazy<RwLock<LDBCache>> =
 | 
			
		||||
    Lazy::new(|| RwLock::new(BTreeMap::new()));
 | 
			
		||||
/// Writes will be written to this cache, and then depending on the propagation method, the
 | 
			
		||||
/// Primary cache will be set to a copy
 | 
			
		||||
pub static SECONDARY_CACHE: Lazy<RwLock<LDBCache>> =
 | 
			
		||||
    Lazy::new(|| RwLock::new(BTreeMap::new()));
 | 
			
		||||
/// how often are keys accessed? this will influence the garbage collector
 | 
			
		||||
pub static KEY_ACCESS_COUNTER: Lazy<RwLock<BTreeMap<Arc<Vec<u8>>, AtomicU64>>> =
 | 
			
		||||
    Lazy::new(|| RwLock::new(BTreeMap::new()));
 | 
			
		||||
 | 
			
		||||
pub fn key_to_path(config: &LypheDBConfig, key: &[u8]) -> PathBuf {
 | 
			
		||||
    let mut path = PathBuf::new();
 | 
			
		||||
    path.push(&config.master);
 | 
			
		||||
 | 
			
		||||
    let mut fnbuf = Vec::new();
 | 
			
		||||
    for (i, byte) in key.iter().enumerate() {
 | 
			
		||||
        if *byte == b'/' {
 | 
			
		||||
            let encoded = percent_encode(&fnbuf, &NOT_ALLOWED_ASCII).to_string();
 | 
			
		||||
            path.push(encoded);
 | 
			
		||||
            fnbuf.clear();
 | 
			
		||||
        } else {
 | 
			
		||||
            fnbuf.push(*byte);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    if !fnbuf.is_empty() {
 | 
			
		||||
        let encoded = percent_encode(&fnbuf, &NOT_ALLOWED_ASCII).to_string();
 | 
			
		||||
        path.push(encoded);
 | 
			
		||||
        fnbuf.clear();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    path.push(".self");
 | 
			
		||||
 | 
			
		||||
    path
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub enum OperationError {
 | 
			
		||||
    KeyCannotBeEmpty,
 | 
			
		||||
    FilesystemPermissionError,
 | 
			
		||||
    BadFilesystemEntry,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn set_key_disk(
 | 
			
		||||
    config: &LypheDBConfig,
 | 
			
		||||
    key: &[u8],
 | 
			
		||||
    value: &[u8],
 | 
			
		||||
) -> Result<(), OperationError> {
 | 
			
		||||
    if key.is_empty() {
 | 
			
		||||
        return Err(OperationError::KeyCannotBeEmpty);
 | 
			
		||||
    }
 | 
			
		||||
    let path = key_to_path(config, key);
 | 
			
		||||
    let directory = path.parent().ok_or(OperationError::KeyCannotBeEmpty)?;
 | 
			
		||||
    if let Ok(directory_exists) = directory.try_exists() {
 | 
			
		||||
        if !directory_exists {
 | 
			
		||||
            std::fs::create_dir_all(directory).map_err(|e| {
 | 
			
		||||
                warn!("couldn't create directory: {:?}", e);
 | 
			
		||||
                OperationError::FilesystemPermissionError
 | 
			
		||||
            })?;
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        return Err(OperationError::FilesystemPermissionError);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::fs::write(path, value).map_err(|e| {
 | 
			
		||||
        warn!("couldn't write file: {:?}", e);
 | 
			
		||||
        OperationError::FilesystemPermissionError
 | 
			
		||||
    })?;
 | 
			
		||||
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn delete_key_disk(
 | 
			
		||||
    config: &LypheDBConfig,
 | 
			
		||||
    key: &[u8],
 | 
			
		||||
) -> Result<(), OperationError> {
 | 
			
		||||
    if key.is_empty() {
 | 
			
		||||
        return Err(OperationError::KeyCannotBeEmpty);
 | 
			
		||||
    }
 | 
			
		||||
    let path = key_to_path(config, key);
 | 
			
		||||
    if let Ok(exists) = path.try_exists() {
 | 
			
		||||
        if !exists {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    std::fs::remove_file(path).map_err(|e| {
 | 
			
		||||
        warn!("couldn't remove file: {:?}", e);
 | 
			
		||||
        OperationError::FilesystemPermissionError
 | 
			
		||||
    })
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn get_key_disk(
 | 
			
		||||
    config: &LypheDBConfig,
 | 
			
		||||
    key: &[u8],
 | 
			
		||||
) -> Result<Option<Vec<u8>>, OperationError> {
 | 
			
		||||
    if key.is_empty() {
 | 
			
		||||
        return Err(OperationError::KeyCannotBeEmpty);
 | 
			
		||||
    }
 | 
			
		||||
    let path = key_to_path(config, key);
 | 
			
		||||
    if let Ok(exists) = path.try_exists() {
 | 
			
		||||
        if !exists {
 | 
			
		||||
            return Ok(None);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    let data = std::fs::read(path).map_err(|e| {
 | 
			
		||||
        warn!("couldn't read file: {:?}", e);
 | 
			
		||||
        OperationError::FilesystemPermissionError
 | 
			
		||||
    })?;
 | 
			
		||||
    Ok(Some(data))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// this function allows empty keys, so you can get all keys under root by doing
 | 
			
		||||
/// get_keys_under_keydir_disk(..., b"")
 | 
			
		||||
pub async fn get_keys_under_keydir_disk(
 | 
			
		||||
    config: &LypheDBConfig,
 | 
			
		||||
    keydir: &[u8],
 | 
			
		||||
) -> Result<Vec<Vec<u8>>, OperationError> {
 | 
			
		||||
    let path = key_to_path(config, keydir);
 | 
			
		||||
    let path = path.parent().expect("bad master");
 | 
			
		||||
    let mut keys = Vec::new();
 | 
			
		||||
    for entry in std::fs::read_dir(path).map_err(|e| {
 | 
			
		||||
        warn!("couldn't read directory: {:?}", e);
 | 
			
		||||
        OperationError::FilesystemPermissionError
 | 
			
		||||
    })? {
 | 
			
		||||
        let entry = entry.map_err(|e| {
 | 
			
		||||
            warn!("couldn't read directory entry: {:?}", e);
 | 
			
		||||
            OperationError::FilesystemPermissionError
 | 
			
		||||
        })?;
 | 
			
		||||
        let path = entry.path();
 | 
			
		||||
        let filename = path
 | 
			
		||||
            .to_str()
 | 
			
		||||
            .ok_or(OperationError::FilesystemPermissionError)?;
 | 
			
		||||
        if filename.ends_with(".self") {
 | 
			
		||||
            // this is a value file, ignore
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        let filename = filename.trim_start_matches(&config.master);
 | 
			
		||||
        let key = percent_decode_str(filename).collect();
 | 
			
		||||
        keys.push(key);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(keys)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn set_key(
 | 
			
		||||
    config: LypheDBConfig,
 | 
			
		||||
    key: &[u8],
 | 
			
		||||
    value: &[u8],
 | 
			
		||||
    strat: &PropagationStrategy,
 | 
			
		||||
) -> Result<(), OperationError> {
 | 
			
		||||
    let k1 = Arc::new(key.to_vec());
 | 
			
		||||
    let v1 = Arc::new(value.to_vec());
 | 
			
		||||
    let disk_task = {
 | 
			
		||||
        let k1 = k1.clone();
 | 
			
		||||
        let v1 = v1.clone();
 | 
			
		||||
        tokio::spawn(async move { set_key_disk(&config, &k1, &v1).await })
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    let prop_task = match strat {
 | 
			
		||||
        PropagationStrategy::Immediate => {
 | 
			
		||||
            let k1 = k1.clone();
 | 
			
		||||
            let v1 = v1.clone();
 | 
			
		||||
            tokio::spawn(async move {
 | 
			
		||||
                let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
			
		||||
                secondary_cache.insert(k1, v1);
 | 
			
		||||
                let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
			
		||||
                *primary_cache = secondary_cache.clone();
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
        PropagationStrategy::Timeout => {
 | 
			
		||||
            let k1 = k1.clone();
 | 
			
		||||
            let v1 = v1.clone();
 | 
			
		||||
            tokio::spawn(async move {
 | 
			
		||||
                tokio::spawn(async move {
 | 
			
		||||
                    {
 | 
			
		||||
                        let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
			
		||||
                        secondary_cache.insert(k1, v1);
 | 
			
		||||
                    }
 | 
			
		||||
                    tokio::spawn(async move {
 | 
			
		||||
                        let start = std::time::Instant::now();
 | 
			
		||||
                        loop {
 | 
			
		||||
                            if start.elapsed().as_secs() > 60 {
 | 
			
		||||
                                break;
 | 
			
		||||
                            }
 | 
			
		||||
                            let pc = PRIMARY_CACHE.try_write();
 | 
			
		||||
                            if pc.is_err() {
 | 
			
		||||
                                tokio::time::sleep(std::time::Duration::from_secs(10)).await;
 | 
			
		||||
                                continue;
 | 
			
		||||
                            } else {
 | 
			
		||||
                                break;
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                        let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
			
		||||
                        let secondary_cache = SECONDARY_CACHE.read().await;
 | 
			
		||||
                        *primary_cache = secondary_cache.clone();
 | 
			
		||||
                    });
 | 
			
		||||
                });
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
        PropagationStrategy::OnRead => {
 | 
			
		||||
            let k1 = k1.clone();
 | 
			
		||||
            tokio::spawn(async move {
 | 
			
		||||
                {
 | 
			
		||||
                    let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
			
		||||
                    secondary_cache.remove(&k1);
 | 
			
		||||
                }
 | 
			
		||||
                {
 | 
			
		||||
                    let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
			
		||||
                    primary_cache.remove(&k1);
 | 
			
		||||
                }
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if let Ok(Err(e)) = disk_task.await {
 | 
			
		||||
        error!("couldn't set key on disk: {:?} ({:?})", e, key);
 | 
			
		||||
        // undo propagation
 | 
			
		||||
        prop_task.abort();
 | 
			
		||||
        {
 | 
			
		||||
            let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
			
		||||
            primary_cache.remove(&k1);
 | 
			
		||||
        }
 | 
			
		||||
        {
 | 
			
		||||
            let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
			
		||||
            secondary_cache.remove(&k1);
 | 
			
		||||
        }
 | 
			
		||||
        return Err(e);
 | 
			
		||||
    }
 | 
			
		||||
    let _ = prop_task.await;
 | 
			
		||||
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn get_key(config: LypheDBConfig, key: &[u8]) -> Result<Option<Vec<u8>>, OperationError> {
 | 
			
		||||
    let k1 = Arc::new(key.to_vec());
 | 
			
		||||
    {
 | 
			
		||||
        let k1 = k1.clone();
 | 
			
		||||
        tokio::spawn(async move {
 | 
			
		||||
            let mut access_counter = KEY_ACCESS_COUNTER.write().await;
 | 
			
		||||
            let counter = access_counter.entry(k1.clone()).or_insert(AtomicU64::new(0));
 | 
			
		||||
            if counter.load(Ordering::Relaxed) > 10000000 {
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
            counter.fetch_add(1, Ordering::SeqCst);
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
    let disk_task = {
 | 
			
		||||
        let k1 = k1.clone();
 | 
			
		||||
        tokio::spawn(async move { get_key_disk(&config, &k1).await })
 | 
			
		||||
    };
 | 
			
		||||
    {
 | 
			
		||||
        // read from cache
 | 
			
		||||
        let cache = PRIMARY_CACHE.read().await;
 | 
			
		||||
        if let Some(val) = cache.get(&k1) {
 | 
			
		||||
            disk_task.abort();
 | 
			
		||||
            return Ok(Some(val.to_vec()));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    //debug!("cache miss");
 | 
			
		||||
    if let Ok(result) = disk_task.await {
 | 
			
		||||
        if let Ok(Some(val)) = &result {
 | 
			
		||||
            let val = Arc::new(val.to_vec());
 | 
			
		||||
            tokio::spawn(async move {
 | 
			
		||||
                {
 | 
			
		||||
                    let mut cache = SECONDARY_CACHE.write().await;
 | 
			
		||||
                    cache.insert(k1.clone(), val.clone());
 | 
			
		||||
                }
 | 
			
		||||
                {
 | 
			
		||||
                    let secondary_cache = SECONDARY_CACHE.read().await;
 | 
			
		||||
                    let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
			
		||||
                    *primary_cache = secondary_cache.clone();
 | 
			
		||||
                }
 | 
			
		||||
                //debug!("cache insert");
 | 
			
		||||
            });
 | 
			
		||||
        }
 | 
			
		||||
        result
 | 
			
		||||
    } else {
 | 
			
		||||
        Err(OperationError::FilesystemPermissionError)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn delete_key(config: LypheDBConfig, key: &[u8]) -> Result<(), OperationError> {
 | 
			
		||||
    let k1 = Arc::new(key.to_vec());
 | 
			
		||||
    let disk_task = {
 | 
			
		||||
        let k1 = k1.clone();
 | 
			
		||||
        tokio::spawn(async move { delete_key_disk(&config, &k1).await })
 | 
			
		||||
    };
 | 
			
		||||
    let prop_task = {
 | 
			
		||||
        let k1 = k1.clone();
 | 
			
		||||
        tokio::spawn(async move {
 | 
			
		||||
            {
 | 
			
		||||
                let mut key_access_counter = KEY_ACCESS_COUNTER.write().await;
 | 
			
		||||
                key_access_counter.remove(&k1);
 | 
			
		||||
            }
 | 
			
		||||
            {
 | 
			
		||||
                let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
			
		||||
                secondary_cache.remove(&k1);
 | 
			
		||||
            }
 | 
			
		||||
            let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
			
		||||
            let secondary_cache = SECONDARY_CACHE.read().await;
 | 
			
		||||
            *primary_cache = secondary_cache.clone();
 | 
			
		||||
        })
 | 
			
		||||
    };
 | 
			
		||||
    prop_task.await.expect("couldn't delete key");
 | 
			
		||||
    disk_task.await.expect("couldn't delete key")
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,49 +0,0 @@
 | 
			
		|||
use serde::{Deserialize, Serialize};
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
			
		||||
pub enum LDBNatsMessage {
 | 
			
		||||
    Command(LypheDBCommand),
 | 
			
		||||
    Entries(KVList),
 | 
			
		||||
    Count(u64),
 | 
			
		||||
    Success,
 | 
			
		||||
    BadRequest,
 | 
			
		||||
    NotFound,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
			
		||||
pub enum PropagationStrategy {
 | 
			
		||||
    /// Reads will immediately be able to read this key's value as soon as it has been set
 | 
			
		||||
    Immediate,
 | 
			
		||||
    /// The value change will be queued along with others,
 | 
			
		||||
    /// then in a period of either inactivity or a maximum time, all changes will be immediately
 | 
			
		||||
    /// seen at once
 | 
			
		||||
    Timeout,
 | 
			
		||||
    /// The key will be removed from the cache, and thus the next read will cause a cache miss,
 | 
			
		||||
    /// and the value will be loaded from disk
 | 
			
		||||
    OnRead,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
			
		||||
pub enum LypheDBCommand {
 | 
			
		||||
    SetKeys(KVList, PropagationStrategy),
 | 
			
		||||
    GetKeys(KeyList),
 | 
			
		||||
    CountKeys(KeyDirectory),
 | 
			
		||||
    /// NOT RECURSIVE
 | 
			
		||||
    GetKeyDirectory(KeyDirectory),
 | 
			
		||||
    DeleteKeys(KeyList),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
			
		||||
pub struct KVList {
 | 
			
		||||
    pub kvs: Vec<(Vec<u8>, Vec<u8>)>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
			
		||||
pub struct KeyList {
 | 
			
		||||
    pub keys: Vec<Vec<u8>>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
			
		||||
pub struct KeyDirectory {
 | 
			
		||||
    pub key: Vec<u8>,
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,248 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
keys are stored on disk like this
 | 
			
		||||
<master>/<keyname>/.self
 | 
			
		||||
 | 
			
		||||
so if i stored the key "/this/is/a/key" with the data "hello world", it'd look like this
 | 
			
		||||
 | 
			
		||||
<master>/this/is/a/key/.self -> "hello world"
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
use crate::config::{LypheDBConfig, load_config};
 | 
			
		||||
use crate::dbimpl::{KEY_ACCESS_COUNTER, PRIMARY_CACHE, SECONDARY_CACHE};
 | 
			
		||||
use futures::StreamExt;
 | 
			
		||||
use log::{debug, error, info, warn};
 | 
			
		||||
use lyphedb::{KVList, LDBNatsMessage, LypheDBCommand};
 | 
			
		||||
 | 
			
		||||
mod config;
 | 
			
		||||
mod dbimpl;
 | 
			
		||||
 | 
			
		||||
pub async fn gc_thread(config: LypheDBConfig) {
 | 
			
		||||
    loop {
 | 
			
		||||
        tokio::time::sleep(std::time::Duration::from_secs(61)).await;
 | 
			
		||||
        {
 | 
			
		||||
            let cache = PRIMARY_CACHE.read().await;
 | 
			
		||||
            let cache_size = cache.len() * config.avg_entry_size;
 | 
			
		||||
            if cache_size > config.gc_limit {
 | 
			
		||||
                debug!("gc triggered, cache size: {} bytes", cache_size);
 | 
			
		||||
                let keycount_to_remove = cache.len() - config.gc_limit / config.avg_entry_size;
 | 
			
		||||
                drop(cache);
 | 
			
		||||
                let mut least_freq_keys = vec![];
 | 
			
		||||
                for (key, count) in KEY_ACCESS_COUNTER.read().await.iter() {
 | 
			
		||||
                    let count = count.load(std::sync::atomic::Ordering::Relaxed);
 | 
			
		||||
                    if least_freq_keys.len() < keycount_to_remove {
 | 
			
		||||
                        least_freq_keys.push((key.clone(), count));
 | 
			
		||||
                    } else {
 | 
			
		||||
                        for (other, oc) in least_freq_keys.iter_mut() {
 | 
			
		||||
                            if count < *oc {
 | 
			
		||||
                                *other = key.clone();
 | 
			
		||||
                                *oc = count;
 | 
			
		||||
                                break;
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                let mut cache = SECONDARY_CACHE.write().await;
 | 
			
		||||
                for (key, _) in least_freq_keys.iter() {
 | 
			
		||||
                    cache.remove(key);
 | 
			
		||||
                }
 | 
			
		||||
                let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
			
		||||
                *primary_cache = cache.clone();
 | 
			
		||||
                debug!(
 | 
			
		||||
                    "gc finished, cache size: {} bytes",
 | 
			
		||||
                    primary_cache.len() * config.avg_entry_size
 | 
			
		||||
                );
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub async fn precache_keys_until_limit(config: LypheDBConfig) {
 | 
			
		||||
    info!("precache started");
 | 
			
		||||
    let mut precache_count = 0;
 | 
			
		||||
    let mut precache_stack = dbimpl::get_keys_under_keydir_disk(&config, b"")
 | 
			
		||||
        .await
 | 
			
		||||
        .expect("couldn't get root keys");
 | 
			
		||||
    while let Some(precache_key) = precache_stack.pop() {
 | 
			
		||||
        {
 | 
			
		||||
            let cache = PRIMARY_CACHE.read().await;
 | 
			
		||||
            if cache.len() * config.avg_entry_size > config.gc_limit {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            if cache.len() % 10000 == 0 {
 | 
			
		||||
                info!("precache size: {} mb", (cache.len() * config.avg_entry_size) / (1024*1024));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        let children = dbimpl::get_keys_under_keydir_disk(&config, &precache_key)
 | 
			
		||||
            .await
 | 
			
		||||
            .expect("couldn't get children of key");
 | 
			
		||||
        if !children.is_empty() {
 | 
			
		||||
            precache_stack.extend(children);
 | 
			
		||||
        } else {
 | 
			
		||||
            let _value = dbimpl::get_key(config.clone(), &precache_key)
 | 
			
		||||
                .await
 | 
			
		||||
                .expect("couldn't get value of key");
 | 
			
		||||
            precache_count += 1;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    info!("precache finished, {} values precached", precache_count);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
async fn main() {
 | 
			
		||||
    let config = load_config(&std::env::args().nth(1).expect("please specify config file"));
 | 
			
		||||
    let logger = env_logger::builder()
 | 
			
		||||
        .filter_level(match config.log.as_str() {
 | 
			
		||||
            "debug" => log::LevelFilter::Debug,
 | 
			
		||||
            "info" => log::LevelFilter::Info,
 | 
			
		||||
            "warn" => log::LevelFilter::Warn,
 | 
			
		||||
            "error" => log::LevelFilter::Error,
 | 
			
		||||
            _ => unreachable!(),
 | 
			
		||||
        })
 | 
			
		||||
        .build();
 | 
			
		||||
    log::set_boxed_logger(Box::new(logger)).unwrap();
 | 
			
		||||
    log::set_max_level(match config.log.as_str() {
 | 
			
		||||
        "debug" => log::LevelFilter::Debug,
 | 
			
		||||
        "info" => log::LevelFilter::Info,
 | 
			
		||||
        "warn" => log::LevelFilter::Warn,
 | 
			
		||||
        "error" => log::LevelFilter::Error,
 | 
			
		||||
        _ => unreachable!(),
 | 
			
		||||
    });
 | 
			
		||||
    info!("lyphedb started");
 | 
			
		||||
 | 
			
		||||
    let nats = async_nats::ConnectOptions::new()
 | 
			
		||||
        .add_client_certificate(
 | 
			
		||||
            config.nats_cert.as_str().into(),
 | 
			
		||||
            config.nats_key.as_str().into(),
 | 
			
		||||
        )
 | 
			
		||||
        .connect(config.nats_url.as_str())
 | 
			
		||||
        .await;
 | 
			
		||||
    if let Err(e) = nats {
 | 
			
		||||
        error!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    let nats = nats.unwrap();
 | 
			
		||||
 | 
			
		||||
    let mut subscriber = nats
 | 
			
		||||
        .queue_subscribe(config.name.clone(), "lyphedb".to_string())
 | 
			
		||||
        .await
 | 
			
		||||
        .expect("couldn't subscribe to subject");
 | 
			
		||||
 | 
			
		||||
    info!("nats connected");
 | 
			
		||||
 | 
			
		||||
    tokio::spawn(precache_keys_until_limit(config.clone()));
 | 
			
		||||
    tokio::spawn(gc_thread(config.clone()));
 | 
			
		||||
 | 
			
		||||
    while let Some(msg) = subscriber.next().await {
 | 
			
		||||
        let config = config.clone();
 | 
			
		||||
        let nats = nats.clone();
 | 
			
		||||
        tokio::spawn(async move {
 | 
			
		||||
            async fn bad_request(nats: &async_nats::Client, replyto: async_nats::Subject) {
 | 
			
		||||
                let reply = rmp_serde::to_vec(&LDBNatsMessage::BadRequest).unwrap();
 | 
			
		||||
                if let Err(e) = nats.publish(replyto, reply.into()).await {
 | 
			
		||||
                    warn!("couldn't send reply: {:?}", e);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            let data = msg.payload.to_vec();
 | 
			
		||||
            let data = rmp_serde::from_slice::<LDBNatsMessage>(&data);
 | 
			
		||||
            if let Err(e) = data {
 | 
			
		||||
                warn!("couldn't deserialize message: {:?}", e);
 | 
			
		||||
                if let Some(replyto) = msg.reply {
 | 
			
		||||
                    bad_request(&nats, replyto).await;
 | 
			
		||||
                }
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
            let data = data.unwrap();
 | 
			
		||||
            match data {
 | 
			
		||||
                LDBNatsMessage::Command(cmd) => match cmd {
 | 
			
		||||
                    LypheDBCommand::SetKeys(kvlist, propstrat) => {
 | 
			
		||||
                        for (key, value) in kvlist.kvs {
 | 
			
		||||
                            if let Err(e) =
 | 
			
		||||
                                dbimpl::set_key(config.clone(), &key, &value, &propstrat).await
 | 
			
		||||
                            {
 | 
			
		||||
                                warn!("couldn't set key: {:?}", e);
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                        let reply = rmp_serde::to_vec(&LDBNatsMessage::Success).unwrap();
 | 
			
		||||
                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
			
		||||
                            warn!("couldn't send reply: {:?}", e);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    LypheDBCommand::GetKeys(klist) => {
 | 
			
		||||
                        let mut reply = Vec::new();
 | 
			
		||||
                        for key in klist.keys {
 | 
			
		||||
                            if let Ok(Some(value)) = dbimpl::get_key(config.clone(), &key).await {
 | 
			
		||||
                                reply.push((key, value));
 | 
			
		||||
                            } else {
 | 
			
		||||
                                warn!("couldn't get key: {:?}", key);
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                        let reply =
 | 
			
		||||
                            rmp_serde::to_vec(&LDBNatsMessage::Entries(KVList { kvs: reply }))
 | 
			
		||||
                                .unwrap();
 | 
			
		||||
                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
			
		||||
                            warn!("couldn't send reply: {:?}", e);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    LypheDBCommand::CountKeys(keydir) => {
 | 
			
		||||
                        let keys = dbimpl::get_keys_under_keydir_disk(&config, &keydir.key)
 | 
			
		||||
                            .await
 | 
			
		||||
                            .expect("couldn't get keys under keydir");
 | 
			
		||||
                        let mut count = 0;
 | 
			
		||||
                        for key in keys {
 | 
			
		||||
                            let value = dbimpl::get_key(config.clone(), &key)
 | 
			
		||||
                                .await
 | 
			
		||||
                                .expect("couldn't get value of key");
 | 
			
		||||
                            if value.is_some() {
 | 
			
		||||
                                count += 1;
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                        let reply = rmp_serde::to_vec(&LDBNatsMessage::Count(count)).unwrap();
 | 
			
		||||
                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
			
		||||
                            warn!("couldn't send reply: {:?}", e);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    LypheDBCommand::GetKeyDirectory(keydir) => {
 | 
			
		||||
                        let mut reply = Vec::new();
 | 
			
		||||
                        let keys = dbimpl::get_keys_under_keydir_disk(&config, &keydir.key)
 | 
			
		||||
                            .await
 | 
			
		||||
                            .expect("couldn't get keys under keydir");
 | 
			
		||||
                        for key in keys {
 | 
			
		||||
                            let value = dbimpl::get_key(config.clone(), &key)
 | 
			
		||||
                                .await
 | 
			
		||||
                                .expect("couldn't get value of key");
 | 
			
		||||
                            if let Some(value) = value {
 | 
			
		||||
                                reply.push((key, value));
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                        let reply =
 | 
			
		||||
                            rmp_serde::to_vec(&LDBNatsMessage::Entries(KVList { kvs: reply }))
 | 
			
		||||
                                .unwrap();
 | 
			
		||||
                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
			
		||||
                            warn!("couldn't send reply: {:?}", e);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    LypheDBCommand::DeleteKeys(klist) => {
 | 
			
		||||
                        for key in klist.keys {
 | 
			
		||||
                            if let Err(e) = dbimpl::delete_key(config.clone(), &key).await {
 | 
			
		||||
                                warn!("couldn't delete key: {:?}", e);
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                        let reply = rmp_serde::to_vec(&LDBNatsMessage::Success).unwrap();
 | 
			
		||||
                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
			
		||||
                            warn!("couldn't send reply: {:?}", e);
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                },
 | 
			
		||||
                _ => {
 | 
			
		||||
                    warn!("bad request, not command");
 | 
			
		||||
                    if let Some(replyto) = msg.reply {
 | 
			
		||||
                        bad_request(&nats, replyto).await;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    info!("lyphedb shutting down");
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -8,7 +8,7 @@ license-file = "LICENSE"
 | 
			
		|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
			
		||||
 | 
			
		||||
[dependencies]
 | 
			
		||||
asklyphe-common = { path = "../asklyphe-common", features = ["foundationdb"] }
 | 
			
		||||
asklyphe-common = { path = "../asklyphe-common" }
 | 
			
		||||
tokio = { version = "1.0", features = ["full"] }
 | 
			
		||||
serde = { version = "1.0", features = ["derive"] }
 | 
			
		||||
rmp-serde = "1.1.2"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,7 +16,7 @@ use astro_float::{BigFloat, RoundingMode};
 | 
			
		|||
use once_cell::sync::Lazy;
 | 
			
		||||
use std::collections::BTreeMap;
 | 
			
		||||
 | 
			
		||||
pub const PRECISION: usize = 2048;
 | 
			
		||||
pub const PRECISION: usize = 1024;
 | 
			
		||||
 | 
			
		||||
// length unit -> value in meters
 | 
			
		||||
pub static LENGTH_STORE: Lazy<BTreeMap<LengthUnit, BigFloat>> = Lazy::new(|| {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,7 +15,7 @@ use crate::length_units::{LengthUnit, LENGTH_NAMES, LENGTH_STORE, PRECISION};
 | 
			
		|||
use crate::unit_defs::{ConvertTo, MetricPrefix};
 | 
			
		||||
use astro_float::{BigFloat, Consts, Radix, RoundingMode};
 | 
			
		||||
 | 
			
		||||
pub const MAX_PRECISION: usize = 2048;
 | 
			
		||||
pub const MAX_PRECISION: usize = 1024;
 | 
			
		||||
 | 
			
		||||
pub mod length_units;
 | 
			
		||||
pub mod unit_defs;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										1
									
								
								vorebot/.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								vorebot/.gitignore
									
										
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -1 +0,0 @@
 | 
			
		|||
/target
 | 
			
		||||
| 
						 | 
				
			
			@ -1,28 +0,0 @@
 | 
			
		|||
[package]
 | 
			
		||||
name = "vorebot"
 | 
			
		||||
version = "0.2.0"
 | 
			
		||||
edition = "2021"
 | 
			
		||||
 | 
			
		||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
			
		||||
 | 
			
		||||
[dependencies]
 | 
			
		||||
asklyphe-common = { path = "../asklyphe-common" }
 | 
			
		||||
tokio = { version = "1.0", features = ["full"] }
 | 
			
		||||
serde = { version = "1.0", features = ["derive"] }
 | 
			
		||||
rand = "0.8.5"
 | 
			
		||||
rmp-serde = "1.1.2"
 | 
			
		||||
base64 = "0.21.7"
 | 
			
		||||
image = "0.24.8"
 | 
			
		||||
isahc = "1.7.2"
 | 
			
		||||
ulid = "1.0.0"
 | 
			
		||||
async-nats = "0.38.0"
 | 
			
		||||
futures = "0.3.28"
 | 
			
		||||
chrono = "0.4.26"
 | 
			
		||||
once_cell = "1.18.0"
 | 
			
		||||
env_logger = "0.10.0"
 | 
			
		||||
log = "0.4.19"
 | 
			
		||||
mutex-timeouts = { version = "0.3.0", features = ["tokio"] }
 | 
			
		||||
prometheus_exporter = "0.8.5"
 | 
			
		||||
thirtyfour = "0.35.0"
 | 
			
		||||
stopwords = "0.1.1"
 | 
			
		||||
texting_robots = "0.2.2"
 | 
			
		||||
| 
						 | 
				
			
			@ -1,400 +0,0 @@
 | 
			
		|||
mod webparse;
 | 
			
		||||
 | 
			
		||||
use asklyphe_common::nats::vorebot::{
 | 
			
		||||
    VOREBOT_NEWHOSTNAME_SERVICE, VOREBOT_SERVICE, VOREBOT_SUGGESTED_SERVICE,
 | 
			
		||||
};
 | 
			
		||||
use async_nats::jetstream;
 | 
			
		||||
use async_nats::jetstream::consumer::PullConsumer;
 | 
			
		||||
use async_nats::jetstream::stream::RetentionPolicy;
 | 
			
		||||
use chrono::TimeZone;
 | 
			
		||||
use futures::StreamExt;
 | 
			
		||||
use log::{debug, error, info, warn};
 | 
			
		||||
use mutex_timeouts::tokio::MutexWithTimeoutAuto as Mutex;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use prometheus_exporter::prometheus::core::{AtomicF64, GenericGauge};
 | 
			
		||||
use prometheus_exporter::prometheus::{register_counter, register_gauge, Counter};
 | 
			
		||||
use std::cmp::max;
 | 
			
		||||
use std::collections::{BTreeMap, BTreeSet};
 | 
			
		||||
use std::hash::{DefaultHasher, Hasher};
 | 
			
		||||
use std::io::Read;
 | 
			
		||||
use std::iter::Iterator;
 | 
			
		||||
use std::str::FromStr;
 | 
			
		||||
use std::string::ToString;
 | 
			
		||||
use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
 | 
			
		||||
use async_nats::jetstream::kv;
 | 
			
		||||
use stopwords::{Language, Spark, Stopwords};
 | 
			
		||||
use thirtyfour::{CapabilitiesHelper, DesiredCapabilities, Proxy, WebDriver};
 | 
			
		||||
use thirtyfour::common::capabilities::firefox::FirefoxPreferences;
 | 
			
		||||
use tokio::task::JoinHandle;
 | 
			
		||||
use asklyphe_common::ldb::DBConn;
 | 
			
		||||
use asklyphe_common::nats::vorebot::CrawlRequest;
 | 
			
		||||
use crate::webparse::web_parse;
 | 
			
		||||
 | 
			
		||||
pub static NATS_URL: Lazy<String> =
 | 
			
		||||
    Lazy::new(|| std::env::var("NATS_URL").expect("NO NATS URL DEFINED"));
 | 
			
		||||
pub static NATS_PASSWORD: Lazy<Option<String>> = Lazy::new(|| {
 | 
			
		||||
    std::env::var("NATS_PASSWORD").ok()
 | 
			
		||||
});
 | 
			
		||||
pub static NATS_CERT: Lazy<String> = Lazy::new(|| std::env::var("NATS_CERT").expect("NO NATS_CERT DEFINED"));
 | 
			
		||||
pub static NATS_KEY: Lazy<String> = Lazy::new(|| std::env::var("NATS_KEY").expect("NO NATS_KEY DEFINED"));
 | 
			
		||||
pub static BROWSER_THREADS: Lazy<Vec<String>> =
 | 
			
		||||
    Lazy::new(|| std::env::var("BROWSER_THREADS").expect("PLEASE LIST BROWSER_THREADS").split(',').map(|v| v.to_string()).collect());
 | 
			
		||||
pub static BROWSER_PROXY: Lazy<Option<String>> = Lazy::new(|| {
 | 
			
		||||
    std::env::var("BROWSER_PROXY").ok()
 | 
			
		||||
});
 | 
			
		||||
pub static DB_NAME: Lazy<String> =
 | 
			
		||||
    Lazy::new(|| std::env::var("DB_NAME").expect("PLEASE ADD DB_NAME"));
 | 
			
		||||
 | 
			
		||||
// in minutes
 | 
			
		||||
const DEFAULT_CRAWLER_TIMEOUT: u64 = 5;
 | 
			
		||||
pub static CRAWLER_TIMEOUT: Lazy<u64> =
 | 
			
		||||
    Lazy::new(|| std::env::var("CRAWLER_TIMEOUT").map(|v| v.parse::<u64>().ok()).ok().flatten().unwrap_or(DEFAULT_CRAWLER_TIMEOUT));
 | 
			
		||||
 | 
			
		||||
pub static DOCUMENTS_CRAWLED: AtomicU64 = AtomicU64::new(0);
 | 
			
		||||
pub static LAST_MESSAGE: AtomicI64 = AtomicI64::new(0);
 | 
			
		||||
 | 
			
		||||
pub static LAST_TASK_COMPLETE: Lazy<Vec<Arc<AtomicI64>>> = Lazy::new(|| {
 | 
			
		||||
    let max_threads: usize = BROWSER_THREADS.len();
 | 
			
		||||
    let mut vals = vec![];
 | 
			
		||||
    for i in 0..max_threads {
 | 
			
		||||
        //    let db = Database::default().expect("couldn't connect to foundation db!");
 | 
			
		||||
        //    DBS.lock().await.push(Arc::new(db));
 | 
			
		||||
        vals.push(Arc::new(AtomicI64::new(chrono::Utc::now().timestamp())));
 | 
			
		||||
    }
 | 
			
		||||
    vals
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
pub static USER_AGENT: Lazy<String> = Lazy::new(|| {
 | 
			
		||||
    format!(
 | 
			
		||||
        "Vorebot/{} (compatible; Googlebot/2.1; +https://voremicrocomputers.com/crawler.html)",
 | 
			
		||||
        env!("CARGO_PKG_VERSION")
 | 
			
		||||
    )
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
async fn main() {
 | 
			
		||||
    mutex_timeouts::tokio::GLOBAL_TOKIO_TIMEOUT.store(72, Ordering::Relaxed);
 | 
			
		||||
 | 
			
		||||
    env_logger::init();
 | 
			
		||||
    info!("began at {}", chrono::Utc::now().to_string());
 | 
			
		||||
 | 
			
		||||
    let mut nats = async_nats::ConnectOptions::new();
 | 
			
		||||
 | 
			
		||||
    if let Some(password) = NATS_PASSWORD.as_ref() {
 | 
			
		||||
        nats = nats.user_and_password("vorebot".to_string(), password.to_string());
 | 
			
		||||
    } else {
 | 
			
		||||
        nats = nats.add_client_certificate(NATS_CERT.as_str().into(), NATS_KEY.as_str().into());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let nats = nats.connect(NATS_URL.as_str())
 | 
			
		||||
        .await;
 | 
			
		||||
    if let Err(e) = nats {
 | 
			
		||||
        error!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    let nats = nats.unwrap();
 | 
			
		||||
    let dbconn = DBConn::new(nats.clone(), DB_NAME.to_string());
 | 
			
		||||
    let nats = jetstream::new(nats);
 | 
			
		||||
 | 
			
		||||
    // fixme: remove this once we have proper site suggestion interface
 | 
			
		||||
    let args = std::env::args().collect::<Vec<_>>();
 | 
			
		||||
    if let Some(suggestion_site) = args.get(1) {
 | 
			
		||||
        let damping = args.get(2).map(|v| v.parse::<f64>().expect("BAD FP")).unwrap_or(0.45);
 | 
			
		||||
        warn!("suggesting {} with damping {}", suggestion_site, damping);
 | 
			
		||||
 | 
			
		||||
        let ack = nats.publish(VOREBOT_SUGGESTED_SERVICE.to_string(), rmp_serde::to_vec(&CrawlRequest {
 | 
			
		||||
            url: suggestion_site.to_string(),
 | 
			
		||||
            damping,
 | 
			
		||||
        }).unwrap().into()).await.unwrap();
 | 
			
		||||
 | 
			
		||||
        ack.await.expect("FATAL ERROR");
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let mut tasks: Vec<(JoinHandle<()>, String, Arc<AtomicU64>)> = vec![];
 | 
			
		||||
    let mut available_browsers: Vec<String> = BROWSER_THREADS.clone();
 | 
			
		||||
 | 
			
		||||
    {
 | 
			
		||||
        loop {
 | 
			
		||||
            while tasks.len() < BROWSER_THREADS.len() {
 | 
			
		||||
                let nats = nats.clone();
 | 
			
		||||
                let browser = available_browsers.pop().expect("NO BROWSERS LEFT, THIS IS A FATAL BUG!");
 | 
			
		||||
                let db = dbconn.clone();
 | 
			
		||||
                let b = browser.clone();
 | 
			
		||||
                let last_parse = Arc::new(AtomicU64::new(SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs()));
 | 
			
		||||
                let lp = last_parse.clone();
 | 
			
		||||
                tasks.push((tokio::spawn(async move {
 | 
			
		||||
                    let browser = b;
 | 
			
		||||
                    info!("using {}", browser);
 | 
			
		||||
                    info!("crawler spawned");
 | 
			
		||||
 | 
			
		||||
                    /* normal priority */
 | 
			
		||||
                    let consumer: PullConsumer = nats.get_or_create_stream(jetstream::stream::Config {
 | 
			
		||||
                        name: VOREBOT_SERVICE.to_string(),
 | 
			
		||||
                        subjects: vec![VOREBOT_SERVICE.to_string()],
 | 
			
		||||
                        retention: RetentionPolicy::WorkQueue,
 | 
			
		||||
                        ..Default::default()
 | 
			
		||||
                    }).await
 | 
			
		||||
                        .expect("FATAL! FAILED TO SUBSCRIBE TO NATS!")
 | 
			
		||||
                        .get_or_create_consumer("parser", jetstream::consumer::pull::Config {
 | 
			
		||||
                            durable_name: Some("parser".to_string()),
 | 
			
		||||
                            filter_subject: VOREBOT_SERVICE.to_string(),
 | 
			
		||||
                            ..Default::default()
 | 
			
		||||
                        }).await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
			
		||||
                    let mut messages = consumer.messages().await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
			
		||||
 | 
			
		||||
                    /* higher priority (new hostnames) */
 | 
			
		||||
                    let higher_consumer: PullConsumer = nats.get_or_create_stream(jetstream::stream::Config {
 | 
			
		||||
                        name: VOREBOT_NEWHOSTNAME_SERVICE.to_string(),
 | 
			
		||||
                        subjects: vec![VOREBOT_NEWHOSTNAME_SERVICE.to_string()],
 | 
			
		||||
                        retention: RetentionPolicy::WorkQueue,
 | 
			
		||||
                        ..Default::default()
 | 
			
		||||
                    }).await
 | 
			
		||||
                        .expect("FATAL! FAILED TO SUBSCRIBE TO NATS!")
 | 
			
		||||
                        .get_or_create_consumer("highparser", jetstream::consumer::pull::Config {
 | 
			
		||||
                            durable_name: Some("highparser".to_string()),
 | 
			
		||||
                            filter_subject: VOREBOT_NEWHOSTNAME_SERVICE.to_string(),
 | 
			
		||||
                            ..Default::default()
 | 
			
		||||
                        }).await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
			
		||||
                    let mut high_messages = higher_consumer.messages().await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
			
		||||
 | 
			
		||||
                    /* highest priority (user-suggested) */
 | 
			
		||||
                    let highest_consumer: PullConsumer = nats.get_or_create_stream(jetstream::stream::Config {
 | 
			
		||||
                        name: VOREBOT_SUGGESTED_SERVICE.to_string(),
 | 
			
		||||
                        subjects: vec![VOREBOT_SUGGESTED_SERVICE.to_string()],
 | 
			
		||||
                        retention: RetentionPolicy::WorkQueue,
 | 
			
		||||
                        ..Default::default()
 | 
			
		||||
                    }).await
 | 
			
		||||
                        .expect("FATAL! FAILED TO SUBSCRIBE TO NATS!")
 | 
			
		||||
                        .get_or_create_consumer("highestparser", jetstream::consumer::pull::Config {
 | 
			
		||||
                            durable_name: Some("highestparser".to_string()),
 | 
			
		||||
                            filter_subject: VOREBOT_SUGGESTED_SERVICE.to_string(),
 | 
			
		||||
                            ..Default::default()
 | 
			
		||||
                        }).await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
			
		||||
                    let mut highest_messages = highest_consumer.messages().await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
			
		||||
 | 
			
		||||
                    let mut prefs = FirefoxPreferences::new();
 | 
			
		||||
                    prefs.set_user_agent(USER_AGENT.to_string()).unwrap();
 | 
			
		||||
                    let mut caps = DesiredCapabilities::firefox();
 | 
			
		||||
                    caps.set_preferences(prefs).unwrap();
 | 
			
		||||
                    if let Some(proxy) = BROWSER_PROXY.as_ref() {
 | 
			
		||||
                        caps.set_proxy(Proxy::Manual {
 | 
			
		||||
                            ftp_proxy: None,
 | 
			
		||||
                            http_proxy: Some(proxy.to_string()),
 | 
			
		||||
                            ssl_proxy: Some(proxy.to_string()),
 | 
			
		||||
                            socks_proxy: None,
 | 
			
		||||
                            socks_version: None,
 | 
			
		||||
                            socks_username: None,
 | 
			
		||||
                            socks_password: None,
 | 
			
		||||
                            no_proxy: None,
 | 
			
		||||
                        }).unwrap();
 | 
			
		||||
                    }
 | 
			
		||||
                    let driver = WebDriver::new(&browser, caps).await.unwrap();
 | 
			
		||||
                    info!("crawler ready");
 | 
			
		||||
 | 
			
		||||
                    loop {
 | 
			
		||||
                        lp.store(SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(), Ordering::Relaxed);
 | 
			
		||||
                        tokio::select! {
 | 
			
		||||
                            Some(highest) = StreamExt::next(&mut highest_messages) => {
 | 
			
		||||
                                if let Err(e) = highest {
 | 
			
		||||
                                    warn!("error when recv js message! {e}");
 | 
			
		||||
                                } else {
 | 
			
		||||
                                    let message = highest.unwrap();
 | 
			
		||||
                                    if let Err(e) = message.ack().await {
 | 
			
		||||
                                        warn!("failed acking message {e}");
 | 
			
		||||
                                    }
 | 
			
		||||
                                    let req = rmp_serde::from_slice::<CrawlRequest>(message.payload.as_ref());
 | 
			
		||||
                                    if let Err(e) = req {
 | 
			
		||||
                                        error!("BAD NATS REQUEST: {e}");
 | 
			
		||||
                                        continue;
 | 
			
		||||
                                    }
 | 
			
		||||
                                    let req = req.unwrap();
 | 
			
		||||
                                    info!("RECV USER SUGGESTION!");
 | 
			
		||||
                                    let now = chrono::Utc::now().timestamp();
 | 
			
		||||
                                    LAST_MESSAGE.store(now, Ordering::Relaxed);
 | 
			
		||||
                                    let nats = nats.clone();
 | 
			
		||||
 | 
			
		||||
                                    let mut bad = false;
 | 
			
		||||
 | 
			
		||||
                                    driver.in_new_tab(|| async {
 | 
			
		||||
                                        if web_parse(nats.clone(), db.clone(), &driver, &req.url, req.damping).await.is_err() {
 | 
			
		||||
                                            warn!("temporary failure detected in parsing, requeuing");
 | 
			
		||||
                                            nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&req).unwrap().into()).await.expect("FAILED TO REQUEUE");
 | 
			
		||||
                                            bad = true;
 | 
			
		||||
                                        }
 | 
			
		||||
                                        Ok(())
 | 
			
		||||
                                    }).await.unwrap();
 | 
			
		||||
 | 
			
		||||
                                    if bad {
 | 
			
		||||
                                        continue;
 | 
			
		||||
                                    }
 | 
			
		||||
 | 
			
		||||
                                    if DOCUMENTS_CRAWLED.load(Ordering::Relaxed) % 100 == 0 {
 | 
			
		||||
                                        DOCUMENTS_CRAWLED.fetch_add(1, Ordering::Relaxed);
 | 
			
		||||
                                        info!("crawled {} pages!", DOCUMENTS_CRAWLED.load(Ordering::Relaxed));
 | 
			
		||||
                                    }
 | 
			
		||||
                                }
 | 
			
		||||
                            }
 | 
			
		||||
                            Some(high) = StreamExt::next(&mut high_messages) => {
 | 
			
		||||
                                if let Err(e) = high {
 | 
			
		||||
                                    warn!("error when recv js message! {e}");
 | 
			
		||||
                                } else {
 | 
			
		||||
                                    let message = high.unwrap();
 | 
			
		||||
                                    if let Err(e) = message.ack().await {
 | 
			
		||||
                                        warn!("failed acking message {e}");
 | 
			
		||||
                                    }
 | 
			
		||||
                                    let req = rmp_serde::from_slice::<CrawlRequest>(message.payload.as_ref());
 | 
			
		||||
                                    if let Err(e) = req {
 | 
			
		||||
                                        error!("BAD NATS REQUEST: {e}");
 | 
			
		||||
                                        continue;
 | 
			
		||||
                                    }
 | 
			
		||||
                                    let req = req.unwrap();
 | 
			
		||||
                                    info!("RECV HIGH PRIORITY!");
 | 
			
		||||
                                    let now = chrono::Utc::now().timestamp();
 | 
			
		||||
                                    LAST_MESSAGE.store(now, Ordering::Relaxed);
 | 
			
		||||
                                    let nats = nats.clone();
 | 
			
		||||
                                    let mut bad = false;
 | 
			
		||||
 | 
			
		||||
                                    driver.in_new_tab(|| async {
 | 
			
		||||
                                        if web_parse(nats.clone(), db.clone(), &driver, &req.url, req.damping).await.is_err() {
 | 
			
		||||
                                            warn!("temporary failure detected in parsing, requeuing");
 | 
			
		||||
                                            nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&req).unwrap().into()).await.expect("FAILED TO REQUEUE");
 | 
			
		||||
                                            bad = true;
 | 
			
		||||
                                        }
 | 
			
		||||
                                        Ok(())
 | 
			
		||||
                                    }).await.unwrap();
 | 
			
		||||
 | 
			
		||||
                                    if bad {
 | 
			
		||||
                                        continue;
 | 
			
		||||
                                    }
 | 
			
		||||
 | 
			
		||||
                                    if DOCUMENTS_CRAWLED.load(Ordering::Relaxed) % 100 == 0 {
 | 
			
		||||
                                        DOCUMENTS_CRAWLED.fetch_add(1, Ordering::Relaxed);
 | 
			
		||||
                                        info!("crawled {} pages!", DOCUMENTS_CRAWLED.load(Ordering::Relaxed));
 | 
			
		||||
                                    }
 | 
			
		||||
                                }
 | 
			
		||||
                            }
 | 
			
		||||
                            Some(normal) = StreamExt::next(&mut messages) => {
 | 
			
		||||
                                if let Err(e) = normal {
 | 
			
		||||
                                    warn!("error when recv js message! {e}");
 | 
			
		||||
                                } else {
 | 
			
		||||
                                    let message = normal.unwrap();
 | 
			
		||||
                                    if let Err(e) = message.ack().await {
 | 
			
		||||
                                        warn!("failed acking message {e}");
 | 
			
		||||
                                    }
 | 
			
		||||
                                    let req = rmp_serde::from_slice::<CrawlRequest>(message.payload.as_ref());
 | 
			
		||||
                                    if let Err(e) = req {
 | 
			
		||||
                                        error!("BAD NATS REQUEST: {e}");
 | 
			
		||||
                                        continue;
 | 
			
		||||
                                    }
 | 
			
		||||
                                    let req = req.unwrap();
 | 
			
		||||
                                    let now = chrono::Utc::now().timestamp();
 | 
			
		||||
                                    LAST_MESSAGE.store(now, Ordering::Relaxed);
 | 
			
		||||
                                    let nats = nats.clone();
 | 
			
		||||
                                    
 | 
			
		||||
                                    let mut hash = DefaultHasher::new();
 | 
			
		||||
                                    hash.write(req.url.as_bytes());
 | 
			
		||||
                                    let hash = hash.finish();
 | 
			
		||||
 | 
			
		||||
                                    let dehomo_bucket = nats.get_key_value("dehomo").await;
 | 
			
		||||
                                    let dehomo_bucket = if dehomo_bucket.is_err() {
 | 
			
		||||
                                        let dehomo_bucket = nats.create_key_value(kv::Config {
 | 
			
		||||
                                            bucket: "dehomo".to_string(),
 | 
			
		||||
                                            description: "prevent the same url from being scraped again too quickly".to_string(),
 | 
			
		||||
                                            max_age: Duration::from_secs(60*60),
 | 
			
		||||
                                            ..Default::default()
 | 
			
		||||
                                        }).await;
 | 
			
		||||
                                        if let Err(e) = dehomo_bucket {
 | 
			
		||||
                                            panic!("FAILED TO CREATE DEHOMO BUCKET!!! {e}");
 | 
			
		||||
                                        } else {
 | 
			
		||||
                                            dehomo_bucket.unwrap()
 | 
			
		||||
                                        }
 | 
			
		||||
                                    } else {
 | 
			
		||||
                                        dehomo_bucket.unwrap()
 | 
			
		||||
                                    };
 | 
			
		||||
                                    if dehomo_bucket.get(hash.to_string()).await.ok().flatten().map(|v| *v.first().unwrap_or(&0) == 1).unwrap_or(false) {
 | 
			
		||||
                                        info!("too soon to scrape {}", req.url);
 | 
			
		||||
                                        continue;
 | 
			
		||||
                                    }
 | 
			
		||||
                                    
 | 
			
		||||
                                    let mut bad = false;
 | 
			
		||||
 | 
			
		||||
                                    driver.in_new_tab(|| async {
 | 
			
		||||
                                        if web_parse(nats.clone(), db.clone(), &driver, &req.url, req.damping).await.is_err() {
 | 
			
		||||
                                            warn!("temporary failure detected in parsing, requeuing");
 | 
			
		||||
                                            nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&req).unwrap().into()).await.expect("FAILED TO REQUEUE");
 | 
			
		||||
                                            bad = true;
 | 
			
		||||
                                        }
 | 
			
		||||
                                        Ok(())
 | 
			
		||||
                                    }).await.unwrap();
 | 
			
		||||
                                    
 | 
			
		||||
                                    if bad {
 | 
			
		||||
                                        continue;
 | 
			
		||||
                                    }
 | 
			
		||||
                                    
 | 
			
		||||
                                    dehomo_bucket.put(hash.to_string(), vec![1u8].into()).await.expect("failed to store dehomo");
 | 
			
		||||
 | 
			
		||||
                                    if DOCUMENTS_CRAWLED.load(Ordering::Relaxed) % 100 == 0 {
 | 
			
		||||
                                        DOCUMENTS_CRAWLED.fetch_add(1, Ordering::Relaxed);
 | 
			
		||||
                                        info!("crawled {} pages!", DOCUMENTS_CRAWLED.load(Ordering::Relaxed));
 | 
			
		||||
                                    }
 | 
			
		||||
                                }
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }),
 | 
			
		||||
                            browser, last_parse.clone()
 | 
			
		||||
                ));
 | 
			
		||||
                warn!("spawning new injest thread");
 | 
			
		||||
            }
 | 
			
		||||
            let mut tasks_to_remove = vec![];
 | 
			
		||||
            for task in tasks.iter() {
 | 
			
		||||
                if task.0.is_finished() {
 | 
			
		||||
                    tasks_to_remove.push(task.1.clone());
 | 
			
		||||
                    available_browsers.push(task.1.clone());
 | 
			
		||||
                }
 | 
			
		||||
                let last_parse = task.2.load(Ordering::Relaxed);
 | 
			
		||||
                if SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs() > (*CRAWLER_TIMEOUT * 60) + last_parse {
 | 
			
		||||
                    // task has been taking too long
 | 
			
		||||
                    warn!("task taking too long! aborting!");
 | 
			
		||||
                    task.0.abort();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            tasks.retain(|v| !tasks_to_remove.contains(&v.1));
 | 
			
		||||
            tokio::time::sleep(Duration::from_secs(3)).await;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//#[tokio::main]
 | 
			
		||||
//async fn main() {
 | 
			
		||||
//    mutex_timeouts::tokio::GLOBAL_TOKIO_TIMEOUT.store(72, Ordering::Relaxed);
 | 
			
		||||
//
 | 
			
		||||
//    env_logger::init();
 | 
			
		||||
//    info!("began at {}", chrono::Utc::now().to_string());
 | 
			
		||||
//
 | 
			
		||||
//    let nats = async_nats::connect(NATS_URL.as_str()).await;
 | 
			
		||||
//    if let Err(e) = nats {
 | 
			
		||||
//        error!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
			
		||||
//        return;
 | 
			
		||||
//    }
 | 
			
		||||
//    let nats = nats.unwrap();
 | 
			
		||||
//
 | 
			
		||||
//    let dbconn = DBConn::new(nats.clone(), "lyphedb-test");
 | 
			
		||||
//
 | 
			
		||||
//    let nats = jetstream::new(nats);
 | 
			
		||||
//
 | 
			
		||||
//    let mut prefs = FirefoxPreferences::new();
 | 
			
		||||
//    prefs.set_user_agent(USER_AGENT.to_string()).unwrap();
 | 
			
		||||
//    let mut caps = DesiredCapabilities::firefox();
 | 
			
		||||
//    caps.set_preferences(prefs).unwrap();
 | 
			
		||||
//    let driver = WebDriver::new(&BROWSER_THREADS[0], caps).await.unwrap();
 | 
			
		||||
//
 | 
			
		||||
//    driver.in_new_tab(|| async {
 | 
			
		||||
//        web_parse(nats.clone(), dbconn.clone(), &driver, "https://asklyphe.com/", 0.85).await.expect("Failed to run web parse");
 | 
			
		||||
//
 | 
			
		||||
//        Ok(())
 | 
			
		||||
//    }).await.unwrap();
 | 
			
		||||
//}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,492 +0,0 @@
 | 
			
		|||
use crate::USER_AGENT;
 | 
			
		||||
use asklyphe_common::ldb::{linkrelstore, linkstore, metastore, sitestore, titlestore, wordstore, DBConn};
 | 
			
		||||
use async_nats::jetstream;
 | 
			
		||||
use async_nats::jetstream::kv;
 | 
			
		||||
use futures::AsyncReadExt;
 | 
			
		||||
use image::EncodableLayout;
 | 
			
		||||
use isahc::config::RedirectPolicy;
 | 
			
		||||
use isahc::prelude::Configurable;
 | 
			
		||||
use isahc::HttpClient;
 | 
			
		||||
use log::{debug, error, warn};
 | 
			
		||||
use std::collections::{BTreeMap, BTreeSet};
 | 
			
		||||
use std::hash::{DefaultHasher, Hasher};
 | 
			
		||||
use std::sync::atomic::AtomicBool;
 | 
			
		||||
use std::sync::{mpsc, Arc};
 | 
			
		||||
use std::time::Duration;
 | 
			
		||||
use stopwords::{Language, Spark, Stopwords};
 | 
			
		||||
use texting_robots::{get_robots_url, Robot};
 | 
			
		||||
use thirtyfour::{By, WebDriver};
 | 
			
		||||
use asklyphe_common::nats::vorebot::CrawlRequest;
 | 
			
		||||
use asklyphe_common::nats::vorebot::VOREBOT_SERVICE;
 | 
			
		||||
 | 
			
		||||
pub fn allowed_to_crawl(robotstxt: &[u8], url: &str) -> Result<bool, ()> {
 | 
			
		||||
    let robot1 = Robot::new("Vorebot", robotstxt);
 | 
			
		||||
    if let Err(e) = robot1 {
 | 
			
		||||
        warn!(
 | 
			
		||||
            "potentially malformed robots.txt ({}), not crawling {}",
 | 
			
		||||
            e, url
 | 
			
		||||
        );
 | 
			
		||||
        return Err(());
 | 
			
		||||
    }
 | 
			
		||||
    let robot1 = robot1.unwrap();
 | 
			
		||||
    Ok(robot1.allowed(url))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// returns Err if we cannot access a page, but the error associated with it seems temporary (i.e. it's worth trying again later)
 | 
			
		||||
// otherwise, returns Ok
 | 
			
		||||
pub async fn web_parse(
 | 
			
		||||
    nats: jetstream::Context,
 | 
			
		||||
    db: DBConn,
 | 
			
		||||
    driver: &WebDriver,
 | 
			
		||||
    url: &str,
 | 
			
		||||
    damping: f64,
 | 
			
		||||
) -> Result<(), ()> {
 | 
			
		||||
 | 
			
		||||
    driver.delete_all_cookies().await.map_err(|_| ())?;
 | 
			
		||||
    let robots_bucket = nats.get_key_value("robots").await;
 | 
			
		||||
    let robots_bucket = if robots_bucket.is_err() {
 | 
			
		||||
        let robots_bucket = nats
 | 
			
		||||
            .create_key_value(kv::Config {
 | 
			
		||||
                bucket: "robots".to_string(),
 | 
			
		||||
                description: "storage of robots.txt data for given hosts".to_string(),
 | 
			
		||||
                ..Default::default()
 | 
			
		||||
            })
 | 
			
		||||
            .await;
 | 
			
		||||
        if let Err(e) = robots_bucket {
 | 
			
		||||
            error!("could not create robots.txt bucket: {}", e);
 | 
			
		||||
            None
 | 
			
		||||
        } else {
 | 
			
		||||
            Some(robots_bucket.unwrap())
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        robots_bucket.ok()
 | 
			
		||||
    };
 | 
			
		||||
    let hosts_bucket = nats.get_key_value("hosts").await;
 | 
			
		||||
    let hosts_bucket = if hosts_bucket.is_err() {
 | 
			
		||||
        let hosts_bucket = nats
 | 
			
		||||
            .create_key_value(kv::Config {
 | 
			
		||||
                bucket: "hosts".to_string(),
 | 
			
		||||
                description: "prevent the same host from being scraped too quickly".to_string(),
 | 
			
		||||
                max_age: Duration::from_secs(60 * 10),
 | 
			
		||||
                ..Default::default()
 | 
			
		||||
            })
 | 
			
		||||
            .await;
 | 
			
		||||
        if let Err(e) = hosts_bucket {
 | 
			
		||||
            error!("could not create hosts bucket: {}", e);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        } else {
 | 
			
		||||
            hosts_bucket.unwrap()
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        hosts_bucket.unwrap()
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    let robots_url = get_robots_url(url);
 | 
			
		||||
    if robots_url.is_err() {
 | 
			
		||||
        error!("could not get a robots.txt url from {}, not crawling", url);
 | 
			
		||||
        return Ok(());
 | 
			
		||||
    }
 | 
			
		||||
    let robots_url = robots_url.unwrap();
 | 
			
		||||
    let mut hash = DefaultHasher::new();
 | 
			
		||||
    hash.write(robots_url.as_bytes());
 | 
			
		||||
    let hash = hash.finish();
 | 
			
		||||
 | 
			
		||||
    if let Ok(Some(host)) = hosts_bucket.get(hash.to_string()).await {
 | 
			
		||||
        let count = *host.first().unwrap_or(&0);
 | 
			
		||||
        if count > 10 {
 | 
			
		||||
            warn!("scraping {} too quickly, avoiding for one minute", robots_url);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        }
 | 
			
		||||
        hosts_bucket.put(hash.to_string(), vec![count + 1].into()).await.expect("COULDN'T INSERT INTO HOSTS BUCKET!");
 | 
			
		||||
    } else {
 | 
			
		||||
        hosts_bucket.put(hash.to_string(), vec![1].into()).await.expect("COULDN'T INSERT INTO HOSTS BUCKET!");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let mut skip_robots_check = false;
 | 
			
		||||
    if let Some(robots_bucket) = &robots_bucket {
 | 
			
		||||
        if let Ok(Some(entry)) = robots_bucket.get(hash.to_string()).await {
 | 
			
		||||
            if let Ok(res) = allowed_to_crawl(entry.as_bytes(), url) {
 | 
			
		||||
                if !res {
 | 
			
		||||
                    debug!("robots.txt does not allow us to crawl {}", url);
 | 
			
		||||
                    return Ok(());
 | 
			
		||||
                } else {
 | 
			
		||||
                    skip_robots_check = true;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if !skip_robots_check {
 | 
			
		||||
        // check manually
 | 
			
		||||
        debug!("checking new robots.txt \"{}\"", robots_url);
 | 
			
		||||
        let client = HttpClient::builder()
 | 
			
		||||
            .redirect_policy(RedirectPolicy::Limit(10))
 | 
			
		||||
            .timeout(Duration::from_secs(60))
 | 
			
		||||
            .build();
 | 
			
		||||
        if let Err(e) = client {
 | 
			
		||||
            error!("could not create new robots.txt httpclient: {}", e);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        }
 | 
			
		||||
        let client = client.unwrap();
 | 
			
		||||
        let request = isahc::Request::get(&robots_url)
 | 
			
		||||
            .header("user-agent", USER_AGENT.as_str())
 | 
			
		||||
            .body(());
 | 
			
		||||
        if let Err(e) = request {
 | 
			
		||||
            error!("could not create robots.txt get request: {}", e);
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
        let request = request.unwrap();
 | 
			
		||||
        let response = client.send_async(request).await;
 | 
			
		||||
        if let Err(e) = response {
 | 
			
		||||
            warn!("could not get robots.txt page: {}", e);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        }
 | 
			
		||||
        let mut response = response.unwrap();
 | 
			
		||||
        if response.status() == 429 {
 | 
			
		||||
            // too many requests
 | 
			
		||||
            warn!("too many requests for {}", robots_url);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        }
 | 
			
		||||
        if response.status().is_server_error() {
 | 
			
		||||
            // don't crawl at the moment
 | 
			
		||||
            debug!("not crawling {} due to server error", robots_url);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let mut body = "".to_string();
 | 
			
		||||
        if let Err(e) = response.body_mut().read_to_string(&mut body).await {
 | 
			
		||||
            warn!("could not read from robots.txt response: {}", e);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if let Ok(res) = allowed_to_crawl(body.as_bytes(), url) {
 | 
			
		||||
            if let Some(robots_bucket) = &robots_bucket {
 | 
			
		||||
                if let Err(e) = robots_bucket
 | 
			
		||||
                    .put(hash.to_string(), body.as_bytes().to_vec().into())
 | 
			
		||||
                    .await
 | 
			
		||||
                {
 | 
			
		||||
                    warn!("could not put robots.txt data: {}", e);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if !res {
 | 
			
		||||
                debug!("robots.txt does not allow us to crawl {}", url);
 | 
			
		||||
                return Ok(());
 | 
			
		||||
            } else {
 | 
			
		||||
                // we're allowed to crawl!
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let start = std::time::Instant::now();
 | 
			
		||||
    debug!("handling request for {}", url);
 | 
			
		||||
 | 
			
		||||
    // check for bad status codes
 | 
			
		||||
    // fixme: i hate this solution, can we get something that actually checks the browser's request?
 | 
			
		||||
    let client = HttpClient::builder()
 | 
			
		||||
        .redirect_policy(RedirectPolicy::Limit(10))
 | 
			
		||||
        .timeout(Duration::from_secs(60))
 | 
			
		||||
        .build();
 | 
			
		||||
    if let Err(e) = client {
 | 
			
		||||
        error!("could not create new badstatuscode httpclient: {}", e);
 | 
			
		||||
        return Err(());
 | 
			
		||||
    }
 | 
			
		||||
    let client = client.unwrap();
 | 
			
		||||
    let request = isahc::Request::get(url)
 | 
			
		||||
        .header("user-agent", USER_AGENT.as_str())
 | 
			
		||||
        .body(());
 | 
			
		||||
    if let Err(e) = request {
 | 
			
		||||
        error!("could not create badstatuscode get request: {}", e);
 | 
			
		||||
        return Ok(());
 | 
			
		||||
    }
 | 
			
		||||
    let request = request.unwrap();
 | 
			
		||||
    let response = client.send_async(request).await;
 | 
			
		||||
    if let Err(e) = response {
 | 
			
		||||
        warn!("could not get badstatuscode page: {}", e);
 | 
			
		||||
        return Err(());
 | 
			
		||||
    }
 | 
			
		||||
    let mut response = response.unwrap();
 | 
			
		||||
    if response.status() == 429 {
 | 
			
		||||
        // too many requests
 | 
			
		||||
        warn!("too many requests for {}", url);
 | 
			
		||||
        return Err(());
 | 
			
		||||
    }
 | 
			
		||||
    if response.status().is_server_error() || response.status().is_client_error() {
 | 
			
		||||
        // don't crawl at the moment
 | 
			
		||||
        debug!("not crawling {} due to bad status code {}", url, response.status());
 | 
			
		||||
        return Err(());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // i guess we're good
 | 
			
		||||
    driver.goto(url).await.map_err(|_| ())?;
 | 
			
		||||
 | 
			
		||||
    let html_element = driver.find(By::Tag("html")).await.map_err(|_| ())?;
 | 
			
		||||
 | 
			
		||||
    if let Some(lang) = html_element.attr("lang").await.ok().flatten() {
 | 
			
		||||
        if !lang.starts_with("en") && !lang.starts_with("unknown") {
 | 
			
		||||
            // i.e. non-english language
 | 
			
		||||
            // fixme: remove this once we start expanding to non-english-speaking markets?
 | 
			
		||||
            warn!("skipping {} due to {} language (currently prioritizing english", url, lang);
 | 
			
		||||
            return Err(());
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let meta_elements = driver.find_all(By::Tag("meta")).await.map_err(|_| ())?;
 | 
			
		||||
 | 
			
		||||
    let title = driver.title().await.map_err(|_| ())?;
 | 
			
		||||
    let mut description = None;
 | 
			
		||||
    let mut keywords = vec![];
 | 
			
		||||
    for elem in meta_elements {
 | 
			
		||||
        if let Ok(Some(name)) = elem.attr("name").await {
 | 
			
		||||
            match name.as_str() {
 | 
			
		||||
                "description" => {
 | 
			
		||||
                    if let Ok(Some(content)) = elem.attr("content").await {
 | 
			
		||||
                        description = Some(content);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                "keywords" => {
 | 
			
		||||
                    if let Ok(Some(content)) = elem.attr("content").await {
 | 
			
		||||
                        keywords = content
 | 
			
		||||
                            .split(',')
 | 
			
		||||
                            .map(|v| v.to_lowercase())
 | 
			
		||||
                            .filter(|v| !v.is_empty())
 | 
			
		||||
                            .collect();
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                _ => {}
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let body = driver.find(By::Tag("body")).await.map_err(|_| ())?;
 | 
			
		||||
    let raw_page_content = body.text().await.map_err(|_| ())?;
 | 
			
		||||
 | 
			
		||||
    async fn gather_elements_with_multiplier(
 | 
			
		||||
        driver: &WebDriver,
 | 
			
		||||
        wordmap: &mut BTreeMap<String, f64>,
 | 
			
		||||
        stops: &BTreeSet<&&str>,
 | 
			
		||||
        elements: &[&str],
 | 
			
		||||
        multiplier: f64,
 | 
			
		||||
    ) {
 | 
			
		||||
        let mut elms = vec![];
 | 
			
		||||
        for tag in elements {
 | 
			
		||||
            elms.push(driver.find_all(By::Tag(*tag)).await);
 | 
			
		||||
        }
 | 
			
		||||
        let elms = elms.iter().flatten().flatten().collect::<Vec<_>>();
 | 
			
		||||
        let mut sentences = vec![];
 | 
			
		||||
        let mut sentence_set = BTreeSet::new();
 | 
			
		||||
 | 
			
		||||
        debug!("processing elements...");
 | 
			
		||||
        for node in elms {
 | 
			
		||||
            let _ = node.scroll_into_view().await;
 | 
			
		||||
            let boxmodel = node.rect().await;
 | 
			
		||||
            if boxmodel.is_err() {
 | 
			
		||||
                // not visible
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            let boxmodel = boxmodel.unwrap();
 | 
			
		||||
            let current_text = node.text().await;
 | 
			
		||||
            if current_text.is_err() {
 | 
			
		||||
                // no text on this node
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            let current_text = current_text.unwrap().trim().to_string();
 | 
			
		||||
            if current_text.is_empty() {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            let sqs = (boxmodel.width * boxmodel.height).max(1.0); // no 0 divides pls (:
 | 
			
		||||
            let ccount = current_text.chars().count() as f64;
 | 
			
		||||
            let cssq = if ccount > 0.0 { sqs / ccount } else { 0.0 };
 | 
			
		||||
            if sentence_set.contains(¤t_text) {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            sentence_set.insert(current_text.clone());
 | 
			
		||||
            sentences.push((current_text, cssq));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for (sentence, cssq) in sentences {
 | 
			
		||||
            let mut cssq = (cssq / 500.0).powi(2) * multiplier;
 | 
			
		||||
            for word in sentence.split_whitespace() {
 | 
			
		||||
                let word = word
 | 
			
		||||
                    .to_lowercase()
 | 
			
		||||
                    .trim_end_matches(|v: char| v.is_ascii_punctuation())
 | 
			
		||||
                    .to_string();
 | 
			
		||||
                if stops.contains(&word.as_str()) {
 | 
			
		||||
                    // less valuable
 | 
			
		||||
                    cssq /= 100.0;
 | 
			
		||||
                }
 | 
			
		||||
                if let Some(wentry) = wordmap.get_mut(&word) {
 | 
			
		||||
                    *wentry += cssq;
 | 
			
		||||
                } else {
 | 
			
		||||
                    if word.is_empty() {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
                    wordmap.insert(word.to_string(), cssq);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let mut wordmap: BTreeMap<String, f64> = BTreeMap::new();
 | 
			
		||||
    let stops: BTreeSet<_> = Spark::stopwords(Language::English)
 | 
			
		||||
        .unwrap()
 | 
			
		||||
        .iter()
 | 
			
		||||
        .collect();
 | 
			
		||||
 | 
			
		||||
    debug!("headers...");
 | 
			
		||||
    gather_elements_with_multiplier(driver, &mut wordmap, &stops, &["h1","h2","h3","h4","h5","h6"], 3.0)
 | 
			
		||||
        .await;
 | 
			
		||||
 | 
			
		||||
    debug!("paragraphs...");
 | 
			
		||||
    gather_elements_with_multiplier(driver, &mut wordmap, &stops, &["p","div"], 1.0).await;
 | 
			
		||||
 | 
			
		||||
    let mut wordmap = wordmap.into_iter().collect::<Vec<_>>();
 | 
			
		||||
    wordmap.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
 | 
			
		||||
 | 
			
		||||
    let mut db_error_so_requeue_anyways = false;
 | 
			
		||||
 | 
			
		||||
    let words = wordmap
 | 
			
		||||
        .iter()
 | 
			
		||||
        .map(|(word, _)| word.as_str())
 | 
			
		||||
        .collect::<Vec<_>>();
 | 
			
		||||
    #[allow(clippy::collapsible_if)]
 | 
			
		||||
    if !words.is_empty() {
 | 
			
		||||
        if wordstore::add_url_to_keywords(&db, &words, url)
 | 
			
		||||
            .await
 | 
			
		||||
            .is_err()
 | 
			
		||||
        {
 | 
			
		||||
            warn!("couldn't add {} to keywords!", url);
 | 
			
		||||
            db_error_so_requeue_anyways = true;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let mut metawords = keywords.iter().map(|v| v.as_str()).collect::<Vec<_>>();
 | 
			
		||||
    let desc2 = description.clone();
 | 
			
		||||
    let desc2 = desc2.map(|v| {
 | 
			
		||||
        v.to_lowercase()
 | 
			
		||||
            .split_whitespace()
 | 
			
		||||
            .map(String::from)
 | 
			
		||||
            .collect::<Vec<_>>()
 | 
			
		||||
    });
 | 
			
		||||
    if let Some(description) = &desc2 {
 | 
			
		||||
        for word in description {
 | 
			
		||||
            let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
			
		||||
            if word.is_empty() {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            metawords.push(word);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    #[allow(clippy::collapsible_if)]
 | 
			
		||||
    if !metawords.is_empty() {
 | 
			
		||||
        if metastore::add_url_to_metawords(&db, &metawords, url)
 | 
			
		||||
            .await
 | 
			
		||||
            .is_err()
 | 
			
		||||
        {
 | 
			
		||||
            warn!("couldn't add {} to metawords!", url);
 | 
			
		||||
            db_error_so_requeue_anyways = true;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let mut titlewords = vec![];
 | 
			
		||||
    let title2 = title.clone();
 | 
			
		||||
    let title2 = title2.to_lowercase();
 | 
			
		||||
    for word in title2.split_whitespace() {
 | 
			
		||||
        let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
			
		||||
        if word.is_empty() {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        titlewords.push(word);
 | 
			
		||||
    }
 | 
			
		||||
    #[allow(clippy::collapsible_if)]
 | 
			
		||||
    if !titlewords.is_empty() {
 | 
			
		||||
        if titlestore::add_url_to_titlewords(&db, &titlewords, url)
 | 
			
		||||
            .await
 | 
			
		||||
            .is_err()
 | 
			
		||||
        {
 | 
			
		||||
            warn!("couldn't add {} to titlewords!", url);
 | 
			
		||||
            db_error_so_requeue_anyways = true;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if sitestore::add_website(
 | 
			
		||||
        &db,
 | 
			
		||||
        url,
 | 
			
		||||
        Some(title),
 | 
			
		||||
        description,
 | 
			
		||||
        if keywords.is_empty() {
 | 
			
		||||
            None
 | 
			
		||||
        } else {
 | 
			
		||||
            Some(keywords)
 | 
			
		||||
        },
 | 
			
		||||
        &wordmap,
 | 
			
		||||
        raw_page_content,
 | 
			
		||||
        damping
 | 
			
		||||
    )
 | 
			
		||||
    .await
 | 
			
		||||
    .is_err()
 | 
			
		||||
    {
 | 
			
		||||
        warn!("couldn't add {} to sitestore!", url);
 | 
			
		||||
        db_error_so_requeue_anyways = true;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    debug!("finished with main site stuff for {}", url);
 | 
			
		||||
 | 
			
		||||
    let linkelms = driver.find_all(By::Tag("a")).await.map_err(|_| ())?;
 | 
			
		||||
 | 
			
		||||
    for linkelm in linkelms {
 | 
			
		||||
        if linkelm.scroll_into_view().await.is_err() {
 | 
			
		||||
            debug!("couldn't scroll into view!");
 | 
			
		||||
        }
 | 
			
		||||
        let href = linkelm.prop("href").await.map_err(|_| ())?;
 | 
			
		||||
        if href.is_none() {
 | 
			
		||||
            debug!("no href!");
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        let href = href.unwrap();
 | 
			
		||||
        if href.contains('#') {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        let linktext = linkelm.text().await.map_err(|_| ())?.to_lowercase();
 | 
			
		||||
        let linkimgs = linkelm.find_all(By::Tag("img")).await.map_err(|_| ())?;
 | 
			
		||||
        let mut alts = "".to_string();
 | 
			
		||||
        for img in linkimgs {
 | 
			
		||||
            if let Ok(Some(alt)) = img.attr("alt").await {
 | 
			
		||||
                alts.push_str(&alt);
 | 
			
		||||
                alts.push(' ');
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        let alts = alts.trim().to_lowercase();
 | 
			
		||||
        let mut linkwords = vec![];
 | 
			
		||||
        for word in linktext.split_whitespace() {
 | 
			
		||||
            let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
			
		||||
            linkwords.push(word);
 | 
			
		||||
        }
 | 
			
		||||
        for word in alts.split_whitespace() {
 | 
			
		||||
            let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
			
		||||
            linkwords.push(word);
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        #[allow(clippy::collapsible_if)]
 | 
			
		||||
        if !linkwords.is_empty() {
 | 
			
		||||
            if linkstore::add_url_to_linkwords(&db, &linkwords, &href).await.is_err() {
 | 
			
		||||
                warn!("couldn't add {} to linkwords!", url);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        if linkrelstore::a_linksto_b(&db, url, &href).await.is_err() {
 | 
			
		||||
            warn!("couldn't perform a_linksto_b (a {url} b {href})");
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&CrawlRequest {
 | 
			
		||||
            url: href,
 | 
			
		||||
            damping: 0.85,
 | 
			
		||||
        }).unwrap().into()).await.unwrap();
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    let elapsed = start.elapsed().as_secs_f64();
 | 
			
		||||
    
 | 
			
		||||
    debug!("crawled {} in {} seconds", url, elapsed);
 | 
			
		||||
    
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	Add table
		
		Reference in a new issue