Compare commits
	
		
			47 commits
		
	
	
		
			main
			...
			spellcheck
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| b91674678e | |||
| 26f5196138 | |||
| 623b068cef | |||
| 270698c762 | |||
| 7e7079dd42 | |||
| b4112c311c | |||
| 0c10b15447 | |||
| b86fd6b6bc | |||
| 6fa292d1de | |||
| b8a338d9ba | |||
| c2e28c3791 | |||
| 050d45ee2e | |||
| 39dfa6816e | |||
| 03314a53d1 | |||
| 1c6edf2039 | |||
| 95ba628934 | |||
| 96478fb5d2 | |||
| 74c65d993d | |||
| 9237d1a048 | |||
| 4be68e4ba1 | |||
| 2463986e8d | |||
| 38ef5a45f6 | |||
| 0f6f0deb9c | |||
| 9c159c7170 | |||
| d341c66390 | |||
| c754338bf4 | |||
| 56d07057c9 | |||
| 384464bdbc | |||
| d45f13f030 | |||
| 927ce9d3ed | |||
| 17054c0a9c | |||
| b6f5ae177a | |||
| 83053c57a8 | |||
| 1ad6dad9eb | |||
| 7de4cdc0d6 | |||
| ce60a00868 | |||
| 9f87a327e0 | |||
| f3ee26f24b | |||
| d6509cccbe | |||
| 1dc0a399a0 | |||
| 30b3c2352e | |||
| 949a49e5de | |||
| a0b31d3326 | |||
| a16782617a | |||
| 73463a8d21 | |||
| 906c2ed8df | |||
| b1202cc889 | 
					 60 changed files with 3958 additions and 310 deletions
				
			
		
							
								
								
									
										16
									
								
								.forgejo/build-service/action.yml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								.forgejo/build-service/action.yml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,16 @@
 | 
				
			||||||
 | 
					inputs:
 | 
				
			||||||
 | 
					  service-name:
 | 
				
			||||||
 | 
					    description: 'name of the service to build and upload'
 | 
				
			||||||
 | 
					    required: true
 | 
				
			||||||
 | 
					runs:
 | 
				
			||||||
 | 
					  using: "composite"
 | 
				
			||||||
 | 
					  steps:
 | 
				
			||||||
 | 
					    - run: |
 | 
				
			||||||
 | 
					        mkdir -pv artifacts
 | 
				
			||||||
 | 
					        cargo build --release --bin ${{ inputs.service-name }}
 | 
				
			||||||
 | 
					        mv target/release/${{ inputs.service-name }} artifacts/
 | 
				
			||||||
 | 
					      shell: bash
 | 
				
			||||||
 | 
					    - uses: actions/upload-artifact@v3
 | 
				
			||||||
 | 
					      with:
 | 
				
			||||||
 | 
					        name: "${{ inputs.service-name }}"
 | 
				
			||||||
 | 
					        path: artifacts/
 | 
				
			||||||
							
								
								
									
										39
									
								
								.forgejo/workflows/build-all.yml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								.forgejo/workflows/build-all.yml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,39 @@
 | 
				
			||||||
 | 
					on: [push]
 | 
				
			||||||
 | 
					jobs:
 | 
				
			||||||
 | 
					  build-all-services:
 | 
				
			||||||
 | 
					    runs-on: docker
 | 
				
			||||||
 | 
					    container:
 | 
				
			||||||
 | 
					      image: rust:1-bookworm
 | 
				
			||||||
 | 
					    steps:
 | 
				
			||||||
 | 
					      - run: |
 | 
				
			||||||
 | 
					          apt-get update -qq
 | 
				
			||||||
 | 
					          apt-get install -y -qq nodejs git clang
 | 
				
			||||||
 | 
					      - uses: actions/checkout@v3
 | 
				
			||||||
 | 
					      - id: vorebot
 | 
				
			||||||
 | 
					        uses: ./.forgejo/build-service
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          service-name: "vorebot"
 | 
				
			||||||
 | 
					      - id: asklyphe-auth-frontend
 | 
				
			||||||
 | 
					        uses: ./.forgejo/build-service
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          service-name: "asklyphe-auth-frontend"
 | 
				
			||||||
 | 
					      - id: asklyphe-frontend
 | 
				
			||||||
 | 
					        uses: ./.forgejo/build-service
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          service-name: "asklyphe-frontend"
 | 
				
			||||||
 | 
					      - id: authservice
 | 
				
			||||||
 | 
					        uses: ./.forgejo/build-service
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          service-name: "authservice"
 | 
				
			||||||
 | 
					      - id: bingservice
 | 
				
			||||||
 | 
					        uses: ./.forgejo/build-service
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          service-name: "bingservice"
 | 
				
			||||||
 | 
					      - id: googleservice
 | 
				
			||||||
 | 
					        uses: ./.forgejo/build-service
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          service-name: "googleservice"
 | 
				
			||||||
 | 
					      - id: lyphedb
 | 
				
			||||||
 | 
					        uses: ./.forgejo/build-service
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          service-name: "lyphedb"
 | 
				
			||||||
							
								
								
									
										995
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										995
									
								
								Cargo.lock
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
					@ -1,2 +1,2 @@
 | 
				
			||||||
[workspace]
 | 
					[workspace]
 | 
				
			||||||
members = ["asklyphe-common", "asklyphe-frontend", "searchservice", "asklyphe-auth-frontend", "unit_converter", "authservice", "authservice/migration", "authservice/entity", "bingservice", "googleservice"]
 | 
					members = ["asklyphe-common", "asklyphe-frontend", "asklyphe-auth-frontend", "unit_converter", "authservice", "authservice/migration", "authservice/entity", "bingservice", "googleservice", "vorebot", "lyphedb", "lyphedb/ldbtesttool"]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
{% block title %}Login{% endblock %}
 | 
					{% block title %}Login{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/auth.css"/>
 | 
					<link rel="stylesheet" href="/static/auth.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -11,10 +11,10 @@
 | 
				
			||||||
    <a id="alyphebig" href="https://asklyphe.com/">
 | 
					    <a id="alyphebig" href="https://asklyphe.com/">
 | 
				
			||||||
    <div id="lyphebig">
 | 
					    <div id="lyphebig">
 | 
				
			||||||
            <div class="bent-surrounding">
 | 
					            <div class="bent-surrounding">
 | 
				
			||||||
                <img id="lypheimg" src="/static/img/lyphebent.png" alt="image of lyphe, our mascot!">
 | 
					                <img id="lypheimg" src="/static/img/lyphebent.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div id="lyphetitle">
 | 
					            <div id="lyphetitle">
 | 
				
			||||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
					                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
				
			||||||
                <p>the best search engine!</p>
 | 
					                <p>the best search engine!</p>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
{% block title %}Register{% endblock %}
 | 
					{% block title %}Register{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/auth.css"/>
 | 
					<link rel="stylesheet" href="/static/auth.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -11,10 +11,10 @@
 | 
				
			||||||
    <a id="alyphebig" href="https://asklyphe.com/">
 | 
					    <a id="alyphebig" href="https://asklyphe.com/">
 | 
				
			||||||
        <div id="lyphebig">
 | 
					        <div id="lyphebig">
 | 
				
			||||||
            <div class="bent-surrounding">
 | 
					            <div class="bent-surrounding">
 | 
				
			||||||
                <img id="lypheimg" src="/static/img/lyphebent.png" alt="image of lyphe, our mascot!">
 | 
					                <img id="lypheimg" src="/static/img/lyphebent.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div id="lyphetitle">
 | 
					            <div id="lyphetitle">
 | 
				
			||||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
					                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
				
			||||||
                <p>the best search engine!</p>
 | 
					                <p>the best search engine!</p>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,7 +11,7 @@
 | 
				
			||||||
              title="AskLyphe"
 | 
					              title="AskLyphe"
 | 
				
			||||||
              href="/static/osd.xml" />
 | 
					              href="/static/osd.xml" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <link rel="stylesheet" href="/static/shell.css" />
 | 
					        <link rel="stylesheet" href="/static/shell.css?git={{ git_commit }}" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        {% block head %}{% endblock %}
 | 
					        {% block head %}{% endblock %}
 | 
				
			||||||
    </head>
 | 
					    </head>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
{% block title %}Verify Email{% endblock %}
 | 
					{% block title %}Verify Email{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/auth.css"/>
 | 
					<link rel="stylesheet" href="/static/auth.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -11,10 +11,10 @@
 | 
				
			||||||
    <a id="alyphebig" href="https://asklyphe.com/">
 | 
					    <a id="alyphebig" href="https://asklyphe.com/">
 | 
				
			||||||
        <div id="lyphebig">
 | 
					        <div id="lyphebig">
 | 
				
			||||||
            <div class="bent-surrounding">
 | 
					            <div class="bent-surrounding">
 | 
				
			||||||
                <img id="lypheimg" src="/static/img/lyphebent.png" alt="image of lyphe, our mascot!">
 | 
					                <img id="lypheimg" src="/static/img/lyphebent.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div id="lyphetitle">
 | 
					            <div id="lyphetitle">
 | 
				
			||||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
					                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
				
			||||||
                <p>the best search engine!</p>
 | 
					                <p>the best search engine!</p>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,10 +11,16 @@ license-file = "LICENSE"
 | 
				
			||||||
tokio = { version = "1.0", features = ["full"] }
 | 
					tokio = { version = "1.0", features = ["full"] }
 | 
				
			||||||
chrono = "0.4.31"
 | 
					chrono = "0.4.31"
 | 
				
			||||||
serde = { version = "1.0", features = ["derive"] }
 | 
					serde = { version = "1.0", features = ["derive"] }
 | 
				
			||||||
foundationdb = { version = "0.8.0", features = ["embedded-fdb-include"] }
 | 
					lyphedb = { path = "../lyphedb" }
 | 
				
			||||||
 | 
					foundationdb = { version = "0.8.0", features = ["embedded-fdb-include"], optional = true }
 | 
				
			||||||
log = "0.4.20"
 | 
					log = "0.4.20"
 | 
				
			||||||
rmp-serde = "1.1.2"
 | 
					rmp-serde = "1.1.2"
 | 
				
			||||||
futures = "0.3.30"
 | 
					futures = "0.3.30"
 | 
				
			||||||
async-nats = "0.38.0"
 | 
					async-nats = "0.38.0"
 | 
				
			||||||
ulid = "1.1.0"
 | 
					ulid = "1.1.0"
 | 
				
			||||||
rand = "0.8.5"
 | 
					rand = "0.8.5"
 | 
				
			||||||
 | 
					percent-encoding = "2.3.1"
 | 
				
			||||||
 | 
					sha-rs = "0.1.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[features]
 | 
				
			||||||
 | 
					default = []
 | 
				
			||||||
							
								
								
									
										112
									
								
								asklyphe-common/src/ldb/linkrelstore.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										112
									
								
								asklyphe-common/src/ldb/linkrelstore.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,112 @@
 | 
				
			||||||
 | 
					use crate::ldb::{construct_path, hash, DBConn};
 | 
				
			||||||
 | 
					use log::{error, warn};
 | 
				
			||||||
 | 
					use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const INCOMINGSTORE: &str = "incomingstore";
 | 
				
			||||||
 | 
					pub const OUTGOINGSTORE: &str = "outgoingstore";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn a_linksto_b(db: &DBConn, a: &str, b: &str) -> Result<(), ()> {
 | 
				
			||||||
 | 
					    let key_sets = vec![
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[OUTGOINGSTORE, &hash(a)])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            a.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[OUTGOINGSTORE, &hash(a), &hash(b)])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            b.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[INCOMINGSTORE, &hash(b)])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            b.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[INCOMINGSTORE, &hash(b), &hash(a)])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            a.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(
 | 
				
			||||||
 | 
					        KVList { kvs: key_sets },
 | 
				
			||||||
 | 
					        PropagationStrategy::OnRead,
 | 
				
			||||||
 | 
					    ));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => Ok(()),
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            error!("bad request for a_linksto_b");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            error!("not found for a_linksto_b");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn what_links_to_this(db: &DBConn, url: &str) -> Result<Vec<String>, ()> {
 | 
				
			||||||
 | 
					    let path = construct_path(&[INCOMINGSTORE, &hash(url)])
 | 
				
			||||||
 | 
					        .as_bytes()
 | 
				
			||||||
 | 
					        .to_vec();
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory { key: path }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Entries(kvs) => Ok(kvs
 | 
				
			||||||
 | 
					            .kvs
 | 
				
			||||||
 | 
					            .into_iter()
 | 
				
			||||||
 | 
					            .map(|v| String::from_utf8_lossy(&v.1).to_string())
 | 
				
			||||||
 | 
					            .collect()),
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            warn!("lyphedb responded with \"success\" to what_links_to_this, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            warn!("bad request for what_links_to_this");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => Ok(vec![]),
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn what_links_from_this(db: &DBConn, url: &str) -> Result<Vec<String>, ()> {
 | 
				
			||||||
 | 
					    let path = construct_path(&[OUTGOINGSTORE, &hash(url)])
 | 
				
			||||||
 | 
					        .as_bytes()
 | 
				
			||||||
 | 
					        .to_vec();
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory { key: path }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Entries(kvs) => Ok(kvs
 | 
				
			||||||
 | 
					            .kvs
 | 
				
			||||||
 | 
					            .into_iter()
 | 
				
			||||||
 | 
					            .map(|v| String::from_utf8_lossy(&v.1).to_string())
 | 
				
			||||||
 | 
					            .collect()),
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            warn!("lyphedb responded with \"success\" to what_links_from_this, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            warn!("bad request for what_links_from_this");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => Ok(vec![]),
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										63
									
								
								asklyphe-common/src/ldb/linkstore.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								asklyphe-common/src/ldb/linkstore.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,63 @@
 | 
				
			||||||
 | 
					use log::{error, warn};
 | 
				
			||||||
 | 
					use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
				
			||||||
 | 
					use crate::ldb::{construct_path, hash, DBConn};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const LINKSTORE: &str = "linkstore";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn add_url_to_linkwords(db: &DBConn, linkwords: &[&str], url: &str) -> Result<(), ()> {
 | 
				
			||||||
 | 
					    let mut key_sets = Vec::new();
 | 
				
			||||||
 | 
					    for linkword in linkwords {
 | 
				
			||||||
 | 
					        let path = construct_path(&[LINKSTORE, linkword, &hash(url)]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					        key_sets.push((path, url.as_bytes().to_vec()));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
				
			||||||
 | 
					        kvs: key_sets,
 | 
				
			||||||
 | 
					    }, PropagationStrategy::OnRead));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            Ok(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            error!("bad request for add_url_to_linkwords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            error!("not found for add_url_to_linkwords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn get_urls_from_linkword(db: &DBConn, keyword: &str) -> Result<Vec<String>, ()> {
 | 
				
			||||||
 | 
					    let path = construct_path(&[LINKSTORE, keyword]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
				
			||||||
 | 
					        key: path,
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Entries(kvs) => {
 | 
				
			||||||
 | 
					            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            warn!("lyphedb responded with \"success\" to get_urls_from_linkwords, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            warn!("bad request for get_urls_from_linkwords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            Ok(vec![])
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										63
									
								
								asklyphe-common/src/ldb/metastore.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								asklyphe-common/src/ldb/metastore.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,63 @@
 | 
				
			||||||
 | 
					use log::{error, warn};
 | 
				
			||||||
 | 
					use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
				
			||||||
 | 
					use crate::ldb::{construct_path, hash, DBConn};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const METASTORE: &str = "metastore";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn add_url_to_metawords(db: &DBConn, metawords: &[&str], url: &str) -> Result<(), ()> {
 | 
				
			||||||
 | 
					    let mut key_sets = Vec::new();
 | 
				
			||||||
 | 
					    for metaword in metawords {
 | 
				
			||||||
 | 
					        let path = construct_path(&[METASTORE, metaword, &hash(url)]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					        key_sets.push((path, url.as_bytes().to_vec()));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
				
			||||||
 | 
					        kvs: key_sets,
 | 
				
			||||||
 | 
					    }, PropagationStrategy::OnRead));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            Ok(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            error!("bad request for add_url_to_metawords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            error!("not found for add_url_to_metawords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn get_urls_from_metaword(db: &DBConn, metaword: &str) -> Result<Vec<String>, ()> {
 | 
				
			||||||
 | 
					    let path = construct_path(&[METASTORE, metaword]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
				
			||||||
 | 
					        key: path,
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Entries(kvs) => {
 | 
				
			||||||
 | 
					            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            warn!("lyphedb responded with \"success\" to get_urls_from_metawords, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            warn!("bad request for get_urls_from_metawords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            Ok(vec![])
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										63
									
								
								asklyphe-common/src/ldb/mod.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								asklyphe-common/src/ldb/mod.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,63 @@
 | 
				
			||||||
 | 
					pub mod wordstore;
 | 
				
			||||||
 | 
					pub mod sitestore;
 | 
				
			||||||
 | 
					pub mod linkstore;
 | 
				
			||||||
 | 
					pub mod metastore;
 | 
				
			||||||
 | 
					pub mod titlestore;
 | 
				
			||||||
 | 
					pub mod linkrelstore;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use std::hash::{DefaultHasher, Hasher};
 | 
				
			||||||
 | 
					use std::sync::Arc;
 | 
				
			||||||
 | 
					use std::sync::atomic::AtomicU64;
 | 
				
			||||||
 | 
					use futures::StreamExt;
 | 
				
			||||||
 | 
					use log::warn;
 | 
				
			||||||
 | 
					use lyphedb::LDBNatsMessage;
 | 
				
			||||||
 | 
					use percent_encoding::{percent_encode, AsciiSet, CONTROLS, NON_ALPHANUMERIC};
 | 
				
			||||||
 | 
					use sha_rs::{Sha, Sha256};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static NEXT_REPLY_ID: AtomicU64 = AtomicU64::new(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const NOT_ALLOWED_ASCII: AsciiSet = CONTROLS.add(b' ').add(b'/').add(b'.').add(b'\\');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn hash(str: &str) -> String {
 | 
				
			||||||
 | 
					    let hasher = Sha256::new();
 | 
				
			||||||
 | 
					    hasher.digest(str.as_bytes())
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn construct_path(path_elements: &[&str]) -> String {
 | 
				
			||||||
 | 
					    let mut buf = String::new();
 | 
				
			||||||
 | 
					    buf.push_str("ASKLYPHE/");
 | 
				
			||||||
 | 
					    for el in path_elements {
 | 
				
			||||||
 | 
					        buf.push_str(&percent_encode(el.as_bytes(), &NOT_ALLOWED_ASCII).to_string());
 | 
				
			||||||
 | 
					        buf.push('/');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    buf.pop();
 | 
				
			||||||
 | 
					    buf
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone)]
 | 
				
			||||||
 | 
					pub struct DBConn {
 | 
				
			||||||
 | 
					    nats: async_nats::Client,
 | 
				
			||||||
 | 
					    name: String,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl DBConn {
 | 
				
			||||||
 | 
					    pub fn new(nats: async_nats::Client, name: impl ToString) -> DBConn {
 | 
				
			||||||
 | 
					        DBConn { nats, name: name.to_string() }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    pub async fn query(&self, message: LDBNatsMessage) -> LDBNatsMessage {
 | 
				
			||||||
 | 
					        let data = rmp_serde::to_vec(&message).unwrap();
 | 
				
			||||||
 | 
					        let replyto = format!("ldb-reply-{}", NEXT_REPLY_ID.fetch_add(1, std::sync::atomic::Ordering::SeqCst));
 | 
				
			||||||
 | 
					        let mut subscriber = self.nats.subscribe(replyto.clone()).await.expect("NATS ERROR");
 | 
				
			||||||
 | 
					        self.nats.publish_with_reply(self.name.clone(), replyto, data.into()).await.expect("NATS ERROR");
 | 
				
			||||||
 | 
					        if let Some(reply) = subscriber.next().await {
 | 
				
			||||||
 | 
					            let reply = rmp_serde::from_slice::<LDBNatsMessage>(&reply.payload);
 | 
				
			||||||
 | 
					            if reply.is_err() {
 | 
				
			||||||
 | 
					                warn!("DECODED BAD MESSAGE FROM LYPHEDB: {}", reply.err().unwrap());
 | 
				
			||||||
 | 
					                return LDBNatsMessage::NotFound;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            return reply.unwrap();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										178
									
								
								asklyphe-common/src/ldb/sitestore.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										178
									
								
								asklyphe-common/src/ldb/sitestore.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,178 @@
 | 
				
			||||||
 | 
					use crate::ldb::{construct_path, hash, DBConn};
 | 
				
			||||||
 | 
					use log::{error, warn};
 | 
				
			||||||
 | 
					use lyphedb::{KVList, KeyDirectory, KeyList, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const SITESTORE: &str = "sitestore";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const TITLE: &str = "title";
 | 
				
			||||||
 | 
					pub const DESCRIPTION: &str = "desc";
 | 
				
			||||||
 | 
					pub const KEYWORDS: &str = "keywords";
 | 
				
			||||||
 | 
					pub const PAGE_TEXT_RANKING: &str = "ptranks";
 | 
				
			||||||
 | 
					pub const PAGE_TEXT_RAW: &str = "textraw";
 | 
				
			||||||
 | 
					pub const DAMPING: &str = "damping";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn add_website(
 | 
				
			||||||
 | 
					    db: &DBConn,
 | 
				
			||||||
 | 
					    url: &str,
 | 
				
			||||||
 | 
					    title: Option<String>,
 | 
				
			||||||
 | 
					    description: Option<String>,
 | 
				
			||||||
 | 
					    keywords: Option<Vec<String>>,
 | 
				
			||||||
 | 
					    page_text_ranking: &[(String, f64)],
 | 
				
			||||||
 | 
					    page_text_raw: String,
 | 
				
			||||||
 | 
					    damping: f64,
 | 
				
			||||||
 | 
					) -> Result<(), ()> {
 | 
				
			||||||
 | 
					    let keyurl = hash(url);
 | 
				
			||||||
 | 
					    let mut kvs = vec![
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[SITESTORE, &keyurl]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					            url.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RANKING])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            rmp_serde::to_vec(page_text_ranking).unwrap(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RAW])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            page_text_raw.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            construct_path(&[SITESTORE, &keyurl, DAMPING])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            damping.to_be_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if let Some(title) = title {
 | 
				
			||||||
 | 
					        kvs.push((
 | 
				
			||||||
 | 
					            construct_path(&[SITESTORE, &keyurl, TITLE]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					            title.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if let Some(description) = description {
 | 
				
			||||||
 | 
					        kvs.push((
 | 
				
			||||||
 | 
					            construct_path(&[SITESTORE, &keyurl, DESCRIPTION])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            description.as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if let Some(keywords) = keywords {
 | 
				
			||||||
 | 
					        kvs.push((
 | 
				
			||||||
 | 
					            construct_path(&[SITESTORE, &keyurl, KEYWORDS])
 | 
				
			||||||
 | 
					                .as_bytes()
 | 
				
			||||||
 | 
					                .to_vec(),
 | 
				
			||||||
 | 
					            rmp_serde::to_vec(&keywords).unwrap(),
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(
 | 
				
			||||||
 | 
					        KVList { kvs },
 | 
				
			||||||
 | 
					        PropagationStrategy::OnRead,
 | 
				
			||||||
 | 
					    ));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => Ok(()),
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            error!("bad request for add_website");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            error!("not found for add_website");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Default, Debug, Clone)]
 | 
				
			||||||
 | 
					pub struct WebsiteData {
 | 
				
			||||||
 | 
					    pub title: Option<String>,
 | 
				
			||||||
 | 
					    pub description: Option<String>,
 | 
				
			||||||
 | 
					    pub keywords: Option<Vec<String>>,
 | 
				
			||||||
 | 
					    pub page_text_ranking: Vec<(String, f64)>,
 | 
				
			||||||
 | 
					    pub page_text_raw: String,
 | 
				
			||||||
 | 
					    pub damping: f64,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn get_website(db: &DBConn, url: &str) -> Result<WebsiteData, ()> {
 | 
				
			||||||
 | 
					    let keyurl = hash(url);
 | 
				
			||||||
 | 
					    let keys = [
 | 
				
			||||||
 | 
					        construct_path(&[SITESTORE, &keyurl, TITLE]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        construct_path(&[SITESTORE, &keyurl, DESCRIPTION]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        construct_path(&[SITESTORE, &keyurl, KEYWORDS]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RANKING]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        construct_path(&[SITESTORE, &keyurl, PAGE_TEXT_RAW]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					        construct_path(&[SITESTORE, &keyurl, DAMPING]).as_bytes().to_vec(),
 | 
				
			||||||
 | 
					    ].to_vec();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeys(KeyList { keys }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Entries(kvlist) => {
 | 
				
			||||||
 | 
					            let mut data = WebsiteData::default();
 | 
				
			||||||
 | 
					            for (key, value) in kvlist.kvs {
 | 
				
			||||||
 | 
					                let key = String::from_utf8_lossy(&key).to_string();
 | 
				
			||||||
 | 
					                match key.as_str() {
 | 
				
			||||||
 | 
					                    _ if key.ends_with(TITLE) => {
 | 
				
			||||||
 | 
					                        data.title = Some(String::from_utf8_lossy(&value).to_string());
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ if key.ends_with(DESCRIPTION) => {
 | 
				
			||||||
 | 
					                        data.description = Some(String::from_utf8_lossy(&value).to_string());
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ if key.ends_with(KEYWORDS) => {
 | 
				
			||||||
 | 
					                        let deser = rmp_serde::from_slice::<Vec<String>>(&value);
 | 
				
			||||||
 | 
					                        if let Err(e) = deser {
 | 
				
			||||||
 | 
					                            error!("bad keywords entry for {}, deser error: {:?}", key, e);
 | 
				
			||||||
 | 
					                        } else {
 | 
				
			||||||
 | 
					                            data.keywords = Some(deser.unwrap());
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ if key.ends_with(PAGE_TEXT_RANKING) => {
 | 
				
			||||||
 | 
					                        let deser = rmp_serde::from_slice::<Vec<(String, f64)>>(&value);
 | 
				
			||||||
 | 
					                        if let Err(e) = deser {
 | 
				
			||||||
 | 
					                            error!("bad page_text_ranking entry for {}, deser error: {:?}", key, e);
 | 
				
			||||||
 | 
					                        } else {
 | 
				
			||||||
 | 
					                            data.page_text_ranking = deser.unwrap();
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ if key.ends_with(PAGE_TEXT_RAW) => {
 | 
				
			||||||
 | 
					                        data.page_text_raw = String::from_utf8_lossy(&value).to_string();
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ if key.ends_with(DAMPING) => {
 | 
				
			||||||
 | 
					                        data.damping = f64::from_be_bytes(value.try_into().unwrap_or(0.85f64.to_be_bytes()));
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    _ => {
 | 
				
			||||||
 | 
					                        warn!("encountered weird returned key for get_website");
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Ok(data)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            warn!("lyphedb responded with \"success\" to get_website, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            error!("bad request for get_website");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            warn!("not found for get_website");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										63
									
								
								asklyphe-common/src/ldb/titlestore.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								asklyphe-common/src/ldb/titlestore.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,63 @@
 | 
				
			||||||
 | 
					use log::{error, warn};
 | 
				
			||||||
 | 
					use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
				
			||||||
 | 
					use crate::ldb::{construct_path, hash, DBConn};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const TITLESTORE: &str = "titlestore";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn add_url_to_titlewords(db: &DBConn, titlewords: &[&str], url: &str) -> Result<(), ()> {
 | 
				
			||||||
 | 
					    let mut key_sets = Vec::new();
 | 
				
			||||||
 | 
					    for titleword in titlewords {
 | 
				
			||||||
 | 
					        let path = construct_path(&[TITLESTORE, titleword, &hash(url)]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					        key_sets.push((path, url.as_bytes().to_vec()));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
				
			||||||
 | 
					        kvs: key_sets,
 | 
				
			||||||
 | 
					    }, PropagationStrategy::OnRead));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            Ok(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            error!("bad request for add_url_to_titlewords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            error!("not found for add_url_to_titlewords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn get_urls_from_titleword(db: &DBConn, titleword: &str) -> Result<Vec<String>, ()> {
 | 
				
			||||||
 | 
					    let path = construct_path(&[TITLESTORE, titleword]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
				
			||||||
 | 
					        key: path,
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Entries(kvs) => {
 | 
				
			||||||
 | 
					            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            warn!("lyphedb responded with \"success\" to get_urls_from_titlewords, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            warn!("bad request for get_urls_from_titlewords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            Ok(vec![])
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										64
									
								
								asklyphe-common/src/ldb/wordstore.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								asklyphe-common/src/ldb/wordstore.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,64 @@
 | 
				
			||||||
 | 
					use log::{error, warn};
 | 
				
			||||||
 | 
					use lyphedb::{KVList, KeyDirectory, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
				
			||||||
 | 
					use crate::ldb::{construct_path, hash, DBConn};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const WORDSTORE: &str = "wordstore";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn add_url_to_keywords(db: &DBConn, keywords: &[&str], url: &str) -> Result<(), ()> {
 | 
				
			||||||
 | 
					    let mut key_sets = Vec::new();
 | 
				
			||||||
 | 
					    for keyword in keywords {
 | 
				
			||||||
 | 
					        let path = construct_path(&[WORDSTORE, keyword, &hash(url)]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					        let data = url.as_bytes().to_vec();
 | 
				
			||||||
 | 
					        key_sets.push((path, data));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::SetKeys(KVList {
 | 
				
			||||||
 | 
					        kvs: key_sets,
 | 
				
			||||||
 | 
					    }, PropagationStrategy::OnRead));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            Ok(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            error!("bad request for add_url_to_keywords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            error!("not found for add_url_to_keywords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn get_urls_from_keyword(db: &DBConn, keyword: &str) -> Result<Vec<String>, ()> {
 | 
				
			||||||
 | 
					    let path = construct_path(&[WORDSTORE, keyword]).as_bytes().to_vec();
 | 
				
			||||||
 | 
					    let cmd = LDBNatsMessage::Command(LypheDBCommand::GetKeyDirectory(KeyDirectory {
 | 
				
			||||||
 | 
					        key: path,
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    match db.query(cmd).await {
 | 
				
			||||||
 | 
					        LDBNatsMessage::Entries(kvs) => {
 | 
				
			||||||
 | 
					            Ok(kvs.kvs.into_iter().map(|v| String::from_utf8_lossy(&v.1).to_string()).collect())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::Success => {
 | 
				
			||||||
 | 
					            warn!("lyphedb responded with \"success\" to get_urls_from_keywords, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::BadRequest => {
 | 
				
			||||||
 | 
					            warn!("bad request for get_urls_from_keywords");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        LDBNatsMessage::NotFound => {
 | 
				
			||||||
 | 
					            Ok(vec![])
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        _ => {
 | 
				
			||||||
 | 
					            warn!("lyphedb sent weird message as response, treating as error");
 | 
				
			||||||
 | 
					            Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -12,8 +12,12 @@
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub mod nats;
 | 
					pub mod nats;
 | 
				
			||||||
 | 
					#[cfg(feature = "foundationdb")]
 | 
				
			||||||
pub mod db;
 | 
					pub mod db;
 | 
				
			||||||
 | 
					pub mod ldb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub use lyphedb;
 | 
				
			||||||
 | 
					#[cfg(feature = "foundationdb")]
 | 
				
			||||||
pub use foundationdb;
 | 
					pub use foundationdb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn add(left: usize, right: usize) -> usize {
 | 
					pub fn add(left: usize, right: usize) -> usize {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,7 +18,7 @@ pub const VOREBOT_NEWHOSTNAME_SERVICE: &str = "websiteparse_highpriority";
 | 
				
			||||||
pub const VOREBOT_SUGGESTED_SERVICE: &str = "websiteparse_highestpriority";
 | 
					pub const VOREBOT_SUGGESTED_SERVICE: &str = "websiteparse_highestpriority";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
 | 
					#[derive(Debug, Clone, Serialize, Deserialize)]
 | 
				
			||||||
pub struct WebParseRequest {
 | 
					pub struct CrawlRequest {
 | 
				
			||||||
    pub url: String,
 | 
					    pub url: String,
 | 
				
			||||||
    pub damping_factor: f32,
 | 
					    pub damping: f64,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -36,5 +36,7 @@ once_cell = "1.19.0"
 | 
				
			||||||
chrono = "0.4.33"
 | 
					chrono = "0.4.33"
 | 
				
			||||||
rand = "0.8.5"
 | 
					rand = "0.8.5"
 | 
				
			||||||
url_encoded_data = "0.6.1"
 | 
					url_encoded_data = "0.6.1"
 | 
				
			||||||
 | 
					strum = "0.27.1"
 | 
				
			||||||
 | 
					strum_macros = "0.27.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
env_logger = "*"
 | 
					env_logger = "*"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,6 +14,7 @@
 | 
				
			||||||
pub mod searchbot;
 | 
					pub mod searchbot;
 | 
				
			||||||
pub mod wikipedia;
 | 
					pub mod wikipedia;
 | 
				
			||||||
pub mod unit_converter;
 | 
					pub mod unit_converter;
 | 
				
			||||||
 | 
					pub mod spellcheck;
 | 
				
			||||||
pub mod routes;
 | 
					pub mod routes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use std::{env, process};
 | 
					use std::{env, process};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -343,14 +343,45 @@ pub async fn admin_invitecode(
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        let active_codes = match list_invite_codes(nats.clone(), token.clone(), false).await {
 | 
					        let active_codes = match list_invite_codes(nats.clone(), token.clone(), false).await {
 | 
				
			||||||
            Ok(v) => v,
 | 
					            Ok(mut v) => {
 | 
				
			||||||
 | 
					                for v in &mut v {
 | 
				
			||||||
 | 
					                    if let Some(used_by) = &v.used_by {
 | 
				
			||||||
 | 
					                        if used_by.len() > 32 {
 | 
				
			||||||
 | 
					                            v.used_by = Some(format!("{}...", &used_by[0..32]));
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if v.creator.len() > 32 {
 | 
				
			||||||
 | 
					                        v.creator = format!("{}...", &v.creator[0..32]);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                v
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
            Err(e) => {
 | 
					            Err(e) => {
 | 
				
			||||||
                return e.into_response();
 | 
					                return e.into_response();
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        let used_codes = match list_invite_codes(nats.clone(), token.clone(), true).await {
 | 
					        let used_codes = match list_invite_codes(nats.clone(), token.clone(), true).await {
 | 
				
			||||||
            Ok(v) => v.into_iter().map(|mut v| if v.used_at.is_none() { v.used_at = Some(String::from("unset")); v } else { v }).collect(),
 | 
					            Ok(v) => v.into_iter().map(|mut v| {
 | 
				
			||||||
 | 
					                if let Some(used_by) = &v.used_by {
 | 
				
			||||||
 | 
					                    if used_by.len() > 32 {
 | 
				
			||||||
 | 
					                        v.used_by = Some(format!("{}...", &used_by[0..32]));
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if v.creator.len() > 32 {
 | 
				
			||||||
 | 
					                    v.creator = format!("{}...", &v.creator[0..32]);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if v.used_at.is_none() {
 | 
				
			||||||
 | 
					                    v.used_at = Some(String::from("unset"));
 | 
				
			||||||
 | 
					                    v
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    v
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }).collect(),
 | 
				
			||||||
            Err(e) => {
 | 
					            Err(e) => {
 | 
				
			||||||
                return e.into_response();
 | 
					                return e.into_response();
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,6 +27,7 @@ use tracing::{debug, error};
 | 
				
			||||||
use tracing::log::warn;
 | 
					use tracing::log::warn;
 | 
				
			||||||
use crate::{Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
 | 
					use crate::{Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
 | 
				
			||||||
use crate::routes::index::FrontpageAnnouncement;
 | 
					use crate::routes::index::FrontpageAnnouncement;
 | 
				
			||||||
 | 
					use crate::routes::Themes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Serialize, Debug)]
 | 
					#[derive(Serialize, Debug)]
 | 
				
			||||||
struct FullAnnouncement {
 | 
					struct FullAnnouncement {
 | 
				
			||||||
| 
						 | 
					@ -96,7 +97,7 @@ pub struct AnnouncementTemplate {
 | 
				
			||||||
    built_on: String,
 | 
					    built_on: String,
 | 
				
			||||||
    year: String,
 | 
					    year: String,
 | 
				
			||||||
    alpha: bool,
 | 
					    alpha: bool,
 | 
				
			||||||
    theme: String,
 | 
					    theme: Themes,
 | 
				
			||||||
    announcement: FullAnnouncement,
 | 
					    announcement: FullAnnouncement,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -109,7 +110,7 @@ pub async fn announcement_full(Extension(nats): Extension<Arc<jetstream::Context
 | 
				
			||||||
            built_on: BUILT_ON.to_string(),
 | 
					            built_on: BUILT_ON.to_string(),
 | 
				
			||||||
            year: YEAR.to_string(),
 | 
					            year: YEAR.to_string(),
 | 
				
			||||||
            alpha: ALPHA,
 | 
					            alpha: ALPHA,
 | 
				
			||||||
            theme: "default".to_string(),
 | 
					            theme: Themes::Default,
 | 
				
			||||||
            announcement,
 | 
					            announcement,
 | 
				
			||||||
        }.into_response()
 | 
					        }.into_response()
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,7 +30,7 @@ use tokio::sync::Mutex;
 | 
				
			||||||
use tracing::error;
 | 
					use tracing::error;
 | 
				
			||||||
use tracing::log::warn;
 | 
					use tracing::log::warn;
 | 
				
			||||||
use crate::{BUILT_ON, GIT_COMMIT, Opts, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
 | 
					use crate::{BUILT_ON, GIT_COMMIT, Opts, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
 | 
				
			||||||
use crate::routes::{authenticate_user, UserInfo};
 | 
					use crate::routes::{authenticate_user, Themes, UserInfo};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Serialize, Debug)]
 | 
					#[derive(Serialize, Debug)]
 | 
				
			||||||
pub struct FrontpageAnnouncement {
 | 
					pub struct FrontpageAnnouncement {
 | 
				
			||||||
| 
						 | 
					@ -102,7 +102,7 @@ pub fn frontpage_error(error: &str, auth_url: String) -> FrontpageTemplate {
 | 
				
			||||||
        year: YEAR.to_string(),
 | 
					        year: YEAR.to_string(),
 | 
				
			||||||
        alpha: ALPHA,
 | 
					        alpha: ALPHA,
 | 
				
			||||||
        count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
					        count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
				
			||||||
        theme: "default".to_string(),
 | 
					        theme: Themes::Default,
 | 
				
			||||||
        announcement: None,
 | 
					        announcement: None,
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -185,7 +185,7 @@ pub struct FrontpageTemplate {
 | 
				
			||||||
    year: String,
 | 
					    year: String,
 | 
				
			||||||
    alpha: bool,
 | 
					    alpha: bool,
 | 
				
			||||||
    count: u64,
 | 
					    count: u64,
 | 
				
			||||||
    theme: String,
 | 
					    theme: Themes,
 | 
				
			||||||
    announcement: Option<FrontpageAnnouncement>,
 | 
					    announcement: Option<FrontpageAnnouncement>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -202,7 +202,7 @@ pub async fn frontpage(
 | 
				
			||||||
        year: YEAR.to_string(),
 | 
					        year: YEAR.to_string(),
 | 
				
			||||||
        alpha: ALPHA,
 | 
					        alpha: ALPHA,
 | 
				
			||||||
        count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
					        count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
				
			||||||
        theme: "default".to_string(),
 | 
					        theme: Themes::Default,
 | 
				
			||||||
        announcement,
 | 
					        announcement,
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -217,7 +217,7 @@ struct IndexTemplate {
 | 
				
			||||||
    year: String,
 | 
					    year: String,
 | 
				
			||||||
    alpha: bool,
 | 
					    alpha: bool,
 | 
				
			||||||
    count: u64,
 | 
					    count: u64,
 | 
				
			||||||
    theme: String,
 | 
					    theme: Themes,
 | 
				
			||||||
    announcement: Option<FrontpageAnnouncement>,
 | 
					    announcement: Option<FrontpageAnnouncement>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -234,7 +234,7 @@ pub async fn index(
 | 
				
			||||||
                return (jar.remove("token"), frontpage_error(e.as_str(), opts.auth_url.clone())).into_response();
 | 
					                return (jar.remove("token"), frontpage_error(e.as_str(), opts.auth_url.clone())).into_response();
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
        let theme = info.theme.clone();
 | 
					        let theme = info.get_theme();
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        let announcement = latest_announcement(nats.clone()).await;
 | 
					        let announcement = latest_announcement(nats.clone()).await;
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
| 
						 | 
					@ -260,7 +260,7 @@ pub async fn index(
 | 
				
			||||||
            year: YEAR.to_string(),
 | 
					            year: YEAR.to_string(),
 | 
				
			||||||
            alpha: ALPHA,
 | 
					            alpha: ALPHA,
 | 
				
			||||||
            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
					            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
				
			||||||
            theme: "default".to_string(),
 | 
					            theme: Themes::Default,
 | 
				
			||||||
            announcement,
 | 
					            announcement,
 | 
				
			||||||
        }.into_response()
 | 
					        }.into_response()
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,7 +10,8 @@
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
 | 
					 * You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
 | 
					use std::fmt::Display;
 | 
				
			||||||
 | 
					use std::str::FromStr;
 | 
				
			||||||
use std::sync::Arc;
 | 
					use std::sync::Arc;
 | 
				
			||||||
use askama::Template;
 | 
					use askama::Template;
 | 
				
			||||||
use askama_axum::IntoResponse;
 | 
					use askama_axum::IntoResponse;
 | 
				
			||||||
| 
						 | 
					@ -21,7 +22,13 @@ use asklyphe_common::nats::comms::ServiceResponse;
 | 
				
			||||||
use async_nats::jetstream;
 | 
					use async_nats::jetstream;
 | 
				
			||||||
use axum::http::StatusCode;
 | 
					use axum::http::StatusCode;
 | 
				
			||||||
use serde::Serialize;
 | 
					use serde::Serialize;
 | 
				
			||||||
use tracing::error;
 | 
					use strum::IntoEnumIterator;
 | 
				
			||||||
 | 
					use strum_macros::EnumIter;
 | 
				
			||||||
 | 
					use time::macros::utc_datetime;
 | 
				
			||||||
 | 
					use time::{OffsetDateTime, UtcDateTime};
 | 
				
			||||||
 | 
					use tracing::{debug, error};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const RANDOM_THEME_EPOCH: UtcDateTime = utc_datetime!(2025-03-19 00:00);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub mod search;
 | 
					pub mod search;
 | 
				
			||||||
pub mod index;
 | 
					pub mod index;
 | 
				
			||||||
| 
						 | 
					@ -30,7 +37,102 @@ pub mod user_settings;
 | 
				
			||||||
pub mod admin;
 | 
					pub mod admin;
 | 
				
			||||||
pub mod announcement;
 | 
					pub mod announcement;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Serialize)]
 | 
					#[derive(Default, EnumIter, PartialEq, Eq, Copy, Clone)]
 | 
				
			||||||
 | 
					pub enum Themes {
 | 
				
			||||||
 | 
					    Classic,
 | 
				
			||||||
 | 
					    Dark,
 | 
				
			||||||
 | 
					    #[default]
 | 
				
			||||||
 | 
					    Default,
 | 
				
			||||||
 | 
					    Freaky,
 | 
				
			||||||
 | 
					    Gloss,
 | 
				
			||||||
 | 
					    Oled,
 | 
				
			||||||
 | 
					    Water,
 | 
				
			||||||
 | 
					    Random
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Themes {
 | 
				
			||||||
 | 
					    pub fn get_all_themes() -> Vec<Themes> {
 | 
				
			||||||
 | 
					        Self::iter().collect()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn display_name(&self) -> String {
 | 
				
			||||||
 | 
					        match self {
 | 
				
			||||||
 | 
					            Themes::Classic => {
 | 
				
			||||||
 | 
					                "classic".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Dark => {
 | 
				
			||||||
 | 
					                "dark theme".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Default => {
 | 
				
			||||||
 | 
					                "default theme".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Freaky => {
 | 
				
			||||||
 | 
					                "freaky".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Gloss => {
 | 
				
			||||||
 | 
					                "gloss".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Oled => {
 | 
				
			||||||
 | 
					                "lights out".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Water => {
 | 
				
			||||||
 | 
					                "water".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Random => {
 | 
				
			||||||
 | 
					                "random".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn internal_name(&self) -> String {
 | 
				
			||||||
 | 
					        match self {
 | 
				
			||||||
 | 
					            Themes::Classic => {
 | 
				
			||||||
 | 
					                "classic".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Dark => {
 | 
				
			||||||
 | 
					                "dark".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Default => {
 | 
				
			||||||
 | 
					                "default".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Freaky => {
 | 
				
			||||||
 | 
					                "freaky".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Gloss => {
 | 
				
			||||||
 | 
					                "gloss".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Oled => {
 | 
				
			||||||
 | 
					                "oled".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Water => {
 | 
				
			||||||
 | 
					                "water".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Themes::Random => {
 | 
				
			||||||
 | 
					                "random".to_string()
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl FromStr for Themes {
 | 
				
			||||||
 | 
					    type Err = ();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn from_str(s: &str) -> Result<Self, Self::Err> {
 | 
				
			||||||
 | 
					        match s {
 | 
				
			||||||
 | 
					            "classic" => Ok(Themes::Classic),
 | 
				
			||||||
 | 
					            "dark" => Ok(Themes::Dark),
 | 
				
			||||||
 | 
					            "default" => Ok(Themes::Default),
 | 
				
			||||||
 | 
					            "freaky" => Ok(Themes::Freaky),
 | 
				
			||||||
 | 
					            "gloss" => Ok(Themes::Gloss),
 | 
				
			||||||
 | 
					            "oled" => Ok(Themes::Oled),
 | 
				
			||||||
 | 
					            "water" => Ok(Themes::Water),
 | 
				
			||||||
 | 
					            "random" => Ok(Themes::Random),
 | 
				
			||||||
 | 
					            _ => Err(())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Serialize, Clone)]
 | 
				
			||||||
pub struct UserInfo {
 | 
					pub struct UserInfo {
 | 
				
			||||||
    pub username: String,
 | 
					    pub username: String,
 | 
				
			||||||
    pub email: String,
 | 
					    pub email: String,
 | 
				
			||||||
| 
						 | 
					@ -39,6 +141,27 @@ pub struct UserInfo {
 | 
				
			||||||
    pub administrator: bool,
 | 
					    pub administrator: bool,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl UserInfo {
 | 
				
			||||||
 | 
					    pub fn get_theme(&self) -> Themes {
 | 
				
			||||||
 | 
					        let theme: Themes = self.theme.parse().unwrap_or_default();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if theme.eq(&Themes::Random) {
 | 
				
			||||||
 | 
					            let possible_themes = Themes::get_all_themes();
 | 
				
			||||||
 | 
					            let current_day = UtcDateTime::now();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            let rand_value = (((current_day - RANDOM_THEME_EPOCH).as_seconds_f64() / 86400.0) % possible_themes.len() as f64) as usize;
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            *possible_themes.get(rand_value).unwrap_or(&Themes::Default)
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            theme
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    pub fn get_true_theme(&self) -> Themes {
 | 
				
			||||||
 | 
					        self.theme.parse().unwrap()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub async fn authenticate_user(nats: Arc<jetstream::Context>, token: String) -> Result<UserInfo, String> {
 | 
					pub async fn authenticate_user(nats: Arc<jetstream::Context>, token: String) -> Result<UserInfo, String> {
 | 
				
			||||||
    let response = comms::query_service(
 | 
					    let response = comms::query_service(
 | 
				
			||||||
        comms::Query::AuthService(AuthServiceQuery {
 | 
					        comms::Query::AuthService(AuthServiceQuery {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,12 +12,14 @@
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::routes::index::frontpage_error;
 | 
					use crate::routes::index::frontpage_error;
 | 
				
			||||||
use crate::routes::{authenticate_user, UserInfo};
 | 
					use crate::routes::{authenticate_user, Themes, UserInfo};
 | 
				
			||||||
use crate::searchbot::{gather_image_results, gather_search_results};
 | 
					use crate::searchbot::{gather_image_results, gather_search_results};
 | 
				
			||||||
use crate::unit_converter;
 | 
					use crate::unit_converter;
 | 
				
			||||||
use crate::unit_converter::UnitConversion;
 | 
					use crate::unit_converter::UnitConversion;
 | 
				
			||||||
use crate::wikipedia::WikipediaSummary;
 | 
					use crate::wikipedia::WikipediaSummary;
 | 
				
			||||||
use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
 | 
					use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
 | 
				
			||||||
 | 
					use crate::spellcheck;
 | 
				
			||||||
 | 
					use crate::spellcheck::SpellCheckResults;
 | 
				
			||||||
use askama::Template;
 | 
					use askama::Template;
 | 
				
			||||||
use asklyphe_common::nats;
 | 
					use asklyphe_common::nats;
 | 
				
			||||||
use asklyphe_common::nats::bingservice::{
 | 
					use asklyphe_common::nats::bingservice::{
 | 
				
			||||||
| 
						 | 
					@ -68,6 +70,7 @@ pub struct Complications {
 | 
				
			||||||
    disabled: bool,
 | 
					    disabled: bool,
 | 
				
			||||||
    wikipedia: Option<WikipediaSummary>,
 | 
					    wikipedia: Option<WikipediaSummary>,
 | 
				
			||||||
    unit_converter: Option<UnitConversion>,
 | 
					    unit_converter: Option<UnitConversion>,
 | 
				
			||||||
 | 
					    spellcheck: Option<SpellCheckResults>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub async fn search(
 | 
					pub async fn search(
 | 
				
			||||||
| 
						 | 
					@ -111,7 +114,7 @@ struct SearchTemplateJavascript {
 | 
				
			||||||
    built_on: String,
 | 
					    built_on: String,
 | 
				
			||||||
    year: String,
 | 
					    year: String,
 | 
				
			||||||
    alpha: bool,
 | 
					    alpha: bool,
 | 
				
			||||||
    theme: String,
 | 
					    theme: Themes,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub async fn search_js(
 | 
					pub async fn search_js(
 | 
				
			||||||
| 
						 | 
					@ -121,7 +124,7 @@ pub async fn search_js(
 | 
				
			||||||
    Extension(opts): Extension<Opts>,
 | 
					    Extension(opts): Extension<Opts>,
 | 
				
			||||||
) -> impl IntoResponse {
 | 
					) -> impl IntoResponse {
 | 
				
			||||||
    fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplateJavascript {
 | 
					    fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplateJavascript {
 | 
				
			||||||
        let theme = info.theme.clone();
 | 
					        let theme = info.get_theme();
 | 
				
			||||||
        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
					        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
				
			||||||
        SearchTemplateJavascript {
 | 
					        SearchTemplateJavascript {
 | 
				
			||||||
            info,
 | 
					            info,
 | 
				
			||||||
| 
						 | 
					@ -158,24 +161,27 @@ pub async fn search_js(
 | 
				
			||||||
        let mut complications = Complications::default();
 | 
					        let mut complications = Complications::default();
 | 
				
			||||||
        // todo: better way of specifying that user doesn't want complications
 | 
					        // todo: better way of specifying that user doesn't want complications
 | 
				
			||||||
        if !query.contains("-complications") {
 | 
					        if !query.contains("-complications") {
 | 
				
			||||||
            let mut wikiquery = query.clone().to_lowercase();
 | 
					            /*let mut wikiquery = query.clone().to_lowercase();
 | 
				
			||||||
            wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
 | 
					            wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
 | 
				
			||||||
            wikiquery = wikiquery.replace(' ', "%20");
 | 
					            wikiquery = wikiquery.replace(' ', "%20");
 | 
				
			||||||
            // todo: proper url escaping
 | 
					            // todo: proper url escaping
 | 
				
			||||||
            let wikipedia_comp =
 | 
					            let wikipedia_comp =
 | 
				
			||||||
                tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
 | 
					                tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
 | 
				
			||||||
            complications.wikipedia = wikipedia_comp.await.unwrap_or_default();
 | 
					            complications.wikipedia = wikipedia_comp.await.unwrap_or_default();*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            let mut unit_query = query.clone().to_lowercase();
 | 
					            let mut unit_query = query.clone().to_lowercase();
 | 
				
			||||||
            unit_query = unit_query.replace("metre", "meter");
 | 
					            unit_query = unit_query.replace("metre", "meter");
 | 
				
			||||||
            let unit_comp = unit_converter::convert_unit(&unit_query);
 | 
					            let unit_comp = unit_converter::convert_unit(&unit_query);
 | 
				
			||||||
            complications.unit_converter = unit_comp;
 | 
					            complications.unit_converter = unit_comp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            let corrections = spellcheck::check(&query);
 | 
				
			||||||
 | 
					            complications.spellcheck = corrections;
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            complications.disabled = true;
 | 
					            complications.disabled = true;
 | 
				
			||||||
            query = query.replace("-complications", "");
 | 
					            query = query.replace("-complications", "");
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let theme = info.theme.clone();
 | 
					        let theme = info.get_theme();
 | 
				
			||||||
        let querystr = url_encoded_data::stringify(&[("q", og_query.as_str())]);
 | 
					        let querystr = url_encoded_data::stringify(&[("q", og_query.as_str())]);
 | 
				
			||||||
        SearchTemplateJavascript {
 | 
					        SearchTemplateJavascript {
 | 
				
			||||||
            info,
 | 
					            info,
 | 
				
			||||||
| 
						 | 
					@ -217,7 +223,7 @@ pub struct SearchTemplate {
 | 
				
			||||||
    pub built_on: String,
 | 
					    pub built_on: String,
 | 
				
			||||||
    pub year: String,
 | 
					    pub year: String,
 | 
				
			||||||
    pub alpha: bool,
 | 
					    pub alpha: bool,
 | 
				
			||||||
    pub theme: String,
 | 
					    pub theme: Themes,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub async fn search_nojs(
 | 
					pub async fn search_nojs(
 | 
				
			||||||
| 
						 | 
					@ -227,7 +233,7 @@ pub async fn search_nojs(
 | 
				
			||||||
    Extension(opts): Extension<Opts>,
 | 
					    Extension(opts): Extension<Opts>,
 | 
				
			||||||
) -> impl IntoResponse {
 | 
					) -> impl IntoResponse {
 | 
				
			||||||
    fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplate {
 | 
					    fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplate {
 | 
				
			||||||
        let theme = info.theme.clone();
 | 
					        let theme = info.get_theme();
 | 
				
			||||||
        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
					        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
				
			||||||
        SearchTemplate {
 | 
					        SearchTemplate {
 | 
				
			||||||
            info,
 | 
					            info,
 | 
				
			||||||
| 
						 | 
					@ -416,7 +422,7 @@ pub struct ImageSearchTemplate {
 | 
				
			||||||
    pub built_on: String,
 | 
					    pub built_on: String,
 | 
				
			||||||
    pub year: String,
 | 
					    pub year: String,
 | 
				
			||||||
    pub alpha: bool,
 | 
					    pub alpha: bool,
 | 
				
			||||||
    pub theme: String,
 | 
					    pub theme: Themes,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
pub async fn image_search(
 | 
					pub async fn image_search(
 | 
				
			||||||
    jar: CookieJar,
 | 
					    jar: CookieJar,
 | 
				
			||||||
| 
						 | 
					@ -425,7 +431,7 @@ pub async fn image_search(
 | 
				
			||||||
    Extension(opts): Extension<Opts>,
 | 
					    Extension(opts): Extension<Opts>,
 | 
				
			||||||
) -> impl IntoResponse {
 | 
					) -> impl IntoResponse {
 | 
				
			||||||
    fn error_response(query: String, info: UserInfo, error: &str) -> ImageSearchTemplate {
 | 
					    fn error_response(query: String, info: UserInfo, error: &str) -> ImageSearchTemplate {
 | 
				
			||||||
        let theme = info.theme.clone();
 | 
					        let theme = info.get_theme();
 | 
				
			||||||
        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
					        let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
 | 
				
			||||||
        ImageSearchTemplate {
 | 
					        ImageSearchTemplate {
 | 
				
			||||||
            info,
 | 
					            info,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,49 +27,12 @@ use serde::Deserialize;
 | 
				
			||||||
use tokio::sync::Mutex;
 | 
					use tokio::sync::Mutex;
 | 
				
			||||||
use tracing::error;
 | 
					use tracing::error;
 | 
				
			||||||
use crate::{BUILT_ON, GIT_COMMIT, Opts, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
 | 
					use crate::{BUILT_ON, GIT_COMMIT, Opts, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
 | 
				
			||||||
use crate::routes::{authenticate_user, UserInfo};
 | 
					use crate::routes::{authenticate_user, Themes, UserInfo};
 | 
				
			||||||
 | 
					 | 
				
			||||||
pub struct Theme<'a> {
 | 
					 | 
				
			||||||
    pub value: &'a str,
 | 
					 | 
				
			||||||
    pub name: &'a str,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub static THEMES: &[Theme] = &[
 | 
					 | 
				
			||||||
    Theme {
 | 
					 | 
				
			||||||
        value: "default",
 | 
					 | 
				
			||||||
        name: "default theme",
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    Theme {
 | 
					 | 
				
			||||||
        value: "dark",
 | 
					 | 
				
			||||||
        name: "dark theme",
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    Theme {
 | 
					 | 
				
			||||||
        value: "oled",
 | 
					 | 
				
			||||||
        name: "lights out",
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    Theme {
 | 
					 | 
				
			||||||
        value: "classic",
 | 
					 | 
				
			||||||
        name: "classic",
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    Theme {
 | 
					 | 
				
			||||||
        value: "freaky",
 | 
					 | 
				
			||||||
        name: "freaky",
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    Theme {
 | 
					 | 
				
			||||||
        value: "water",
 | 
					 | 
				
			||||||
        name: "water",
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
    Theme {
 | 
					 | 
				
			||||||
        value: "gloss",
 | 
					 | 
				
			||||||
        name: "gloss",
 | 
					 | 
				
			||||||
    },
 | 
					 | 
				
			||||||
];
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Template)]
 | 
					#[derive(Template)]
 | 
				
			||||||
#[template(path = "user_settings.html")]
 | 
					#[template(path = "user_settings.html")]
 | 
				
			||||||
pub struct SettingsTemplate {
 | 
					pub struct SettingsTemplate {
 | 
				
			||||||
    themes: &'static [Theme<'static>],
 | 
					    themes: Vec<Themes>,
 | 
				
			||||||
 | 
					 | 
				
			||||||
    error: Option<String>,
 | 
					    error: Option<String>,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    info: UserInfo,
 | 
					    info: UserInfo,
 | 
				
			||||||
| 
						 | 
					@ -80,7 +43,8 @@ pub struct SettingsTemplate {
 | 
				
			||||||
    year: String,
 | 
					    year: String,
 | 
				
			||||||
    alpha: bool,
 | 
					    alpha: bool,
 | 
				
			||||||
    count: u64,
 | 
					    count: u64,
 | 
				
			||||||
    theme: String,
 | 
					    theme: Themes,
 | 
				
			||||||
 | 
					    true_theme: Themes,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub async fn user_settings(
 | 
					pub async fn user_settings(
 | 
				
			||||||
| 
						 | 
					@ -96,11 +60,11 @@ pub async fn user_settings(
 | 
				
			||||||
                return (jar.remove("token"), crate::routes::index::frontpage_error(e.as_str(), opts.auth_url.clone())).into_response();
 | 
					                return (jar.remove("token"), crate::routes::index::frontpage_error(e.as_str(), opts.auth_url.clone())).into_response();
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
        let theme = info.theme.clone();
 | 
					        let theme = info.get_theme();
 | 
				
			||||||
        SettingsTemplate {
 | 
					        SettingsTemplate {
 | 
				
			||||||
            themes: THEMES,
 | 
					            themes: Themes::get_all_themes(),
 | 
				
			||||||
            error: None,
 | 
					            error: None,
 | 
				
			||||||
            info,
 | 
					            info: info.clone(),
 | 
				
			||||||
            search_query: "".to_string(),
 | 
					            search_query: "".to_string(),
 | 
				
			||||||
            version: VERSION.to_string(),
 | 
					            version: VERSION.to_string(),
 | 
				
			||||||
            git_commit: GIT_COMMIT.to_string(),
 | 
					            git_commit: GIT_COMMIT.to_string(),
 | 
				
			||||||
| 
						 | 
					@ -109,6 +73,7 @@ pub async fn user_settings(
 | 
				
			||||||
            alpha: ALPHA,
 | 
					            alpha: ALPHA,
 | 
				
			||||||
            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
					            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
				
			||||||
            theme,
 | 
					            theme,
 | 
				
			||||||
 | 
					            true_theme: info.get_true_theme(),
 | 
				
			||||||
        }.into_response()
 | 
					        }.into_response()
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        Redirect::temporary("/").into_response()
 | 
					        Redirect::temporary("/").into_response()
 | 
				
			||||||
| 
						 | 
					@ -126,11 +91,11 @@ pub async fn theme_change_post(
 | 
				
			||||||
    Extension(opts): Extension<Opts>,
 | 
					    Extension(opts): Extension<Opts>,
 | 
				
			||||||
    Form(input): Form<ThemeChangeForm>,
 | 
					    Form(input): Form<ThemeChangeForm>,
 | 
				
			||||||
) -> impl IntoResponse {
 | 
					) -> impl IntoResponse {
 | 
				
			||||||
    fn settings_error(info: UserInfo, theme: String, error: String) -> impl IntoResponse {
 | 
					    fn settings_error(info: UserInfo, theme: Themes, error: String) -> impl IntoResponse {
 | 
				
			||||||
        SettingsTemplate {
 | 
					        SettingsTemplate {
 | 
				
			||||||
            themes: THEMES,
 | 
					            themes: Themes::get_all_themes(),
 | 
				
			||||||
            error: Some(error),
 | 
					            error: Some(error),
 | 
				
			||||||
            info,
 | 
					            info: info.clone(),
 | 
				
			||||||
            search_query: "".to_string(),
 | 
					            search_query: "".to_string(),
 | 
				
			||||||
            version: VERSION.to_string(),
 | 
					            version: VERSION.to_string(),
 | 
				
			||||||
            git_commit: GIT_COMMIT.to_string(),
 | 
					            git_commit: GIT_COMMIT.to_string(),
 | 
				
			||||||
| 
						 | 
					@ -139,6 +104,7 @@ pub async fn theme_change_post(
 | 
				
			||||||
            alpha: ALPHA,
 | 
					            alpha: ALPHA,
 | 
				
			||||||
            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
					            count: WEBSITE_COUNT.load(Ordering::Relaxed),
 | 
				
			||||||
            theme,
 | 
					            theme,
 | 
				
			||||||
 | 
					            true_theme: info.get_true_theme(),
 | 
				
			||||||
        }.into_response()
 | 
					        }.into_response()
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -151,10 +117,12 @@ pub async fn theme_change_post(
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let theme = info.theme.clone();
 | 
					        let theme = info.get_theme();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if !THEMES.iter().map(|v| v.value.to_string()).collect::<Vec<String>>().contains(&input.theme.clone().unwrap_or("default".to_string())) {
 | 
					        if let Some(theme_input) = &input.theme {
 | 
				
			||||||
            return settings_error(info, theme, "invalid input, please try again!".to_string()).into_response();
 | 
					            if !Themes::get_all_themes().iter().map(|x| x.internal_name().to_string()).collect::<Vec<String>>().contains(&theme_input) {
 | 
				
			||||||
 | 
					                return settings_error(info, theme.clone(), "invalid input, please try again!".to_string()).into_response();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let response = comms::query_service(comms::Query::AuthService(AuthServiceQuery {
 | 
					        let response = comms::query_service(comms::Query::AuthService(AuthServiceQuery {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -341,7 +341,7 @@ pub async fn gather_search_results(nats: Arc<jetstream::Context>, query: &str, u
 | 
				
			||||||
        search_results.remove(rm - i);
 | 
					        search_results.remove(rm - i);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let theme = user_info.theme.clone();
 | 
					    let theme = user_info.get_theme();
 | 
				
			||||||
    let querystr = url_encoded_data::stringify(&[("q", query)]);
 | 
					    let querystr = url_encoded_data::stringify(&[("q", query)]);
 | 
				
			||||||
    SearchTemplate {
 | 
					    SearchTemplate {
 | 
				
			||||||
        info: user_info,
 | 
					        info: user_info,
 | 
				
			||||||
| 
						 | 
					@ -489,7 +489,7 @@ pub async fn gather_image_results(nats: Arc<jetstream::Context>, query: &str, us
 | 
				
			||||||
        result.src = format!("/imgproxy?{}", url);
 | 
					        result.src = format!("/imgproxy?{}", url);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let theme = user_info.theme.clone();
 | 
					    let theme = user_info.get_theme();
 | 
				
			||||||
    ImageSearchTemplate {
 | 
					    ImageSearchTemplate {
 | 
				
			||||||
        info: user_info,
 | 
					        info: user_info,
 | 
				
			||||||
        error: None,
 | 
					        error: None,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										258
									
								
								asklyphe-frontend/src/spellcheck.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										258
									
								
								asklyphe-frontend/src/spellcheck.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,258 @@
 | 
				
			||||||
 | 
					use once_cell::sync::Lazy;
 | 
				
			||||||
 | 
					use tracing::{debug, error};
 | 
				
			||||||
 | 
					use std::{cmp, mem};
 | 
				
			||||||
 | 
					use std::collections::BTreeMap;
 | 
				
			||||||
 | 
					use std::sync::Mutex;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// how to generate words.txt:
 | 
				
			||||||
 | 
					// clone https://github.com/en-wl/wordlist && cd wordlist
 | 
				
			||||||
 | 
					// make
 | 
				
			||||||
 | 
					// ./scowl wl --deaccent > words0.txt
 | 
				
			||||||
 | 
					// filtered with this python script:
 | 
				
			||||||
 | 
					// -----------------------------------
 | 
				
			||||||
 | 
					// with open("words0.txt", "r") as f:
 | 
				
			||||||
 | 
					// 	out = []
 | 
				
			||||||
 | 
					// 	for line in f:
 | 
				
			||||||
 | 
					// 		line = line.lower()
 | 
				
			||||||
 | 
					// 		if not line in out:
 | 
				
			||||||
 | 
					// 			out.append(line)
 | 
				
			||||||
 | 
					// 	out.sort()
 | 
				
			||||||
 | 
					// 	with open("words.txt", "w") as out_file:
 | 
				
			||||||
 | 
					// 		for line in out:
 | 
				
			||||||
 | 
					// 			out_file.write(f'{line}')
 | 
				
			||||||
 | 
					// ------------------------------------
 | 
				
			||||||
 | 
					// then use regex or similar to enclose every line in quotes and add comma, then add 'static KNOWN_WORDS: &[&str] = &[' to the start and '];' to the end
 | 
				
			||||||
 | 
					include!("./words.txt");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// a cache of misspelled words and the closest match in the database
 | 
				
			||||||
 | 
					static MATCH_CACHE: Lazy<Mutex<BTreeMap<String, Option<&str>>>> = Lazy::new(|| Mutex::new(BTreeMap::new()));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// max distance before no alternatives are considered
 | 
				
			||||||
 | 
					const MAX_DISTANCE: usize = 6;
 | 
				
			||||||
 | 
					// max input text size before spellcheck is not run. on my laptop 13,000 chars of input takes around 4 seconds so this should be fine
 | 
				
			||||||
 | 
					// update: got a larger word database and it doesn't take 4 seconds anymore lmao
 | 
				
			||||||
 | 
					// update 2: added binary search & caching and now 50000 chars takes ~2-4 seconds
 | 
				
			||||||
 | 
					const MAX_QUERY_WORDS: usize = 512;
 | 
				
			||||||
 | 
					// Not really a huge issue, just used to hopefully reduce the allocations made in levenshtein_distance & provide minor performance improvements
 | 
				
			||||||
 | 
					// not needed for now
 | 
				
			||||||
 | 
					// const MAX_WORD_SIZE: usize = 64;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub type SpellCheckResults = Vec<SpellCheckResult>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub struct SpellCheckResult {
 | 
				
			||||||
 | 
						pub orig: String,
 | 
				
			||||||
 | 
						pub correction: &'static str,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn check(query: &String) -> Option<SpellCheckResults> {
 | 
				
			||||||
 | 
						error!("Query: {}", query);
 | 
				
			||||||
 | 
						/*let query: &str = {
 | 
				
			||||||
 | 
							if query.len() > MAX_QUERY_SIZE {
 | 
				
			||||||
 | 
								error!("Query is too large to be spell checked, only checking first {} chars", MAX_QUERY_SIZE);
 | 
				
			||||||
 | 
								query.get(0..MAX_QUERY_SIZE).unwrap()
 | 
				
			||||||
 | 
								// return None;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								query
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						};*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// TODO: look into how 'wc -w' counts words and copy how it splits things
 | 
				
			||||||
 | 
						let query_flattened = prepare(query);
 | 
				
			||||||
 | 
						let words = query_flattened
 | 
				
			||||||
 | 
							.split_whitespace()
 | 
				
			||||||
 | 
							.filter(|word| word.len() > 0)
 | 
				
			||||||
 | 
							// .filter(|word|)
 | 
				
			||||||
 | 
							.collect::<Vec<_>>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						error!("Words in query: {}", words.len());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (words.len() > MAX_QUERY_WORDS) {
 | 
				
			||||||
 | 
							error!("{} is too many words in query to spell check", words.len());
 | 
				
			||||||
 | 
							// return None;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						let mut distances: SpellCheckResults = vec![];
 | 
				
			||||||
 | 
						for qword in words {
 | 
				
			||||||
 | 
							// error!("Word: {}", qword);
 | 
				
			||||||
 | 
							// error!("is known: {:?}", KNOWN_WORDS.binary_search(&qword));
 | 
				
			||||||
 | 
							if KNOWN_WORDS.binary_search(&qword).is_ok() {
 | 
				
			||||||
 | 
								// error!("Exact word match: {}", qword);
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								let mut cache = MATCH_CACHE.lock().unwrap();
 | 
				
			||||||
 | 
								if cache.contains_key(qword) {
 | 
				
			||||||
 | 
									// We don't need to tell the user if there is no suggestion for an unknown word
 | 
				
			||||||
 | 
									if (cache.get(qword).unwrap().is_some()) {
 | 
				
			||||||
 | 
										// TODO: don't push duplicate misspelled words
 | 
				
			||||||
 | 
										distances.push(SpellCheckResult{orig: qword.to_owned(), correction: cache.get(qword).unwrap().unwrap()});
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									let closest_match = KNOWN_WORDS.iter()
 | 
				
			||||||
 | 
										.map(|kword| (kword, levenshtein_distance(&qword, &kword)))
 | 
				
			||||||
 | 
										.min_by(|a, b| a.1.cmp(&b.1)).unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									assert!(closest_match.1 > 0, "Found exact match not caught by binary search, is the word database properly sorted?");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									if closest_match.1 <= MAX_DISTANCE {
 | 
				
			||||||
 | 
										cache.insert(qword.to_owned(), Some(*closest_match.0));
 | 
				
			||||||
 | 
										distances.push(SpellCheckResult{orig: qword.to_owned(), correction: *closest_match.0});
 | 
				
			||||||
 | 
									} else {
 | 
				
			||||||
 | 
										// even though there is no close enough match, cache it anyway so that it doesn't have to be looked up every time
 | 
				
			||||||
 | 
										cache.insert(qword.to_owned(), None);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							// error!("End");
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						error!("Spell check results:");
 | 
				
			||||||
 | 
						for word in &distances {
 | 
				
			||||||
 | 
							debug!("instead of '{}' did you mean '{}'?", word.orig, word.correction);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if distances.len() > 0 {
 | 
				
			||||||
 | 
							Some(distances)
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							None
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*	let distances = prepare(query).split_whitespace()
 | 
				
			||||||
 | 
							.filter(|qword| qword.len() > 0)
 | 
				
			||||||
 | 
							.map(|qword| qword.to_lowercase())
 | 
				
			||||||
 | 
							.map(
 | 
				
			||||||
 | 
								|qword| {
 | 
				
			||||||
 | 
									let mut exact_match = false;
 | 
				
			||||||
 | 
									KNOWN_WORDS.iter()
 | 
				
			||||||
 | 
										.map(|kword| kword.to_lowercase())
 | 
				
			||||||
 | 
										.map(
 | 
				
			||||||
 | 
											|kword|
 | 
				
			||||||
 | 
											(qword.clone(), kword.clone(), levenshtein_distance(&qword, &kword)))
 | 
				
			||||||
 | 
												// totally isn't jank at all and is the best solution totally
 | 
				
			||||||
 | 
												.take_while(|val| if exact_match {false} else if val.2 == 0 {exact_match = true; true} else {true})
 | 
				
			||||||
 | 
												// .map(|val| {error!("Val: {:?}", val); val})
 | 
				
			||||||
 | 
												.min_by(|a, b| a.2.cmp(&b.2)).unwrap()
 | 
				
			||||||
 | 
								})/*.filter_map(|word| word)*/.filter(|word| word.2 > 0 && word.2 <= MAX_DISTANCE)/*.filter(|(_, _, dist)| *dist > 0 && *dist <= MAX_DISTANCE)*/.map(|word| SpellCheckResult{orig: word.0, correction: word.1.to_owned().to_owned()})/*.filter(|(_, _, d)| *d > 0)*/
 | 
				
			||||||
 | 
							.map(|word| {
 | 
				
			||||||
 | 
								debug!("instead of '{}' did you mean '{}'?", word.orig, word.correction);
 | 
				
			||||||
 | 
								word
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
							.collect::<Vec<_>>();
 | 
				
			||||||
 | 
						/*for word in &distances {
 | 
				
			||||||
 | 
							debug!("instead of '{}' did you mean '{}'? (distance of )", word.0, word.1/*, word.2*/);
 | 
				
			||||||
 | 
						}*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if distances.len() > 0 {
 | 
				
			||||||
 | 
							Some(distances)
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							None
 | 
				
			||||||
 | 
						}*/
 | 
				
			||||||
 | 
						// None
 | 
				
			||||||
 | 
						// vec![]
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO: handle symbols better, probably with a regex
 | 
				
			||||||
 | 
					fn prepare(s: &str) -> String {
 | 
				
			||||||
 | 
						s.replace("\"", "")
 | 
				
			||||||
 | 
							.replace(",", " ")
 | 
				
			||||||
 | 
							.replace(":", " ")
 | 
				
			||||||
 | 
							.replace(".", " ")
 | 
				
			||||||
 | 
							.replace("/", " ")
 | 
				
			||||||
 | 
							.replace("&", " ")
 | 
				
			||||||
 | 
							.replace("!", " ")
 | 
				
			||||||
 | 
							.replace("?", " ")
 | 
				
			||||||
 | 
							/*.replace("'", "")*/
 | 
				
			||||||
 | 
							.replace("0", "")
 | 
				
			||||||
 | 
							.replace("1", "")
 | 
				
			||||||
 | 
							.replace("2", "")
 | 
				
			||||||
 | 
							.replace("3", "")
 | 
				
			||||||
 | 
							.replace("4", "")
 | 
				
			||||||
 | 
							.replace("5", "")
 | 
				
			||||||
 | 
							.replace("6", "")
 | 
				
			||||||
 | 
							.replace("7", "")
 | 
				
			||||||
 | 
							.replace("8", "")
 | 
				
			||||||
 | 
							.replace("9", "")
 | 
				
			||||||
 | 
							.to_lowercase()
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// cost of 2 for add/remove, cost of 1 for replace
 | 
				
			||||||
 | 
					fn levenshtein_distance(a: &str, other: &str) -> usize {
 | 
				
			||||||
 | 
						// debug!("Self: '{}', Other: '{}'", a, other);
 | 
				
			||||||
 | 
						// let mut dist: &mut [usize; MAX_WORD_SIZE] = &mut [0usize; MAX_WORD_SIZE];
 | 
				
			||||||
 | 
						// let mut dist_prev: &mut [usize; MAX_WORD_SIZE] = &mut [0usize; MAX_WORD_SIZE];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						let mut dist = vec![0usize; other.len() + 1];
 | 
				
			||||||
 | 
						let mut dist_prev = vec![0usize; other.len() + 1];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in 0..=other.len() {
 | 
				
			||||||
 | 
							dist_prev[i] = i;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in 1..=a.len() {
 | 
				
			||||||
 | 
							dist[0] = i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for j in 1..=other.len() {
 | 
				
			||||||
 | 
								if a.get(i - 1..i).unwrap() == other.get(j - 1..j).unwrap() {
 | 
				
			||||||
 | 
									dist[j] = dist_prev[j - 1];
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									// TODO: make addition/subtraction 1 more expensive than replacement, presumably by adding '+ 1' to 2/3 of these
 | 
				
			||||||
 | 
									// motivation: honex from bee movie script is turned into hone instead of honey, this will also generally improve results & is what wikipedia says to do (best reason)
 | 
				
			||||||
 | 
									dist[j] = 1 + cmp::min(
 | 
				
			||||||
 | 
										*dist.get(j - 1).unwrap() + 1,
 | 
				
			||||||
 | 
										cmp::min(*dist_prev.get(j).unwrap() + 1, *dist_prev.get(j - 1).unwrap()));
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							mem::swap(&mut dist, &mut dist_prev);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						dist_prev[other.len()]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*let mut distances = vec![vec![0usize; other.len() + 1]; a.len() + 1];
 | 
				
			||||||
 | 
						for i in 1..=a.len() {
 | 
				
			||||||
 | 
							distances[i][0] = i;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for j in 1..=other.len() {
 | 
				
			||||||
 | 
							distances[0][j] = j;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*unsafe {
 | 
				
			||||||
 | 
						for i in 1..=a.len() {
 | 
				
			||||||
 | 
							for j in 1..=other.len() {
 | 
				
			||||||
 | 
								if *a.get_unchecked(i - 1..i) == *other.get_unchecked(j - 1..j) {
 | 
				
			||||||
 | 
									// 0
 | 
				
			||||||
 | 
									distances[i][j] = *distances.get_unchecked(i - 1).get_unchecked(j - 1);
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									// 1
 | 
				
			||||||
 | 
									distances[i][j] = 1 + cmp::min(
 | 
				
			||||||
 | 
										(*distances.get_unchecked(i - 1).get_unchecked(j - 1)),
 | 
				
			||||||
 | 
										cmp::min(
 | 
				
			||||||
 | 
											(*distances.get_unchecked(i - 1).get_unchecked(j)),
 | 
				
			||||||
 | 
											(*distances.get_unchecked(i).get_unchecked(j - 1))
 | 
				
			||||||
 | 
										)
 | 
				
			||||||
 | 
									);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						}*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for i in 1..=a.len()  {
 | 
				
			||||||
 | 
							for j in 1..=other.len() {
 | 
				
			||||||
 | 
								if a.get(i - 1..i).unwrap() == other.get(j - 1..j).unwrap() {
 | 
				
			||||||
 | 
									// 0
 | 
				
			||||||
 | 
									distances[i][j] = *distances.get(i - 1).unwrap().get(j - 1).unwrap();
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									// 1
 | 
				
			||||||
 | 
									distances[i][j] = 1 + cmp::min(
 | 
				
			||||||
 | 
										(*distances.get(i - 1).unwrap().get(j - 1).unwrap()),
 | 
				
			||||||
 | 
										cmp::min(
 | 
				
			||||||
 | 
											(*distances.get(i - 1).unwrap().get(j).unwrap()),
 | 
				
			||||||
 | 
											(*distances.get(i).unwrap().get(j - 1).unwrap())
 | 
				
			||||||
 | 
										)
 | 
				
			||||||
 | 
									);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						*distances.get(a.len()).unwrap().get(other.len()).unwrap()*/
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										11
									
								
								asklyphe-frontend/static/themes/classic/imagesearch.css
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								asklyphe-frontend/static/themes/classic/imagesearch.css
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,11 @@
 | 
				
			||||||
 | 
					.pagecontent {
 | 
				
			||||||
 | 
					    background: url("/static/snow.gif");
 | 
				
			||||||
 | 
					    border: 3px inset black;
 | 
				
			||||||
 | 
					    color: black;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@media screen and (max-width: 800px) {
 | 
				
			||||||
 | 
					    .pagecontent {
 | 
				
			||||||
 | 
					        width: calc(100% - 4px);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,6 @@
 | 
				
			||||||
    background: url("/static/snow.gif");
 | 
					    background: url("/static/snow.gif");
 | 
				
			||||||
    border: 3px inset black;
 | 
					    border: 3px inset black;
 | 
				
			||||||
    color: black;
 | 
					    color: black;
 | 
				
			||||||
    box-sizing: border-box;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@media screen and (max-width: 800px) {
 | 
					@media screen and (max-width: 800px) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,7 +1,6 @@
 | 
				
			||||||
.settings-area {
 | 
					.settings-area {
 | 
				
			||||||
    background: url("/static/snow.gif");
 | 
					    background: url("/static/snow.gif");
 | 
				
			||||||
    border: 1px solid white;
 | 
					    border: 1px solid white;
 | 
				
			||||||
    box-sizing: border-box;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@media screen and (max-width: 800px) {
 | 
					@media screen and (max-width: 800px) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
{% extends "admin/shell.html" %}
 | 
					{% extends "admin/shell.html" %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block title %}Home{% endblock %}
 | 
					{% block title %}Invite Codes{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
{% block title %}{{ announcement.title }}{% endblock %}
 | 
					{% block title %}{{ announcement.title }}{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/default/announcement.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/default/announcement.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -12,11 +12,11 @@
 | 
				
			||||||
        <a href="/" class="lyphe">
 | 
					        <a href="/" class="lyphe">
 | 
				
			||||||
            <div id="lyphesmall">
 | 
					            <div id="lyphesmall">
 | 
				
			||||||
                <div class="bent-surrounding">
 | 
					                <div class="bent-surrounding">
 | 
				
			||||||
                    <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png"
 | 
					                    <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png?git={{ git_commit }}"
 | 
				
			||||||
                         alt="image of lyphe, our mascot!">
 | 
					                         alt="image of lyphe, our mascot!">
 | 
				
			||||||
                </div>
 | 
					                </div>
 | 
				
			||||||
                <div id="lyphetitlenav">
 | 
					                <div id="lyphetitlenav">
 | 
				
			||||||
                    <img src="/static/img/logo.png" alt="ask lyphe!"/>
 | 
					                    <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask lyphe!"/>
 | 
				
			||||||
                </div>
 | 
					                </div>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,8 +3,8 @@
 | 
				
			||||||
{% block title %}the best search engine{% endblock %}
 | 
					{% block title %}the best search engine{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/frontpage.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/frontpage.css?git={{ git_commit }}"/>
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/inline-announcement.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/inline-announcement.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -17,10 +17,10 @@
 | 
				
			||||||
    {% endmatch %}
 | 
					    {% endmatch %}
 | 
				
			||||||
    <div id="lyphebig">
 | 
					    <div id="lyphebig">
 | 
				
			||||||
        <div class="bent-surrounding">
 | 
					        <div class="bent-surrounding">
 | 
				
			||||||
            <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png" alt="image of lyphe, our mascot!">
 | 
					            <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
        <div id="lyphetitle">
 | 
					        <div id="lyphetitle">
 | 
				
			||||||
            <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
					            <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
				
			||||||
            <p>a user-first, customizable, useful, and fun search engine!</p>
 | 
					            <p>a user-first, customizable, useful, and fun search engine!</p>
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
| 
						 | 
					@ -67,7 +67,7 @@
 | 
				
			||||||
        <div class="pagegradient">
 | 
					        <div class="pagegradient">
 | 
				
			||||||
            <div id="userfirst" class="feature">
 | 
					            <div id="userfirst" class="feature">
 | 
				
			||||||
                <h1>user-first</h1>
 | 
					                <h1>user-first</h1>
 | 
				
			||||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
					                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
				
			||||||
                <p>
 | 
					                <p>
 | 
				
			||||||
                    we will never serve a single ad.
 | 
					                    we will never serve a single ad.
 | 
				
			||||||
                    <br/>
 | 
					                    <br/>
 | 
				
			||||||
| 
						 | 
					@ -90,7 +90,7 @@
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div id="customizable" class="feature">
 | 
					            <div id="customizable" class="feature">
 | 
				
			||||||
                <h1>customizable</h1>
 | 
					                <h1>customizable</h1>
 | 
				
			||||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
					                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
				
			||||||
                <p>
 | 
					                <p>
 | 
				
			||||||
                    we aim to design askLyphe in a way that makes personalizing your search results easy and fun!
 | 
					                    we aim to design askLyphe in a way that makes personalizing your search results easy and fun!
 | 
				
			||||||
                    <br/>
 | 
					                    <br/>
 | 
				
			||||||
| 
						 | 
					@ -102,7 +102,7 @@
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div id="useful-fun" class="feature">
 | 
					            <div id="useful-fun" class="feature">
 | 
				
			||||||
                <h1>useful, but fun!</h1>
 | 
					                <h1>useful, but fun!</h1>
 | 
				
			||||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
					                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
				
			||||||
                <p>
 | 
					                <p>
 | 
				
			||||||
                    our search engine does not rely on (however may still include, for completeness) results from
 | 
					                    our search engine does not rely on (however may still include, for completeness) results from
 | 
				
			||||||
                    Google, Bing, or any other search engine out there.
 | 
					                    Google, Bing, or any other search engine out there.
 | 
				
			||||||
| 
						 | 
					@ -135,7 +135,7 @@
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div id="fourth-box" class="feature">
 | 
					            <div id="fourth-box" class="feature">
 | 
				
			||||||
                <h1>fourth box!</h1>
 | 
					                <h1>fourth box!</h1>
 | 
				
			||||||
                <img src="/static/lyphe.png" alt="lorem ipsum dolor sit amet"/>
 | 
					                <img src="/static/lyphe.png?git={{ git_commit }}" alt="lorem ipsum dolor sit amet"/>
 | 
				
			||||||
                <p>
 | 
					                <p>
 | 
				
			||||||
                    i haven't decided what to put here yet! we're still in alpha so i'm sure i'll come up with something
 | 
					                    i haven't decided what to put here yet! we're still in alpha so i'm sure i'll come up with something
 | 
				
			||||||
                    eventually, but i wanted this fourth box because i feel like the design flows nicer with it (:
 | 
					                    eventually, but i wanted this fourth box because i feel like the design flows nicer with it (:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,9 +3,9 @@
 | 
				
			||||||
{% block title %}Home{% endblock %}
 | 
					{% block title %}Home{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/default/home.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/default/home.css?git={{ git_commit }}"/>
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/home.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/home.css?git={{ git_commit }}"/>
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/inline-announcement.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/inline-announcement.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -15,10 +15,10 @@
 | 
				
			||||||
    <div class="pagecontent">
 | 
					    <div class="pagecontent">
 | 
				
			||||||
        <div id="lyphebig">
 | 
					        <div id="lyphebig">
 | 
				
			||||||
            <div class="bent-surrounding">
 | 
					            <div class="bent-surrounding">
 | 
				
			||||||
                <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png" alt="image of lyphe, our mascot!">
 | 
					                <img id="lypheimg" src="/static/img/lyphebent{% if alpha %}-alpha{% endif %}.png?git={{ git_commit }}" alt="image of lyphe, our mascot!">
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            <div id="lyphetitle">
 | 
					            <div id="lyphetitle">
 | 
				
			||||||
                <img src="/static/img/logo.png" alt="ask Lyphe!"/>
 | 
					                <img src="/static/img/logo.png?git={{ git_commit }}" alt="ask Lyphe!"/>
 | 
				
			||||||
                <h2>the best search engine!</h2>
 | 
					                <h2>the best search engine!</h2>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,9 +3,9 @@
 | 
				
			||||||
{% block title %}Images - {{ search_query }}{% endblock %}
 | 
					{% block title %}Images - {{ search_query }}{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/default/imagesearch.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/default/imagesearch.css?git={{ git_commit }}"/>
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/imagesearch.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/imagesearch.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css"/>{% endif %}
 | 
					{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css?git={{ git_commit }}"/>{% endif %}
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -16,10 +16,10 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    <div class="index-bar">
 | 
					    <div class="index-bar">
 | 
				
			||||||
        <a href="{{ websearch_url }}">
 | 
					        <a href="{{ websearch_url }}">
 | 
				
			||||||
            <img id="websearch-img" src="/static/img/websearch.png" alt="search the web"/>
 | 
					            <img id="websearch-img" src="/static/img/websearch.png?git={{ git_commit }}" alt="search the web"/>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
        <a href="{{ imagesearch_url }}">
 | 
					        <a href="{{ imagesearch_url }}">
 | 
				
			||||||
            <img id="imagesearch-img-selected" src="/static/img/imagesearch_selected.png" alt="search for images"/>
 | 
					            <img id="imagesearch-img-selected" src="/static/img/imagesearch_selected.png?git={{ git_commit }}" alt="search for images"/>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,9 +3,9 @@
 | 
				
			||||||
{% block title %}{{ search_query }}{% endblock %}
 | 
					{% block title %}{{ search_query }}{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/default/search.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/default/search.css?git={{ git_commit }}"/>
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/search.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/search.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css"/>{% endif %}
 | 
					{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css?git={{ git_commit }}"/>{% endif %}
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -16,10 +16,10 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    <div class="index-bar">
 | 
					    <div class="index-bar">
 | 
				
			||||||
        <a href="{{ websearch_url }}">
 | 
					        <a href="{{ websearch_url }}">
 | 
				
			||||||
            <img id="websearch-img-selected" src="/static/img/websearch_selected.png" alt="search the web"/>
 | 
					            <img id="websearch-img-selected" src="/static/img/websearch_selected.png?git={{ git_commit }}" alt="search the web"/>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
        <a href="{{ imagesearch_url }}">
 | 
					        <a href="{{ imagesearch_url }}">
 | 
				
			||||||
            <img id="imagesearch-img" src="/static/img/imagesearch.png" alt="search for images"/>
 | 
					            <img id="imagesearch-img" src="/static/img/imagesearch.png?git={{ git_commit }}" alt="search for images"/>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,10 +3,10 @@
 | 
				
			||||||
{% block title %}{{ search_query }}{% endblock %}
 | 
					{% block title %}{{ search_query }}{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/default/search.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/default/search.css?git={{ git_commit }}"/>
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/search.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/search.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css"/>{% endif %}
 | 
					{% if search_query == "notnite" %}<link rel="stylesheet" href="/static/creature.css?git={{ git_commit }}"/>{% endif %}
 | 
				
			||||||
<script src="/static/js/search.js" defer></script>
 | 
					<script src="/static/js/search.js?git={{ git_commit }}" defer></script>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -17,10 +17,10 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    <div class="index-bar">
 | 
					    <div class="index-bar">
 | 
				
			||||||
        <a href="{{ websearch_url }}">
 | 
					        <a href="{{ websearch_url }}">
 | 
				
			||||||
            <img id="websearch-img-selected" src="/static/img/websearch_selected.png" alt="search the web"/>
 | 
					            <img id="websearch-img-selected" src="/static/img/websearch_selected.png?git={{ git_commit }}" alt="search the web"/>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
        <a href="{{ imagesearch_url }}">
 | 
					        <a href="{{ imagesearch_url }}">
 | 
				
			||||||
            <img id="imagesearch-img" src="/static/img/imagesearch.png" alt="search for images"/>
 | 
					            <img id="imagesearch-img" src="/static/img/imagesearch.png?git={{ git_commit }}" alt="search for images"/>
 | 
				
			||||||
        </a>
 | 
					        </a>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,8 +11,8 @@
 | 
				
			||||||
              title="AskLyphe"
 | 
					              title="AskLyphe"
 | 
				
			||||||
              href="/static/osd.xml" />
 | 
					              href="/static/osd.xml" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <link rel="stylesheet" href="/static/themes/default/shell.css" />
 | 
					        <link rel="stylesheet" href="/static/themes/default/shell.css?git={{ git_commit }}" />
 | 
				
			||||||
        <link rel="stylesheet" href="/static/themes/{{theme}}/shell.css" />
 | 
					        <link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/shell.css?git={{ git_commit }}" />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        {% block head %}{% endblock %}
 | 
					        {% block head %}{% endblock %}
 | 
				
			||||||
    </head>
 | 
					    </head>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,8 +3,8 @@
 | 
				
			||||||
{% block title %}{{ search_query }}{% endblock %}
 | 
					{% block title %}{{ search_query }}{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block head %}
 | 
					{% block head %}
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/default/settings.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/default/settings.css?git={{ git_commit }}"/>
 | 
				
			||||||
<link rel="stylesheet" href="/static/themes/{{theme}}/settings.css"/>
 | 
					<link rel="stylesheet" href="/static/themes/{{theme.internal_name()}}/settings.css?git={{ git_commit }}"/>
 | 
				
			||||||
{% endblock %}
 | 
					{% endblock %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{% block page %}
 | 
					{% block page %}
 | 
				
			||||||
| 
						 | 
					@ -43,16 +43,15 @@
 | 
				
			||||||
                <div class="settings-row">
 | 
					                <div class="settings-row">
 | 
				
			||||||
                    <div id="theme" class="settings-section">
 | 
					                    <div id="theme" class="settings-section">
 | 
				
			||||||
                        <h2>theme</h2>
 | 
					                        <h2>theme</h2>
 | 
				
			||||||
                        {% for t in themes %}
 | 
					                        <p>your current theme is: "{{true_theme.display_name()}}"</p>
 | 
				
			||||||
                        {%if theme==t.value%}
 | 
					                        {% if true_theme.internal_name() != theme.internal_name() %}
 | 
				
			||||||
                        <p>your current theme is: "{{t.name}}"</p>
 | 
					                        <p>today's random theme is {{ theme.display_name() }}</p>
 | 
				
			||||||
                        {%endif%}
 | 
					                        {% endif %}
 | 
				
			||||||
                        {% endfor %}
 | 
					 | 
				
			||||||
                        <form action="/user_settings/set_theme" method="post">
 | 
					                        <form action="/user_settings/set_theme" method="post">
 | 
				
			||||||
                            <label for="theme-selector">theme</label>
 | 
					                            <label for="theme-selector">theme</label>
 | 
				
			||||||
                            <select name="theme" id="theme-selector">
 | 
					                            <select name="theme" id="theme-selector">
 | 
				
			||||||
                                {% for t in themes %}
 | 
					                                {% for t in themes %}
 | 
				
			||||||
                                <option value="{{t.value}}" {%if theme==t.value%}selected{%endif%}>{{t.name}}</option>
 | 
					                                <option value="{{t.internal_name()}}" {%if true_theme.internal_name()==t.internal_name()%}selected{%endif%}>{{t.display_name()}}</option>
 | 
				
			||||||
                                {% endfor %}
 | 
					                                {% endfor %}
 | 
				
			||||||
                            </select>
 | 
					                            </select>
 | 
				
			||||||
                            <button type="submit" id="theme-submit">change theme!</button>
 | 
					                            <button type="submit" id="theme-submit">change theme!</button>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -457,6 +457,7 @@ pub async fn user_count(db: &DatabaseConnection) -> Result<usize, FetchUserError
 | 
				
			||||||
/// returns the number of users in the database who are admins
 | 
					/// returns the number of users in the database who are admins
 | 
				
			||||||
pub async fn admin_count(db: &DatabaseConnection) -> Result<usize, FetchUserError> {
 | 
					pub async fn admin_count(db: &DatabaseConnection) -> Result<usize, FetchUserError> {
 | 
				
			||||||
    // dont fucking touch this, i don't know why it works but it does, it's actually evil
 | 
					    // dont fucking touch this, i don't know why it works but it does, it's actually evil
 | 
				
			||||||
 | 
					    // note: doesn't work
 | 
				
			||||||
    Ok(user::Entity::find().filter(user::Column::Flags.into_expr().binary(BinOper::LShift, Expr::value(63 - 2)).lt(1 << (63 - 2)))
 | 
					    Ok(user::Entity::find().filter(user::Column::Flags.into_expr().binary(BinOper::LShift, Expr::value(63 - 2)).lt(1 << (63 - 2)))
 | 
				
			||||||
        .count(db).await.map_err(|e| {
 | 
					        .count(db).await.map_err(|e| {
 | 
				
			||||||
        error!("DATABASE ERROR WHILE ADMINCOUNT: {e}");
 | 
					        error!("DATABASE ERROR WHILE ADMINCOUNT: {e}");
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,7 @@ mod process;
 | 
				
			||||||
pub mod db;
 | 
					pub mod db;
 | 
				
			||||||
mod email;
 | 
					mod email;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use std::env;
 | 
				
			||||||
use std::string::ToString;
 | 
					use std::string::ToString;
 | 
				
			||||||
use std::sync::Arc;
 | 
					use std::sync::Arc;
 | 
				
			||||||
use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
 | 
					use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
 | 
				
			||||||
| 
						 | 
					@ -36,6 +37,8 @@ pub static SMTP_URL: Lazy<String> = Lazy::new(|| std::env::var("SMTP_URL").expec
 | 
				
			||||||
pub static SMTP_USERNAME: Lazy<String> = Lazy::new(|| std::env::var("SMTP_USERNAME").expect("NO SMTP_USERNAME DEFINED"));
 | 
					pub static SMTP_USERNAME: Lazy<String> = Lazy::new(|| std::env::var("SMTP_USERNAME").expect("NO SMTP_USERNAME DEFINED"));
 | 
				
			||||||
pub static SMTP_PASSWORD: Lazy<String> = Lazy::new(|| std::env::var("SMTP_PASSWORD").expect("NO SMTP_PASSWORD DEFINED"));
 | 
					pub static SMTP_PASSWORD: Lazy<String> = Lazy::new(|| std::env::var("SMTP_PASSWORD").expect("NO SMTP_PASSWORD DEFINED"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub static AUTH_URL: Lazy<String> = Lazy::new(|| std::env::var("AUTH_URL").unwrap_or("https://auth.asklyphe.com".to_string()));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub static PROCESSES_HANDLED: AtomicU64 = AtomicU64::new(0);
 | 
					pub static PROCESSES_HANDLED: AtomicU64 = AtomicU64::new(0);
 | 
				
			||||||
pub static LAST_MESSAGE: AtomicI64 = AtomicI64::new(0);
 | 
					pub static LAST_MESSAGE: AtomicI64 = AtomicI64::new(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,7 +21,7 @@ use crate::db::{invite_code, session, user};
 | 
				
			||||||
use crate::db::invite_code::ConsumeInviteCodeError;
 | 
					use crate::db::invite_code::ConsumeInviteCodeError;
 | 
				
			||||||
use crate::db::session::{CreateSessionError, DeleteSessionError, FetchSessionError};
 | 
					use crate::db::session::{CreateSessionError, DeleteSessionError, FetchSessionError};
 | 
				
			||||||
use crate::db::user::{CreateUserError, DeleteUserError, EmailChangeError, FetchUserError, RegisterVerificationCodeError, VerifyEmailPassComboError, VerifyVerificationCodeError};
 | 
					use crate::db::user::{CreateUserError, DeleteUserError, EmailChangeError, FetchUserError, RegisterVerificationCodeError, VerifyEmailPassComboError, VerifyVerificationCodeError};
 | 
				
			||||||
use crate::email;
 | 
					use crate::{email, AUTH_URL};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn generate_email_verification_code() -> String {
 | 
					fn generate_email_verification_code() -> String {
 | 
				
			||||||
    rand::thread_rng()
 | 
					    rand::thread_rng()
 | 
				
			||||||
| 
						 | 
					@ -121,7 +121,7 @@ pub async fn register_request(db: &DatabaseConnection, request: RegisterRequest)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    debug!("verification code for {} is \"{}\"", request.username, verification_code);
 | 
					    debug!("verification code for {} is \"{}\"", request.username, verification_code);
 | 
				
			||||||
    email::send_verification_code_email(&request.email, &request.username, format!("https://auth.asklyphe.com/verify?username={}&token={}", request.username, verification_code).as_str());
 | 
					    email::send_verification_code_email(&request.email, &request.username, format!("{}/verify?username={}&token={}", AUTH_URL.as_str(), request.username, verification_code).as_str());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RegisterResponse::Success
 | 
					    RegisterResponse::Success
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										30
									
								
								lyphedb/.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								lyphedb/.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,30 @@
 | 
				
			||||||
 | 
					# ---> Rust
 | 
				
			||||||
 | 
					# Generated by Cargo
 | 
				
			||||||
 | 
					# will have compiled files and executables
 | 
				
			||||||
 | 
					debug/
 | 
				
			||||||
 | 
					target/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 | 
				
			||||||
 | 
					# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 | 
				
			||||||
 | 
					Cargo.lock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# These are backup files generated by rustfmt
 | 
				
			||||||
 | 
					**/*.rs.bk
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# MSVC Windows builds of rustc generate these, which store debugging information
 | 
				
			||||||
 | 
					*.pdb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# RustRover
 | 
				
			||||||
 | 
					#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 | 
				
			||||||
 | 
					#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 | 
				
			||||||
 | 
					#  and can be added to the global gitignore or merged into this file.  For a more nuclear
 | 
				
			||||||
 | 
					#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 | 
				
			||||||
 | 
					#.idea/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Added by cargo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/target
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# don't commit employee test configs (:
 | 
				
			||||||
 | 
					husky_config.toml
 | 
				
			||||||
							
								
								
									
										18
									
								
								lyphedb/Cargo.toml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								lyphedb/Cargo.toml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,18 @@
 | 
				
			||||||
 | 
					[package]
 | 
				
			||||||
 | 
					name = "lyphedb"
 | 
				
			||||||
 | 
					version = "0.1.0"
 | 
				
			||||||
 | 
					edition = "2024"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[dependencies]
 | 
				
			||||||
 | 
					log = "0.4.20"
 | 
				
			||||||
 | 
					rmp-serde = "1.1.2"
 | 
				
			||||||
 | 
					futures = "0.3.30"
 | 
				
			||||||
 | 
					async-nats = "0.38.0"
 | 
				
			||||||
 | 
					tokio = { version = "1.0", features = ["full"] }
 | 
				
			||||||
 | 
					chrono = "0.4.31"
 | 
				
			||||||
 | 
					serde = { version = "1.0", features = ["derive"] }
 | 
				
			||||||
 | 
					rand = "0.9.0"
 | 
				
			||||||
 | 
					toml = "0.8.20"
 | 
				
			||||||
 | 
					env_logger = "0.11.6"
 | 
				
			||||||
 | 
					once_cell = "1.20.3"
 | 
				
			||||||
 | 
					percent-encoding = "2.3.1"
 | 
				
			||||||
							
								
								
									
										10
									
								
								lyphedb/config.toml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								lyphedb/config.toml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,10 @@
 | 
				
			||||||
 | 
					name = "lyphedb-test"
 | 
				
			||||||
 | 
					write = true
 | 
				
			||||||
 | 
					master = "/lyphedb_master"
 | 
				
			||||||
 | 
					ram_limit = "1mb" # supported suffixes: b,kb,mb,gb
 | 
				
			||||||
 | 
					gc_limit = "512kb"
 | 
				
			||||||
 | 
					avg_entry_size = "1kb"
 | 
				
			||||||
 | 
					log = "debug"
 | 
				
			||||||
 | 
					nats_cert = "nats/nats.cert"
 | 
				
			||||||
 | 
					nats_key = "nats/nats.pem"
 | 
				
			||||||
 | 
					nats_url = "127.0.0.1:4222"
 | 
				
			||||||
							
								
								
									
										14
									
								
								lyphedb/ldbtesttool/Cargo.toml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								lyphedb/ldbtesttool/Cargo.toml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,14 @@
 | 
				
			||||||
 | 
					[package]
 | 
				
			||||||
 | 
					name = "ldbtesttool"
 | 
				
			||||||
 | 
					version = "0.1.0"
 | 
				
			||||||
 | 
					edition = "2024"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[dependencies]
 | 
				
			||||||
 | 
					rmp-serde = "1.1.2"
 | 
				
			||||||
 | 
					futures = "0.3.30"
 | 
				
			||||||
 | 
					async-nats = "0.38.0"
 | 
				
			||||||
 | 
					tokio = { version = "1.0", features = ["full"] }
 | 
				
			||||||
 | 
					serde = { version = "1.0", features = ["derive"] }
 | 
				
			||||||
 | 
					lyphedb = { path = "../" }
 | 
				
			||||||
 | 
					toml = "0.8.20"
 | 
				
			||||||
 | 
					rand = "0.9.0"
 | 
				
			||||||
							
								
								
									
										40
									
								
								lyphedb/ldbtesttool/src/config.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								lyphedb/ldbtesttool/src/config.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,40 @@
 | 
				
			||||||
 | 
					use serde::Deserialize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Deserialize)]
 | 
				
			||||||
 | 
					struct ConfigFile {
 | 
				
			||||||
 | 
					    pub name: String,
 | 
				
			||||||
 | 
					    pub nats_cert: String,
 | 
				
			||||||
 | 
					    pub nats_key: String,
 | 
				
			||||||
 | 
					    pub nats_url: String,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone)]
 | 
				
			||||||
 | 
					pub struct LypheDBConfig {
 | 
				
			||||||
 | 
					    pub name: String,
 | 
				
			||||||
 | 
					    pub nats_cert: String,
 | 
				
			||||||
 | 
					    pub nats_key: String,
 | 
				
			||||||
 | 
					    pub nats_url: String,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn load_config(path: &str) -> LypheDBConfig {
 | 
				
			||||||
 | 
					    let s = std::fs::read_to_string(path);
 | 
				
			||||||
 | 
					    if let Err(e) = s {
 | 
				
			||||||
 | 
					        panic!("failed to read config file: {}", e);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let s = s.unwrap();
 | 
				
			||||||
 | 
					    let cnf = toml::from_str::<ConfigFile>(&s);
 | 
				
			||||||
 | 
					    if let Err(e) = cnf {
 | 
				
			||||||
 | 
					        panic!("failed to parse config file: {}", e);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let cnf = cnf.unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // quick checks and conversions
 | 
				
			||||||
 | 
					    let mut config = LypheDBConfig {
 | 
				
			||||||
 | 
					        name: cnf.name,
 | 
				
			||||||
 | 
					        nats_cert: cnf.nats_cert,
 | 
				
			||||||
 | 
					        nats_key: cnf.nats_key,
 | 
				
			||||||
 | 
					        nats_url: cnf.nats_url,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    config
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										114
									
								
								lyphedb/ldbtesttool/src/main.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								lyphedb/ldbtesttool/src/main.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,114 @@
 | 
				
			||||||
 | 
					use crate::config::load_config;
 | 
				
			||||||
 | 
					use lyphedb::{KVList, KeyList, LDBNatsMessage, LypheDBCommand, PropagationStrategy};
 | 
				
			||||||
 | 
					use std::collections::BTreeMap;
 | 
				
			||||||
 | 
					use async_nats::Message;
 | 
				
			||||||
 | 
					use futures::StreamExt;
 | 
				
			||||||
 | 
					use tokio::sync::mpsc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mod config;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[tokio::main]
 | 
				
			||||||
 | 
					async fn main() {
 | 
				
			||||||
 | 
					    let config = load_config(&std::env::args().nth(1).expect("please specify config file"));
 | 
				
			||||||
 | 
					    let nats = async_nats::ConnectOptions::new()
 | 
				
			||||||
 | 
					        .add_client_certificate(
 | 
				
			||||||
 | 
					            config.nats_cert.as_str().into(),
 | 
				
			||||||
 | 
					            config.nats_key.as_str().into(),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        .connect(config.nats_url.as_str())
 | 
				
			||||||
 | 
					        .await;
 | 
				
			||||||
 | 
					    if let Err(e) = nats {
 | 
				
			||||||
 | 
					        eprintln!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let nats = nats.unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // test 1: create 10000 keys, send in chunks of 10
 | 
				
			||||||
 | 
					    println!("test 1: create 10_000 keys, send in chunks of 100");
 | 
				
			||||||
 | 
					    let (key_tx, mut key_rx) = mpsc::unbounded_channel();
 | 
				
			||||||
 | 
					    let start = std::time::Instant::now();
 | 
				
			||||||
 | 
					    let mut tasks = vec![];
 | 
				
			||||||
 | 
					    for _ in 0..(10_000 / 100) {
 | 
				
			||||||
 | 
					        let name = config.name.clone();
 | 
				
			||||||
 | 
					        let nats = nats.clone();
 | 
				
			||||||
 | 
					        let key_tx = key_tx.clone();
 | 
				
			||||||
 | 
					        tasks.push(tokio::spawn(async move {
 | 
				
			||||||
 | 
					            let mut keys = BTreeMap::new();
 | 
				
			||||||
 | 
					            for _ in 0..100 {
 | 
				
			||||||
 | 
					                let key = rand::random::<[u8; 16]>();
 | 
				
			||||||
 | 
					                let data = rand::random::<[u8; 16]>();
 | 
				
			||||||
 | 
					                key_tx.send((key, data)).unwrap();
 | 
				
			||||||
 | 
					                keys.insert(key, data);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let data = rmp_serde::to_vec(&LDBNatsMessage::Command(LypheDBCommand::SetKeys(
 | 
				
			||||||
 | 
					                KVList {
 | 
				
			||||||
 | 
					                    kvs: keys
 | 
				
			||||||
 | 
					                        .into_iter()
 | 
				
			||||||
 | 
					                        .map(|(k, v)| (k.to_vec(), v.to_vec()))
 | 
				
			||||||
 | 
					                        .collect(),
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					                PropagationStrategy::Immediate,
 | 
				
			||||||
 | 
					            )))
 | 
				
			||||||
 | 
					                .unwrap();
 | 
				
			||||||
 | 
					            let replyto_sub = "lyphedb_test_1".to_string();
 | 
				
			||||||
 | 
					            let mut subscriber = nats.subscribe(replyto_sub.clone()).await.unwrap();
 | 
				
			||||||
 | 
					            nats.publish_with_reply(name.clone(), replyto_sub, data.into()).await.unwrap();
 | 
				
			||||||
 | 
					            if let Some(reply) = subscriber.next().await {
 | 
				
			||||||
 | 
					                let reply = rmp_serde::from_slice::<LDBNatsMessage>(&reply.payload).unwrap();
 | 
				
			||||||
 | 
					                if let LDBNatsMessage::Success = reply {} else {
 | 
				
			||||||
 | 
					                    eprintln!("failure");
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for task in tasks {
 | 
				
			||||||
 | 
					        task.await.unwrap();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let end = std::time::Instant::now();
 | 
				
			||||||
 | 
					    println!("test 1: {}ms", end.duration_since(start).as_millis());
 | 
				
			||||||
 | 
					    let mut our_copy = BTreeMap::new();
 | 
				
			||||||
 | 
					    let mut buffer = vec![];
 | 
				
			||||||
 | 
					    key_rx.recv_many(&mut buffer, 10_000).await;
 | 
				
			||||||
 | 
					    for (k, v) in buffer {
 | 
				
			||||||
 | 
					        our_copy.insert(k, v);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // test 2: read back all keys and check for accuracy
 | 
				
			||||||
 | 
					    println!("test 2: read back all keys and check for accuracy");
 | 
				
			||||||
 | 
					    let kv: Vec<_> = our_copy.into_iter().map(|(k, v)| (k.to_vec(), v.to_vec())).collect();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let start = std::time::Instant::now();
 | 
				
			||||||
 | 
					    let mut tasks = vec![];
 | 
				
			||||||
 | 
					    for i in 0..100 {
 | 
				
			||||||
 | 
					        let batch = kv[i * 100..((i + 1) * 100).min(kv.len())].to_vec();
 | 
				
			||||||
 | 
					        let name = config.name.clone();
 | 
				
			||||||
 | 
					        let nats = nats.clone();
 | 
				
			||||||
 | 
					        tasks.push(tokio::spawn(async move {
 | 
				
			||||||
 | 
					            let data = rmp_serde::to_vec(&LDBNatsMessage::Command(LypheDBCommand::GetKeys(
 | 
				
			||||||
 | 
					                KeyList {
 | 
				
			||||||
 | 
					                    keys: batch.iter().map(|(k, _)| k.to_vec()).collect()
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            ))).unwrap();
 | 
				
			||||||
 | 
					            let replyto_sub = format!("lyphedb_test_2_{}", i);
 | 
				
			||||||
 | 
					            let mut subscriber = nats.subscribe(replyto_sub.clone()).await.unwrap();
 | 
				
			||||||
 | 
					            nats.publish_with_reply(name.clone(), replyto_sub, data.into()).await.unwrap();
 | 
				
			||||||
 | 
					            if let Some(reply) = subscriber.next().await {
 | 
				
			||||||
 | 
					                let reply = rmp_serde::from_slice::<LDBNatsMessage>(&reply.payload).unwrap();
 | 
				
			||||||
 | 
					                if let LDBNatsMessage::Entries(kvlist) = reply {
 | 
				
			||||||
 | 
					                    // check all keys
 | 
				
			||||||
 | 
					                    for (k1, v1) in batch {
 | 
				
			||||||
 | 
					                        let v2 = kvlist.kvs.iter().find(|(k, v)| k == &k1).expect("key not found");
 | 
				
			||||||
 | 
					                        assert_eq!(v1, v2.1);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    eprintln!("failure");
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for task in tasks {
 | 
				
			||||||
 | 
					        task.await.unwrap();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let end = std::time::Instant::now();
 | 
				
			||||||
 | 
					    println!("test 2: {}ms", end.duration_since(start).as_millis());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										129
									
								
								lyphedb/src/config.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								lyphedb/src/config.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,129 @@
 | 
				
			||||||
 | 
					use serde::Deserialize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Deserialize)]
 | 
				
			||||||
 | 
					struct ConfigFile {
 | 
				
			||||||
 | 
					    pub name: String,
 | 
				
			||||||
 | 
					    pub write: bool,
 | 
				
			||||||
 | 
					    pub master: String,
 | 
				
			||||||
 | 
					    pub ram_limit: String,
 | 
				
			||||||
 | 
					    pub gc_limit: String,
 | 
				
			||||||
 | 
					    pub avg_entry_size: String,
 | 
				
			||||||
 | 
					    pub log: String,
 | 
				
			||||||
 | 
					    pub nats_cert: String,
 | 
				
			||||||
 | 
					    pub nats_key: String,
 | 
				
			||||||
 | 
					    pub nats_url: String,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone)]
 | 
				
			||||||
 | 
					pub struct LypheDBConfig {
 | 
				
			||||||
 | 
					    pub name: String,
 | 
				
			||||||
 | 
					    pub write: bool,
 | 
				
			||||||
 | 
					    pub master: String,
 | 
				
			||||||
 | 
					    pub ram_limit: usize,
 | 
				
			||||||
 | 
					    pub gc_limit: usize,
 | 
				
			||||||
 | 
					    pub avg_entry_size: usize,
 | 
				
			||||||
 | 
					    pub log: String,
 | 
				
			||||||
 | 
					    pub nats_cert: String,
 | 
				
			||||||
 | 
					    pub nats_key: String,
 | 
				
			||||||
 | 
					    pub nats_url: String,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn load_config(path: &str) -> LypheDBConfig {
 | 
				
			||||||
 | 
					    let s = std::fs::read_to_string(path);
 | 
				
			||||||
 | 
					    if let Err(e) = s {
 | 
				
			||||||
 | 
					        panic!("failed to read config file: {}", e);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let s = s.unwrap();
 | 
				
			||||||
 | 
					    let cnf = toml::from_str::<ConfigFile>(&s);
 | 
				
			||||||
 | 
					    if let Err(e) = cnf {
 | 
				
			||||||
 | 
					        panic!("failed to parse config file: {}", e);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let cnf = cnf.unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // quick checks and conversions
 | 
				
			||||||
 | 
					    let mut config = LypheDBConfig {
 | 
				
			||||||
 | 
					        name: cnf.name,
 | 
				
			||||||
 | 
					        write: cnf.write,
 | 
				
			||||||
 | 
					        master: cnf.master,
 | 
				
			||||||
 | 
					        ram_limit: 0,
 | 
				
			||||||
 | 
					        gc_limit: 0,
 | 
				
			||||||
 | 
					        avg_entry_size: 0,
 | 
				
			||||||
 | 
					        log: cnf.log,
 | 
				
			||||||
 | 
					        nats_cert: cnf.nats_cert,
 | 
				
			||||||
 | 
					        nats_key: cnf.nats_key,
 | 
				
			||||||
 | 
					        nats_url: cnf.nats_url,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if !(cnf.ram_limit.ends_with("b") &&
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            cnf.ram_limit.trim_end_matches("b").ends_with("k") ||
 | 
				
			||||||
 | 
					                cnf.ram_limit.trim_end_matches("b").ends_with("m") ||
 | 
				
			||||||
 | 
					                cnf.ram_limit.trim_end_matches("b").ends_with("g")
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    ) {
 | 
				
			||||||
 | 
					        panic!("invalid ram limit");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    config.ram_limit = if cnf.ram_limit.ends_with("gb") {
 | 
				
			||||||
 | 
					        cnf.ram_limit.trim_end_matches("gb").parse::<usize>().unwrap() * 1024 * 1024 * 1024
 | 
				
			||||||
 | 
					    } else if cnf.ram_limit.ends_with("mb") {
 | 
				
			||||||
 | 
					        cnf.ram_limit.trim_end_matches("mb").parse::<usize>().unwrap() * 1024 * 1024
 | 
				
			||||||
 | 
					    } else if cnf.ram_limit.ends_with("kb") {
 | 
				
			||||||
 | 
					        cnf.ram_limit.trim_end_matches("kb").parse::<usize>().unwrap() * 1024
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        cnf.ram_limit.trim_end_matches("b").parse::<usize>().unwrap()
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if !(cnf.gc_limit.ends_with("b") &&
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            cnf.gc_limit.trim_end_matches("b").ends_with("k") ||
 | 
				
			||||||
 | 
					                cnf.gc_limit.trim_end_matches("b").ends_with("m") ||
 | 
				
			||||||
 | 
					                cnf.gc_limit.trim_end_matches("b").ends_with("g")
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    ) {
 | 
				
			||||||
 | 
					        panic!("invalid ram limit");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    config.gc_limit = if cnf.gc_limit.ends_with("gb") {
 | 
				
			||||||
 | 
					        cnf.gc_limit.trim_end_matches("gb").parse::<usize>().unwrap() * 1024 * 1024 * 1024
 | 
				
			||||||
 | 
					    } else if cnf.gc_limit.ends_with("mb") {
 | 
				
			||||||
 | 
					        cnf.gc_limit.trim_end_matches("mb").parse::<usize>().unwrap() * 1024 * 1024
 | 
				
			||||||
 | 
					    } else if cnf.gc_limit.ends_with("kb") {
 | 
				
			||||||
 | 
					        cnf.gc_limit.trim_end_matches("kb").parse::<usize>().unwrap() * 1024
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        cnf.gc_limit.trim_end_matches("b").parse::<usize>().unwrap()
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if !(cnf.avg_entry_size.ends_with("b") &&
 | 
				
			||||||
 | 
					        (
 | 
				
			||||||
 | 
					            cnf.avg_entry_size.trim_end_matches("b").ends_with("k") ||
 | 
				
			||||||
 | 
					                cnf.avg_entry_size.trim_end_matches("b").ends_with("m") ||
 | 
				
			||||||
 | 
					                cnf.avg_entry_size.trim_end_matches("b").ends_with("g")
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    ) {
 | 
				
			||||||
 | 
					        panic!("invalid ram limit");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    config.avg_entry_size = if cnf.avg_entry_size.ends_with("gb") {
 | 
				
			||||||
 | 
					        cnf.avg_entry_size.trim_end_matches("gb").parse::<usize>().unwrap() * 1024 * 1024 * 1024
 | 
				
			||||||
 | 
					    } else if cnf.avg_entry_size.ends_with("mb") {
 | 
				
			||||||
 | 
					        cnf.avg_entry_size.trim_end_matches("mb").parse::<usize>().unwrap() * 1024 * 1024
 | 
				
			||||||
 | 
					    } else if cnf.avg_entry_size.ends_with("kb") {
 | 
				
			||||||
 | 
					        cnf.avg_entry_size.trim_end_matches("kb").parse::<usize>().unwrap() * 1024
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        cnf.avg_entry_size.trim_end_matches("b").parse::<usize>().unwrap()
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if config.avg_entry_size > config.ram_limit {
 | 
				
			||||||
 | 
					        panic!("avg entry size is larger than ram limit");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if config.gc_limit > config.ram_limit {
 | 
				
			||||||
 | 
					        panic!("gc limit is larger than ram limit");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if config.log.is_empty() {
 | 
				
			||||||
 | 
					        config.log = "info".to_string();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if config.log != "debug" && config.log != "info" && config.log != "warn" && config.log != "error" {
 | 
				
			||||||
 | 
					        panic!("invalid log level");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    config
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										321
									
								
								lyphedb/src/dbimpl/mod.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										321
									
								
								lyphedb/src/dbimpl/mod.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,321 @@
 | 
				
			||||||
 | 
					use crate::config::LypheDBConfig;
 | 
				
			||||||
 | 
					use log::*;
 | 
				
			||||||
 | 
					use lyphedb::PropagationStrategy;
 | 
				
			||||||
 | 
					use once_cell::sync::Lazy;
 | 
				
			||||||
 | 
					use std::borrow::Cow;
 | 
				
			||||||
 | 
					use std::collections::BTreeMap;
 | 
				
			||||||
 | 
					use std::path::PathBuf;
 | 
				
			||||||
 | 
					use std::sync::Arc;
 | 
				
			||||||
 | 
					use std::sync::atomic::{AtomicU64, Ordering};
 | 
				
			||||||
 | 
					use percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS};
 | 
				
			||||||
 | 
					use tokio::sync::RwLock;
 | 
				
			||||||
 | 
					pub const NOT_ALLOWED_ASCII: AsciiSet = CONTROLS.add(b' ').add(b'/').add(b'.').add(b'\\');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub type LDBCache = BTreeMap<Arc<Vec<u8>>, Arc<Vec<u8>>>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Reads will read from this cache
 | 
				
			||||||
 | 
					pub static PRIMARY_CACHE: Lazy<RwLock<LDBCache>> =
 | 
				
			||||||
 | 
					    Lazy::new(|| RwLock::new(BTreeMap::new()));
 | 
				
			||||||
 | 
					/// Writes will be written to this cache, and then depending on the propagation method, the
 | 
				
			||||||
 | 
					/// Primary cache will be set to a copy
 | 
				
			||||||
 | 
					pub static SECONDARY_CACHE: Lazy<RwLock<LDBCache>> =
 | 
				
			||||||
 | 
					    Lazy::new(|| RwLock::new(BTreeMap::new()));
 | 
				
			||||||
 | 
					/// how often are keys accessed? this will influence the garbage collector
 | 
				
			||||||
 | 
					pub static KEY_ACCESS_COUNTER: Lazy<RwLock<BTreeMap<Arc<Vec<u8>>, AtomicU64>>> =
 | 
				
			||||||
 | 
					    Lazy::new(|| RwLock::new(BTreeMap::new()));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn key_to_path(config: &LypheDBConfig, key: &[u8]) -> PathBuf {
 | 
				
			||||||
 | 
					    let mut path = PathBuf::new();
 | 
				
			||||||
 | 
					    path.push(&config.master);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut fnbuf = Vec::new();
 | 
				
			||||||
 | 
					    for (i, byte) in key.iter().enumerate() {
 | 
				
			||||||
 | 
					        if *byte == b'/' {
 | 
				
			||||||
 | 
					            let encoded = percent_encode(&fnbuf, &NOT_ALLOWED_ASCII).to_string();
 | 
				
			||||||
 | 
					            path.push(encoded);
 | 
				
			||||||
 | 
					            fnbuf.clear();
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            fnbuf.push(*byte);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if !fnbuf.is_empty() {
 | 
				
			||||||
 | 
					        let encoded = percent_encode(&fnbuf, &NOT_ALLOWED_ASCII).to_string();
 | 
				
			||||||
 | 
					        path.push(encoded);
 | 
				
			||||||
 | 
					        fnbuf.clear();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    path.push(".self");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    path
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub enum OperationError {
 | 
				
			||||||
 | 
					    KeyCannotBeEmpty,
 | 
				
			||||||
 | 
					    FilesystemPermissionError,
 | 
				
			||||||
 | 
					    BadFilesystemEntry,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn set_key_disk(
 | 
				
			||||||
 | 
					    config: &LypheDBConfig,
 | 
				
			||||||
 | 
					    key: &[u8],
 | 
				
			||||||
 | 
					    value: &[u8],
 | 
				
			||||||
 | 
					) -> Result<(), OperationError> {
 | 
				
			||||||
 | 
					    if key.is_empty() {
 | 
				
			||||||
 | 
					        return Err(OperationError::KeyCannotBeEmpty);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let path = key_to_path(config, key);
 | 
				
			||||||
 | 
					    let directory = path.parent().ok_or(OperationError::KeyCannotBeEmpty)?;
 | 
				
			||||||
 | 
					    if let Ok(directory_exists) = directory.try_exists() {
 | 
				
			||||||
 | 
					        if !directory_exists {
 | 
				
			||||||
 | 
					            std::fs::create_dir_all(directory).map_err(|e| {
 | 
				
			||||||
 | 
					                warn!("couldn't create directory: {:?}", e);
 | 
				
			||||||
 | 
					                OperationError::FilesystemPermissionError
 | 
				
			||||||
 | 
					            })?;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        return Err(OperationError::FilesystemPermissionError);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::fs::write(path, value).map_err(|e| {
 | 
				
			||||||
 | 
					        warn!("couldn't write file: {:?}", e);
 | 
				
			||||||
 | 
					        OperationError::FilesystemPermissionError
 | 
				
			||||||
 | 
					    })?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Ok(())
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn delete_key_disk(
 | 
				
			||||||
 | 
					    config: &LypheDBConfig,
 | 
				
			||||||
 | 
					    key: &[u8],
 | 
				
			||||||
 | 
					) -> Result<(), OperationError> {
 | 
				
			||||||
 | 
					    if key.is_empty() {
 | 
				
			||||||
 | 
					        return Err(OperationError::KeyCannotBeEmpty);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let path = key_to_path(config, key);
 | 
				
			||||||
 | 
					    if let Ok(exists) = path.try_exists() {
 | 
				
			||||||
 | 
					        if !exists {
 | 
				
			||||||
 | 
					            return Ok(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    std::fs::remove_file(path).map_err(|e| {
 | 
				
			||||||
 | 
					        warn!("couldn't remove file: {:?}", e);
 | 
				
			||||||
 | 
					        OperationError::FilesystemPermissionError
 | 
				
			||||||
 | 
					    })
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn get_key_disk(
 | 
				
			||||||
 | 
					    config: &LypheDBConfig,
 | 
				
			||||||
 | 
					    key: &[u8],
 | 
				
			||||||
 | 
					) -> Result<Option<Vec<u8>>, OperationError> {
 | 
				
			||||||
 | 
					    if key.is_empty() {
 | 
				
			||||||
 | 
					        return Err(OperationError::KeyCannotBeEmpty);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let path = key_to_path(config, key);
 | 
				
			||||||
 | 
					    if let Ok(exists) = path.try_exists() {
 | 
				
			||||||
 | 
					        if !exists {
 | 
				
			||||||
 | 
					            return Ok(None);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let data = std::fs::read(path).map_err(|e| {
 | 
				
			||||||
 | 
					        warn!("couldn't read file: {:?}", e);
 | 
				
			||||||
 | 
					        OperationError::FilesystemPermissionError
 | 
				
			||||||
 | 
					    })?;
 | 
				
			||||||
 | 
					    Ok(Some(data))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// this function allows empty keys, so you can get all keys under root by doing
 | 
				
			||||||
 | 
					/// get_keys_under_keydir_disk(..., b"")
 | 
				
			||||||
 | 
					pub async fn get_keys_under_keydir_disk(
 | 
				
			||||||
 | 
					    config: &LypheDBConfig,
 | 
				
			||||||
 | 
					    keydir: &[u8],
 | 
				
			||||||
 | 
					) -> Result<Vec<Vec<u8>>, OperationError> {
 | 
				
			||||||
 | 
					    let path = key_to_path(config, keydir);
 | 
				
			||||||
 | 
					    let path = path.parent().expect("bad master");
 | 
				
			||||||
 | 
					    let mut keys = Vec::new();
 | 
				
			||||||
 | 
					    for entry in std::fs::read_dir(path).map_err(|e| {
 | 
				
			||||||
 | 
					        warn!("couldn't read directory: {:?}", e);
 | 
				
			||||||
 | 
					        OperationError::FilesystemPermissionError
 | 
				
			||||||
 | 
					    })? {
 | 
				
			||||||
 | 
					        let entry = entry.map_err(|e| {
 | 
				
			||||||
 | 
					            warn!("couldn't read directory entry: {:?}", e);
 | 
				
			||||||
 | 
					            OperationError::FilesystemPermissionError
 | 
				
			||||||
 | 
					        })?;
 | 
				
			||||||
 | 
					        let path = entry.path();
 | 
				
			||||||
 | 
					        let filename = path
 | 
				
			||||||
 | 
					            .to_str()
 | 
				
			||||||
 | 
					            .ok_or(OperationError::FilesystemPermissionError)?;
 | 
				
			||||||
 | 
					        if filename.ends_with(".self") {
 | 
				
			||||||
 | 
					            // this is a value file, ignore
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let filename = filename.trim_start_matches(&config.master);
 | 
				
			||||||
 | 
					        let key = percent_decode_str(filename).collect();
 | 
				
			||||||
 | 
					        keys.push(key);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Ok(keys)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn set_key(
 | 
				
			||||||
 | 
					    config: LypheDBConfig,
 | 
				
			||||||
 | 
					    key: &[u8],
 | 
				
			||||||
 | 
					    value: &[u8],
 | 
				
			||||||
 | 
					    strat: &PropagationStrategy,
 | 
				
			||||||
 | 
					) -> Result<(), OperationError> {
 | 
				
			||||||
 | 
					    let k1 = Arc::new(key.to_vec());
 | 
				
			||||||
 | 
					    let v1 = Arc::new(value.to_vec());
 | 
				
			||||||
 | 
					    let disk_task = {
 | 
				
			||||||
 | 
					        let k1 = k1.clone();
 | 
				
			||||||
 | 
					        let v1 = v1.clone();
 | 
				
			||||||
 | 
					        tokio::spawn(async move { set_key_disk(&config, &k1, &v1).await })
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let prop_task = match strat {
 | 
				
			||||||
 | 
					        PropagationStrategy::Immediate => {
 | 
				
			||||||
 | 
					            let k1 = k1.clone();
 | 
				
			||||||
 | 
					            let v1 = v1.clone();
 | 
				
			||||||
 | 
					            tokio::spawn(async move {
 | 
				
			||||||
 | 
					                let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                secondary_cache.insert(k1, v1);
 | 
				
			||||||
 | 
					                let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                *primary_cache = secondary_cache.clone();
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        PropagationStrategy::Timeout => {
 | 
				
			||||||
 | 
					            let k1 = k1.clone();
 | 
				
			||||||
 | 
					            let v1 = v1.clone();
 | 
				
			||||||
 | 
					            tokio::spawn(async move {
 | 
				
			||||||
 | 
					                tokio::spawn(async move {
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                        secondary_cache.insert(k1, v1);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    tokio::spawn(async move {
 | 
				
			||||||
 | 
					                        let start = std::time::Instant::now();
 | 
				
			||||||
 | 
					                        loop {
 | 
				
			||||||
 | 
					                            if start.elapsed().as_secs() > 60 {
 | 
				
			||||||
 | 
					                                break;
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            let pc = PRIMARY_CACHE.try_write();
 | 
				
			||||||
 | 
					                            if pc.is_err() {
 | 
				
			||||||
 | 
					                                tokio::time::sleep(std::time::Duration::from_secs(10)).await;
 | 
				
			||||||
 | 
					                                continue;
 | 
				
			||||||
 | 
					                            } else {
 | 
				
			||||||
 | 
					                                break;
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                        let secondary_cache = SECONDARY_CACHE.read().await;
 | 
				
			||||||
 | 
					                        *primary_cache = secondary_cache.clone();
 | 
				
			||||||
 | 
					                    });
 | 
				
			||||||
 | 
					                });
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        PropagationStrategy::OnRead => {
 | 
				
			||||||
 | 
					            let k1 = k1.clone();
 | 
				
			||||||
 | 
					            tokio::spawn(async move {
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                    secondary_cache.remove(&k1);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                    primary_cache.remove(&k1);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if let Ok(Err(e)) = disk_task.await {
 | 
				
			||||||
 | 
					        error!("couldn't set key on disk: {:?} ({:?})", e, key);
 | 
				
			||||||
 | 
					        // undo propagation
 | 
				
			||||||
 | 
					        prop_task.abort();
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
				
			||||||
 | 
					            primary_cache.remove(&k1);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
				
			||||||
 | 
					            secondary_cache.remove(&k1);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        return Err(e);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let _ = prop_task.await;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Ok(())
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn get_key(config: LypheDBConfig, key: &[u8]) -> Result<Option<Vec<u8>>, OperationError> {
 | 
				
			||||||
 | 
					    let k1 = Arc::new(key.to_vec());
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        let k1 = k1.clone();
 | 
				
			||||||
 | 
					        tokio::spawn(async move {
 | 
				
			||||||
 | 
					            let mut access_counter = KEY_ACCESS_COUNTER.write().await;
 | 
				
			||||||
 | 
					            let counter = access_counter.entry(k1.clone()).or_insert(AtomicU64::new(0));
 | 
				
			||||||
 | 
					            if counter.load(Ordering::Relaxed) > 10000000 {
 | 
				
			||||||
 | 
					                return;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            counter.fetch_add(1, Ordering::SeqCst);
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let disk_task = {
 | 
				
			||||||
 | 
					        let k1 = k1.clone();
 | 
				
			||||||
 | 
					        tokio::spawn(async move { get_key_disk(&config, &k1).await })
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        // read from cache
 | 
				
			||||||
 | 
					        let cache = PRIMARY_CACHE.read().await;
 | 
				
			||||||
 | 
					        if let Some(val) = cache.get(&k1) {
 | 
				
			||||||
 | 
					            disk_task.abort();
 | 
				
			||||||
 | 
					            return Ok(Some(val.to_vec()));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    //debug!("cache miss");
 | 
				
			||||||
 | 
					    if let Ok(result) = disk_task.await {
 | 
				
			||||||
 | 
					        if let Ok(Some(val)) = &result {
 | 
				
			||||||
 | 
					            let val = Arc::new(val.to_vec());
 | 
				
			||||||
 | 
					            tokio::spawn(async move {
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    let mut cache = SECONDARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                    cache.insert(k1.clone(), val.clone());
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    let secondary_cache = SECONDARY_CACHE.read().await;
 | 
				
			||||||
 | 
					                    let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                    *primary_cache = secondary_cache.clone();
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                //debug!("cache insert");
 | 
				
			||||||
 | 
					            });
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        result
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        Err(OperationError::FilesystemPermissionError)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn delete_key(config: LypheDBConfig, key: &[u8]) -> Result<(), OperationError> {
 | 
				
			||||||
 | 
					    let k1 = Arc::new(key.to_vec());
 | 
				
			||||||
 | 
					    let disk_task = {
 | 
				
			||||||
 | 
					        let k1 = k1.clone();
 | 
				
			||||||
 | 
					        tokio::spawn(async move { delete_key_disk(&config, &k1).await })
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    let prop_task = {
 | 
				
			||||||
 | 
					        let k1 = k1.clone();
 | 
				
			||||||
 | 
					        tokio::spawn(async move {
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                let mut key_access_counter = KEY_ACCESS_COUNTER.write().await;
 | 
				
			||||||
 | 
					                key_access_counter.remove(&k1);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                let mut secondary_cache = SECONDARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                secondary_cache.remove(&k1);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
				
			||||||
 | 
					            let secondary_cache = SECONDARY_CACHE.read().await;
 | 
				
			||||||
 | 
					            *primary_cache = secondary_cache.clone();
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    prop_task.await.expect("couldn't delete key");
 | 
				
			||||||
 | 
					    disk_task.await.expect("couldn't delete key")
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										49
									
								
								lyphedb/src/lib.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								lyphedb/src/lib.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,49 @@
 | 
				
			||||||
 | 
					use serde::{Deserialize, Serialize};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
				
			||||||
 | 
					pub enum LDBNatsMessage {
 | 
				
			||||||
 | 
					    Command(LypheDBCommand),
 | 
				
			||||||
 | 
					    Entries(KVList),
 | 
				
			||||||
 | 
					    Count(u64),
 | 
				
			||||||
 | 
					    Success,
 | 
				
			||||||
 | 
					    BadRequest,
 | 
				
			||||||
 | 
					    NotFound,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
				
			||||||
 | 
					pub enum PropagationStrategy {
 | 
				
			||||||
 | 
					    /// Reads will immediately be able to read this key's value as soon as it has been set
 | 
				
			||||||
 | 
					    Immediate,
 | 
				
			||||||
 | 
					    /// The value change will be queued along with others,
 | 
				
			||||||
 | 
					    /// then in a period of either inactivity or a maximum time, all changes will be immediately
 | 
				
			||||||
 | 
					    /// seen at once
 | 
				
			||||||
 | 
					    Timeout,
 | 
				
			||||||
 | 
					    /// The key will be removed from the cache, and thus the next read will cause a cache miss,
 | 
				
			||||||
 | 
					    /// and the value will be loaded from disk
 | 
				
			||||||
 | 
					    OnRead,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
				
			||||||
 | 
					pub enum LypheDBCommand {
 | 
				
			||||||
 | 
					    SetKeys(KVList, PropagationStrategy),
 | 
				
			||||||
 | 
					    GetKeys(KeyList),
 | 
				
			||||||
 | 
					    CountKeys(KeyDirectory),
 | 
				
			||||||
 | 
					    /// NOT RECURSIVE
 | 
				
			||||||
 | 
					    GetKeyDirectory(KeyDirectory),
 | 
				
			||||||
 | 
					    DeleteKeys(KeyList),
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
				
			||||||
 | 
					pub struct KVList {
 | 
				
			||||||
 | 
					    pub kvs: Vec<(Vec<u8>, Vec<u8>)>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
				
			||||||
 | 
					pub struct KeyList {
 | 
				
			||||||
 | 
					    pub keys: Vec<Vec<u8>>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Clone, Debug, Serialize, Deserialize)]
 | 
				
			||||||
 | 
					pub struct KeyDirectory {
 | 
				
			||||||
 | 
					    pub key: Vec<u8>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										248
									
								
								lyphedb/src/main.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										248
									
								
								lyphedb/src/main.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,248 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					keys are stored on disk like this
 | 
				
			||||||
 | 
					<master>/<keyname>/.self
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					so if i stored the key "/this/is/a/key" with the data "hello world", it'd look like this
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<master>/this/is/a/key/.self -> "hello world"
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use crate::config::{LypheDBConfig, load_config};
 | 
				
			||||||
 | 
					use crate::dbimpl::{KEY_ACCESS_COUNTER, PRIMARY_CACHE, SECONDARY_CACHE};
 | 
				
			||||||
 | 
					use futures::StreamExt;
 | 
				
			||||||
 | 
					use log::{debug, error, info, warn};
 | 
				
			||||||
 | 
					use lyphedb::{KVList, LDBNatsMessage, LypheDBCommand};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mod config;
 | 
				
			||||||
 | 
					mod dbimpl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn gc_thread(config: LypheDBConfig) {
 | 
				
			||||||
 | 
					    loop {
 | 
				
			||||||
 | 
					        tokio::time::sleep(std::time::Duration::from_secs(61)).await;
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            let cache = PRIMARY_CACHE.read().await;
 | 
				
			||||||
 | 
					            let cache_size = cache.len() * config.avg_entry_size;
 | 
				
			||||||
 | 
					            if cache_size > config.gc_limit {
 | 
				
			||||||
 | 
					                debug!("gc triggered, cache size: {} bytes", cache_size);
 | 
				
			||||||
 | 
					                let keycount_to_remove = cache.len() - config.gc_limit / config.avg_entry_size;
 | 
				
			||||||
 | 
					                drop(cache);
 | 
				
			||||||
 | 
					                let mut least_freq_keys = vec![];
 | 
				
			||||||
 | 
					                for (key, count) in KEY_ACCESS_COUNTER.read().await.iter() {
 | 
				
			||||||
 | 
					                    let count = count.load(std::sync::atomic::Ordering::Relaxed);
 | 
				
			||||||
 | 
					                    if least_freq_keys.len() < keycount_to_remove {
 | 
				
			||||||
 | 
					                        least_freq_keys.push((key.clone(), count));
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        for (other, oc) in least_freq_keys.iter_mut() {
 | 
				
			||||||
 | 
					                            if count < *oc {
 | 
				
			||||||
 | 
					                                *other = key.clone();
 | 
				
			||||||
 | 
					                                *oc = count;
 | 
				
			||||||
 | 
					                                break;
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                let mut cache = SECONDARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                for (key, _) in least_freq_keys.iter() {
 | 
				
			||||||
 | 
					                    cache.remove(key);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                let mut primary_cache = PRIMARY_CACHE.write().await;
 | 
				
			||||||
 | 
					                *primary_cache = cache.clone();
 | 
				
			||||||
 | 
					                debug!(
 | 
				
			||||||
 | 
					                    "gc finished, cache size: {} bytes",
 | 
				
			||||||
 | 
					                    primary_cache.len() * config.avg_entry_size
 | 
				
			||||||
 | 
					                );
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub async fn precache_keys_until_limit(config: LypheDBConfig) {
 | 
				
			||||||
 | 
					    info!("precache started");
 | 
				
			||||||
 | 
					    let mut precache_count = 0;
 | 
				
			||||||
 | 
					    let mut precache_stack = dbimpl::get_keys_under_keydir_disk(&config, b"")
 | 
				
			||||||
 | 
					        .await
 | 
				
			||||||
 | 
					        .expect("couldn't get root keys");
 | 
				
			||||||
 | 
					    while let Some(precache_key) = precache_stack.pop() {
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            let cache = PRIMARY_CACHE.read().await;
 | 
				
			||||||
 | 
					            if cache.len() * config.avg_entry_size > config.gc_limit {
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            if cache.len() % 10000 == 0 {
 | 
				
			||||||
 | 
					                info!("precache size: {} mb", (cache.len() * config.avg_entry_size) / (1024*1024));
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let children = dbimpl::get_keys_under_keydir_disk(&config, &precache_key)
 | 
				
			||||||
 | 
					            .await
 | 
				
			||||||
 | 
					            .expect("couldn't get children of key");
 | 
				
			||||||
 | 
					        if !children.is_empty() {
 | 
				
			||||||
 | 
					            precache_stack.extend(children);
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            let _value = dbimpl::get_key(config.clone(), &precache_key)
 | 
				
			||||||
 | 
					                .await
 | 
				
			||||||
 | 
					                .expect("couldn't get value of key");
 | 
				
			||||||
 | 
					            precache_count += 1;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    info!("precache finished, {} values precached", precache_count);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[tokio::main]
 | 
				
			||||||
 | 
					async fn main() {
 | 
				
			||||||
 | 
					    let config = load_config(&std::env::args().nth(1).expect("please specify config file"));
 | 
				
			||||||
 | 
					    let logger = env_logger::builder()
 | 
				
			||||||
 | 
					        .filter_level(match config.log.as_str() {
 | 
				
			||||||
 | 
					            "debug" => log::LevelFilter::Debug,
 | 
				
			||||||
 | 
					            "info" => log::LevelFilter::Info,
 | 
				
			||||||
 | 
					            "warn" => log::LevelFilter::Warn,
 | 
				
			||||||
 | 
					            "error" => log::LevelFilter::Error,
 | 
				
			||||||
 | 
					            _ => unreachable!(),
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					        .build();
 | 
				
			||||||
 | 
					    log::set_boxed_logger(Box::new(logger)).unwrap();
 | 
				
			||||||
 | 
					    log::set_max_level(match config.log.as_str() {
 | 
				
			||||||
 | 
					        "debug" => log::LevelFilter::Debug,
 | 
				
			||||||
 | 
					        "info" => log::LevelFilter::Info,
 | 
				
			||||||
 | 
					        "warn" => log::LevelFilter::Warn,
 | 
				
			||||||
 | 
					        "error" => log::LevelFilter::Error,
 | 
				
			||||||
 | 
					        _ => unreachable!(),
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    info!("lyphedb started");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let nats = async_nats::ConnectOptions::new()
 | 
				
			||||||
 | 
					        .add_client_certificate(
 | 
				
			||||||
 | 
					            config.nats_cert.as_str().into(),
 | 
				
			||||||
 | 
					            config.nats_key.as_str().into(),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        .connect(config.nats_url.as_str())
 | 
				
			||||||
 | 
					        .await;
 | 
				
			||||||
 | 
					    if let Err(e) = nats {
 | 
				
			||||||
 | 
					        error!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let nats = nats.unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut subscriber = nats
 | 
				
			||||||
 | 
					        .queue_subscribe(config.name.clone(), "lyphedb".to_string())
 | 
				
			||||||
 | 
					        .await
 | 
				
			||||||
 | 
					        .expect("couldn't subscribe to subject");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    info!("nats connected");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    tokio::spawn(precache_keys_until_limit(config.clone()));
 | 
				
			||||||
 | 
					    tokio::spawn(gc_thread(config.clone()));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while let Some(msg) = subscriber.next().await {
 | 
				
			||||||
 | 
					        let config = config.clone();
 | 
				
			||||||
 | 
					        let nats = nats.clone();
 | 
				
			||||||
 | 
					        tokio::spawn(async move {
 | 
				
			||||||
 | 
					            async fn bad_request(nats: &async_nats::Client, replyto: async_nats::Subject) {
 | 
				
			||||||
 | 
					                let reply = rmp_serde::to_vec(&LDBNatsMessage::BadRequest).unwrap();
 | 
				
			||||||
 | 
					                if let Err(e) = nats.publish(replyto, reply.into()).await {
 | 
				
			||||||
 | 
					                    warn!("couldn't send reply: {:?}", e);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            let data = msg.payload.to_vec();
 | 
				
			||||||
 | 
					            let data = rmp_serde::from_slice::<LDBNatsMessage>(&data);
 | 
				
			||||||
 | 
					            if let Err(e) = data {
 | 
				
			||||||
 | 
					                warn!("couldn't deserialize message: {:?}", e);
 | 
				
			||||||
 | 
					                if let Some(replyto) = msg.reply {
 | 
				
			||||||
 | 
					                    bad_request(&nats, replyto).await;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                return;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let data = data.unwrap();
 | 
				
			||||||
 | 
					            match data {
 | 
				
			||||||
 | 
					                LDBNatsMessage::Command(cmd) => match cmd {
 | 
				
			||||||
 | 
					                    LypheDBCommand::SetKeys(kvlist, propstrat) => {
 | 
				
			||||||
 | 
					                        for (key, value) in kvlist.kvs {
 | 
				
			||||||
 | 
					                            if let Err(e) =
 | 
				
			||||||
 | 
					                                dbimpl::set_key(config.clone(), &key, &value, &propstrat).await
 | 
				
			||||||
 | 
					                            {
 | 
				
			||||||
 | 
					                                warn!("couldn't set key: {:?}", e);
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        let reply = rmp_serde::to_vec(&LDBNatsMessage::Success).unwrap();
 | 
				
			||||||
 | 
					                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
				
			||||||
 | 
					                            warn!("couldn't send reply: {:?}", e);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    LypheDBCommand::GetKeys(klist) => {
 | 
				
			||||||
 | 
					                        let mut reply = Vec::new();
 | 
				
			||||||
 | 
					                        for key in klist.keys {
 | 
				
			||||||
 | 
					                            if let Ok(Some(value)) = dbimpl::get_key(config.clone(), &key).await {
 | 
				
			||||||
 | 
					                                reply.push((key, value));
 | 
				
			||||||
 | 
					                            } else {
 | 
				
			||||||
 | 
					                                warn!("couldn't get key: {:?}", key);
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        let reply =
 | 
				
			||||||
 | 
					                            rmp_serde::to_vec(&LDBNatsMessage::Entries(KVList { kvs: reply }))
 | 
				
			||||||
 | 
					                                .unwrap();
 | 
				
			||||||
 | 
					                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
				
			||||||
 | 
					                            warn!("couldn't send reply: {:?}", e);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    LypheDBCommand::CountKeys(keydir) => {
 | 
				
			||||||
 | 
					                        let keys = dbimpl::get_keys_under_keydir_disk(&config, &keydir.key)
 | 
				
			||||||
 | 
					                            .await
 | 
				
			||||||
 | 
					                            .expect("couldn't get keys under keydir");
 | 
				
			||||||
 | 
					                        let mut count = 0;
 | 
				
			||||||
 | 
					                        for key in keys {
 | 
				
			||||||
 | 
					                            let value = dbimpl::get_key(config.clone(), &key)
 | 
				
			||||||
 | 
					                                .await
 | 
				
			||||||
 | 
					                                .expect("couldn't get value of key");
 | 
				
			||||||
 | 
					                            if value.is_some() {
 | 
				
			||||||
 | 
					                                count += 1;
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        let reply = rmp_serde::to_vec(&LDBNatsMessage::Count(count)).unwrap();
 | 
				
			||||||
 | 
					                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
				
			||||||
 | 
					                            warn!("couldn't send reply: {:?}", e);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    LypheDBCommand::GetKeyDirectory(keydir) => {
 | 
				
			||||||
 | 
					                        let mut reply = Vec::new();
 | 
				
			||||||
 | 
					                        let keys = dbimpl::get_keys_under_keydir_disk(&config, &keydir.key)
 | 
				
			||||||
 | 
					                            .await
 | 
				
			||||||
 | 
					                            .expect("couldn't get keys under keydir");
 | 
				
			||||||
 | 
					                        for key in keys {
 | 
				
			||||||
 | 
					                            let value = dbimpl::get_key(config.clone(), &key)
 | 
				
			||||||
 | 
					                                .await
 | 
				
			||||||
 | 
					                                .expect("couldn't get value of key");
 | 
				
			||||||
 | 
					                            if let Some(value) = value {
 | 
				
			||||||
 | 
					                                reply.push((key, value));
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        let reply =
 | 
				
			||||||
 | 
					                            rmp_serde::to_vec(&LDBNatsMessage::Entries(KVList { kvs: reply }))
 | 
				
			||||||
 | 
					                                .unwrap();
 | 
				
			||||||
 | 
					                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
				
			||||||
 | 
					                            warn!("couldn't send reply: {:?}", e);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    LypheDBCommand::DeleteKeys(klist) => {
 | 
				
			||||||
 | 
					                        for key in klist.keys {
 | 
				
			||||||
 | 
					                            if let Err(e) = dbimpl::delete_key(config.clone(), &key).await {
 | 
				
			||||||
 | 
					                                warn!("couldn't delete key: {:?}", e);
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        let reply = rmp_serde::to_vec(&LDBNatsMessage::Success).unwrap();
 | 
				
			||||||
 | 
					                        if let Err(e) = nats.publish(msg.reply.unwrap(), reply.into()).await {
 | 
				
			||||||
 | 
					                            warn!("couldn't send reply: {:?}", e);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					                _ => {
 | 
				
			||||||
 | 
					                    warn!("bad request, not command");
 | 
				
			||||||
 | 
					                    if let Some(replyto) = msg.reply {
 | 
				
			||||||
 | 
					                        bad_request(&nats, replyto).await;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    info!("lyphedb shutting down");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -8,7 +8,7 @@ license-file = "LICENSE"
 | 
				
			||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
					# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[dependencies]
 | 
					[dependencies]
 | 
				
			||||||
asklyphe-common = { path = "../asklyphe-common" }
 | 
					asklyphe-common = { path = "../asklyphe-common", features = ["foundationdb"] }
 | 
				
			||||||
tokio = { version = "1.0", features = ["full"] }
 | 
					tokio = { version = "1.0", features = ["full"] }
 | 
				
			||||||
serde = { version = "1.0", features = ["derive"] }
 | 
					serde = { version = "1.0", features = ["derive"] }
 | 
				
			||||||
rmp-serde = "1.1.2"
 | 
					rmp-serde = "1.1.2"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,7 +16,7 @@ use astro_float::{BigFloat, RoundingMode};
 | 
				
			||||||
use once_cell::sync::Lazy;
 | 
					use once_cell::sync::Lazy;
 | 
				
			||||||
use std::collections::BTreeMap;
 | 
					use std::collections::BTreeMap;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub const PRECISION: usize = 1024;
 | 
					pub const PRECISION: usize = 2048;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// length unit -> value in meters
 | 
					// length unit -> value in meters
 | 
				
			||||||
pub static LENGTH_STORE: Lazy<BTreeMap<LengthUnit, BigFloat>> = Lazy::new(|| {
 | 
					pub static LENGTH_STORE: Lazy<BTreeMap<LengthUnit, BigFloat>> = Lazy::new(|| {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,7 +15,7 @@ use crate::length_units::{LengthUnit, LENGTH_NAMES, LENGTH_STORE, PRECISION};
 | 
				
			||||||
use crate::unit_defs::{ConvertTo, MetricPrefix};
 | 
					use crate::unit_defs::{ConvertTo, MetricPrefix};
 | 
				
			||||||
use astro_float::{BigFloat, Consts, Radix, RoundingMode};
 | 
					use astro_float::{BigFloat, Consts, Radix, RoundingMode};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub const MAX_PRECISION: usize = 1024;
 | 
					pub const MAX_PRECISION: usize = 2048;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub mod length_units;
 | 
					pub mod length_units;
 | 
				
			||||||
pub mod unit_defs;
 | 
					pub mod unit_defs;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										1
									
								
								vorebot/.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								vorebot/.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					/target
 | 
				
			||||||
							
								
								
									
										28
									
								
								vorebot/Cargo.toml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								vorebot/Cargo.toml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,28 @@
 | 
				
			||||||
 | 
					[package]
 | 
				
			||||||
 | 
					name = "vorebot"
 | 
				
			||||||
 | 
					version = "0.2.0"
 | 
				
			||||||
 | 
					edition = "2021"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[dependencies]
 | 
				
			||||||
 | 
					asklyphe-common = { path = "../asklyphe-common" }
 | 
				
			||||||
 | 
					tokio = { version = "1.0", features = ["full"] }
 | 
				
			||||||
 | 
					serde = { version = "1.0", features = ["derive"] }
 | 
				
			||||||
 | 
					rand = "0.8.5"
 | 
				
			||||||
 | 
					rmp-serde = "1.1.2"
 | 
				
			||||||
 | 
					base64 = "0.21.7"
 | 
				
			||||||
 | 
					image = "0.24.8"
 | 
				
			||||||
 | 
					isahc = "1.7.2"
 | 
				
			||||||
 | 
					ulid = "1.0.0"
 | 
				
			||||||
 | 
					async-nats = "0.38.0"
 | 
				
			||||||
 | 
					futures = "0.3.28"
 | 
				
			||||||
 | 
					chrono = "0.4.26"
 | 
				
			||||||
 | 
					once_cell = "1.18.0"
 | 
				
			||||||
 | 
					env_logger = "0.10.0"
 | 
				
			||||||
 | 
					log = "0.4.19"
 | 
				
			||||||
 | 
					mutex-timeouts = { version = "0.3.0", features = ["tokio"] }
 | 
				
			||||||
 | 
					prometheus_exporter = "0.8.5"
 | 
				
			||||||
 | 
					thirtyfour = "0.35.0"
 | 
				
			||||||
 | 
					stopwords = "0.1.1"
 | 
				
			||||||
 | 
					texting_robots = "0.2.2"
 | 
				
			||||||
							
								
								
									
										400
									
								
								vorebot/src/main.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										400
									
								
								vorebot/src/main.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,400 @@
 | 
				
			||||||
 | 
					mod webparse;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use asklyphe_common::nats::vorebot::{
 | 
				
			||||||
 | 
					    VOREBOT_NEWHOSTNAME_SERVICE, VOREBOT_SERVICE, VOREBOT_SUGGESTED_SERVICE,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use async_nats::jetstream;
 | 
				
			||||||
 | 
					use async_nats::jetstream::consumer::PullConsumer;
 | 
				
			||||||
 | 
					use async_nats::jetstream::stream::RetentionPolicy;
 | 
				
			||||||
 | 
					use chrono::TimeZone;
 | 
				
			||||||
 | 
					use futures::StreamExt;
 | 
				
			||||||
 | 
					use log::{debug, error, info, warn};
 | 
				
			||||||
 | 
					use mutex_timeouts::tokio::MutexWithTimeoutAuto as Mutex;
 | 
				
			||||||
 | 
					use once_cell::sync::Lazy;
 | 
				
			||||||
 | 
					use prometheus_exporter::prometheus::core::{AtomicF64, GenericGauge};
 | 
				
			||||||
 | 
					use prometheus_exporter::prometheus::{register_counter, register_gauge, Counter};
 | 
				
			||||||
 | 
					use std::cmp::max;
 | 
				
			||||||
 | 
					use std::collections::{BTreeMap, BTreeSet};
 | 
				
			||||||
 | 
					use std::hash::{DefaultHasher, Hasher};
 | 
				
			||||||
 | 
					use std::io::Read;
 | 
				
			||||||
 | 
					use std::iter::Iterator;
 | 
				
			||||||
 | 
					use std::str::FromStr;
 | 
				
			||||||
 | 
					use std::string::ToString;
 | 
				
			||||||
 | 
					use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
 | 
				
			||||||
 | 
					use std::sync::Arc;
 | 
				
			||||||
 | 
					use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
 | 
				
			||||||
 | 
					use async_nats::jetstream::kv;
 | 
				
			||||||
 | 
					use stopwords::{Language, Spark, Stopwords};
 | 
				
			||||||
 | 
					use thirtyfour::{CapabilitiesHelper, DesiredCapabilities, Proxy, WebDriver};
 | 
				
			||||||
 | 
					use thirtyfour::common::capabilities::firefox::FirefoxPreferences;
 | 
				
			||||||
 | 
					use tokio::task::JoinHandle;
 | 
				
			||||||
 | 
					use asklyphe_common::ldb::DBConn;
 | 
				
			||||||
 | 
					use asklyphe_common::nats::vorebot::CrawlRequest;
 | 
				
			||||||
 | 
					use crate::webparse::web_parse;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub static NATS_URL: Lazy<String> =
 | 
				
			||||||
 | 
					    Lazy::new(|| std::env::var("NATS_URL").expect("NO NATS URL DEFINED"));
 | 
				
			||||||
 | 
					pub static NATS_PASSWORD: Lazy<Option<String>> = Lazy::new(|| {
 | 
				
			||||||
 | 
					    std::env::var("NATS_PASSWORD").ok()
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
 | 
					pub static NATS_CERT: Lazy<String> = Lazy::new(|| std::env::var("NATS_CERT").expect("NO NATS_CERT DEFINED"));
 | 
				
			||||||
 | 
					pub static NATS_KEY: Lazy<String> = Lazy::new(|| std::env::var("NATS_KEY").expect("NO NATS_KEY DEFINED"));
 | 
				
			||||||
 | 
					pub static BROWSER_THREADS: Lazy<Vec<String>> =
 | 
				
			||||||
 | 
					    Lazy::new(|| std::env::var("BROWSER_THREADS").expect("PLEASE LIST BROWSER_THREADS").split(',').map(|v| v.to_string()).collect());
 | 
				
			||||||
 | 
					pub static BROWSER_PROXY: Lazy<Option<String>> = Lazy::new(|| {
 | 
				
			||||||
 | 
					    std::env::var("BROWSER_PROXY").ok()
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
 | 
					pub static DB_NAME: Lazy<String> =
 | 
				
			||||||
 | 
					    Lazy::new(|| std::env::var("DB_NAME").expect("PLEASE ADD DB_NAME"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// in minutes
 | 
				
			||||||
 | 
					const DEFAULT_CRAWLER_TIMEOUT: u64 = 5;
 | 
				
			||||||
 | 
					pub static CRAWLER_TIMEOUT: Lazy<u64> =
 | 
				
			||||||
 | 
					    Lazy::new(|| std::env::var("CRAWLER_TIMEOUT").map(|v| v.parse::<u64>().ok()).ok().flatten().unwrap_or(DEFAULT_CRAWLER_TIMEOUT));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub static DOCUMENTS_CRAWLED: AtomicU64 = AtomicU64::new(0);
 | 
				
			||||||
 | 
					pub static LAST_MESSAGE: AtomicI64 = AtomicI64::new(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub static LAST_TASK_COMPLETE: Lazy<Vec<Arc<AtomicI64>>> = Lazy::new(|| {
 | 
				
			||||||
 | 
					    let max_threads: usize = BROWSER_THREADS.len();
 | 
				
			||||||
 | 
					    let mut vals = vec![];
 | 
				
			||||||
 | 
					    for i in 0..max_threads {
 | 
				
			||||||
 | 
					        //    let db = Database::default().expect("couldn't connect to foundation db!");
 | 
				
			||||||
 | 
					        //    DBS.lock().await.push(Arc::new(db));
 | 
				
			||||||
 | 
					        vals.push(Arc::new(AtomicI64::new(chrono::Utc::now().timestamp())));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    vals
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub static USER_AGENT: Lazy<String> = Lazy::new(|| {
 | 
				
			||||||
 | 
					    format!(
 | 
				
			||||||
 | 
					        "Vorebot/{} (compatible; Googlebot/2.1; +https://voremicrocomputers.com/crawler.html)",
 | 
				
			||||||
 | 
					        env!("CARGO_PKG_VERSION")
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[tokio::main]
 | 
				
			||||||
 | 
					async fn main() {
 | 
				
			||||||
 | 
					    mutex_timeouts::tokio::GLOBAL_TOKIO_TIMEOUT.store(72, Ordering::Relaxed);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    env_logger::init();
 | 
				
			||||||
 | 
					    info!("began at {}", chrono::Utc::now().to_string());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut nats = async_nats::ConnectOptions::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if let Some(password) = NATS_PASSWORD.as_ref() {
 | 
				
			||||||
 | 
					        nats = nats.user_and_password("vorebot".to_string(), password.to_string());
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        nats = nats.add_client_certificate(NATS_CERT.as_str().into(), NATS_KEY.as_str().into());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let nats = nats.connect(NATS_URL.as_str())
 | 
				
			||||||
 | 
					        .await;
 | 
				
			||||||
 | 
					    if let Err(e) = nats {
 | 
				
			||||||
 | 
					        error!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let nats = nats.unwrap();
 | 
				
			||||||
 | 
					    let dbconn = DBConn::new(nats.clone(), DB_NAME.to_string());
 | 
				
			||||||
 | 
					    let nats = jetstream::new(nats);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // fixme: remove this once we have proper site suggestion interface
 | 
				
			||||||
 | 
					    let args = std::env::args().collect::<Vec<_>>();
 | 
				
			||||||
 | 
					    if let Some(suggestion_site) = args.get(1) {
 | 
				
			||||||
 | 
					        let damping = args.get(2).map(|v| v.parse::<f64>().expect("BAD FP")).unwrap_or(0.45);
 | 
				
			||||||
 | 
					        warn!("suggesting {} with damping {}", suggestion_site, damping);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let ack = nats.publish(VOREBOT_SUGGESTED_SERVICE.to_string(), rmp_serde::to_vec(&CrawlRequest {
 | 
				
			||||||
 | 
					            url: suggestion_site.to_string(),
 | 
				
			||||||
 | 
					            damping,
 | 
				
			||||||
 | 
					        }).unwrap().into()).await.unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ack.await.expect("FATAL ERROR");
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut tasks: Vec<(JoinHandle<()>, String, Arc<AtomicU64>)> = vec![];
 | 
				
			||||||
 | 
					    let mut available_browsers: Vec<String> = BROWSER_THREADS.clone();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        loop {
 | 
				
			||||||
 | 
					            while tasks.len() < BROWSER_THREADS.len() {
 | 
				
			||||||
 | 
					                let nats = nats.clone();
 | 
				
			||||||
 | 
					                let browser = available_browsers.pop().expect("NO BROWSERS LEFT, THIS IS A FATAL BUG!");
 | 
				
			||||||
 | 
					                let db = dbconn.clone();
 | 
				
			||||||
 | 
					                let b = browser.clone();
 | 
				
			||||||
 | 
					                let last_parse = Arc::new(AtomicU64::new(SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs()));
 | 
				
			||||||
 | 
					                let lp = last_parse.clone();
 | 
				
			||||||
 | 
					                tasks.push((tokio::spawn(async move {
 | 
				
			||||||
 | 
					                    let browser = b;
 | 
				
			||||||
 | 
					                    info!("using {}", browser);
 | 
				
			||||||
 | 
					                    info!("crawler spawned");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    /* normal priority */
 | 
				
			||||||
 | 
					                    let consumer: PullConsumer = nats.get_or_create_stream(jetstream::stream::Config {
 | 
				
			||||||
 | 
					                        name: VOREBOT_SERVICE.to_string(),
 | 
				
			||||||
 | 
					                        subjects: vec![VOREBOT_SERVICE.to_string()],
 | 
				
			||||||
 | 
					                        retention: RetentionPolicy::WorkQueue,
 | 
				
			||||||
 | 
					                        ..Default::default()
 | 
				
			||||||
 | 
					                    }).await
 | 
				
			||||||
 | 
					                        .expect("FATAL! FAILED TO SUBSCRIBE TO NATS!")
 | 
				
			||||||
 | 
					                        .get_or_create_consumer("parser", jetstream::consumer::pull::Config {
 | 
				
			||||||
 | 
					                            durable_name: Some("parser".to_string()),
 | 
				
			||||||
 | 
					                            filter_subject: VOREBOT_SERVICE.to_string(),
 | 
				
			||||||
 | 
					                            ..Default::default()
 | 
				
			||||||
 | 
					                        }).await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
				
			||||||
 | 
					                    let mut messages = consumer.messages().await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    /* higher priority (new hostnames) */
 | 
				
			||||||
 | 
					                    let higher_consumer: PullConsumer = nats.get_or_create_stream(jetstream::stream::Config {
 | 
				
			||||||
 | 
					                        name: VOREBOT_NEWHOSTNAME_SERVICE.to_string(),
 | 
				
			||||||
 | 
					                        subjects: vec![VOREBOT_NEWHOSTNAME_SERVICE.to_string()],
 | 
				
			||||||
 | 
					                        retention: RetentionPolicy::WorkQueue,
 | 
				
			||||||
 | 
					                        ..Default::default()
 | 
				
			||||||
 | 
					                    }).await
 | 
				
			||||||
 | 
					                        .expect("FATAL! FAILED TO SUBSCRIBE TO NATS!")
 | 
				
			||||||
 | 
					                        .get_or_create_consumer("highparser", jetstream::consumer::pull::Config {
 | 
				
			||||||
 | 
					                            durable_name: Some("highparser".to_string()),
 | 
				
			||||||
 | 
					                            filter_subject: VOREBOT_NEWHOSTNAME_SERVICE.to_string(),
 | 
				
			||||||
 | 
					                            ..Default::default()
 | 
				
			||||||
 | 
					                        }).await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
				
			||||||
 | 
					                    let mut high_messages = higher_consumer.messages().await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    /* highest priority (user-suggested) */
 | 
				
			||||||
 | 
					                    let highest_consumer: PullConsumer = nats.get_or_create_stream(jetstream::stream::Config {
 | 
				
			||||||
 | 
					                        name: VOREBOT_SUGGESTED_SERVICE.to_string(),
 | 
				
			||||||
 | 
					                        subjects: vec![VOREBOT_SUGGESTED_SERVICE.to_string()],
 | 
				
			||||||
 | 
					                        retention: RetentionPolicy::WorkQueue,
 | 
				
			||||||
 | 
					                        ..Default::default()
 | 
				
			||||||
 | 
					                    }).await
 | 
				
			||||||
 | 
					                        .expect("FATAL! FAILED TO SUBSCRIBE TO NATS!")
 | 
				
			||||||
 | 
					                        .get_or_create_consumer("highestparser", jetstream::consumer::pull::Config {
 | 
				
			||||||
 | 
					                            durable_name: Some("highestparser".to_string()),
 | 
				
			||||||
 | 
					                            filter_subject: VOREBOT_SUGGESTED_SERVICE.to_string(),
 | 
				
			||||||
 | 
					                            ..Default::default()
 | 
				
			||||||
 | 
					                        }).await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
				
			||||||
 | 
					                    let mut highest_messages = highest_consumer.messages().await.expect("FATAL! FAILED TO SUBSCRIBE TO NATS!");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    let mut prefs = FirefoxPreferences::new();
 | 
				
			||||||
 | 
					                    prefs.set_user_agent(USER_AGENT.to_string()).unwrap();
 | 
				
			||||||
 | 
					                    let mut caps = DesiredCapabilities::firefox();
 | 
				
			||||||
 | 
					                    caps.set_preferences(prefs).unwrap();
 | 
				
			||||||
 | 
					                    if let Some(proxy) = BROWSER_PROXY.as_ref() {
 | 
				
			||||||
 | 
					                        caps.set_proxy(Proxy::Manual {
 | 
				
			||||||
 | 
					                            ftp_proxy: None,
 | 
				
			||||||
 | 
					                            http_proxy: Some(proxy.to_string()),
 | 
				
			||||||
 | 
					                            ssl_proxy: Some(proxy.to_string()),
 | 
				
			||||||
 | 
					                            socks_proxy: None,
 | 
				
			||||||
 | 
					                            socks_version: None,
 | 
				
			||||||
 | 
					                            socks_username: None,
 | 
				
			||||||
 | 
					                            socks_password: None,
 | 
				
			||||||
 | 
					                            no_proxy: None,
 | 
				
			||||||
 | 
					                        }).unwrap();
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    let driver = WebDriver::new(&browser, caps).await.unwrap();
 | 
				
			||||||
 | 
					                    info!("crawler ready");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    loop {
 | 
				
			||||||
 | 
					                        lp.store(SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(), Ordering::Relaxed);
 | 
				
			||||||
 | 
					                        tokio::select! {
 | 
				
			||||||
 | 
					                            Some(highest) = StreamExt::next(&mut highest_messages) => {
 | 
				
			||||||
 | 
					                                if let Err(e) = highest {
 | 
				
			||||||
 | 
					                                    warn!("error when recv js message! {e}");
 | 
				
			||||||
 | 
					                                } else {
 | 
				
			||||||
 | 
					                                    let message = highest.unwrap();
 | 
				
			||||||
 | 
					                                    if let Err(e) = message.ack().await {
 | 
				
			||||||
 | 
					                                        warn!("failed acking message {e}");
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    let req = rmp_serde::from_slice::<CrawlRequest>(message.payload.as_ref());
 | 
				
			||||||
 | 
					                                    if let Err(e) = req {
 | 
				
			||||||
 | 
					                                        error!("BAD NATS REQUEST: {e}");
 | 
				
			||||||
 | 
					                                        continue;
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    let req = req.unwrap();
 | 
				
			||||||
 | 
					                                    info!("RECV USER SUGGESTION!");
 | 
				
			||||||
 | 
					                                    let now = chrono::Utc::now().timestamp();
 | 
				
			||||||
 | 
					                                    LAST_MESSAGE.store(now, Ordering::Relaxed);
 | 
				
			||||||
 | 
					                                    let nats = nats.clone();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    let mut bad = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    driver.in_new_tab(|| async {
 | 
				
			||||||
 | 
					                                        if web_parse(nats.clone(), db.clone(), &driver, &req.url, req.damping).await.is_err() {
 | 
				
			||||||
 | 
					                                            warn!("temporary failure detected in parsing, requeuing");
 | 
				
			||||||
 | 
					                                            nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&req).unwrap().into()).await.expect("FAILED TO REQUEUE");
 | 
				
			||||||
 | 
					                                            bad = true;
 | 
				
			||||||
 | 
					                                        }
 | 
				
			||||||
 | 
					                                        Ok(())
 | 
				
			||||||
 | 
					                                    }).await.unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    if bad {
 | 
				
			||||||
 | 
					                                        continue;
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    if DOCUMENTS_CRAWLED.load(Ordering::Relaxed) % 100 == 0 {
 | 
				
			||||||
 | 
					                                        DOCUMENTS_CRAWLED.fetch_add(1, Ordering::Relaxed);
 | 
				
			||||||
 | 
					                                        info!("crawled {} pages!", DOCUMENTS_CRAWLED.load(Ordering::Relaxed));
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            Some(high) = StreamExt::next(&mut high_messages) => {
 | 
				
			||||||
 | 
					                                if let Err(e) = high {
 | 
				
			||||||
 | 
					                                    warn!("error when recv js message! {e}");
 | 
				
			||||||
 | 
					                                } else {
 | 
				
			||||||
 | 
					                                    let message = high.unwrap();
 | 
				
			||||||
 | 
					                                    if let Err(e) = message.ack().await {
 | 
				
			||||||
 | 
					                                        warn!("failed acking message {e}");
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    let req = rmp_serde::from_slice::<CrawlRequest>(message.payload.as_ref());
 | 
				
			||||||
 | 
					                                    if let Err(e) = req {
 | 
				
			||||||
 | 
					                                        error!("BAD NATS REQUEST: {e}");
 | 
				
			||||||
 | 
					                                        continue;
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    let req = req.unwrap();
 | 
				
			||||||
 | 
					                                    info!("RECV HIGH PRIORITY!");
 | 
				
			||||||
 | 
					                                    let now = chrono::Utc::now().timestamp();
 | 
				
			||||||
 | 
					                                    LAST_MESSAGE.store(now, Ordering::Relaxed);
 | 
				
			||||||
 | 
					                                    let nats = nats.clone();
 | 
				
			||||||
 | 
					                                    let mut bad = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    driver.in_new_tab(|| async {
 | 
				
			||||||
 | 
					                                        if web_parse(nats.clone(), db.clone(), &driver, &req.url, req.damping).await.is_err() {
 | 
				
			||||||
 | 
					                                            warn!("temporary failure detected in parsing, requeuing");
 | 
				
			||||||
 | 
					                                            nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&req).unwrap().into()).await.expect("FAILED TO REQUEUE");
 | 
				
			||||||
 | 
					                                            bad = true;
 | 
				
			||||||
 | 
					                                        }
 | 
				
			||||||
 | 
					                                        Ok(())
 | 
				
			||||||
 | 
					                                    }).await.unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    if bad {
 | 
				
			||||||
 | 
					                                        continue;
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    if DOCUMENTS_CRAWLED.load(Ordering::Relaxed) % 100 == 0 {
 | 
				
			||||||
 | 
					                                        DOCUMENTS_CRAWLED.fetch_add(1, Ordering::Relaxed);
 | 
				
			||||||
 | 
					                                        info!("crawled {} pages!", DOCUMENTS_CRAWLED.load(Ordering::Relaxed));
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            Some(normal) = StreamExt::next(&mut messages) => {
 | 
				
			||||||
 | 
					                                if let Err(e) = normal {
 | 
				
			||||||
 | 
					                                    warn!("error when recv js message! {e}");
 | 
				
			||||||
 | 
					                                } else {
 | 
				
			||||||
 | 
					                                    let message = normal.unwrap();
 | 
				
			||||||
 | 
					                                    if let Err(e) = message.ack().await {
 | 
				
			||||||
 | 
					                                        warn!("failed acking message {e}");
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    let req = rmp_serde::from_slice::<CrawlRequest>(message.payload.as_ref());
 | 
				
			||||||
 | 
					                                    if let Err(e) = req {
 | 
				
			||||||
 | 
					                                        error!("BAD NATS REQUEST: {e}");
 | 
				
			||||||
 | 
					                                        continue;
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    let req = req.unwrap();
 | 
				
			||||||
 | 
					                                    let now = chrono::Utc::now().timestamp();
 | 
				
			||||||
 | 
					                                    LAST_MESSAGE.store(now, Ordering::Relaxed);
 | 
				
			||||||
 | 
					                                    let nats = nats.clone();
 | 
				
			||||||
 | 
					                                    
 | 
				
			||||||
 | 
					                                    let mut hash = DefaultHasher::new();
 | 
				
			||||||
 | 
					                                    hash.write(req.url.as_bytes());
 | 
				
			||||||
 | 
					                                    let hash = hash.finish();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    let dehomo_bucket = nats.get_key_value("dehomo").await;
 | 
				
			||||||
 | 
					                                    let dehomo_bucket = if dehomo_bucket.is_err() {
 | 
				
			||||||
 | 
					                                        let dehomo_bucket = nats.create_key_value(kv::Config {
 | 
				
			||||||
 | 
					                                            bucket: "dehomo".to_string(),
 | 
				
			||||||
 | 
					                                            description: "prevent the same url from being scraped again too quickly".to_string(),
 | 
				
			||||||
 | 
					                                            max_age: Duration::from_secs(60*60),
 | 
				
			||||||
 | 
					                                            ..Default::default()
 | 
				
			||||||
 | 
					                                        }).await;
 | 
				
			||||||
 | 
					                                        if let Err(e) = dehomo_bucket {
 | 
				
			||||||
 | 
					                                            panic!("FAILED TO CREATE DEHOMO BUCKET!!! {e}");
 | 
				
			||||||
 | 
					                                        } else {
 | 
				
			||||||
 | 
					                                            dehomo_bucket.unwrap()
 | 
				
			||||||
 | 
					                                        }
 | 
				
			||||||
 | 
					                                    } else {
 | 
				
			||||||
 | 
					                                        dehomo_bucket.unwrap()
 | 
				
			||||||
 | 
					                                    };
 | 
				
			||||||
 | 
					                                    if dehomo_bucket.get(hash.to_string()).await.ok().flatten().map(|v| *v.first().unwrap_or(&0) == 1).unwrap_or(false) {
 | 
				
			||||||
 | 
					                                        info!("too soon to scrape {}", req.url);
 | 
				
			||||||
 | 
					                                        continue;
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    
 | 
				
			||||||
 | 
					                                    let mut bad = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    driver.in_new_tab(|| async {
 | 
				
			||||||
 | 
					                                        if web_parse(nats.clone(), db.clone(), &driver, &req.url, req.damping).await.is_err() {
 | 
				
			||||||
 | 
					                                            warn!("temporary failure detected in parsing, requeuing");
 | 
				
			||||||
 | 
					                                            nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&req).unwrap().into()).await.expect("FAILED TO REQUEUE");
 | 
				
			||||||
 | 
					                                            bad = true;
 | 
				
			||||||
 | 
					                                        }
 | 
				
			||||||
 | 
					                                        Ok(())
 | 
				
			||||||
 | 
					                                    }).await.unwrap();
 | 
				
			||||||
 | 
					                                    
 | 
				
			||||||
 | 
					                                    if bad {
 | 
				
			||||||
 | 
					                                        continue;
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                    
 | 
				
			||||||
 | 
					                                    dehomo_bucket.put(hash.to_string(), vec![1u8].into()).await.expect("failed to store dehomo");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                                    if DOCUMENTS_CRAWLED.load(Ordering::Relaxed) % 100 == 0 {
 | 
				
			||||||
 | 
					                                        DOCUMENTS_CRAWLED.fetch_add(1, Ordering::Relaxed);
 | 
				
			||||||
 | 
					                                        info!("crawled {} pages!", DOCUMENTS_CRAWLED.load(Ordering::Relaxed));
 | 
				
			||||||
 | 
					                                    }
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }),
 | 
				
			||||||
 | 
					                            browser, last_parse.clone()
 | 
				
			||||||
 | 
					                ));
 | 
				
			||||||
 | 
					                warn!("spawning new injest thread");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let mut tasks_to_remove = vec![];
 | 
				
			||||||
 | 
					            for task in tasks.iter() {
 | 
				
			||||||
 | 
					                if task.0.is_finished() {
 | 
				
			||||||
 | 
					                    tasks_to_remove.push(task.1.clone());
 | 
				
			||||||
 | 
					                    available_browsers.push(task.1.clone());
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                let last_parse = task.2.load(Ordering::Relaxed);
 | 
				
			||||||
 | 
					                if SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs() > (*CRAWLER_TIMEOUT * 60) + last_parse {
 | 
				
			||||||
 | 
					                    // task has been taking too long
 | 
				
			||||||
 | 
					                    warn!("task taking too long! aborting!");
 | 
				
			||||||
 | 
					                    task.0.abort();
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            tasks.retain(|v| !tasks_to_remove.contains(&v.1));
 | 
				
			||||||
 | 
					            tokio::time::sleep(Duration::from_secs(3)).await;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//#[tokio::main]
 | 
				
			||||||
 | 
					//async fn main() {
 | 
				
			||||||
 | 
					//    mutex_timeouts::tokio::GLOBAL_TOKIO_TIMEOUT.store(72, Ordering::Relaxed);
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    env_logger::init();
 | 
				
			||||||
 | 
					//    info!("began at {}", chrono::Utc::now().to_string());
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    let nats = async_nats::connect(NATS_URL.as_str()).await;
 | 
				
			||||||
 | 
					//    if let Err(e) = nats {
 | 
				
			||||||
 | 
					//        error!("FATAL ERROR, COULDN'T CONNECT TO NATS: {}", e);
 | 
				
			||||||
 | 
					//        return;
 | 
				
			||||||
 | 
					//    }
 | 
				
			||||||
 | 
					//    let nats = nats.unwrap();
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    let dbconn = DBConn::new(nats.clone(), "lyphedb-test");
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    let nats = jetstream::new(nats);
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    let mut prefs = FirefoxPreferences::new();
 | 
				
			||||||
 | 
					//    prefs.set_user_agent(USER_AGENT.to_string()).unwrap();
 | 
				
			||||||
 | 
					//    let mut caps = DesiredCapabilities::firefox();
 | 
				
			||||||
 | 
					//    caps.set_preferences(prefs).unwrap();
 | 
				
			||||||
 | 
					//    let driver = WebDriver::new(&BROWSER_THREADS[0], caps).await.unwrap();
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    driver.in_new_tab(|| async {
 | 
				
			||||||
 | 
					//        web_parse(nats.clone(), dbconn.clone(), &driver, "https://asklyphe.com/", 0.85).await.expect("Failed to run web parse");
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//        Ok(())
 | 
				
			||||||
 | 
					//    }).await.unwrap();
 | 
				
			||||||
 | 
					//}
 | 
				
			||||||
							
								
								
									
										492
									
								
								vorebot/src/webparse/mod.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										492
									
								
								vorebot/src/webparse/mod.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,492 @@
 | 
				
			||||||
 | 
					use crate::USER_AGENT;
 | 
				
			||||||
 | 
					use asklyphe_common::ldb::{linkrelstore, linkstore, metastore, sitestore, titlestore, wordstore, DBConn};
 | 
				
			||||||
 | 
					use async_nats::jetstream;
 | 
				
			||||||
 | 
					use async_nats::jetstream::kv;
 | 
				
			||||||
 | 
					use futures::AsyncReadExt;
 | 
				
			||||||
 | 
					use image::EncodableLayout;
 | 
				
			||||||
 | 
					use isahc::config::RedirectPolicy;
 | 
				
			||||||
 | 
					use isahc::prelude::Configurable;
 | 
				
			||||||
 | 
					use isahc::HttpClient;
 | 
				
			||||||
 | 
					use log::{debug, error, warn};
 | 
				
			||||||
 | 
					use std::collections::{BTreeMap, BTreeSet};
 | 
				
			||||||
 | 
					use std::hash::{DefaultHasher, Hasher};
 | 
				
			||||||
 | 
					use std::sync::atomic::AtomicBool;
 | 
				
			||||||
 | 
					use std::sync::{mpsc, Arc};
 | 
				
			||||||
 | 
					use std::time::Duration;
 | 
				
			||||||
 | 
					use stopwords::{Language, Spark, Stopwords};
 | 
				
			||||||
 | 
					use texting_robots::{get_robots_url, Robot};
 | 
				
			||||||
 | 
					use thirtyfour::{By, WebDriver};
 | 
				
			||||||
 | 
					use asklyphe_common::nats::vorebot::CrawlRequest;
 | 
				
			||||||
 | 
					use asklyphe_common::nats::vorebot::VOREBOT_SERVICE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn allowed_to_crawl(robotstxt: &[u8], url: &str) -> Result<bool, ()> {
 | 
				
			||||||
 | 
					    let robot1 = Robot::new("Vorebot", robotstxt);
 | 
				
			||||||
 | 
					    if let Err(e) = robot1 {
 | 
				
			||||||
 | 
					        warn!(
 | 
				
			||||||
 | 
					            "potentially malformed robots.txt ({}), not crawling {}",
 | 
				
			||||||
 | 
					            e, url
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					        return Err(());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let robot1 = robot1.unwrap();
 | 
				
			||||||
 | 
					    Ok(robot1.allowed(url))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// returns Err if we cannot access a page, but the error associated with it seems temporary (i.e. it's worth trying again later)
 | 
				
			||||||
 | 
					// otherwise, returns Ok
 | 
				
			||||||
 | 
					pub async fn web_parse(
 | 
				
			||||||
 | 
					    nats: jetstream::Context,
 | 
				
			||||||
 | 
					    db: DBConn,
 | 
				
			||||||
 | 
					    driver: &WebDriver,
 | 
				
			||||||
 | 
					    url: &str,
 | 
				
			||||||
 | 
					    damping: f64,
 | 
				
			||||||
 | 
					) -> Result<(), ()> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    driver.delete_all_cookies().await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					    let robots_bucket = nats.get_key_value("robots").await;
 | 
				
			||||||
 | 
					    let robots_bucket = if robots_bucket.is_err() {
 | 
				
			||||||
 | 
					        let robots_bucket = nats
 | 
				
			||||||
 | 
					            .create_key_value(kv::Config {
 | 
				
			||||||
 | 
					                bucket: "robots".to_string(),
 | 
				
			||||||
 | 
					                description: "storage of robots.txt data for given hosts".to_string(),
 | 
				
			||||||
 | 
					                ..Default::default()
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					            .await;
 | 
				
			||||||
 | 
					        if let Err(e) = robots_bucket {
 | 
				
			||||||
 | 
					            error!("could not create robots.txt bucket: {}", e);
 | 
				
			||||||
 | 
					            None
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            Some(robots_bucket.unwrap())
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        robots_bucket.ok()
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    let hosts_bucket = nats.get_key_value("hosts").await;
 | 
				
			||||||
 | 
					    let hosts_bucket = if hosts_bucket.is_err() {
 | 
				
			||||||
 | 
					        let hosts_bucket = nats
 | 
				
			||||||
 | 
					            .create_key_value(kv::Config {
 | 
				
			||||||
 | 
					                bucket: "hosts".to_string(),
 | 
				
			||||||
 | 
					                description: "prevent the same host from being scraped too quickly".to_string(),
 | 
				
			||||||
 | 
					                max_age: Duration::from_secs(60 * 10),
 | 
				
			||||||
 | 
					                ..Default::default()
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					            .await;
 | 
				
			||||||
 | 
					        if let Err(e) = hosts_bucket {
 | 
				
			||||||
 | 
					            error!("could not create hosts bucket: {}", e);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            hosts_bucket.unwrap()
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        hosts_bucket.unwrap()
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let robots_url = get_robots_url(url);
 | 
				
			||||||
 | 
					    if robots_url.is_err() {
 | 
				
			||||||
 | 
					        error!("could not get a robots.txt url from {}, not crawling", url);
 | 
				
			||||||
 | 
					        return Ok(());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let robots_url = robots_url.unwrap();
 | 
				
			||||||
 | 
					    let mut hash = DefaultHasher::new();
 | 
				
			||||||
 | 
					    hash.write(robots_url.as_bytes());
 | 
				
			||||||
 | 
					    let hash = hash.finish();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if let Ok(Some(host)) = hosts_bucket.get(hash.to_string()).await {
 | 
				
			||||||
 | 
					        let count = *host.first().unwrap_or(&0);
 | 
				
			||||||
 | 
					        if count > 10 {
 | 
				
			||||||
 | 
					            warn!("scraping {} too quickly, avoiding for one minute", robots_url);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        hosts_bucket.put(hash.to_string(), vec![count + 1].into()).await.expect("COULDN'T INSERT INTO HOSTS BUCKET!");
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        hosts_bucket.put(hash.to_string(), vec![1].into()).await.expect("COULDN'T INSERT INTO HOSTS BUCKET!");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut skip_robots_check = false;
 | 
				
			||||||
 | 
					    if let Some(robots_bucket) = &robots_bucket {
 | 
				
			||||||
 | 
					        if let Ok(Some(entry)) = robots_bucket.get(hash.to_string()).await {
 | 
				
			||||||
 | 
					            if let Ok(res) = allowed_to_crawl(entry.as_bytes(), url) {
 | 
				
			||||||
 | 
					                if !res {
 | 
				
			||||||
 | 
					                    debug!("robots.txt does not allow us to crawl {}", url);
 | 
				
			||||||
 | 
					                    return Ok(());
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    skip_robots_check = true;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if !skip_robots_check {
 | 
				
			||||||
 | 
					        // check manually
 | 
				
			||||||
 | 
					        debug!("checking new robots.txt \"{}\"", robots_url);
 | 
				
			||||||
 | 
					        let client = HttpClient::builder()
 | 
				
			||||||
 | 
					            .redirect_policy(RedirectPolicy::Limit(10))
 | 
				
			||||||
 | 
					            .timeout(Duration::from_secs(60))
 | 
				
			||||||
 | 
					            .build();
 | 
				
			||||||
 | 
					        if let Err(e) = client {
 | 
				
			||||||
 | 
					            error!("could not create new robots.txt httpclient: {}", e);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let client = client.unwrap();
 | 
				
			||||||
 | 
					        let request = isahc::Request::get(&robots_url)
 | 
				
			||||||
 | 
					            .header("user-agent", USER_AGENT.as_str())
 | 
				
			||||||
 | 
					            .body(());
 | 
				
			||||||
 | 
					        if let Err(e) = request {
 | 
				
			||||||
 | 
					            error!("could not create robots.txt get request: {}", e);
 | 
				
			||||||
 | 
					            return Ok(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let request = request.unwrap();
 | 
				
			||||||
 | 
					        let response = client.send_async(request).await;
 | 
				
			||||||
 | 
					        if let Err(e) = response {
 | 
				
			||||||
 | 
					            warn!("could not get robots.txt page: {}", e);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let mut response = response.unwrap();
 | 
				
			||||||
 | 
					        if response.status() == 429 {
 | 
				
			||||||
 | 
					            // too many requests
 | 
				
			||||||
 | 
					            warn!("too many requests for {}", robots_url);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if response.status().is_server_error() {
 | 
				
			||||||
 | 
					            // don't crawl at the moment
 | 
				
			||||||
 | 
					            debug!("not crawling {} due to server error", robots_url);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut body = "".to_string();
 | 
				
			||||||
 | 
					        if let Err(e) = response.body_mut().read_to_string(&mut body).await {
 | 
				
			||||||
 | 
					            warn!("could not read from robots.txt response: {}", e);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if let Ok(res) = allowed_to_crawl(body.as_bytes(), url) {
 | 
				
			||||||
 | 
					            if let Some(robots_bucket) = &robots_bucket {
 | 
				
			||||||
 | 
					                if let Err(e) = robots_bucket
 | 
				
			||||||
 | 
					                    .put(hash.to_string(), body.as_bytes().to_vec().into())
 | 
				
			||||||
 | 
					                    .await
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    warn!("could not put robots.txt data: {}", e);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if !res {
 | 
				
			||||||
 | 
					                debug!("robots.txt does not allow us to crawl {}", url);
 | 
				
			||||||
 | 
					                return Ok(());
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                // we're allowed to crawl!
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let start = std::time::Instant::now();
 | 
				
			||||||
 | 
					    debug!("handling request for {}", url);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // check for bad status codes
 | 
				
			||||||
 | 
					    // fixme: i hate this solution, can we get something that actually checks the browser's request?
 | 
				
			||||||
 | 
					    let client = HttpClient::builder()
 | 
				
			||||||
 | 
					        .redirect_policy(RedirectPolicy::Limit(10))
 | 
				
			||||||
 | 
					        .timeout(Duration::from_secs(60))
 | 
				
			||||||
 | 
					        .build();
 | 
				
			||||||
 | 
					    if let Err(e) = client {
 | 
				
			||||||
 | 
					        error!("could not create new badstatuscode httpclient: {}", e);
 | 
				
			||||||
 | 
					        return Err(());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let client = client.unwrap();
 | 
				
			||||||
 | 
					    let request = isahc::Request::get(url)
 | 
				
			||||||
 | 
					        .header("user-agent", USER_AGENT.as_str())
 | 
				
			||||||
 | 
					        .body(());
 | 
				
			||||||
 | 
					    if let Err(e) = request {
 | 
				
			||||||
 | 
					        error!("could not create badstatuscode get request: {}", e);
 | 
				
			||||||
 | 
					        return Ok(());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let request = request.unwrap();
 | 
				
			||||||
 | 
					    let response = client.send_async(request).await;
 | 
				
			||||||
 | 
					    if let Err(e) = response {
 | 
				
			||||||
 | 
					        warn!("could not get badstatuscode page: {}", e);
 | 
				
			||||||
 | 
					        return Err(());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    let mut response = response.unwrap();
 | 
				
			||||||
 | 
					    if response.status() == 429 {
 | 
				
			||||||
 | 
					        // too many requests
 | 
				
			||||||
 | 
					        warn!("too many requests for {}", url);
 | 
				
			||||||
 | 
					        return Err(());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if response.status().is_server_error() || response.status().is_client_error() {
 | 
				
			||||||
 | 
					        // don't crawl at the moment
 | 
				
			||||||
 | 
					        debug!("not crawling {} due to bad status code {}", url, response.status());
 | 
				
			||||||
 | 
					        return Err(());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // i guess we're good
 | 
				
			||||||
 | 
					    driver.goto(url).await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let html_element = driver.find(By::Tag("html")).await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if let Some(lang) = html_element.attr("lang").await.ok().flatten() {
 | 
				
			||||||
 | 
					        if !lang.starts_with("en") && !lang.starts_with("unknown") {
 | 
				
			||||||
 | 
					            // i.e. non-english language
 | 
				
			||||||
 | 
					            // fixme: remove this once we start expanding to non-english-speaking markets?
 | 
				
			||||||
 | 
					            warn!("skipping {} due to {} language (currently prioritizing english", url, lang);
 | 
				
			||||||
 | 
					            return Err(());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let meta_elements = driver.find_all(By::Tag("meta")).await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let title = driver.title().await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					    let mut description = None;
 | 
				
			||||||
 | 
					    let mut keywords = vec![];
 | 
				
			||||||
 | 
					    for elem in meta_elements {
 | 
				
			||||||
 | 
					        if let Ok(Some(name)) = elem.attr("name").await {
 | 
				
			||||||
 | 
					            match name.as_str() {
 | 
				
			||||||
 | 
					                "description" => {
 | 
				
			||||||
 | 
					                    if let Ok(Some(content)) = elem.attr("content").await {
 | 
				
			||||||
 | 
					                        description = Some(content);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                "keywords" => {
 | 
				
			||||||
 | 
					                    if let Ok(Some(content)) = elem.attr("content").await {
 | 
				
			||||||
 | 
					                        keywords = content
 | 
				
			||||||
 | 
					                            .split(',')
 | 
				
			||||||
 | 
					                            .map(|v| v.to_lowercase())
 | 
				
			||||||
 | 
					                            .filter(|v| !v.is_empty())
 | 
				
			||||||
 | 
					                            .collect();
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                _ => {}
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let body = driver.find(By::Tag("body")).await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					    let raw_page_content = body.text().await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async fn gather_elements_with_multiplier(
 | 
				
			||||||
 | 
					        driver: &WebDriver,
 | 
				
			||||||
 | 
					        wordmap: &mut BTreeMap<String, f64>,
 | 
				
			||||||
 | 
					        stops: &BTreeSet<&&str>,
 | 
				
			||||||
 | 
					        elements: &[&str],
 | 
				
			||||||
 | 
					        multiplier: f64,
 | 
				
			||||||
 | 
					    ) {
 | 
				
			||||||
 | 
					        let mut elms = vec![];
 | 
				
			||||||
 | 
					        for tag in elements {
 | 
				
			||||||
 | 
					            elms.push(driver.find_all(By::Tag(*tag)).await);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let elms = elms.iter().flatten().flatten().collect::<Vec<_>>();
 | 
				
			||||||
 | 
					        let mut sentences = vec![];
 | 
				
			||||||
 | 
					        let mut sentence_set = BTreeSet::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        debug!("processing elements...");
 | 
				
			||||||
 | 
					        for node in elms {
 | 
				
			||||||
 | 
					            let _ = node.scroll_into_view().await;
 | 
				
			||||||
 | 
					            let boxmodel = node.rect().await;
 | 
				
			||||||
 | 
					            if boxmodel.is_err() {
 | 
				
			||||||
 | 
					                // not visible
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let boxmodel = boxmodel.unwrap();
 | 
				
			||||||
 | 
					            let current_text = node.text().await;
 | 
				
			||||||
 | 
					            if current_text.is_err() {
 | 
				
			||||||
 | 
					                // no text on this node
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let current_text = current_text.unwrap().trim().to_string();
 | 
				
			||||||
 | 
					            if current_text.is_empty() {
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            let sqs = (boxmodel.width * boxmodel.height).max(1.0); // no 0 divides pls (:
 | 
				
			||||||
 | 
					            let ccount = current_text.chars().count() as f64;
 | 
				
			||||||
 | 
					            let cssq = if ccount > 0.0 { sqs / ccount } else { 0.0 };
 | 
				
			||||||
 | 
					            if sentence_set.contains(¤t_text) {
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            sentence_set.insert(current_text.clone());
 | 
				
			||||||
 | 
					            sentences.push((current_text, cssq));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (sentence, cssq) in sentences {
 | 
				
			||||||
 | 
					            let mut cssq = (cssq / 500.0).powi(2) * multiplier;
 | 
				
			||||||
 | 
					            for word in sentence.split_whitespace() {
 | 
				
			||||||
 | 
					                let word = word
 | 
				
			||||||
 | 
					                    .to_lowercase()
 | 
				
			||||||
 | 
					                    .trim_end_matches(|v: char| v.is_ascii_punctuation())
 | 
				
			||||||
 | 
					                    .to_string();
 | 
				
			||||||
 | 
					                if stops.contains(&word.as_str()) {
 | 
				
			||||||
 | 
					                    // less valuable
 | 
				
			||||||
 | 
					                    cssq /= 100.0;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                if let Some(wentry) = wordmap.get_mut(&word) {
 | 
				
			||||||
 | 
					                    *wentry += cssq;
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    if word.is_empty() {
 | 
				
			||||||
 | 
					                        continue;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    wordmap.insert(word.to_string(), cssq);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut wordmap: BTreeMap<String, f64> = BTreeMap::new();
 | 
				
			||||||
 | 
					    let stops: BTreeSet<_> = Spark::stopwords(Language::English)
 | 
				
			||||||
 | 
					        .unwrap()
 | 
				
			||||||
 | 
					        .iter()
 | 
				
			||||||
 | 
					        .collect();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    debug!("headers...");
 | 
				
			||||||
 | 
					    gather_elements_with_multiplier(driver, &mut wordmap, &stops, &["h1","h2","h3","h4","h5","h6"], 3.0)
 | 
				
			||||||
 | 
					        .await;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    debug!("paragraphs...");
 | 
				
			||||||
 | 
					    gather_elements_with_multiplier(driver, &mut wordmap, &stops, &["p","div"], 1.0).await;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut wordmap = wordmap.into_iter().collect::<Vec<_>>();
 | 
				
			||||||
 | 
					    wordmap.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut db_error_so_requeue_anyways = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let words = wordmap
 | 
				
			||||||
 | 
					        .iter()
 | 
				
			||||||
 | 
					        .map(|(word, _)| word.as_str())
 | 
				
			||||||
 | 
					        .collect::<Vec<_>>();
 | 
				
			||||||
 | 
					    #[allow(clippy::collapsible_if)]
 | 
				
			||||||
 | 
					    if !words.is_empty() {
 | 
				
			||||||
 | 
					        if wordstore::add_url_to_keywords(&db, &words, url)
 | 
				
			||||||
 | 
					            .await
 | 
				
			||||||
 | 
					            .is_err()
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            warn!("couldn't add {} to keywords!", url);
 | 
				
			||||||
 | 
					            db_error_so_requeue_anyways = true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut metawords = keywords.iter().map(|v| v.as_str()).collect::<Vec<_>>();
 | 
				
			||||||
 | 
					    let desc2 = description.clone();
 | 
				
			||||||
 | 
					    let desc2 = desc2.map(|v| {
 | 
				
			||||||
 | 
					        v.to_lowercase()
 | 
				
			||||||
 | 
					            .split_whitespace()
 | 
				
			||||||
 | 
					            .map(String::from)
 | 
				
			||||||
 | 
					            .collect::<Vec<_>>()
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    if let Some(description) = &desc2 {
 | 
				
			||||||
 | 
					        for word in description {
 | 
				
			||||||
 | 
					            let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
				
			||||||
 | 
					            if word.is_empty() {
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            metawords.push(word);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    #[allow(clippy::collapsible_if)]
 | 
				
			||||||
 | 
					    if !metawords.is_empty() {
 | 
				
			||||||
 | 
					        if metastore::add_url_to_metawords(&db, &metawords, url)
 | 
				
			||||||
 | 
					            .await
 | 
				
			||||||
 | 
					            .is_err()
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            warn!("couldn't add {} to metawords!", url);
 | 
				
			||||||
 | 
					            db_error_so_requeue_anyways = true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut titlewords = vec![];
 | 
				
			||||||
 | 
					    let title2 = title.clone();
 | 
				
			||||||
 | 
					    let title2 = title2.to_lowercase();
 | 
				
			||||||
 | 
					    for word in title2.split_whitespace() {
 | 
				
			||||||
 | 
					        let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
				
			||||||
 | 
					        if word.is_empty() {
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        titlewords.push(word);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    #[allow(clippy::collapsible_if)]
 | 
				
			||||||
 | 
					    if !titlewords.is_empty() {
 | 
				
			||||||
 | 
					        if titlestore::add_url_to_titlewords(&db, &titlewords, url)
 | 
				
			||||||
 | 
					            .await
 | 
				
			||||||
 | 
					            .is_err()
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            warn!("couldn't add {} to titlewords!", url);
 | 
				
			||||||
 | 
					            db_error_so_requeue_anyways = true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if sitestore::add_website(
 | 
				
			||||||
 | 
					        &db,
 | 
				
			||||||
 | 
					        url,
 | 
				
			||||||
 | 
					        Some(title),
 | 
				
			||||||
 | 
					        description,
 | 
				
			||||||
 | 
					        if keywords.is_empty() {
 | 
				
			||||||
 | 
					            None
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            Some(keywords)
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        &wordmap,
 | 
				
			||||||
 | 
					        raw_page_content,
 | 
				
			||||||
 | 
					        damping
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    .await
 | 
				
			||||||
 | 
					    .is_err()
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        warn!("couldn't add {} to sitestore!", url);
 | 
				
			||||||
 | 
					        db_error_so_requeue_anyways = true;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    debug!("finished with main site stuff for {}", url);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let linkelms = driver.find_all(By::Tag("a")).await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for linkelm in linkelms {
 | 
				
			||||||
 | 
					        if linkelm.scroll_into_view().await.is_err() {
 | 
				
			||||||
 | 
					            debug!("couldn't scroll into view!");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let href = linkelm.prop("href").await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					        if href.is_none() {
 | 
				
			||||||
 | 
					            debug!("no href!");
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let href = href.unwrap();
 | 
				
			||||||
 | 
					        if href.contains('#') {
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let linktext = linkelm.text().await.map_err(|_| ())?.to_lowercase();
 | 
				
			||||||
 | 
					        let linkimgs = linkelm.find_all(By::Tag("img")).await.map_err(|_| ())?;
 | 
				
			||||||
 | 
					        let mut alts = "".to_string();
 | 
				
			||||||
 | 
					        for img in linkimgs {
 | 
				
			||||||
 | 
					            if let Ok(Some(alt)) = img.attr("alt").await {
 | 
				
			||||||
 | 
					                alts.push_str(&alt);
 | 
				
			||||||
 | 
					                alts.push(' ');
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let alts = alts.trim().to_lowercase();
 | 
				
			||||||
 | 
					        let mut linkwords = vec![];
 | 
				
			||||||
 | 
					        for word in linktext.split_whitespace() {
 | 
				
			||||||
 | 
					            let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
				
			||||||
 | 
					            linkwords.push(word);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        for word in alts.split_whitespace() {
 | 
				
			||||||
 | 
					            let word = word.trim_end_matches(|v: char| v.is_ascii_punctuation());
 | 
				
			||||||
 | 
					            linkwords.push(word);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        #[allow(clippy::collapsible_if)]
 | 
				
			||||||
 | 
					        if !linkwords.is_empty() {
 | 
				
			||||||
 | 
					            if linkstore::add_url_to_linkwords(&db, &linkwords, &href).await.is_err() {
 | 
				
			||||||
 | 
					                warn!("couldn't add {} to linkwords!", url);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        if linkrelstore::a_linksto_b(&db, url, &href).await.is_err() {
 | 
				
			||||||
 | 
					            warn!("couldn't perform a_linksto_b (a {url} b {href})");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        nats.publish(VOREBOT_SERVICE.to_string(), rmp_serde::to_vec(&CrawlRequest {
 | 
				
			||||||
 | 
					            url: href,
 | 
				
			||||||
 | 
					            damping: 0.85,
 | 
				
			||||||
 | 
					        }).unwrap().into()).await.unwrap();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    let elapsed = start.elapsed().as_secs_f64();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    debug!("crawled {} in {} seconds", url, elapsed);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Ok(())
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue