Browse Source

Add support for command line parameters

pull/6/head
Felix Ableitner 9 months ago
parent
commit
9621b91f7c
  1. 63
      Cargo.lock
  2. 1
      Cargo.toml
  3. 8
      README.md
  4. 10
      src/crawl.rs
  5. 4
      src/lib.rs
  6. 30
      src/main.rs

63
Cargo.lock

@ -2,12 +2,32 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "anyhow"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi 0.3.9",
]
[[package]]
name = "autocfg"
version = "1.0.1"
@ -68,6 +88,21 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "2.33.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "encoding_rs"
version = "0.8.28"
@ -381,6 +416,7 @@ name = "lemmy-stats-crawler"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"futures",
"reqwest",
"serde",
@ -710,6 +746,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "syn"
version = "1.0.62"
@ -721,6 +763,15 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "tinyvec"
version = "1.1.1"
@ -862,6 +913,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-width"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.1"
@ -886,6 +943,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.2"

1
Cargo.toml

@ -11,3 +11,4 @@ anyhow = "1.0.38"
tokio = { version = "0.2.25", features = ["rt-threaded", "macros"] }
futures = "0.3.13"
serde_json = "1.0.64"
clap = "2.33.3"

8
README.md

@ -4,12 +4,6 @@ Crawls Lemmy instances using nodeinfo and API endpoints, to generate a list of i
## Usage
For testing:
```
cargo run
```
For production (hide debug logs):
```
cargo run 2>/dev/null
cargo run -- --start-instances baraza.africa,lemmy.ml
```

10
src/crawl.rs

@ -59,14 +59,14 @@ pub struct InstanceDetails {
}
struct CrawlInstance {
domain: String,
depth: i32,
domain: String,
depth: i32,
}
impl CrawlInstance {
pub fn new(domain: String, depth: i32) -> CrawlInstance {
CrawlInstance { domain, depth }
}
pub fn new(domain: String, depth: i32) -> CrawlInstance {
CrawlInstance { domain, depth }
}
}
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {

4
src/lib.rs

@ -5,5 +5,5 @@ pub mod federated_instances;
pub mod node_info;
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
pub const START_INSTANCES: [&'static str; 1] = ["lemmy.ml"];
pub const MAX_CRAWL_DEPTH: i32 = 2;
pub const DEFAULT_START_INSTANCES: &'static str = "lemmy.ml";
pub const DEFAULT_MAX_CRAWL_DEPTH: &'static str = "1";

30
src/main.rs

@ -1,14 +1,38 @@
use anyhow::Error;
use clap::{App, Arg};
use lemmy_stats_crawler::crawl::{crawl, InstanceDetails};
use lemmy_stats_crawler::{MAX_CRAWL_DEPTH, START_INSTANCES};
use lemmy_stats_crawler::{DEFAULT_MAX_CRAWL_DEPTH, DEFAULT_START_INSTANCES};
use serde::Serialize;
#[tokio::main]
pub async fn main() -> Result<(), Error> {
let start_instances = START_INSTANCES.iter().map(|s| s.to_string()).collect();
let matches = App::new("Lemmy Stats Crawler")
.arg(
Arg::with_name("start-instances")
.long("start-instances")
.takes_value(true),
)
.arg(
Arg::with_name("max-crawl-depth")
.long("max-crawl-depth")
.takes_value(true),
)
.get_matches();
let trusted_instances: Vec<String> = matches
.value_of("start-instances")
.unwrap_or(DEFAULT_START_INSTANCES)
.split(",")
.map(|s| s.to_string())
.collect();
let max_crawl_depth: i32 = matches
.value_of("max-crawl-depth")
.unwrap_or(DEFAULT_MAX_CRAWL_DEPTH)
.parse()?;
let start_instances = trusted_instances.iter().map(|s| s.to_string()).collect();
eprintln!("Crawling...");
let instance_details = crawl(start_instances, MAX_CRAWL_DEPTH).await?;
let instance_details = crawl(start_instances, max_crawl_depth).await?;
let total_stats = aggregate(instance_details);
println!("{}", serde_json::to_string(&total_stats)?);

Loading…
Cancel
Save