stem_rs/descriptor/tordnsel.rs
1//! TorDNSEL exit list parsing.
2//!
3//! This module parses exit list files from [TorDNSEL](https://www.torproject.org/projects/tordnsel.html.en)
4//! (Tor DNS-based Exit List). These files contain information about Tor exit
5//! nodes and the IP addresses they use when exiting to the internet.
6//!
7//! # Overview
8//!
9//! TorDNSEL is a service that tracks which IP addresses are used by Tor exit
10//! nodes. This information is useful for:
11//!
12//! - Identifying traffic originating from Tor exit nodes
13//! - Implementing access controls based on Tor usage
14//! - Research and analysis of the Tor network
15//!
16//! Exit lists are published periodically and contain entries for each known
17//! exit relay, including:
18//! - The relay's fingerprint (identity)
19//! - When the relay was last seen in the consensus
20//! - The IP addresses the relay uses for exiting
21//!
22//! # File Format
23//!
24//! Exit list files follow this format:
25//!
26//! ```text
27//! @type tordnsel 1.0
28//! Downloaded 2024-01-01 00:00:00
29//! ExitNode <40 hex fingerprint>
30//! Published <YYYY-MM-DD HH:MM:SS>
31//! LastStatus <YYYY-MM-DD HH:MM:SS>
32//! ExitAddress <IPv4 address> <YYYY-MM-DD HH:MM:SS>
33//! ExitAddress <IPv4 address> <YYYY-MM-DD HH:MM:SS>
34//! ExitNode <40 hex fingerprint>
35//! ...
36//! ```
37//!
38//! # Example
39//!
40//! ```rust
41//! use stem_rs::descriptor::tordnsel::{TorDNSEL, parse_exit_list};
42//!
43//! let exit_list = r#"ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
44//! Published 2024-01-01 12:00:00
45//! LastStatus 2024-01-01 13:00:00
46//! ExitAddress 192.168.1.1 2024-01-01 13:30:00
47//! ExitNode 00FF300624FECA7F40515C8D854EE925332580D6
48//! Published 2024-01-01 11:00:00
49//! LastStatus 2024-01-01 12:00:00
50//! ExitAddress 10.0.0.1 2024-01-01 12:30:00
51//! "#;
52//!
53//! let entries = parse_exit_list(exit_list)?;
54//! assert_eq!(entries.len(), 2);
55//!
56//! for entry in &entries {
57//! println!("Exit node: {}", entry.fingerprint);
58//! for (addr, date) in &entry.exit_addresses {
59//! println!(" Exit address: {} (seen {})", addr, date);
60//! }
61//! }
62//! # Ok::<(), stem_rs::Error>(())
63//! ```
64//!
65//! # Data Source
66//!
67//! Exit lists can be obtained from:
68//! - [Tor Metrics](https://metrics.torproject.org/collector.html) - Historical data
69//! - [CollecTor](https://collector.torproject.org/) - Archive of Tor network data
70//!
71//! # See Also
72//!
73//! - [`server`](super::server): Server descriptors with full relay information
74//! - [`consensus`](super::consensus): Network status documents
75//! - [`remote`](super::remote): Downloading descriptors from the network
76
77use crate::Error;
78use chrono::{DateTime, NaiveDateTime, Utc};
79use std::net::Ipv4Addr;
80
81/// A TorDNSEL exit list entry for a single relay.
82///
83/// Each entry represents one Tor exit relay and contains information about
84/// when it was last seen and what IP addresses it uses for exiting traffic.
85///
86/// # Structure
87///
88/// A TorDNSEL entry contains:
89/// - The relay's fingerprint (40-character hex string)
90/// - Publication and last-seen timestamps
91/// - One or more exit addresses with observation times
92///
93/// # Exit Addresses
94///
95/// A relay may have multiple exit addresses because:
96/// - It may use different addresses for different exit ports
97/// - It may have changed addresses over time
98/// - It may be multi-homed (multiple network interfaces)
99///
100/// Each exit address is paired with the time it was observed being used.
101///
102/// # Example
103///
104/// ```rust
105/// use stem_rs::descriptor::tordnsel::TorDNSEL;
106///
107/// let content = r#"ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
108/// Published 2024-01-01 12:00:00
109/// LastStatus 2024-01-01 13:00:00
110/// ExitAddress 192.168.1.1 2024-01-01 13:30:00
111/// "#;
112///
113/// let entry = TorDNSEL::parse(content)?;
114/// assert_eq!(entry.fingerprint, "003A71137D959748C8157C4A76ECA639CEF5E33E");
115/// assert_eq!(entry.exit_addresses.len(), 1);
116/// # Ok::<(), stem_rs::Error>(())
117/// ```
118#[derive(Debug, Clone)]
119pub struct TorDNSEL {
120 /// SHA-1 fingerprint of the relay's identity key.
121 ///
122 /// This is a 40-character hexadecimal string that uniquely identifies
123 /// the relay. It matches the fingerprint in server descriptors and
124 /// consensus documents.
125 pub fingerprint: String,
126
127 /// Time when the relay published its descriptor.
128 ///
129 /// This indicates when the relay last updated its server descriptor.
130 /// May be `None` if the field was missing or unparseable.
131 pub published: Option<DateTime<Utc>>,
132
133 /// Time when the relay was last seen in a network status.
134 ///
135 /// This indicates when the relay was last included in a consensus
136 /// document. A relay not seen recently may no longer be active.
137 /// May be `None` if the field was missing or unparseable.
138 pub last_status: Option<DateTime<Utc>>,
139
140 /// List of exit addresses observed for this relay.
141 ///
142 /// Each entry is a tuple of (IPv4 address, observation time).
143 /// The observation time indicates when TorDNSEL detected that
144 /// the relay was using this address for exit traffic.
145 ///
146 /// A relay may have multiple exit addresses if it uses different
147 /// addresses for different connections or has changed addresses.
148 pub exit_addresses: Vec<(Ipv4Addr, DateTime<Utc>)>,
149
150 /// Raw bytes of the original entry content.
151 raw_content: Vec<u8>,
152
153 /// Lines that were not recognized during parsing.
154 unrecognized_lines: Vec<String>,
155}
156
157impl TorDNSEL {
158 /// Parses a TorDNSEL entry from a string.
159 ///
160 /// This method parses a single exit list entry containing information
161 /// about one relay.
162 ///
163 /// # Arguments
164 ///
165 /// * `content` - The entry content as a string
166 ///
167 /// # Returns
168 ///
169 /// A parsed `TorDNSEL` entry on success.
170 ///
171 /// # Errors
172 ///
173 /// Returns [`Error::Parse`] if:
174 /// - The `ExitNode` line is missing
175 /// - The fingerprint is not a valid 40-character hex string
176 ///
177 /// Note: Invalid timestamps are silently ignored rather than causing errors.
178 ///
179 /// # Example
180 ///
181 /// ```rust
182 /// use stem_rs::descriptor::tordnsel::TorDNSEL;
183 ///
184 /// let content = r#"ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
185 /// Published 2024-01-01 12:00:00
186 /// ExitAddress 192.168.1.1 2024-01-01 13:30:00
187 /// "#;
188 ///
189 /// let entry = TorDNSEL::parse(content)?;
190 /// assert_eq!(entry.fingerprint, "003A71137D959748C8157C4A76ECA639CEF5E33E");
191 /// # Ok::<(), stem_rs::Error>(())
192 /// ```
193 pub fn parse(content: &str) -> Result<Self, Error> {
194 Self::parse_bytes(content.as_bytes())
195 }
196
197 /// Parses a TorDNSEL entry from raw bytes.
198 ///
199 /// This is the byte-oriented version of [`parse()`](Self::parse),
200 /// useful when reading directly from files or network streams.
201 ///
202 /// # Arguments
203 ///
204 /// * `content` - The entry content as bytes (UTF-8 encoded)
205 ///
206 /// # Returns
207 ///
208 /// A parsed `TorDNSEL` entry on success.
209 ///
210 /// # Errors
211 ///
212 /// Returns [`Error::Parse`] if:
213 /// - The `ExitNode` line is missing
214 /// - The fingerprint is not a valid 40-character hex string
215 ///
216 /// # Note
217 ///
218 /// Invalid UTF-8 sequences are replaced with the Unicode replacement
219 /// character (U+FFFD) rather than causing an error.
220 pub fn parse_bytes(content: &[u8]) -> Result<Self, Error> {
221 let content_str = String::from_utf8_lossy(content);
222 let mut fingerprint = None;
223 let mut published = None;
224 let mut last_status = None;
225 let mut exit_addresses = Vec::new();
226 let mut unrecognized_lines = Vec::new();
227
228 for line in content_str.lines() {
229 let line = line.trim();
230 if line.is_empty() || line.starts_with('@') {
231 continue;
232 }
233
234 if let Some(value) = line.strip_prefix("ExitNode ") {
235 let fp = value.trim();
236 if !is_valid_fingerprint(fp) {
237 return Err(Error::Parse {
238 location: "tordnsel".into(),
239 reason: format!(
240 "Tor relay fingerprints consist of forty hex digits: {}",
241 fp
242 ),
243 });
244 }
245 fingerprint = Some(fp.to_string());
246 } else if let Some(value) = line.strip_prefix("Published ") {
247 published = parse_timestamp(value.trim());
248 } else if let Some(value) = line.strip_prefix("LastStatus ") {
249 last_status = parse_timestamp(value.trim());
250 } else if let Some(value) = line.strip_prefix("ExitAddress ") {
251 if let Some((addr_str, date_str)) = value.split_once(' ') {
252 if let Ok(addr) = addr_str.trim().parse::<Ipv4Addr>() {
253 if let Some(date) = parse_timestamp(date_str.trim()) {
254 exit_addresses.push((addr, date));
255 }
256 }
257 }
258 } else if !line.starts_with("Downloaded ") {
259 unrecognized_lines.push(line.to_string());
260 }
261 }
262
263 let fingerprint = fingerprint.ok_or_else(|| Error::Parse {
264 location: "tordnsel".into(),
265 reason: "Missing ExitNode fingerprint".into(),
266 })?;
267
268 Ok(Self {
269 fingerprint,
270 published,
271 last_status,
272 exit_addresses,
273 raw_content: content.to_vec(),
274 unrecognized_lines,
275 })
276 }
277
278 /// Returns the raw bytes of the original entry content.
279 ///
280 /// This provides access to the exact bytes that were parsed,
281 /// useful for debugging or storing entries in their original format.
282 ///
283 /// # Returns
284 ///
285 /// A byte slice containing the original entry content.
286 pub fn raw_content(&self) -> &[u8] {
287 &self.raw_content
288 }
289
290 /// Returns lines that were not recognized during parsing.
291 ///
292 /// Unrecognized lines are preserved for forward compatibility
293 /// with future exit list format extensions.
294 ///
295 /// # Returns
296 ///
297 /// A slice of strings, each representing an unrecognized line.
298 /// Empty if all lines were recognized.
299 pub fn unrecognized_lines(&self) -> &[String] {
300 &self.unrecognized_lines
301 }
302
303 /// Converts the entry back to its string representation.
304 ///
305 /// This produces a string in the standard TorDNSEL format
306 /// that can be parsed again or written to a file.
307 ///
308 /// # Returns
309 ///
310 /// A string containing the entry in standard format.
311 ///
312 /// # Example
313 ///
314 /// ```rust
315 /// use stem_rs::descriptor::tordnsel::TorDNSEL;
316 ///
317 /// let content = r#"ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
318 /// Published 2024-01-01 12:00:00
319 /// ExitAddress 192.168.1.1 2024-01-01 13:30:00
320 /// "#;
321 ///
322 /// let entry = TorDNSEL::parse(content)?;
323 /// let output = entry.to_descriptor_string();
324 /// assert!(output.contains("ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E"));
325 /// # Ok::<(), stem_rs::Error>(())
326 /// ```
327 pub fn to_descriptor_string(&self) -> String {
328 let mut lines = Vec::new();
329 lines.push(format!("ExitNode {}", self.fingerprint));
330 if let Some(dt) = self.published {
331 lines.push(format!("Published {}", dt.format("%Y-%m-%d %H:%M:%S")));
332 }
333 if let Some(dt) = self.last_status {
334 lines.push(format!("LastStatus {}", dt.format("%Y-%m-%d %H:%M:%S")));
335 }
336 for (addr, date) in &self.exit_addresses {
337 lines.push(format!(
338 "ExitAddress {} {}",
339 addr,
340 date.format("%Y-%m-%d %H:%M:%S")
341 ));
342 }
343 lines.join("\n")
344 }
345}
346
347/// Parses a complete TorDNSEL exit list file.
348///
349/// This function parses a file containing multiple exit list entries,
350/// returning a vector of all entries found.
351///
352/// # Arguments
353///
354/// * `content` - The complete exit list file content as a string
355///
356/// # Returns
357///
358/// A vector of parsed [`TorDNSEL`] entries on success.
359///
360/// # Errors
361///
362/// Returns [`Error::Parse`] if any entry in the file is malformed.
363/// Parsing stops at the first error.
364///
365/// # Example
366///
367/// ```rust
368/// use stem_rs::descriptor::tordnsel::parse_exit_list;
369///
370/// let exit_list = r#"@type tordnsel 1.0
371/// Downloaded 2024-01-01 00:00:00
372/// ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
373/// Published 2024-01-01 12:00:00
374/// ExitAddress 192.168.1.1 2024-01-01 13:30:00
375/// ExitNode 00FF300624FECA7F40515C8D854EE925332580D6
376/// Published 2024-01-01 11:00:00
377/// ExitAddress 10.0.0.1 2024-01-01 12:30:00
378/// "#;
379///
380/// let entries = parse_exit_list(exit_list)?;
381/// assert_eq!(entries.len(), 2);
382/// # Ok::<(), stem_rs::Error>(())
383/// ```
384pub fn parse_exit_list(content: &str) -> Result<Vec<TorDNSEL>, Error> {
385 parse_exit_list_bytes(content.as_bytes())
386}
387
388/// Parses a complete TorDNSEL exit list file from raw bytes.
389///
390/// This is the byte-oriented version of [`parse_exit_list()`],
391/// useful when reading directly from files or network streams.
392///
393/// # Arguments
394///
395/// * `content` - The complete exit list file content as bytes (UTF-8 encoded)
396///
397/// # Returns
398///
399/// A vector of parsed [`TorDNSEL`] entries on success.
400///
401/// # Errors
402///
403/// Returns [`Error::Parse`] if any entry in the file is malformed.
404/// Parsing stops at the first error.
405///
406/// # Note
407///
408/// Invalid UTF-8 sequences are replaced with the Unicode replacement
409/// character (U+FFFD) rather than causing an error.
410pub fn parse_exit_list_bytes(content: &[u8]) -> Result<Vec<TorDNSEL>, Error> {
411 let content_str = String::from_utf8_lossy(content);
412 let mut entries = Vec::new();
413 let mut current_entry = Vec::new();
414 let mut in_entry = false;
415
416 for line in content_str.lines() {
417 let trimmed = line.trim();
418 if trimmed.starts_with("ExitNode ") {
419 if in_entry && !current_entry.is_empty() {
420 let entry_content = current_entry.join("\n");
421 entries.push(TorDNSEL::parse(&entry_content)?);
422 current_entry.clear();
423 }
424 in_entry = true;
425 }
426 if in_entry {
427 current_entry.push(line.to_string());
428 }
429 }
430
431 if !current_entry.is_empty() {
432 let entry_content = current_entry.join("\n");
433 entries.push(TorDNSEL::parse(&entry_content)?);
434 }
435
436 Ok(entries)
437}
438
439/// Validates a fingerprint string.
440///
441/// A valid fingerprint is exactly 40 hexadecimal characters (case-insensitive).
442fn is_valid_fingerprint(fp: &str) -> bool {
443 fp.len() == 40 && fp.chars().all(|c| c.is_ascii_hexdigit())
444}
445
446/// Parses a timestamp string in Tor's standard format.
447///
448/// Expected format: "YYYY-MM-DD HH:MM:SS"
449fn parse_timestamp(s: &str) -> Option<DateTime<Utc>> {
450 NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S")
451 .ok()
452 .map(|dt| dt.and_utc())
453}
454
455#[cfg(test)]
456mod tests {
457 use super::*;
458
459 const TEST_DESC: &str = r#"@type tordnsel 1.0
460Downloaded 2013-08-19 04:02:03
461ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
462Published 2013-08-19 02:13:53
463LastStatus 2013-08-19 03:02:47
464ExitAddress 66.223.170.168 2013-08-19 03:18:51
465ExitNode 00FF300624FECA7F40515C8D854EE925332580D6
466Published 2013-08-18 07:02:14
467LastStatus 2013-08-18 09:02:58
468ExitAddress 82.252.181.153 2013-08-18 08:03:01
469ExitAddress 82.252.181.154 2013-08-18 08:03:02
470ExitAddress 82.252.181.155 2013-08-18 08:03:03
471ExitNode 030B22437D99B2DB2908B747B6962EAD13AB4039
472Published 2013-08-18 12:44:20
473LastStatus 2013-08-18 13:02:57
474ExitAddress 46.10.211.205 2013-08-18 13:18:48
475"#;
476
477 #[test]
478 fn test_parse_exit_list() {
479 let entries = parse_exit_list(TEST_DESC).unwrap();
480 assert_eq!(entries.len(), 3);
481
482 let desc = &entries[1];
483 assert_eq!(desc.fingerprint, "00FF300624FECA7F40515C8D854EE925332580D6");
484 assert!(desc.published.is_some());
485 assert!(desc.last_status.is_some());
486 assert_eq!(desc.exit_addresses.len(), 3);
487
488 let (addr, _date) = &desc.exit_addresses[0];
489 assert_eq!(*addr, "82.252.181.153".parse::<Ipv4Addr>().unwrap());
490 }
491
492 #[test]
493 fn test_parse_single_entry() {
494 let content = r#"ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
495Published 2013-08-19 02:13:53
496LastStatus 2013-08-19 03:02:47
497ExitAddress 66.223.170.168 2013-08-19 03:18:51"#;
498
499 let entry = TorDNSEL::parse(content).unwrap();
500 assert_eq!(
501 entry.fingerprint,
502 "003A71137D959748C8157C4A76ECA639CEF5E33E"
503 );
504 assert_eq!(entry.exit_addresses.len(), 1);
505 }
506
507 #[test]
508 fn test_invalid_fingerprint() {
509 let content = "ExitNode 030B22437D99B2DB2908B747B6";
510 let result = TorDNSEL::parse(content);
511 assert!(result.is_err());
512 }
513
514 #[test]
515 fn test_missing_fingerprint() {
516 let content = r#"Published 2013-08-19 02:13:53
517ExitAddress 66.223.170.168 2013-08-19 03:18:51"#;
518 let result = TorDNSEL::parse(content);
519 assert!(result.is_err());
520 }
521
522 #[test]
523 fn test_malformed_date_skipped() {
524 let content = r#"ExitNode 030B22437D99B2DB2908B747B6962EAD13AB4038
525Published Today!
526LastStatus 2013-08-18 13:02:57
527ExitAddress 46.10.211.205 Never"#;
528
529 let entry = TorDNSEL::parse(content).unwrap();
530 assert_eq!(
531 entry.fingerprint,
532 "030B22437D99B2DB2908B747B6962EAD13AB4038"
533 );
534 assert!(entry.published.is_none());
535 assert_eq!(entry.exit_addresses.len(), 0);
536 }
537
538 #[test]
539 fn test_to_descriptor_string() {
540 let content = r#"ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
541Published 2013-08-19 02:13:53
542LastStatus 2013-08-19 03:02:47
543ExitAddress 66.223.170.168 2013-08-19 03:18:51"#;
544
545 let entry = TorDNSEL::parse(content).unwrap();
546 let output = entry.to_descriptor_string();
547 assert!(output.contains("ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E"));
548 assert!(output.contains("Published 2013-08-19 02:13:53"));
549 assert!(output.contains("ExitAddress 66.223.170.168"));
550 }
551
552 #[test]
553 fn test_parse_file_assertions() {
554 let entries = parse_exit_list(TEST_DESC).unwrap();
555 assert_eq!(entries.len(), 3);
556
557 let desc = &entries[1];
558 assert_eq!(desc.fingerprint, "00FF300624FECA7F40515C8D854EE925332580D6");
559 assert!(desc.published.is_some());
560 assert!(desc.last_status.is_some());
561 assert_eq!(desc.exit_addresses.len(), 3);
562
563 let (addr, date) = &desc.exit_addresses[0];
564 assert_eq!(*addr, "82.252.181.153".parse::<Ipv4Addr>().unwrap());
565 assert!(date.format("%Y-%m-%d %H:%M:%S").to_string() == "2013-08-18 08:03:01");
566 }
567
568 #[test]
569 fn test_multiple_exit_addresses() {
570 let content = r#"ExitNode 00FF300624FECA7F40515C8D854EE925332580D6
571Published 2013-08-18 07:02:14
572LastStatus 2013-08-18 09:02:58
573ExitAddress 82.252.181.153 2013-08-18 08:03:01
574ExitAddress 82.252.181.154 2013-08-18 08:03:02
575ExitAddress 82.252.181.155 2013-08-18 08:03:03"#;
576
577 let entry = TorDNSEL::parse(content).unwrap();
578 assert_eq!(entry.exit_addresses.len(), 3);
579 assert_eq!(entry.exit_addresses[0].0.to_string(), "82.252.181.153");
580 assert_eq!(entry.exit_addresses[1].0.to_string(), "82.252.181.154");
581 assert_eq!(entry.exit_addresses[2].0.to_string(), "82.252.181.155");
582 }
583
584 #[test]
585 fn test_unrecognized_lines() {
586 let content = r#"ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
587Published 2013-08-19 02:13:53
588UnknownField some value
589ExitAddress 66.223.170.168 2013-08-19 03:18:51"#;
590
591 let entry = TorDNSEL::parse(content).unwrap();
592 assert_eq!(entry.unrecognized_lines(), &["UnknownField some value"]);
593 }
594
595 #[test]
596 fn test_type_annotation_ignored() {
597 let content = r#"@type tordnsel 1.0
598ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
599Published 2013-08-19 02:13:53"#;
600
601 let entry = TorDNSEL::parse(content).unwrap();
602 assert_eq!(
603 entry.fingerprint,
604 "003A71137D959748C8157C4A76ECA639CEF5E33E"
605 );
606 }
607
608 #[test]
609 fn test_downloaded_line_ignored() {
610 let content = r#"Downloaded 2013-08-19 04:02:03
611ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E
612Published 2013-08-19 02:13:53"#;
613
614 let entry = TorDNSEL::parse(content).unwrap();
615 assert_eq!(
616 entry.fingerprint,
617 "003A71137D959748C8157C4A76ECA639CEF5E33E"
618 );
619 assert!(entry.unrecognized_lines().is_empty());
620 }
621}