stem_rs/descriptor/mod.rs
1//! Descriptor parsing for Tor network documents.
2//!
3//! This module provides types for parsing various Tor descriptor formats
4//! including server descriptors, microdescriptors, consensus documents,
5//! and hidden service descriptors.
6//!
7//! # Overview
8//!
9//! Tor relays and directory authorities publish various types of descriptors
10//! that describe the network topology, relay capabilities, and routing
11//! information. This module provides parsers for all major descriptor types:
12//!
13//! - [`ServerDescriptor`] - Full relay metadata including keys, policies, and capabilities
14//! - [`Microdescriptor`] - Compact client-side descriptors with essential routing info
15//! - [`NetworkStatusDocument`] - Consensus documents listing all relays and their status
16//! - [`ExtraInfoDescriptor`] - Bandwidth statistics and additional relay information
17//! - [`HiddenServiceDescriptorV2`] / [`HiddenServiceDescriptorV3`] - Onion service descriptors
18//! - [`Ed25519Certificate`] - Ed25519 certificates used by relays
19//! - [`KeyCertificate`] - Directory authority key certificates
20//! - [`BandwidthFile`] - Bandwidth authority measurement files
21//! - [`TorDNSEL`] - Exit list data from TorDNSEL
22//!
23//! # Descriptor Sources
24//!
25//! Descriptors can be obtained from several sources:
26//!
27//! - **Tor's data directory**: Cached files like `cached-descriptors`, `cached-consensus`
28//! - **Directory authorities**: Via the [`remote`] module's download functions
29//! - **CollecTor archives**: Historical descriptors with `@type` annotations
30//!
31//! # Type Annotations
32//!
33//! Descriptors from [CollecTor](https://metrics.torproject.org/collector.html) include
34//! a type annotation on the first line in the format `@type <name> <major>.<minor>`.
35//! The [`TypeAnnotation`] struct parses these annotations, and [`parse_file`] handles
36//! them automatically.
37//!
38//! # Compression
39//!
40//! Downloaded descriptors are often compressed. This module supports automatic
41//! decompression via [`auto_decompress`] for:
42//!
43//! - **Plaintext** - Uncompressed data
44//! - **Gzip** - Standard gzip compression (fully supported)
45//! - **Zstd** - Zstandard compression (detection only, requires external crate)
46//! - **LZMA** - LZMA/XZ compression (detection only, requires external crate)
47//!
48//! # Digests
49//!
50//! Descriptors have cryptographic digests used for identification and verification.
51//! The [`compute_digest`] function and [`Descriptor::digest`] method support:
52//!
53//! - [`DigestHash::Sha1`] - SHA-1 hash (legacy, used by older descriptors)
54//! - [`DigestHash::Sha256`] - SHA-256 hash (modern descriptors)
55//!
56//! With encodings:
57//!
58//! - [`DigestEncoding::Raw`] - Raw bytes as characters
59//! - [`DigestEncoding::Hex`] - Uppercase hexadecimal
60//! - [`DigestEncoding::Base64`] - Base64 without padding
61//!
62//! # Example
63//!
64//! ```rust,no_run
65//! use stem_rs::descriptor::{parse_file, ServerDescriptor, Descriptor};
66//! use stem_rs::descriptor::{DigestHash, DigestEncoding};
67//!
68//! // Parse a server descriptor from file contents
69//! let content = std::fs::read("cached-descriptors").unwrap();
70//! let descriptor: ServerDescriptor = parse_file(&content).unwrap();
71//!
72//! // Access descriptor fields
73//! println!("Nickname: {}", descriptor.nickname);
74//! println!("Address: {}", descriptor.address);
75//!
76//! // Compute the descriptor's digest
77//! let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
78//! println!("Digest: {}", digest);
79//! ```
80//!
81//! # See Also
82//!
83//! - [`remote`] - Download descriptors from directory authorities
84//! - [`server`] - Server descriptor parsing
85//! - [`micro`] - Microdescriptor parsing
86//! - [`consensus`] - Network status document parsing
87//! - [`hidden`] - Hidden service descriptor parsing
88//!
89//! # See Also
90//!
91//! - [Tor Directory Protocol Specification](https://spec.torproject.org/dir-spec)
92//! - [Python Stem descriptor module](https://stem.torproject.org/api/descriptor/descriptor.html)
93
94pub mod authority;
95pub mod bandwidth_file;
96pub mod cache;
97pub mod certificate;
98pub mod consensus;
99pub mod extra_info;
100pub mod hidden;
101pub mod key_cert;
102pub mod micro;
103pub mod remote;
104pub mod router_status;
105pub mod server;
106pub mod tordnsel;
107
108pub use authority::{DirectoryAuthority, SharedRandomnessCommitment};
109pub use bandwidth_file::{BandwidthFile, BandwidthMeasurement, RecentStats, RelayFailures};
110pub use cache::{CacheStats, DescriptorCache};
111pub use certificate::{
112 Ed25519Certificate, Ed25519Extension, ExtensionFlag, ExtensionType, ED25519_HEADER_LENGTH,
113 ED25519_KEY_LENGTH, ED25519_SIGNATURE_LENGTH,
114};
115pub use consensus::{
116 DocumentSignature, NetworkStatusDocument, NetworkStatusDocumentBuilder, SharedRandomness,
117};
118pub use extra_info::{
119 BandwidthHistory, DirResponse, DirStat, ExtraInfoDescriptor, ExtraInfoDescriptorBuilder,
120 PortKey, Transport,
121};
122pub use hidden::{
123 AuthorizedClient, HiddenServiceDescriptorV2, HiddenServiceDescriptorV3, InnerLayer,
124 IntroductionPointV2, IntroductionPointV3, LinkSpecifier, OuterLayer,
125};
126pub use key_cert::KeyCertificate;
127pub use micro::{Microdescriptor, MicrodescriptorBuilder};
128pub use remote::{
129 download_bandwidth_file, download_consensus, download_detached_signatures,
130 download_extrainfo_descriptors, download_from_dirport, download_key_certificates,
131 download_microdescriptors, download_server_descriptors, get_authorities, Compression, DirPort,
132 DownloadResult,
133};
134pub use router_status::{MicrodescriptorHash, RouterStatusEntry, RouterStatusEntryType};
135pub use server::{ServerDescriptor, ServerDescriptorBuilder};
136pub use tordnsel::{parse_exit_list, parse_exit_list_bytes, TorDNSEL};
137
138use crate::Error;
139#[cfg(feature = "compression")]
140use flate2::read::GzDecoder;
141use sha1::{Digest as Sha1Digest, Sha1};
142use sha2::Sha256;
143#[cfg(feature = "compression")]
144use std::io::Read;
145use std::path::Path;
146use thiserror::Error as ThisError;
147
148/// Errors that can occur when parsing network status consensus documents.
149///
150/// This error type provides specific information about what went wrong during
151/// consensus parsing, making it easier to diagnose and fix issues with malformed
152/// consensus documents.
153///
154/// # Example
155///
156/// ```rust
157/// use stem_rs::descriptor::ConsensusError;
158///
159/// fn handle_consensus_error(err: ConsensusError) {
160/// match err {
161/// ConsensusError::InvalidFingerprint(fp) => {
162/// eprintln!("Invalid relay fingerprint: {}", fp);
163/// }
164/// ConsensusError::TimestampOrderingViolation(msg) => {
165/// eprintln!("Timestamp ordering issue: {}", msg);
166/// }
167/// _ => eprintln!("Consensus parse error: {}", err),
168/// }
169/// }
170/// ```
171#[derive(Debug, ThisError)]
172pub enum ConsensusError {
173 /// IO error occurred while reading consensus data.
174 #[error("IO error: {0}")]
175 Io(#[from] std::io::Error),
176
177 /// Network status version is not supported.
178 #[error("Invalid network status version: expected 3, got {0}")]
179 InvalidNetworkStatusVersion(String),
180
181 /// Vote status field has invalid value.
182 #[error("Invalid vote status: expected 'vote' or 'consensus', got {0}")]
183 InvalidVoteStatus(String),
184
185 /// Timestamp format is invalid or unparseable.
186 #[error("Invalid timestamp format: {0}")]
187 InvalidTimestamp(String),
188
189 /// Voting delay line has wrong number of values.
190 #[error("Invalid voting delay: expected 2 values, got {0}")]
191 InvalidVotingDelay(String),
192
193 /// Relay fingerprint format is invalid.
194 #[error("Invalid fingerprint: {0}")]
195 InvalidFingerprint(String),
196
197 /// IP address format is invalid.
198 #[error("Invalid IP address: {0}")]
199 InvalidIpAddress(#[from] std::net::AddrParseError),
200
201 /// Port number is invalid or out of range.
202 #[error("Invalid port number: {0}")]
203 InvalidPort(#[from] std::num::ParseIntError),
204
205 /// Bandwidth value is invalid or unparseable.
206 #[error("Invalid bandwidth value: {0}")]
207 InvalidBandwidth(String),
208
209 /// Relay flag is not recognized.
210 #[error("Invalid flag: {0}")]
211 InvalidFlag(String),
212
213 /// Protocol version string is malformed.
214 #[error("Invalid protocol version: {0}")]
215 InvalidProtocolVersion(String),
216
217 /// Base64 encoding is invalid.
218 #[error("Invalid base64 encoding: {0}")]
219 InvalidBase64(String),
220
221 /// Cryptographic signature is invalid.
222 #[error("Invalid signature: {0}")]
223 InvalidSignature(String),
224
225 /// Required field is missing from consensus.
226 #[error("Missing required field: {0}")]
227 MissingRequiredField(String),
228
229 /// Timestamps are not in correct order (valid-after < fresh-until < valid-until).
230 #[error("Timestamp ordering violation: {0}")]
231 TimestampOrderingViolation(String),
232
233 /// Line format is invalid at specific location.
234 #[error("Invalid line format at line {line}: {reason}")]
235 InvalidLineFormat {
236 /// Line number where error occurred.
237 line: usize,
238 /// Description of the format error.
239 reason: String,
240 },
241}
242
243/// Errors that can occur when parsing server descriptors.
244///
245/// Server descriptors contain full relay metadata including identity keys,
246/// exit policies, bandwidth information, and platform details.
247///
248/// # Example
249///
250/// ```rust
251/// use stem_rs::descriptor::ServerDescriptorError;
252///
253/// fn handle_server_error(err: ServerDescriptorError) {
254/// match err {
255/// ServerDescriptorError::InvalidNickname(nick) => {
256/// eprintln!("Invalid relay nickname: {}", nick);
257/// }
258/// ServerDescriptorError::MissingRequiredField(field) => {
259/// eprintln!("Missing required field: {}", field);
260/// }
261/// _ => eprintln!("Server descriptor parse error: {}", err),
262/// }
263/// }
264/// ```
265#[derive(Debug, ThisError)]
266pub enum ServerDescriptorError {
267 /// IO error occurred while reading descriptor data.
268 #[error("IO error: {0}")]
269 Io(#[from] std::io::Error),
270
271 /// Router line has wrong number of components.
272 #[error("Invalid router line format: expected 5 parts, got {actual}")]
273 InvalidRouterFormat {
274 /// Actual number of parts found.
275 actual: usize,
276 },
277
278 /// Relay nickname is invalid (must be 1-19 alphanumeric characters).
279 #[error("Invalid nickname: {0}")]
280 InvalidNickname(String),
281
282 /// IP address format is invalid.
283 #[error("Invalid IP address: {0}")]
284 InvalidIpAddress(#[from] std::net::AddrParseError),
285
286 /// Port number is invalid or out of range.
287 #[error("Invalid port number: {0}")]
288 InvalidPort(#[from] std::num::ParseIntError),
289
290 /// Bandwidth line has wrong number of values.
291 #[error("Invalid bandwidth line format: expected 3 parts, got {actual}")]
292 InvalidBandwidthFormat {
293 /// Actual number of parts found.
294 actual: usize,
295 },
296
297 /// Bandwidth value is invalid or unparseable.
298 #[error("Invalid bandwidth value: {0}")]
299 InvalidBandwidth(String),
300
301 /// Published date format is invalid.
302 #[error("Invalid published date format: {0}")]
303 InvalidPublishedDate(String),
304
305 /// Fingerprint format is invalid (must be 40 hex characters).
306 #[error("Invalid fingerprint format: {0}")]
307 InvalidFingerprint(String),
308
309 /// RSA public key is malformed or invalid.
310 #[error("Invalid RSA public key: {0}")]
311 InvalidRsaKey(String),
312
313 /// Ed25519 identity key is invalid.
314 #[error("Invalid Ed25519 identity: {0}")]
315 InvalidEd25519Identity(String),
316
317 /// Exit policy format is invalid.
318 #[error("Invalid exit policy format: {0}")]
319 InvalidExitPolicy(String),
320
321 /// Protocol version string is malformed.
322 #[error("Invalid protocol version: {0}")]
323 InvalidProtocolVersion(String),
324
325 /// Required field is missing from descriptor.
326 #[error("Missing required field: {0}")]
327 MissingRequiredField(String),
328
329 /// Line format is invalid at specific location.
330 #[error("Invalid line format at line {line}: {reason}")]
331 InvalidLineFormat {
332 /// Line number where error occurred.
333 line: usize,
334 /// Description of the format error.
335 reason: String,
336 },
337}
338
339/// Errors that can occur when parsing microdescriptors.
340///
341/// Microdescriptors are compact descriptors used by clients for building
342/// circuits with minimal bandwidth overhead.
343///
344/// # Example
345///
346/// ```rust
347/// use stem_rs::descriptor::MicrodescriptorError;
348///
349/// fn handle_micro_error(err: MicrodescriptorError) {
350/// match err {
351/// MicrodescriptorError::InvalidOnionKey(msg) => {
352/// eprintln!("Invalid onion key: {}", msg);
353/// }
354/// MicrodescriptorError::MissingRequiredField(field) => {
355/// eprintln!("Missing required field: {}", field);
356/// }
357/// _ => eprintln!("Microdescriptor parse error: {}", err),
358/// }
359/// }
360/// ```
361#[derive(Debug, ThisError)]
362pub enum MicrodescriptorError {
363 /// IO error occurred while reading descriptor data.
364 #[error("IO error: {0}")]
365 Io(#[from] std::io::Error),
366
367 /// Onion key format is invalid.
368 #[error("Invalid onion key format: {0}")]
369 InvalidOnionKey(String),
370
371 /// Ntor onion key format is invalid.
372 #[error("Invalid ntor onion key format: {0}")]
373 InvalidNtorOnionKey(String),
374
375 /// Socket address format is invalid.
376 #[error("Invalid socket address: {0}")]
377 InvalidSocketAddress(#[from] std::net::AddrParseError),
378
379 /// Relay family specification is invalid.
380 #[error("Invalid relay family: {0}")]
381 InvalidRelayFamily(String),
382
383 /// Port policy format is invalid.
384 #[error("Invalid port policy: {0}")]
385 InvalidPortPolicy(String),
386
387 /// Base64 encoding is invalid.
388 #[error("Invalid base64 encoding: {0}")]
389 InvalidBase64(String),
390
391 /// Identity key has wrong length for algorithm.
392 #[error("Invalid identity length for {algorithm}: expected {expected}, got {actual}")]
393 InvalidIdentityLength {
394 /// Algorithm name (e.g., "ed25519").
395 algorithm: String,
396 /// Expected length in bytes.
397 expected: usize,
398 /// Actual length found.
399 actual: usize,
400 },
401
402 /// Identity algorithm is not recognized.
403 #[error("Unknown identity algorithm: {0}")]
404 UnknownIdentityAlgorithm(String),
405
406 /// Cryptographic block is incomplete.
407 #[error("Incomplete crypto block for key type: {0}")]
408 IncompleteCryptoBlock(String),
409
410 /// Required field is missing from descriptor.
411 #[error("Missing required field: {0}")]
412 MissingRequiredField(String),
413}
414
415/// Errors that can occur when parsing extra-info descriptors.
416///
417/// Extra-info descriptors contain bandwidth statistics and additional
418/// relay information not included in server descriptors.
419#[derive(Debug, ThisError)]
420pub enum ExtraInfoError {
421 /// IO error occurred while reading descriptor data.
422 #[error("IO error: {0}")]
423 Io(#[from] std::io::Error),
424
425 /// Extra-info line has wrong number of components.
426 #[error("Invalid extra-info line format: expected 3 parts, got {actual}")]
427 InvalidExtraInfoFormat {
428 /// Actual number of parts found.
429 actual: usize,
430 },
431
432 /// Relay nickname is invalid.
433 #[error("Invalid nickname: {0}")]
434 InvalidNickname(String),
435
436 /// Fingerprint format is invalid.
437 #[error("Invalid fingerprint: {0}")]
438 InvalidFingerprint(String),
439
440 /// Published date format is invalid.
441 #[error("Invalid published date format: {0}")]
442 InvalidPublishedDate(String),
443
444 /// Bandwidth history format is invalid.
445 #[error("Invalid bandwidth history format: {0}")]
446 InvalidBandwidthHistory(String),
447
448 /// Timestamp format is invalid.
449 #[error("Invalid timestamp: {0}")]
450 InvalidTimestamp(String),
451
452 /// Required field is missing from descriptor.
453 #[error("Missing required field: {0}")]
454 MissingRequiredField(String),
455}
456
457/// Errors that can occur when parsing hidden service descriptors.
458///
459/// Hidden service descriptors (v2 and v3) contain information needed
460/// to connect to onion services.
461#[derive(Debug, ThisError)]
462pub enum HiddenServiceDescriptorError {
463 /// IO error occurred while reading descriptor data.
464 #[error("IO error: {0}")]
465 Io(#[from] std::io::Error),
466
467 /// Descriptor version is not supported.
468 #[error("Invalid descriptor version: expected 2 or 3, got {0}")]
469 InvalidDescriptorVersion(u32),
470
471 /// Onion address format is invalid.
472 #[error("Invalid onion address: {0}")]
473 InvalidOnionAddress(String),
474
475 /// Introduction point specification is invalid.
476 #[error("Invalid introduction point: {0}")]
477 InvalidIntroductionPoint(String),
478
479 /// Encryption key is malformed.
480 #[error("Invalid encryption key: {0}")]
481 InvalidEncryptionKey(String),
482
483 /// Cryptographic signature is invalid.
484 #[error("Invalid signature: {0}")]
485 InvalidSignature(String),
486
487 /// Base64 encoding is invalid.
488 #[error("Invalid base64 encoding: {0}")]
489 InvalidBase64(String),
490
491 /// Required field is missing from descriptor.
492 #[error("Missing required field: {0}")]
493 MissingRequiredField(String),
494}
495
496/// Errors that can occur when parsing directory key certificates.
497///
498/// Key certificates bind directory authority signing keys to their
499/// long-term identity keys.
500#[derive(Debug, ThisError)]
501pub enum KeyCertificateError {
502 /// IO error occurred while reading certificate data.
503 #[error("IO error: {0}")]
504 Io(#[from] std::io::Error),
505
506 /// Certificate version is not supported.
507 #[error("Invalid certificate version: expected 3, got {0}")]
508 InvalidCertificateVersion(u32),
509
510 /// Fingerprint format is invalid.
511 #[error("Invalid fingerprint: {0}")]
512 InvalidFingerprint(String),
513
514 /// Timestamp format is invalid.
515 #[error("Invalid timestamp: {0}")]
516 InvalidTimestamp(String),
517
518 /// RSA key is malformed.
519 #[error("Invalid RSA key: {0}")]
520 InvalidRsaKey(String),
521
522 /// Cryptographic signature is invalid.
523 #[error("Invalid signature: {0}")]
524 InvalidSignature(String),
525
526 /// Required field is missing from certificate.
527 #[error("Missing required field: {0}")]
528 MissingRequiredField(String),
529}
530
531/// Errors that can occur when parsing bandwidth measurement files.
532///
533/// Bandwidth files contain relay capacity measurements from bandwidth
534/// authorities used to compute consensus weights.
535#[derive(Debug, ThisError)]
536pub enum BandwidthFileError {
537 /// IO error occurred while reading bandwidth file.
538 #[error("IO error: {0}")]
539 Io(#[from] std::io::Error),
540
541 /// Header format is invalid.
542 #[error("Invalid header format: {0}")]
543 InvalidHeaderFormat(String),
544
545 /// Timestamp format is invalid.
546 #[error("Invalid timestamp: {0}")]
547 InvalidTimestamp(String),
548
549 /// Bandwidth value is invalid or unparseable.
550 #[error("Invalid bandwidth value: {0}")]
551 InvalidBandwidth(String),
552
553 /// Fingerprint format is invalid.
554 #[error("Invalid fingerprint: {0}")]
555 InvalidFingerprint(String),
556
557 /// Required header field is missing.
558 #[error("Missing required header field: {0}")]
559 MissingRequiredHeaderField(String),
560}
561
562/// Errors that can occur when parsing TorDNSEL exit lists.
563///
564/// TorDNSEL exit lists contain IP addresses of Tor exit relays.
565#[derive(Debug, ThisError)]
566pub enum TorDNSELError {
567 /// IO error occurred while reading exit list.
568 #[error("IO error: {0}")]
569 Io(#[from] std::io::Error),
570
571 /// IP address format is invalid.
572 #[error("Invalid IP address: {0}")]
573 InvalidIpAddress(#[from] std::net::AddrParseError),
574
575 /// Timestamp format is invalid.
576 #[error("Invalid timestamp: {0}")]
577 InvalidTimestamp(String),
578
579 /// Exit address line format is invalid.
580 #[error("Invalid exit address format: {0}")]
581 InvalidExitAddressFormat(String),
582}
583
584/// Unified error type for all descriptor parsing operations.
585///
586/// This enum wraps all descriptor-specific error types, providing a single
587/// error type that can represent failures from any descriptor parser.
588///
589/// # Design
590///
591/// Following the library-rs reference implementation, this uses transparent
592/// error forwarding with `#[error(transparent)]` to preserve the underlying
593/// error's Display implementation and source chain.
594///
595/// # Example
596///
597/// ```rust
598/// use stem_rs::descriptor::DescriptorError;
599///
600/// fn handle_descriptor_error(err: DescriptorError) {
601/// match err {
602/// DescriptorError::Consensus(e) => {
603/// eprintln!("Consensus error: {}", e);
604/// }
605/// DescriptorError::ServerDescriptor(e) => {
606/// eprintln!("Server descriptor error: {}", e);
607/// }
608/// DescriptorError::UnsupportedCompression(format) => {
609/// eprintln!("Unsupported compression: {}", format);
610/// }
611/// _ => eprintln!("Descriptor error: {}", err),
612/// }
613/// }
614/// ```
615#[derive(Debug, ThisError)]
616pub enum DescriptorError {
617 /// Error parsing network status consensus document.
618 #[error(transparent)]
619 Consensus(#[from] ConsensusError),
620
621 /// Error parsing server descriptor.
622 #[error(transparent)]
623 ServerDescriptor(#[from] ServerDescriptorError),
624
625 /// Error parsing microdescriptor.
626 #[error(transparent)]
627 Microdescriptor(#[from] MicrodescriptorError),
628
629 /// Error parsing extra-info descriptor.
630 #[error(transparent)]
631 ExtraInfo(#[from] ExtraInfoError),
632
633 /// Error parsing hidden service descriptor.
634 #[error(transparent)]
635 HiddenService(#[from] HiddenServiceDescriptorError),
636
637 /// Error parsing directory key certificate.
638 #[error(transparent)]
639 KeyCertificate(#[from] KeyCertificateError),
640
641 /// Error parsing bandwidth measurement file.
642 #[error(transparent)]
643 BandwidthFile(#[from] BandwidthFileError),
644
645 /// Error parsing TorDNSEL exit list.
646 #[error(transparent)]
647 TorDNSEL(#[from] TorDNSELError),
648
649 /// Compression format is not supported.
650 #[error("Unsupported compression format: {0}")]
651 UnsupportedCompression(String),
652
653 /// Decompression failed.
654 #[error("Decompression failed: {0}")]
655 DecompressionFailed(String),
656
657 /// Descriptor contains invalid UTF-8.
658 #[error("Invalid UTF-8 in descriptor: {0}")]
659 InvalidUtf8(#[from] std::string::FromUtf8Error),
660}
661
662/// A type annotation from CollecTor descriptor archives.
663///
664/// CollecTor archives include a type annotation on the first line of each
665/// descriptor file in the format `@type <name> <major>.<minor>`. This struct
666/// represents that parsed annotation.
667///
668/// # Format
669///
670/// ```text
671/// @type server-descriptor 1.0
672/// @type network-status-consensus-3 1.0
673/// @type microdescriptor 1.0
674/// ```
675///
676/// # Example
677///
678/// ```rust
679/// use stem_rs::descriptor::TypeAnnotation;
680///
681/// let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
682/// assert_eq!(annotation.name, "server-descriptor");
683/// assert_eq!(annotation.major_version, 1);
684/// assert_eq!(annotation.minor_version, 0);
685///
686/// // Convert back to string
687/// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
688/// ```
689///
690/// # See Also
691///
692/// - [`DescriptorType`] - Enum of known descriptor types
693/// - [`strip_type_annotation`] - Extract annotation from content
694#[derive(Debug, Clone, PartialEq, Eq)]
695pub struct TypeAnnotation {
696 /// The descriptor type name (e.g., "server-descriptor", "microdescriptor").
697 pub name: String,
698 /// The major version number.
699 pub major_version: u32,
700 /// The minor version number.
701 pub minor_version: u32,
702}
703
704impl TypeAnnotation {
705 /// Creates a new type annotation with the given name and version.
706 ///
707 /// # Arguments
708 ///
709 /// * `name` - The descriptor type name
710 /// * `major_version` - The major version number
711 /// * `minor_version` - The minor version number
712 ///
713 /// # Example
714 ///
715 /// ```rust
716 /// use stem_rs::descriptor::TypeAnnotation;
717 ///
718 /// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
719 /// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
720 /// ```
721 pub fn new(name: impl Into<String>, major_version: u32, minor_version: u32) -> Self {
722 Self {
723 name: name.into(),
724 major_version,
725 minor_version,
726 }
727 }
728
729 /// Parses a type annotation from a line of text.
730 ///
731 /// Returns `None` if the line is not a valid type annotation.
732 ///
733 /// # Arguments
734 ///
735 /// * `line` - The line to parse
736 ///
737 /// # Example
738 ///
739 /// ```rust
740 /// use stem_rs::descriptor::TypeAnnotation;
741 ///
742 /// // Valid annotation
743 /// let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
744 /// assert_eq!(annotation.name, "extra-info");
745 ///
746 /// // Invalid - not an annotation
747 /// assert!(TypeAnnotation::parse("router test 127.0.0.1").is_none());
748 ///
749 /// // Invalid - missing version
750 /// assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
751 /// ```
752 pub fn parse(line: &str) -> Option<Self> {
753 let line = line.trim();
754 if !line.starts_with("@type ") {
755 return None;
756 }
757
758 let rest = &line[6..];
759 let parts: Vec<&str> = rest.split_whitespace().collect();
760 if parts.len() != 2 {
761 return None;
762 }
763
764 let name = parts[0];
765 let version_parts: Vec<&str> = parts[1].split('.').collect();
766 if version_parts.len() != 2 {
767 return None;
768 }
769
770 let major_version = version_parts[0].parse().ok()?;
771 let minor_version = version_parts[1].parse().ok()?;
772
773 Some(Self {
774 name: name.to_string(),
775 major_version,
776 minor_version,
777 })
778 }
779}
780
781impl std::fmt::Display for TypeAnnotation {
782 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
783 write!(
784 f,
785 "@type {} {}.{}",
786 self.name, self.major_version, self.minor_version
787 )
788 }
789}
790
791/// Known descriptor types in the Tor network.
792///
793/// This enum represents all descriptor types that can be identified from
794/// type annotations or filenames. Each variant corresponds to a specific
795/// descriptor format defined in the Tor directory protocol specification.
796///
797/// # Stability
798///
799/// This enum is non-exhaustive. New descriptor types may be added in future
800/// Tor versions.
801///
802/// # Example
803///
804/// ```rust
805/// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
806///
807/// // From type annotation
808/// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
809/// let desc_type = DescriptorType::from_annotation(&annotation);
810/// assert_eq!(desc_type, Some(DescriptorType::ServerDescriptor));
811///
812/// // From filename
813/// let desc_type = DescriptorType::from_filename("cached-consensus");
814/// assert_eq!(desc_type, Some(DescriptorType::NetworkStatusConsensusV3));
815/// ```
816#[derive(Debug, Clone, Copy, PartialEq, Eq)]
817pub enum DescriptorType {
818 /// Server descriptor containing full relay metadata.
819 ///
820 /// Includes identity keys, exit policy, bandwidth, and other relay information.
821 /// Annotation name: `server-descriptor`
822 ServerDescriptor,
823 /// Extra-info descriptor with bandwidth statistics.
824 ///
825 /// Contains detailed statistics about relay operation.
826 /// Annotation name: `extra-info`
827 ExtraInfo,
828 /// Microdescriptor with compact routing information.
829 ///
830 /// Used by clients for building circuits with minimal data.
831 /// Annotation name: `microdescriptor`
832 Microdescriptor,
833 /// Network status consensus document (v3).
834 ///
835 /// The agreed-upon view of the network signed by directory authorities.
836 /// Annotation name: `network-status-consensus-3`
837 NetworkStatusConsensusV3,
838 /// Network status vote document (v3).
839 ///
840 /// Individual directory authority's view before consensus.
841 /// Annotation name: `network-status-vote-3`
842 NetworkStatusVoteV3,
843 /// Microdescriptor-flavored consensus document (v3).
844 ///
845 /// Consensus using microdescriptor hashes instead of full descriptors.
846 /// Annotation name: `network-status-microdesc-consensus-3`
847 NetworkStatusMicrodescConsensusV3,
848 /// Bridge network status document.
849 ///
850 /// Network status for bridge relays (not publicly listed).
851 /// Annotation name: `bridge-network-status`
852 BridgeNetworkStatus,
853 /// Bridge server descriptor.
854 ///
855 /// Server descriptor for bridge relays with some fields redacted.
856 /// Annotation name: `bridge-server-descriptor`
857 BridgeServerDescriptor,
858 /// Bridge extra-info descriptor.
859 ///
860 /// Extra-info for bridge relays.
861 /// Annotation name: `bridge-extra-info`
862 BridgeExtraInfo,
863 /// Directory key certificate (v3).
864 ///
865 /// Certificate binding a directory authority's signing key to its identity.
866 /// Annotation name: `dir-key-certificate-3`
867 DirKeyCertificateV3,
868 /// TorDNSEL exit list.
869 ///
870 /// List of exit relay IP addresses from the TorDNSEL service.
871 /// Annotation name: `tordnsel`
872 TorDNSEL,
873 /// Hidden service descriptor.
874 ///
875 /// Descriptor for onion services (v2 or v3).
876 /// Annotation name: `hidden-service-descriptor`
877 HiddenServiceDescriptor,
878 /// Bandwidth authority measurement file.
879 ///
880 /// Bandwidth measurements from bandwidth authorities.
881 /// Annotation name: `bandwidth-file`
882 BandwidthFile,
883}
884
885impl DescriptorType {
886 /// Returns the annotation name for this descriptor type.
887 ///
888 /// This is the name used in `@type` annotations in CollecTor archives.
889 ///
890 /// # Example
891 ///
892 /// ```rust
893 /// use stem_rs::descriptor::DescriptorType;
894 ///
895 /// assert_eq!(DescriptorType::ServerDescriptor.annotation_name(), "server-descriptor");
896 /// assert_eq!(DescriptorType::Microdescriptor.annotation_name(), "microdescriptor");
897 /// ```
898 pub fn annotation_name(&self) -> &'static str {
899 match self {
900 Self::ServerDescriptor => "server-descriptor",
901 Self::ExtraInfo => "extra-info",
902 Self::Microdescriptor => "microdescriptor",
903 Self::NetworkStatusConsensusV3 => "network-status-consensus-3",
904 Self::NetworkStatusVoteV3 => "network-status-vote-3",
905 Self::NetworkStatusMicrodescConsensusV3 => "network-status-microdesc-consensus-3",
906 Self::BridgeNetworkStatus => "bridge-network-status",
907 Self::BridgeServerDescriptor => "bridge-server-descriptor",
908 Self::BridgeExtraInfo => "bridge-extra-info",
909 Self::DirKeyCertificateV3 => "dir-key-certificate-3",
910 Self::TorDNSEL => "tordnsel",
911 Self::HiddenServiceDescriptor => "hidden-service-descriptor",
912 Self::BandwidthFile => "bandwidth-file",
913 }
914 }
915
916 /// Determines the descriptor type from a type annotation.
917 ///
918 /// Returns `None` if the annotation name is not recognized.
919 ///
920 /// # Arguments
921 ///
922 /// * `annotation` - The type annotation to match
923 ///
924 /// # Example
925 ///
926 /// ```rust
927 /// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
928 ///
929 /// let annotation = TypeAnnotation::new("extra-info", 1, 0);
930 /// assert_eq!(
931 /// DescriptorType::from_annotation(&annotation),
932 /// Some(DescriptorType::ExtraInfo)
933 /// );
934 ///
935 /// let unknown = TypeAnnotation::new("unknown-type", 1, 0);
936 /// assert_eq!(DescriptorType::from_annotation(&unknown), None);
937 /// ```
938 pub fn from_annotation(annotation: &TypeAnnotation) -> Option<Self> {
939 match annotation.name.as_str() {
940 "server-descriptor" => Some(Self::ServerDescriptor),
941 "extra-info" => Some(Self::ExtraInfo),
942 "microdescriptor" => Some(Self::Microdescriptor),
943 "network-status-consensus-3" => Some(Self::NetworkStatusConsensusV3),
944 "network-status-vote-3" => Some(Self::NetworkStatusVoteV3),
945 "network-status-microdesc-consensus-3" => Some(Self::NetworkStatusMicrodescConsensusV3),
946 "bridge-network-status" => Some(Self::BridgeNetworkStatus),
947 "bridge-server-descriptor" => Some(Self::BridgeServerDescriptor),
948 "bridge-extra-info" => Some(Self::BridgeExtraInfo),
949 "dir-key-certificate-3" => Some(Self::DirKeyCertificateV3),
950 "tordnsel" => Some(Self::TorDNSEL),
951 "hidden-service-descriptor" => Some(Self::HiddenServiceDescriptor),
952 "bandwidth-file" => Some(Self::BandwidthFile),
953 _ => None,
954 }
955 }
956
957 /// Determines the descriptor type from a filename.
958 ///
959 /// This is useful for parsing descriptors from Tor's data directory
960 /// where files have conventional names like `cached-descriptors` or
961 /// `cached-consensus`.
962 ///
963 /// Returns `None` if the filename doesn't match a known pattern.
964 ///
965 /// # Arguments
966 ///
967 /// * `filename` - The filename to match (path components are stripped)
968 ///
969 /// # Example
970 ///
971 /// ```rust
972 /// use stem_rs::descriptor::DescriptorType;
973 ///
974 /// assert_eq!(
975 /// DescriptorType::from_filename("cached-descriptors"),
976 /// Some(DescriptorType::ServerDescriptor)
977 /// );
978 /// assert_eq!(
979 /// DescriptorType::from_filename("cached-extrainfo"),
980 /// Some(DescriptorType::ExtraInfo)
981 /// );
982 /// assert_eq!(
983 /// DescriptorType::from_filename("/var/lib/tor/cached-consensus"),
984 /// Some(DescriptorType::NetworkStatusConsensusV3)
985 /// );
986 /// assert_eq!(DescriptorType::from_filename("unknown-file"), None);
987 /// ```
988 pub fn from_filename(filename: &str) -> Option<Self> {
989 let filename = Path::new(filename)
990 .file_name()
991 .and_then(|s| s.to_str())
992 .unwrap_or(filename);
993
994 if filename.contains("cached-consensus") || filename.contains("consensus") {
995 Some(Self::NetworkStatusConsensusV3)
996 } else if filename.contains("cached-microdesc-consensus") {
997 Some(Self::NetworkStatusMicrodescConsensusV3)
998 } else if filename.contains("cached-microdescs") || filename.contains("microdescriptor") {
999 Some(Self::Microdescriptor)
1000 } else if filename.contains("cached-descriptors") || filename.contains("server-descriptor")
1001 {
1002 Some(Self::ServerDescriptor)
1003 } else if filename.contains("cached-extrainfo") || filename.contains("extra-info") {
1004 Some(Self::ExtraInfo)
1005 } else if filename.contains("exit-list") || filename.contains("tordnsel") {
1006 Some(Self::TorDNSEL)
1007 } else if filename.contains("bandwidth") {
1008 Some(Self::BandwidthFile)
1009 } else {
1010 None
1011 }
1012 }
1013}
1014
1015/// Hash algorithm used for computing descriptor digests.
1016///
1017/// Tor uses cryptographic hashes to identify and verify descriptors.
1018/// Older descriptor types use SHA-1, while newer ones use SHA-256.
1019///
1020/// # Example
1021///
1022/// ```rust
1023/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
1024///
1025/// let content = b"example content";
1026/// let sha1_digest = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
1027/// let sha256_digest = compute_digest(content, DigestHash::Sha256, DigestEncoding::Hex);
1028///
1029/// assert_eq!(sha1_digest.len(), 40); // SHA-1 produces 20 bytes = 40 hex chars
1030/// assert_eq!(sha256_digest.len(), 64); // SHA-256 produces 32 bytes = 64 hex chars
1031/// ```
1032#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1033pub enum DigestHash {
1034 /// SHA-1 hash algorithm (160 bits / 20 bytes).
1035 ///
1036 /// Used by legacy descriptor types including server descriptors and
1037 /// v2 hidden service descriptors. While SHA-1 is considered weak for
1038 /// collision resistance, it remains in use for backward compatibility.
1039 Sha1,
1040 /// SHA-256 hash algorithm (256 bits / 32 bytes).
1041 ///
1042 /// Used by modern descriptor types including microdescriptors and
1043 /// v3 hidden service descriptors.
1044 Sha256,
1045}
1046
1047/// Encoding format for descriptor digests.
1048///
1049/// Digests can be represented in different formats depending on the use case.
1050///
1051/// # Example
1052///
1053/// ```rust
1054/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
1055///
1056/// let content = b"test";
1057///
1058/// // Hexadecimal encoding (uppercase)
1059/// let hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
1060/// assert!(hex.chars().all(|c| c.is_ascii_hexdigit()));
1061///
1062/// // Base64 encoding (without padding)
1063/// let b64 = compute_digest(content, DigestHash::Sha1, DigestEncoding::Base64);
1064/// assert!(b64.chars().all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/'));
1065/// ```
1066#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1067pub enum DigestEncoding {
1068 /// Raw bytes represented as characters.
1069 ///
1070 /// Each byte is converted directly to a char. This is primarily useful
1071 /// for internal processing rather than display.
1072 Raw,
1073 /// Uppercase hexadecimal encoding.
1074 ///
1075 /// Each byte becomes two hex characters (0-9, A-F).
1076 /// This is the most common format for displaying fingerprints.
1077 Hex,
1078 /// Base64 encoding without trailing padding.
1079 ///
1080 /// Uses the standard Base64 alphabet (A-Z, a-z, 0-9, +, /).
1081 /// Padding characters ('=') are omitted.
1082 Base64,
1083}
1084
1085/// Trait for parsing and serializing Tor descriptors.
1086///
1087/// This trait defines the common interface for all descriptor types in the
1088/// library. Implementors can parse descriptor content, serialize back to
1089/// the canonical string format, and compute cryptographic digests.
1090///
1091/// # Contract
1092///
1093/// Implementations must satisfy these invariants:
1094///
1095/// 1. **Round-trip consistency**: For any valid descriptor content,
1096/// `parse(content).to_descriptor_string()` should produce semantically
1097/// equivalent content (though whitespace may differ).
1098///
1099/// 2. **Digest stability**: The `digest()` method must return consistent
1100/// results for the same descriptor content.
1101///
1102/// 3. **Error handling**: `parse()` should return `Error::Parse` for
1103/// malformed content with a descriptive error message.
1104///
1105/// # Example
1106///
1107/// ```rust,no_run
1108/// use stem_rs::descriptor::{Descriptor, DigestHash, DigestEncoding};
1109/// use stem_rs::descriptor::ServerDescriptor;
1110///
1111/// let content = "router example 127.0.0.1 9001 0 0\n...";
1112/// let descriptor = ServerDescriptor::parse(content).unwrap();
1113///
1114/// // Serialize back to string
1115/// let serialized = descriptor.to_descriptor_string();
1116///
1117/// // Compute digest
1118/// let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
1119///
1120/// // Access raw content
1121/// let raw = descriptor.raw_content();
1122///
1123/// // Check for unrecognized lines
1124/// let unknown = descriptor.unrecognized_lines();
1125/// ```
1126///
1127/// # Implementors
1128///
1129/// - [`ServerDescriptor`] - Server descriptors
1130/// - [`Microdescriptor`] - Microdescriptors
1131/// - [`ExtraInfoDescriptor`] - Extra-info descriptors
1132/// - [`NetworkStatusDocument`] - Consensus documents
1133pub trait Descriptor: Sized {
1134 /// Parses a descriptor from its string content.
1135 ///
1136 /// # Arguments
1137 ///
1138 /// * `content` - The descriptor content as a string
1139 ///
1140 /// # Errors
1141 ///
1142 /// Returns [`Error::Parse`] if the content is malformed or missing
1143 /// required fields.
1144 fn parse(content: &str) -> Result<Self, Error>;
1145
1146 /// Serializes the descriptor to its canonical string format.
1147 ///
1148 /// The output should be valid descriptor content that can be parsed
1149 /// again with `parse()`.
1150 fn to_descriptor_string(&self) -> String;
1151
1152 /// Computes the cryptographic digest of the descriptor.
1153 ///
1154 /// The digest is computed over the appropriate portion of the descriptor
1155 /// content (which varies by descriptor type).
1156 ///
1157 /// # Arguments
1158 ///
1159 /// * `hash` - The hash algorithm to use
1160 /// * `encoding` - The output encoding format
1161 ///
1162 /// # Errors
1163 ///
1164 /// Returns an error if the digest cannot be computed (e.g., if the
1165 /// descriptor content is invalid).
1166 fn digest(&self, hash: DigestHash, encoding: DigestEncoding) -> Result<String, Error>;
1167
1168 /// Returns the raw bytes of the original descriptor content.
1169 ///
1170 /// This is the exact content that was parsed, preserving original
1171 /// formatting and whitespace.
1172 fn raw_content(&self) -> &[u8];
1173
1174 /// Returns lines from the descriptor that were not recognized.
1175 ///
1176 /// These are lines that don't match any known keyword for this
1177 /// descriptor type. This is useful for forward compatibility when
1178 /// new fields are added to the descriptor format.
1179 fn unrecognized_lines(&self) -> &[String];
1180}
1181
1182/// Detects the compression format of binary content.
1183///
1184/// Examines the magic bytes at the start of the content to determine
1185/// the compression format. This is useful for automatically decompressing
1186/// downloaded descriptors.
1187///
1188/// # Arguments
1189///
1190/// * `content` - The binary content to examine
1191///
1192/// # Returns
1193///
1194/// The detected [`Compression`] format, or [`Compression::Plaintext`] if
1195/// no compression is detected or the content is too short.
1196///
1197/// # Example
1198///
1199/// ```rust
1200/// use stem_rs::descriptor::{detect_compression, Compression};
1201///
1202/// // Gzip magic bytes
1203/// let gzip_content = &[0x1f, 0x8b, 0x08, 0x00];
1204/// assert_eq!(detect_compression(gzip_content), Compression::Gzip);
1205///
1206/// // Plain text
1207/// let plain = b"router example";
1208/// assert_eq!(detect_compression(plain), Compression::Plaintext);
1209/// ```
1210pub fn detect_compression(content: &[u8]) -> Compression {
1211 if content.len() < 2 {
1212 return Compression::Plaintext;
1213 }
1214
1215 if content[0] == 0x1f && content[1] == 0x8b {
1216 return Compression::Gzip;
1217 }
1218
1219 if content.len() >= 4
1220 && content[0] == 0x28
1221 && content[1] == 0xb5
1222 && content[2] == 0x2f
1223 && content[3] == 0xfd
1224 {
1225 return Compression::Zstd;
1226 }
1227
1228 if content.len() >= 6
1229 && content[0] == 0xfd
1230 && content[1] == 0x37
1231 && content[2] == 0x7a
1232 && content[3] == 0x58
1233 && content[4] == 0x5a
1234 && content[5] == 0x00
1235 {
1236 return Compression::Lzma;
1237 }
1238
1239 Compression::Plaintext
1240}
1241
1242/// Decompresses content using the specified compression format.
1243///
1244/// # Arguments
1245///
1246/// * `content` - The compressed content
1247/// * `compression` - The compression format to use
1248///
1249/// # Returns
1250///
1251/// The decompressed content as a byte vector.
1252///
1253/// # Errors
1254///
1255/// Returns [`Error::Parse`] if:
1256/// - Decompression fails (corrupted data)
1257/// - The compression format is not supported (Zstd, LZMA)
1258///
1259/// # Supported Formats
1260///
1261/// - [`Compression::Plaintext`] - Returns content unchanged
1262/// - [`Compression::Gzip`] - Full support via flate2
1263/// - [`Compression::Zstd`] - Detection only, returns error
1264/// - [`Compression::Lzma`] - Detection only, returns error
1265///
1266/// # Example
1267///
1268/// ```rust
1269/// use stem_rs::descriptor::{decompress, Compression};
1270///
1271/// // Plaintext passes through unchanged
1272/// let content = b"Hello, World!";
1273/// let result = decompress(content, Compression::Plaintext).unwrap();
1274/// assert_eq!(result, content);
1275/// ```
1276pub fn decompress(content: &[u8], compression: Compression) -> Result<Vec<u8>, Error> {
1277 match compression {
1278 Compression::Plaintext => Ok(content.to_vec()),
1279 Compression::Gzip => decompress_gzip(content),
1280 Compression::Zstd => Err(Error::Descriptor(DescriptorError::UnsupportedCompression(
1281 "Zstd decompression not supported (requires zstd crate)".into(),
1282 ))),
1283 Compression::Lzma => Err(Error::Descriptor(DescriptorError::UnsupportedCompression(
1284 "LZMA decompression not supported (requires lzma crate)".into(),
1285 ))),
1286 }
1287}
1288
1289fn decompress_gzip(_content: &[u8]) -> Result<Vec<u8>, Error> {
1290 #[cfg(feature = "compression")]
1291 {
1292 let mut decoder = GzDecoder::new(_content);
1293 let mut decompressed = Vec::new();
1294 decoder.read_to_end(&mut decompressed).map_err(|e| {
1295 Error::Descriptor(DescriptorError::DecompressionFailed(format!(
1296 "Failed to decompress gzip: {}",
1297 e
1298 )))
1299 })?;
1300 Ok(decompressed)
1301 }
1302 #[cfg(not(feature = "compression"))]
1303 {
1304 Err(Error::Descriptor(DescriptorError::UnsupportedCompression(
1305 "Gzip decompression not supported (enable 'compression' feature)".into(),
1306 )))
1307 }
1308}
1309
1310/// Automatically detects and decompresses content.
1311///
1312/// This is a convenience function that combines [`detect_compression`] and
1313/// [`decompress`]. It examines the content's magic bytes to determine the
1314/// compression format and decompresses accordingly.
1315///
1316/// # Arguments
1317///
1318/// * `content` - The potentially compressed content
1319///
1320/// # Returns
1321///
1322/// The decompressed content. If the content is not compressed, it is
1323/// returned unchanged.
1324///
1325/// # Errors
1326///
1327/// Returns [`Error::Parse`] if decompression fails or the detected
1328/// compression format is not supported.
1329///
1330/// # Example
1331///
1332/// ```rust
1333/// use stem_rs::descriptor::auto_decompress;
1334///
1335/// // Plain text passes through
1336/// let plain = b"router example 127.0.0.1";
1337/// let result = auto_decompress(plain).unwrap();
1338/// assert_eq!(result, plain);
1339/// ```
1340pub fn auto_decompress(content: &[u8]) -> Result<Vec<u8>, Error> {
1341 let compression = detect_compression(content);
1342 decompress(content, compression)
1343}
1344
1345/// Computes a cryptographic digest of content.
1346///
1347/// This is a low-level function for computing digests. For descriptor
1348/// digests, prefer using the [`Descriptor::digest`] method which knows
1349/// the correct content range to hash.
1350///
1351/// # Arguments
1352///
1353/// * `content` - The content to hash
1354/// * `hash` - The hash algorithm to use
1355/// * `encoding` - The output encoding format
1356///
1357/// # Returns
1358///
1359/// The digest as a string in the specified encoding.
1360///
1361/// # Example
1362///
1363/// ```rust
1364/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
1365///
1366/// let content = b"test content";
1367///
1368/// // SHA-1 in hex
1369/// let sha1_hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
1370/// assert_eq!(sha1_hex.len(), 40);
1371///
1372/// // SHA-256 in base64
1373/// let sha256_b64 = compute_digest(content, DigestHash::Sha256, DigestEncoding::Base64);
1374/// ```
1375pub fn compute_digest(content: &[u8], hash: DigestHash, encoding: DigestEncoding) -> String {
1376 match hash {
1377 DigestHash::Sha1 => {
1378 let mut hasher = Sha1::new();
1379 hasher.update(content);
1380 let result = hasher.finalize();
1381 encode_digest(&result, encoding)
1382 }
1383 DigestHash::Sha256 => {
1384 let mut hasher = Sha256::new();
1385 hasher.update(content);
1386 let result = hasher.finalize();
1387 encode_digest(&result, encoding)
1388 }
1389 }
1390}
1391
1392fn encode_digest(bytes: &[u8], encoding: DigestEncoding) -> String {
1393 match encoding {
1394 DigestEncoding::Raw => bytes.iter().map(|b| *b as char).collect(),
1395 DigestEncoding::Hex => bytes.iter().map(|b| format!("{:02X}", b)).collect(),
1396 DigestEncoding::Base64 => base64_encode(bytes),
1397 }
1398}
1399
1400fn base64_encode(bytes: &[u8]) -> String {
1401 const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1402 let mut result = String::new();
1403 let mut i = 0;
1404 while i < bytes.len() {
1405 let b0 = bytes[i] as u32;
1406 let b1 = bytes.get(i + 1).map(|&b| b as u32).unwrap_or(0);
1407 let b2 = bytes.get(i + 2).map(|&b| b as u32).unwrap_or(0);
1408 let triple = (b0 << 16) | (b1 << 8) | b2;
1409 result.push(ALPHABET[((triple >> 18) & 0x3F) as usize] as char);
1410 result.push(ALPHABET[((triple >> 12) & 0x3F) as usize] as char);
1411 if i + 1 < bytes.len() {
1412 result.push(ALPHABET[((triple >> 6) & 0x3F) as usize] as char);
1413 }
1414 if i + 2 < bytes.len() {
1415 result.push(ALPHABET[(triple & 0x3F) as usize] as char);
1416 }
1417 i += 3;
1418 }
1419 result
1420}
1421
1422/// Parses a descriptor from file content with automatic decompression.
1423///
1424/// This function handles the common case of reading a descriptor from a file:
1425/// 1. Automatically decompresses the content if compressed
1426/// 2. Strips any `@type` annotation from the beginning
1427/// 3. Parses the descriptor using the type's `parse` method
1428///
1429/// # Type Parameters
1430///
1431/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
1432///
1433/// # Arguments
1434///
1435/// * `content` - The raw file content (possibly compressed)
1436///
1437/// # Returns
1438///
1439/// The parsed descriptor.
1440///
1441/// # Errors
1442///
1443/// Returns [`Error::Parse`] if:
1444/// - Decompression fails
1445/// - The content is not valid UTF-8
1446/// - The descriptor content is malformed
1447///
1448/// # Example
1449///
1450/// ```rust,no_run
1451/// use stem_rs::descriptor::{parse_file, ServerDescriptor};
1452///
1453/// let content = std::fs::read("cached-descriptors").unwrap();
1454/// let descriptor: ServerDescriptor = parse_file(&content).unwrap();
1455/// println!("Parsed descriptor for: {}", descriptor.nickname);
1456/// ```
1457///
1458/// # See Also
1459///
1460/// - [`parse_file_with_annotation`] - Also returns the type annotation if present
1461/// - [`Descriptor::parse`] - Parse from string without decompression
1462pub fn parse_file<T: Descriptor>(content: &[u8]) -> Result<T, Error> {
1463 let decompressed = auto_decompress(content)?;
1464 let content_str = String::from_utf8_lossy(&decompressed);
1465 let (_, stripped) = strip_type_annotation(&content_str);
1466 T::parse(stripped)
1467}
1468
1469/// Parses a descriptor from file content, returning the type annotation.
1470///
1471/// Like [`parse_file`], but also returns the `@type` annotation if one
1472/// was present at the beginning of the content.
1473///
1474/// # Type Parameters
1475///
1476/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
1477///
1478/// # Arguments
1479///
1480/// * `content` - The raw file content (possibly compressed)
1481///
1482/// # Returns
1483///
1484/// A tuple of:
1485/// - `Option<TypeAnnotation>` - The type annotation if present
1486/// - `T` - The parsed descriptor
1487///
1488/// # Errors
1489///
1490/// Returns [`Error::Parse`] if decompression or parsing fails.
1491///
1492/// # Example
1493///
1494/// ```rust,no_run
1495/// use stem_rs::descriptor::{parse_file_with_annotation, ServerDescriptor};
1496///
1497/// let content = std::fs::read("server-descriptor").unwrap();
1498/// let (annotation, descriptor): (_, ServerDescriptor) =
1499/// parse_file_with_annotation(&content).unwrap();
1500///
1501/// if let Some(ann) = annotation {
1502/// println!("Type: {} v{}.{}", ann.name, ann.major_version, ann.minor_version);
1503/// }
1504/// ```
1505pub fn parse_file_with_annotation<T: Descriptor>(
1506 content: &[u8],
1507) -> Result<(Option<TypeAnnotation>, T), Error> {
1508 let decompressed = auto_decompress(content)?;
1509 let content_str = String::from_utf8_lossy(&decompressed);
1510 let (annotation, stripped) = strip_type_annotation(&content_str);
1511 let descriptor = T::parse(stripped)?;
1512 Ok((annotation, descriptor))
1513}
1514
1515/// Strips a type annotation from the beginning of descriptor content.
1516///
1517/// If the first line is a valid `@type` annotation, it is parsed and
1518/// removed from the content. Otherwise, the content is returned unchanged.
1519///
1520/// # Arguments
1521///
1522/// * `content` - The descriptor content
1523///
1524/// # Returns
1525///
1526/// A tuple of:
1527/// - `Option<TypeAnnotation>` - The parsed annotation if present
1528/// - `&str` - The remaining content after the annotation
1529///
1530/// # Example
1531///
1532/// ```rust
1533/// use stem_rs::descriptor::strip_type_annotation;
1534///
1535/// let content = "@type server-descriptor 1.0\nrouter example 127.0.0.1";
1536/// let (annotation, rest) = strip_type_annotation(content);
1537///
1538/// assert!(annotation.is_some());
1539/// assert_eq!(annotation.unwrap().name, "server-descriptor");
1540/// assert_eq!(rest, "router example 127.0.0.1");
1541///
1542/// // Without annotation
1543/// let content = "router example 127.0.0.1";
1544/// let (annotation, rest) = strip_type_annotation(content);
1545/// assert!(annotation.is_none());
1546/// assert_eq!(rest, content);
1547/// ```
1548pub fn strip_type_annotation(content: &str) -> (Option<TypeAnnotation>, &str) {
1549 let first_line_end = content.find('\n').unwrap_or(content.len());
1550 let first_line = &content[..first_line_end];
1551
1552 if let Some(annotation) = TypeAnnotation::parse(first_line) {
1553 let rest = if first_line_end < content.len() {
1554 &content[first_line_end + 1..]
1555 } else {
1556 ""
1557 };
1558 (Some(annotation), rest)
1559 } else {
1560 (None, content)
1561 }
1562}
1563
1564#[cfg(test)]
1565mod tests {
1566 use super::*;
1567
1568 #[test]
1569 fn test_detect_compression_plaintext() {
1570 let content = b"@type server-descriptor 1.0\nrouter test";
1571 assert_eq!(detect_compression(content), Compression::Plaintext);
1572 }
1573
1574 #[test]
1575 fn test_detect_compression_gzip() {
1576 let content = &[0x1f, 0x8b, 0x08, 0x00];
1577 assert_eq!(detect_compression(content), Compression::Gzip);
1578 }
1579
1580 #[test]
1581 fn test_detect_compression_zstd() {
1582 let content = &[0x28, 0xb5, 0x2f, 0xfd, 0x00];
1583 assert_eq!(detect_compression(content), Compression::Zstd);
1584 }
1585
1586 #[test]
1587 fn test_detect_compression_lzma() {
1588 let content = &[0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
1589 assert_eq!(detect_compression(content), Compression::Lzma);
1590 }
1591
1592 #[test]
1593 fn test_decompress_plaintext() {
1594 let content = b"Hello, World!";
1595 let result = decompress(content, Compression::Plaintext).unwrap();
1596 assert_eq!(result, content);
1597 }
1598
1599 #[test]
1600 fn test_auto_decompress_plaintext() {
1601 let content = b"Hello, World!";
1602 let result = auto_decompress(content).unwrap();
1603 assert_eq!(result, content);
1604 }
1605
1606 #[test]
1607 fn test_decompress_gzip() {
1608 let compressed = &[
1609 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xf3, 0x48, 0xcd, 0xc9,
1610 0xc9, 0x07, 0x00, 0x82, 0x89, 0xd1, 0xf7, 0x05, 0x00, 0x00, 0x00,
1611 ];
1612 let result = decompress(compressed, Compression::Gzip).unwrap();
1613 assert_eq!(result, b"Hello");
1614 }
1615
1616 #[test]
1617 fn test_type_annotation_parse() {
1618 let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
1619 assert_eq!(annotation.name, "server-descriptor");
1620 assert_eq!(annotation.major_version, 1);
1621 assert_eq!(annotation.minor_version, 0);
1622 }
1623
1624 #[test]
1625 fn test_type_annotation_parse_extra_info() {
1626 let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
1627 assert_eq!(annotation.name, "extra-info");
1628 assert_eq!(annotation.major_version, 1);
1629 assert_eq!(annotation.minor_version, 0);
1630 }
1631
1632 #[test]
1633 fn test_type_annotation_parse_bridge_extra_info() {
1634 let annotation = TypeAnnotation::parse("@type bridge-extra-info 1.2").unwrap();
1635 assert_eq!(annotation.name, "bridge-extra-info");
1636 assert_eq!(annotation.major_version, 1);
1637 assert_eq!(annotation.minor_version, 2);
1638 }
1639
1640 #[test]
1641 fn test_type_annotation_parse_invalid() {
1642 assert!(TypeAnnotation::parse("router test").is_none());
1643 assert!(TypeAnnotation::parse("@type").is_none());
1644 assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
1645 assert!(TypeAnnotation::parse("@type server-descriptor 1").is_none());
1646 }
1647
1648 #[test]
1649 fn test_type_annotation_display() {
1650 let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1651 assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
1652 }
1653
1654 #[test]
1655 fn test_strip_type_annotation() {
1656 let content = "@type server-descriptor 1.0\nrouter test 127.0.0.1";
1657 let (annotation, rest) = strip_type_annotation(content);
1658 assert!(annotation.is_some());
1659 assert_eq!(annotation.unwrap().name, "server-descriptor");
1660 assert_eq!(rest, "router test 127.0.0.1");
1661 }
1662
1663 #[test]
1664 fn test_strip_type_annotation_no_annotation() {
1665 let content = "router test 127.0.0.1";
1666 let (annotation, rest) = strip_type_annotation(content);
1667 assert!(annotation.is_none());
1668 assert_eq!(rest, content);
1669 }
1670
1671 #[test]
1672 fn test_descriptor_type_from_annotation() {
1673 let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1674 assert_eq!(
1675 DescriptorType::from_annotation(&annotation),
1676 Some(DescriptorType::ServerDescriptor)
1677 );
1678
1679 let annotation = TypeAnnotation::new("extra-info", 1, 0);
1680 assert_eq!(
1681 DescriptorType::from_annotation(&annotation),
1682 Some(DescriptorType::ExtraInfo)
1683 );
1684
1685 let annotation = TypeAnnotation::new("tordnsel", 1, 0);
1686 assert_eq!(
1687 DescriptorType::from_annotation(&annotation),
1688 Some(DescriptorType::TorDNSEL)
1689 );
1690 }
1691
1692 #[test]
1693 fn test_descriptor_type_from_filename() {
1694 assert_eq!(
1695 DescriptorType::from_filename("cached-consensus"),
1696 Some(DescriptorType::NetworkStatusConsensusV3)
1697 );
1698 assert_eq!(
1699 DescriptorType::from_filename("cached-descriptors"),
1700 Some(DescriptorType::ServerDescriptor)
1701 );
1702 assert_eq!(
1703 DescriptorType::from_filename("cached-extrainfo"),
1704 Some(DescriptorType::ExtraInfo)
1705 );
1706 assert_eq!(
1707 DescriptorType::from_filename("exit-list"),
1708 Some(DescriptorType::TorDNSEL)
1709 );
1710 }
1711}