stem_rs/descriptor/
mod.rs

1//! Descriptor parsing for Tor network documents.
2//!
3//! This module provides types for parsing various Tor descriptor formats
4//! including server descriptors, microdescriptors, consensus documents,
5//! and hidden service descriptors.
6//!
7//! # Overview
8//!
9//! Tor relays and directory authorities publish various types of descriptors
10//! that describe the network topology, relay capabilities, and routing
11//! information. This module provides parsers for all major descriptor types:
12//!
13//! - [`ServerDescriptor`] - Full relay metadata including keys, policies, and capabilities
14//! - [`Microdescriptor`] - Compact client-side descriptors with essential routing info
15//! - [`NetworkStatusDocument`] - Consensus documents listing all relays and their status
16//! - [`ExtraInfoDescriptor`] - Bandwidth statistics and additional relay information
17//! - [`HiddenServiceDescriptorV2`] / [`HiddenServiceDescriptorV3`] - Onion service descriptors
18//! - [`Ed25519Certificate`] - Ed25519 certificates used by relays
19//! - [`KeyCertificate`] - Directory authority key certificates
20//! - [`BandwidthFile`] - Bandwidth authority measurement files
21//! - [`TorDNSEL`] - Exit list data from TorDNSEL
22//!
23//! # Descriptor Sources
24//!
25//! Descriptors can be obtained from several sources:
26//!
27//! - **Tor's data directory**: Cached files like `cached-descriptors`, `cached-consensus`
28//! - **Directory authorities**: Via the [`remote`] module's download functions
29//! - **CollecTor archives**: Historical descriptors with `@type` annotations
30//!
31//! # Type Annotations
32//!
33//! Descriptors from [CollecTor](https://metrics.torproject.org/collector.html) include
34//! a type annotation on the first line in the format `@type <name> <major>.<minor>`.
35//! The [`TypeAnnotation`] struct parses these annotations, and [`parse_file`] handles
36//! them automatically.
37//!
38//! # Compression
39//!
40//! Downloaded descriptors are often compressed. This module supports automatic
41//! decompression via [`auto_decompress`] for:
42//!
43//! - **Plaintext** - Uncompressed data
44//! - **Gzip** - Standard gzip compression (fully supported)
45//! - **Zstd** - Zstandard compression (detection only, requires external crate)
46//! - **LZMA** - LZMA/XZ compression (detection only, requires external crate)
47//!
48//! # Digests
49//!
50//! Descriptors have cryptographic digests used for identification and verification.
51//! The [`compute_digest`] function and [`Descriptor::digest`] method support:
52//!
53//! - [`DigestHash::Sha1`] - SHA-1 hash (legacy, used by older descriptors)
54//! - [`DigestHash::Sha256`] - SHA-256 hash (modern descriptors)
55//!
56//! With encodings:
57//!
58//! - [`DigestEncoding::Raw`] - Raw bytes as characters
59//! - [`DigestEncoding::Hex`] - Uppercase hexadecimal
60//! - [`DigestEncoding::Base64`] - Base64 without padding
61//!
62//! # Example
63//!
64//! ```rust,no_run
65//! use stem_rs::descriptor::{parse_file, ServerDescriptor, Descriptor};
66//! use stem_rs::descriptor::{DigestHash, DigestEncoding};
67//!
68//! // Parse a server descriptor from file contents
69//! let content = std::fs::read("cached-descriptors").unwrap();
70//! let descriptor: ServerDescriptor = parse_file(&content).unwrap();
71//!
72//! // Access descriptor fields
73//! println!("Nickname: {}", descriptor.nickname);
74//! println!("Address: {}", descriptor.address);
75//!
76//! // Compute the descriptor's digest
77//! let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
78//! println!("Digest: {}", digest);
79//! ```
80//!
81//! # See Also
82//!
83//! - [`remote`] - Download descriptors from directory authorities
84//! - [`server`] - Server descriptor parsing
85//! - [`micro`] - Microdescriptor parsing
86//! - [`consensus`] - Network status document parsing
87//! - [`hidden`] - Hidden service descriptor parsing
88//!
89//! # See Also
90//!
91//! - [Tor Directory Protocol Specification](https://spec.torproject.org/dir-spec)
92//! - [Python Stem descriptor module](https://stem.torproject.org/api/descriptor/descriptor.html)
93
94pub mod authority;
95pub mod bandwidth_file;
96pub mod cache;
97pub mod certificate;
98pub mod consensus;
99pub mod extra_info;
100pub mod hidden;
101pub mod key_cert;
102pub mod micro;
103pub mod remote;
104pub mod router_status;
105pub mod server;
106pub mod tordnsel;
107
108pub use authority::{DirectoryAuthority, SharedRandomnessCommitment};
109pub use bandwidth_file::{BandwidthFile, BandwidthMeasurement, RecentStats, RelayFailures};
110pub use cache::{CacheStats, DescriptorCache};
111pub use certificate::{
112    Ed25519Certificate, Ed25519Extension, ExtensionFlag, ExtensionType, ED25519_HEADER_LENGTH,
113    ED25519_KEY_LENGTH, ED25519_SIGNATURE_LENGTH,
114};
115pub use consensus::{
116    DocumentSignature, NetworkStatusDocument, NetworkStatusDocumentBuilder, SharedRandomness,
117};
118pub use extra_info::{
119    BandwidthHistory, DirResponse, DirStat, ExtraInfoDescriptor, ExtraInfoDescriptorBuilder,
120    PortKey, Transport,
121};
122pub use hidden::{
123    AuthorizedClient, HiddenServiceDescriptorV2, HiddenServiceDescriptorV3, InnerLayer,
124    IntroductionPointV2, IntroductionPointV3, LinkSpecifier, OuterLayer,
125};
126pub use key_cert::KeyCertificate;
127pub use micro::{Microdescriptor, MicrodescriptorBuilder};
128pub use remote::{
129    download_bandwidth_file, download_consensus, download_detached_signatures,
130    download_extrainfo_descriptors, download_from_dirport, download_key_certificates,
131    download_microdescriptors, download_server_descriptors, get_authorities, Compression, DirPort,
132    DownloadResult,
133};
134pub use router_status::{MicrodescriptorHash, RouterStatusEntry, RouterStatusEntryType};
135pub use server::{ServerDescriptor, ServerDescriptorBuilder};
136pub use tordnsel::{parse_exit_list, parse_exit_list_bytes, TorDNSEL};
137
138use crate::Error;
139#[cfg(feature = "compression")]
140use flate2::read::GzDecoder;
141use sha1::{Digest as Sha1Digest, Sha1};
142use sha2::Sha256;
143#[cfg(feature = "compression")]
144use std::io::Read;
145use std::path::Path;
146use thiserror::Error as ThisError;
147
148/// Errors that can occur when parsing network status consensus documents.
149///
150/// This error type provides specific information about what went wrong during
151/// consensus parsing, making it easier to diagnose and fix issues with malformed
152/// consensus documents.
153///
154/// # Example
155///
156/// ```rust
157/// use stem_rs::descriptor::ConsensusError;
158///
159/// fn handle_consensus_error(err: ConsensusError) {
160///     match err {
161///         ConsensusError::InvalidFingerprint(fp) => {
162///             eprintln!("Invalid relay fingerprint: {}", fp);
163///         }
164///         ConsensusError::TimestampOrderingViolation(msg) => {
165///             eprintln!("Timestamp ordering issue: {}", msg);
166///         }
167///         _ => eprintln!("Consensus parse error: {}", err),
168///     }
169/// }
170/// ```
171#[derive(Debug, ThisError)]
172pub enum ConsensusError {
173    /// IO error occurred while reading consensus data.
174    #[error("IO error: {0}")]
175    Io(#[from] std::io::Error),
176
177    /// Network status version is not supported.
178    #[error("Invalid network status version: expected 3, got {0}")]
179    InvalidNetworkStatusVersion(String),
180
181    /// Vote status field has invalid value.
182    #[error("Invalid vote status: expected 'vote' or 'consensus', got {0}")]
183    InvalidVoteStatus(String),
184
185    /// Timestamp format is invalid or unparseable.
186    #[error("Invalid timestamp format: {0}")]
187    InvalidTimestamp(String),
188
189    /// Voting delay line has wrong number of values.
190    #[error("Invalid voting delay: expected 2 values, got {0}")]
191    InvalidVotingDelay(String),
192
193    /// Relay fingerprint format is invalid.
194    #[error("Invalid fingerprint: {0}")]
195    InvalidFingerprint(String),
196
197    /// IP address format is invalid.
198    #[error("Invalid IP address: {0}")]
199    InvalidIpAddress(#[from] std::net::AddrParseError),
200
201    /// Port number is invalid or out of range.
202    #[error("Invalid port number: {0}")]
203    InvalidPort(#[from] std::num::ParseIntError),
204
205    /// Bandwidth value is invalid or unparseable.
206    #[error("Invalid bandwidth value: {0}")]
207    InvalidBandwidth(String),
208
209    /// Relay flag is not recognized.
210    #[error("Invalid flag: {0}")]
211    InvalidFlag(String),
212
213    /// Protocol version string is malformed.
214    #[error("Invalid protocol version: {0}")]
215    InvalidProtocolVersion(String),
216
217    /// Base64 encoding is invalid.
218    #[error("Invalid base64 encoding: {0}")]
219    InvalidBase64(String),
220
221    /// Cryptographic signature is invalid.
222    #[error("Invalid signature: {0}")]
223    InvalidSignature(String),
224
225    /// Required field is missing from consensus.
226    #[error("Missing required field: {0}")]
227    MissingRequiredField(String),
228
229    /// Timestamps are not in correct order (valid-after < fresh-until < valid-until).
230    #[error("Timestamp ordering violation: {0}")]
231    TimestampOrderingViolation(String),
232
233    /// Line format is invalid at specific location.
234    #[error("Invalid line format at line {line}: {reason}")]
235    InvalidLineFormat {
236        /// Line number where error occurred.
237        line: usize,
238        /// Description of the format error.
239        reason: String,
240    },
241}
242
243/// Errors that can occur when parsing server descriptors.
244///
245/// Server descriptors contain full relay metadata including identity keys,
246/// exit policies, bandwidth information, and platform details.
247///
248/// # Example
249///
250/// ```rust
251/// use stem_rs::descriptor::ServerDescriptorError;
252///
253/// fn handle_server_error(err: ServerDescriptorError) {
254///     match err {
255///         ServerDescriptorError::InvalidNickname(nick) => {
256///             eprintln!("Invalid relay nickname: {}", nick);
257///         }
258///         ServerDescriptorError::MissingRequiredField(field) => {
259///             eprintln!("Missing required field: {}", field);
260///         }
261///         _ => eprintln!("Server descriptor parse error: {}", err),
262///     }
263/// }
264/// ```
265#[derive(Debug, ThisError)]
266pub enum ServerDescriptorError {
267    /// IO error occurred while reading descriptor data.
268    #[error("IO error: {0}")]
269    Io(#[from] std::io::Error),
270
271    /// Router line has wrong number of components.
272    #[error("Invalid router line format: expected 5 parts, got {actual}")]
273    InvalidRouterFormat {
274        /// Actual number of parts found.
275        actual: usize,
276    },
277
278    /// Relay nickname is invalid (must be 1-19 alphanumeric characters).
279    #[error("Invalid nickname: {0}")]
280    InvalidNickname(String),
281
282    /// IP address format is invalid.
283    #[error("Invalid IP address: {0}")]
284    InvalidIpAddress(#[from] std::net::AddrParseError),
285
286    /// Port number is invalid or out of range.
287    #[error("Invalid port number: {0}")]
288    InvalidPort(#[from] std::num::ParseIntError),
289
290    /// Bandwidth line has wrong number of values.
291    #[error("Invalid bandwidth line format: expected 3 parts, got {actual}")]
292    InvalidBandwidthFormat {
293        /// Actual number of parts found.
294        actual: usize,
295    },
296
297    /// Bandwidth value is invalid or unparseable.
298    #[error("Invalid bandwidth value: {0}")]
299    InvalidBandwidth(String),
300
301    /// Published date format is invalid.
302    #[error("Invalid published date format: {0}")]
303    InvalidPublishedDate(String),
304
305    /// Fingerprint format is invalid (must be 40 hex characters).
306    #[error("Invalid fingerprint format: {0}")]
307    InvalidFingerprint(String),
308
309    /// RSA public key is malformed or invalid.
310    #[error("Invalid RSA public key: {0}")]
311    InvalidRsaKey(String),
312
313    /// Ed25519 identity key is invalid.
314    #[error("Invalid Ed25519 identity: {0}")]
315    InvalidEd25519Identity(String),
316
317    /// Exit policy format is invalid.
318    #[error("Invalid exit policy format: {0}")]
319    InvalidExitPolicy(String),
320
321    /// Protocol version string is malformed.
322    #[error("Invalid protocol version: {0}")]
323    InvalidProtocolVersion(String),
324
325    /// Required field is missing from descriptor.
326    #[error("Missing required field: {0}")]
327    MissingRequiredField(String),
328
329    /// Line format is invalid at specific location.
330    #[error("Invalid line format at line {line}: {reason}")]
331    InvalidLineFormat {
332        /// Line number where error occurred.
333        line: usize,
334        /// Description of the format error.
335        reason: String,
336    },
337}
338
339/// Errors that can occur when parsing microdescriptors.
340///
341/// Microdescriptors are compact descriptors used by clients for building
342/// circuits with minimal bandwidth overhead.
343///
344/// # Example
345///
346/// ```rust
347/// use stem_rs::descriptor::MicrodescriptorError;
348///
349/// fn handle_micro_error(err: MicrodescriptorError) {
350///     match err {
351///         MicrodescriptorError::InvalidOnionKey(msg) => {
352///             eprintln!("Invalid onion key: {}", msg);
353///         }
354///         MicrodescriptorError::MissingRequiredField(field) => {
355///             eprintln!("Missing required field: {}", field);
356///         }
357///         _ => eprintln!("Microdescriptor parse error: {}", err),
358///     }
359/// }
360/// ```
361#[derive(Debug, ThisError)]
362pub enum MicrodescriptorError {
363    /// IO error occurred while reading descriptor data.
364    #[error("IO error: {0}")]
365    Io(#[from] std::io::Error),
366
367    /// Onion key format is invalid.
368    #[error("Invalid onion key format: {0}")]
369    InvalidOnionKey(String),
370
371    /// Ntor onion key format is invalid.
372    #[error("Invalid ntor onion key format: {0}")]
373    InvalidNtorOnionKey(String),
374
375    /// Socket address format is invalid.
376    #[error("Invalid socket address: {0}")]
377    InvalidSocketAddress(#[from] std::net::AddrParseError),
378
379    /// Relay family specification is invalid.
380    #[error("Invalid relay family: {0}")]
381    InvalidRelayFamily(String),
382
383    /// Port policy format is invalid.
384    #[error("Invalid port policy: {0}")]
385    InvalidPortPolicy(String),
386
387    /// Base64 encoding is invalid.
388    #[error("Invalid base64 encoding: {0}")]
389    InvalidBase64(String),
390
391    /// Identity key has wrong length for algorithm.
392    #[error("Invalid identity length for {algorithm}: expected {expected}, got {actual}")]
393    InvalidIdentityLength {
394        /// Algorithm name (e.g., "ed25519").
395        algorithm: String,
396        /// Expected length in bytes.
397        expected: usize,
398        /// Actual length found.
399        actual: usize,
400    },
401
402    /// Identity algorithm is not recognized.
403    #[error("Unknown identity algorithm: {0}")]
404    UnknownIdentityAlgorithm(String),
405
406    /// Cryptographic block is incomplete.
407    #[error("Incomplete crypto block for key type: {0}")]
408    IncompleteCryptoBlock(String),
409
410    /// Required field is missing from descriptor.
411    #[error("Missing required field: {0}")]
412    MissingRequiredField(String),
413}
414
415/// Errors that can occur when parsing extra-info descriptors.
416///
417/// Extra-info descriptors contain bandwidth statistics and additional
418/// relay information not included in server descriptors.
419#[derive(Debug, ThisError)]
420pub enum ExtraInfoError {
421    /// IO error occurred while reading descriptor data.
422    #[error("IO error: {0}")]
423    Io(#[from] std::io::Error),
424
425    /// Extra-info line has wrong number of components.
426    #[error("Invalid extra-info line format: expected 3 parts, got {actual}")]
427    InvalidExtraInfoFormat {
428        /// Actual number of parts found.
429        actual: usize,
430    },
431
432    /// Relay nickname is invalid.
433    #[error("Invalid nickname: {0}")]
434    InvalidNickname(String),
435
436    /// Fingerprint format is invalid.
437    #[error("Invalid fingerprint: {0}")]
438    InvalidFingerprint(String),
439
440    /// Published date format is invalid.
441    #[error("Invalid published date format: {0}")]
442    InvalidPublishedDate(String),
443
444    /// Bandwidth history format is invalid.
445    #[error("Invalid bandwidth history format: {0}")]
446    InvalidBandwidthHistory(String),
447
448    /// Timestamp format is invalid.
449    #[error("Invalid timestamp: {0}")]
450    InvalidTimestamp(String),
451
452    /// Required field is missing from descriptor.
453    #[error("Missing required field: {0}")]
454    MissingRequiredField(String),
455}
456
457/// Errors that can occur when parsing hidden service descriptors.
458///
459/// Hidden service descriptors (v2 and v3) contain information needed
460/// to connect to onion services.
461#[derive(Debug, ThisError)]
462pub enum HiddenServiceDescriptorError {
463    /// IO error occurred while reading descriptor data.
464    #[error("IO error: {0}")]
465    Io(#[from] std::io::Error),
466
467    /// Descriptor version is not supported.
468    #[error("Invalid descriptor version: expected 2 or 3, got {0}")]
469    InvalidDescriptorVersion(u32),
470
471    /// Onion address format is invalid.
472    #[error("Invalid onion address: {0}")]
473    InvalidOnionAddress(String),
474
475    /// Introduction point specification is invalid.
476    #[error("Invalid introduction point: {0}")]
477    InvalidIntroductionPoint(String),
478
479    /// Encryption key is malformed.
480    #[error("Invalid encryption key: {0}")]
481    InvalidEncryptionKey(String),
482
483    /// Cryptographic signature is invalid.
484    #[error("Invalid signature: {0}")]
485    InvalidSignature(String),
486
487    /// Base64 encoding is invalid.
488    #[error("Invalid base64 encoding: {0}")]
489    InvalidBase64(String),
490
491    /// Required field is missing from descriptor.
492    #[error("Missing required field: {0}")]
493    MissingRequiredField(String),
494}
495
496/// Errors that can occur when parsing directory key certificates.
497///
498/// Key certificates bind directory authority signing keys to their
499/// long-term identity keys.
500#[derive(Debug, ThisError)]
501pub enum KeyCertificateError {
502    /// IO error occurred while reading certificate data.
503    #[error("IO error: {0}")]
504    Io(#[from] std::io::Error),
505
506    /// Certificate version is not supported.
507    #[error("Invalid certificate version: expected 3, got {0}")]
508    InvalidCertificateVersion(u32),
509
510    /// Fingerprint format is invalid.
511    #[error("Invalid fingerprint: {0}")]
512    InvalidFingerprint(String),
513
514    /// Timestamp format is invalid.
515    #[error("Invalid timestamp: {0}")]
516    InvalidTimestamp(String),
517
518    /// RSA key is malformed.
519    #[error("Invalid RSA key: {0}")]
520    InvalidRsaKey(String),
521
522    /// Cryptographic signature is invalid.
523    #[error("Invalid signature: {0}")]
524    InvalidSignature(String),
525
526    /// Required field is missing from certificate.
527    #[error("Missing required field: {0}")]
528    MissingRequiredField(String),
529}
530
531/// Errors that can occur when parsing bandwidth measurement files.
532///
533/// Bandwidth files contain relay capacity measurements from bandwidth
534/// authorities used to compute consensus weights.
535#[derive(Debug, ThisError)]
536pub enum BandwidthFileError {
537    /// IO error occurred while reading bandwidth file.
538    #[error("IO error: {0}")]
539    Io(#[from] std::io::Error),
540
541    /// Header format is invalid.
542    #[error("Invalid header format: {0}")]
543    InvalidHeaderFormat(String),
544
545    /// Timestamp format is invalid.
546    #[error("Invalid timestamp: {0}")]
547    InvalidTimestamp(String),
548
549    /// Bandwidth value is invalid or unparseable.
550    #[error("Invalid bandwidth value: {0}")]
551    InvalidBandwidth(String),
552
553    /// Fingerprint format is invalid.
554    #[error("Invalid fingerprint: {0}")]
555    InvalidFingerprint(String),
556
557    /// Required header field is missing.
558    #[error("Missing required header field: {0}")]
559    MissingRequiredHeaderField(String),
560}
561
562/// Errors that can occur when parsing TorDNSEL exit lists.
563///
564/// TorDNSEL exit lists contain IP addresses of Tor exit relays.
565#[derive(Debug, ThisError)]
566pub enum TorDNSELError {
567    /// IO error occurred while reading exit list.
568    #[error("IO error: {0}")]
569    Io(#[from] std::io::Error),
570
571    /// IP address format is invalid.
572    #[error("Invalid IP address: {0}")]
573    InvalidIpAddress(#[from] std::net::AddrParseError),
574
575    /// Timestamp format is invalid.
576    #[error("Invalid timestamp: {0}")]
577    InvalidTimestamp(String),
578
579    /// Exit address line format is invalid.
580    #[error("Invalid exit address format: {0}")]
581    InvalidExitAddressFormat(String),
582}
583
584/// Unified error type for all descriptor parsing operations.
585///
586/// This enum wraps all descriptor-specific error types, providing a single
587/// error type that can represent failures from any descriptor parser.
588///
589/// # Design
590///
591/// Following the library-rs reference implementation, this uses transparent
592/// error forwarding with `#[error(transparent)]` to preserve the underlying
593/// error's Display implementation and source chain.
594///
595/// # Example
596///
597/// ```rust
598/// use stem_rs::descriptor::DescriptorError;
599///
600/// fn handle_descriptor_error(err: DescriptorError) {
601///     match err {
602///         DescriptorError::Consensus(e) => {
603///             eprintln!("Consensus error: {}", e);
604///         }
605///         DescriptorError::ServerDescriptor(e) => {
606///             eprintln!("Server descriptor error: {}", e);
607///         }
608///         DescriptorError::UnsupportedCompression(format) => {
609///             eprintln!("Unsupported compression: {}", format);
610///         }
611///         _ => eprintln!("Descriptor error: {}", err),
612///     }
613/// }
614/// ```
615#[derive(Debug, ThisError)]
616pub enum DescriptorError {
617    /// Error parsing network status consensus document.
618    #[error(transparent)]
619    Consensus(#[from] ConsensusError),
620
621    /// Error parsing server descriptor.
622    #[error(transparent)]
623    ServerDescriptor(#[from] ServerDescriptorError),
624
625    /// Error parsing microdescriptor.
626    #[error(transparent)]
627    Microdescriptor(#[from] MicrodescriptorError),
628
629    /// Error parsing extra-info descriptor.
630    #[error(transparent)]
631    ExtraInfo(#[from] ExtraInfoError),
632
633    /// Error parsing hidden service descriptor.
634    #[error(transparent)]
635    HiddenService(#[from] HiddenServiceDescriptorError),
636
637    /// Error parsing directory key certificate.
638    #[error(transparent)]
639    KeyCertificate(#[from] KeyCertificateError),
640
641    /// Error parsing bandwidth measurement file.
642    #[error(transparent)]
643    BandwidthFile(#[from] BandwidthFileError),
644
645    /// Error parsing TorDNSEL exit list.
646    #[error(transparent)]
647    TorDNSEL(#[from] TorDNSELError),
648
649    /// Compression format is not supported.
650    #[error("Unsupported compression format: {0}")]
651    UnsupportedCompression(String),
652
653    /// Decompression failed.
654    #[error("Decompression failed: {0}")]
655    DecompressionFailed(String),
656
657    /// Descriptor contains invalid UTF-8.
658    #[error("Invalid UTF-8 in descriptor: {0}")]
659    InvalidUtf8(#[from] std::string::FromUtf8Error),
660}
661
662/// A type annotation from CollecTor descriptor archives.
663///
664/// CollecTor archives include a type annotation on the first line of each
665/// descriptor file in the format `@type <name> <major>.<minor>`. This struct
666/// represents that parsed annotation.
667///
668/// # Format
669///
670/// ```text
671/// @type server-descriptor 1.0
672/// @type network-status-consensus-3 1.0
673/// @type microdescriptor 1.0
674/// ```
675///
676/// # Example
677///
678/// ```rust
679/// use stem_rs::descriptor::TypeAnnotation;
680///
681/// let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
682/// assert_eq!(annotation.name, "server-descriptor");
683/// assert_eq!(annotation.major_version, 1);
684/// assert_eq!(annotation.minor_version, 0);
685///
686/// // Convert back to string
687/// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
688/// ```
689///
690/// # See Also
691///
692/// - [`DescriptorType`] - Enum of known descriptor types
693/// - [`strip_type_annotation`] - Extract annotation from content
694#[derive(Debug, Clone, PartialEq, Eq)]
695pub struct TypeAnnotation {
696    /// The descriptor type name (e.g., "server-descriptor", "microdescriptor").
697    pub name: String,
698    /// The major version number.
699    pub major_version: u32,
700    /// The minor version number.
701    pub minor_version: u32,
702}
703
704impl TypeAnnotation {
705    /// Creates a new type annotation with the given name and version.
706    ///
707    /// # Arguments
708    ///
709    /// * `name` - The descriptor type name
710    /// * `major_version` - The major version number
711    /// * `minor_version` - The minor version number
712    ///
713    /// # Example
714    ///
715    /// ```rust
716    /// use stem_rs::descriptor::TypeAnnotation;
717    ///
718    /// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
719    /// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
720    /// ```
721    pub fn new(name: impl Into<String>, major_version: u32, minor_version: u32) -> Self {
722        Self {
723            name: name.into(),
724            major_version,
725            minor_version,
726        }
727    }
728
729    /// Parses a type annotation from a line of text.
730    ///
731    /// Returns `None` if the line is not a valid type annotation.
732    ///
733    /// # Arguments
734    ///
735    /// * `line` - The line to parse
736    ///
737    /// # Example
738    ///
739    /// ```rust
740    /// use stem_rs::descriptor::TypeAnnotation;
741    ///
742    /// // Valid annotation
743    /// let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
744    /// assert_eq!(annotation.name, "extra-info");
745    ///
746    /// // Invalid - not an annotation
747    /// assert!(TypeAnnotation::parse("router test 127.0.0.1").is_none());
748    ///
749    /// // Invalid - missing version
750    /// assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
751    /// ```
752    pub fn parse(line: &str) -> Option<Self> {
753        let line = line.trim();
754        if !line.starts_with("@type ") {
755            return None;
756        }
757
758        let rest = &line[6..];
759        let parts: Vec<&str> = rest.split_whitespace().collect();
760        if parts.len() != 2 {
761            return None;
762        }
763
764        let name = parts[0];
765        let version_parts: Vec<&str> = parts[1].split('.').collect();
766        if version_parts.len() != 2 {
767            return None;
768        }
769
770        let major_version = version_parts[0].parse().ok()?;
771        let minor_version = version_parts[1].parse().ok()?;
772
773        Some(Self {
774            name: name.to_string(),
775            major_version,
776            minor_version,
777        })
778    }
779}
780
781impl std::fmt::Display for TypeAnnotation {
782    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
783        write!(
784            f,
785            "@type {} {}.{}",
786            self.name, self.major_version, self.minor_version
787        )
788    }
789}
790
791/// Known descriptor types in the Tor network.
792///
793/// This enum represents all descriptor types that can be identified from
794/// type annotations or filenames. Each variant corresponds to a specific
795/// descriptor format defined in the Tor directory protocol specification.
796///
797/// # Stability
798///
799/// This enum is non-exhaustive. New descriptor types may be added in future
800/// Tor versions.
801///
802/// # Example
803///
804/// ```rust
805/// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
806///
807/// // From type annotation
808/// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
809/// let desc_type = DescriptorType::from_annotation(&annotation);
810/// assert_eq!(desc_type, Some(DescriptorType::ServerDescriptor));
811///
812/// // From filename
813/// let desc_type = DescriptorType::from_filename("cached-consensus");
814/// assert_eq!(desc_type, Some(DescriptorType::NetworkStatusConsensusV3));
815/// ```
816#[derive(Debug, Clone, Copy, PartialEq, Eq)]
817pub enum DescriptorType {
818    /// Server descriptor containing full relay metadata.
819    ///
820    /// Includes identity keys, exit policy, bandwidth, and other relay information.
821    /// Annotation name: `server-descriptor`
822    ServerDescriptor,
823    /// Extra-info descriptor with bandwidth statistics.
824    ///
825    /// Contains detailed statistics about relay operation.
826    /// Annotation name: `extra-info`
827    ExtraInfo,
828    /// Microdescriptor with compact routing information.
829    ///
830    /// Used by clients for building circuits with minimal data.
831    /// Annotation name: `microdescriptor`
832    Microdescriptor,
833    /// Network status consensus document (v3).
834    ///
835    /// The agreed-upon view of the network signed by directory authorities.
836    /// Annotation name: `network-status-consensus-3`
837    NetworkStatusConsensusV3,
838    /// Network status vote document (v3).
839    ///
840    /// Individual directory authority's view before consensus.
841    /// Annotation name: `network-status-vote-3`
842    NetworkStatusVoteV3,
843    /// Microdescriptor-flavored consensus document (v3).
844    ///
845    /// Consensus using microdescriptor hashes instead of full descriptors.
846    /// Annotation name: `network-status-microdesc-consensus-3`
847    NetworkStatusMicrodescConsensusV3,
848    /// Bridge network status document.
849    ///
850    /// Network status for bridge relays (not publicly listed).
851    /// Annotation name: `bridge-network-status`
852    BridgeNetworkStatus,
853    /// Bridge server descriptor.
854    ///
855    /// Server descriptor for bridge relays with some fields redacted.
856    /// Annotation name: `bridge-server-descriptor`
857    BridgeServerDescriptor,
858    /// Bridge extra-info descriptor.
859    ///
860    /// Extra-info for bridge relays.
861    /// Annotation name: `bridge-extra-info`
862    BridgeExtraInfo,
863    /// Directory key certificate (v3).
864    ///
865    /// Certificate binding a directory authority's signing key to its identity.
866    /// Annotation name: `dir-key-certificate-3`
867    DirKeyCertificateV3,
868    /// TorDNSEL exit list.
869    ///
870    /// List of exit relay IP addresses from the TorDNSEL service.
871    /// Annotation name: `tordnsel`
872    TorDNSEL,
873    /// Hidden service descriptor.
874    ///
875    /// Descriptor for onion services (v2 or v3).
876    /// Annotation name: `hidden-service-descriptor`
877    HiddenServiceDescriptor,
878    /// Bandwidth authority measurement file.
879    ///
880    /// Bandwidth measurements from bandwidth authorities.
881    /// Annotation name: `bandwidth-file`
882    BandwidthFile,
883}
884
885impl DescriptorType {
886    /// Returns the annotation name for this descriptor type.
887    ///
888    /// This is the name used in `@type` annotations in CollecTor archives.
889    ///
890    /// # Example
891    ///
892    /// ```rust
893    /// use stem_rs::descriptor::DescriptorType;
894    ///
895    /// assert_eq!(DescriptorType::ServerDescriptor.annotation_name(), "server-descriptor");
896    /// assert_eq!(DescriptorType::Microdescriptor.annotation_name(), "microdescriptor");
897    /// ```
898    pub fn annotation_name(&self) -> &'static str {
899        match self {
900            Self::ServerDescriptor => "server-descriptor",
901            Self::ExtraInfo => "extra-info",
902            Self::Microdescriptor => "microdescriptor",
903            Self::NetworkStatusConsensusV3 => "network-status-consensus-3",
904            Self::NetworkStatusVoteV3 => "network-status-vote-3",
905            Self::NetworkStatusMicrodescConsensusV3 => "network-status-microdesc-consensus-3",
906            Self::BridgeNetworkStatus => "bridge-network-status",
907            Self::BridgeServerDescriptor => "bridge-server-descriptor",
908            Self::BridgeExtraInfo => "bridge-extra-info",
909            Self::DirKeyCertificateV3 => "dir-key-certificate-3",
910            Self::TorDNSEL => "tordnsel",
911            Self::HiddenServiceDescriptor => "hidden-service-descriptor",
912            Self::BandwidthFile => "bandwidth-file",
913        }
914    }
915
916    /// Determines the descriptor type from a type annotation.
917    ///
918    /// Returns `None` if the annotation name is not recognized.
919    ///
920    /// # Arguments
921    ///
922    /// * `annotation` - The type annotation to match
923    ///
924    /// # Example
925    ///
926    /// ```rust
927    /// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
928    ///
929    /// let annotation = TypeAnnotation::new("extra-info", 1, 0);
930    /// assert_eq!(
931    ///     DescriptorType::from_annotation(&annotation),
932    ///     Some(DescriptorType::ExtraInfo)
933    /// );
934    ///
935    /// let unknown = TypeAnnotation::new("unknown-type", 1, 0);
936    /// assert_eq!(DescriptorType::from_annotation(&unknown), None);
937    /// ```
938    pub fn from_annotation(annotation: &TypeAnnotation) -> Option<Self> {
939        match annotation.name.as_str() {
940            "server-descriptor" => Some(Self::ServerDescriptor),
941            "extra-info" => Some(Self::ExtraInfo),
942            "microdescriptor" => Some(Self::Microdescriptor),
943            "network-status-consensus-3" => Some(Self::NetworkStatusConsensusV3),
944            "network-status-vote-3" => Some(Self::NetworkStatusVoteV3),
945            "network-status-microdesc-consensus-3" => Some(Self::NetworkStatusMicrodescConsensusV3),
946            "bridge-network-status" => Some(Self::BridgeNetworkStatus),
947            "bridge-server-descriptor" => Some(Self::BridgeServerDescriptor),
948            "bridge-extra-info" => Some(Self::BridgeExtraInfo),
949            "dir-key-certificate-3" => Some(Self::DirKeyCertificateV3),
950            "tordnsel" => Some(Self::TorDNSEL),
951            "hidden-service-descriptor" => Some(Self::HiddenServiceDescriptor),
952            "bandwidth-file" => Some(Self::BandwidthFile),
953            _ => None,
954        }
955    }
956
957    /// Determines the descriptor type from a filename.
958    ///
959    /// This is useful for parsing descriptors from Tor's data directory
960    /// where files have conventional names like `cached-descriptors` or
961    /// `cached-consensus`.
962    ///
963    /// Returns `None` if the filename doesn't match a known pattern.
964    ///
965    /// # Arguments
966    ///
967    /// * `filename` - The filename to match (path components are stripped)
968    ///
969    /// # Example
970    ///
971    /// ```rust
972    /// use stem_rs::descriptor::DescriptorType;
973    ///
974    /// assert_eq!(
975    ///     DescriptorType::from_filename("cached-descriptors"),
976    ///     Some(DescriptorType::ServerDescriptor)
977    /// );
978    /// assert_eq!(
979    ///     DescriptorType::from_filename("cached-extrainfo"),
980    ///     Some(DescriptorType::ExtraInfo)
981    /// );
982    /// assert_eq!(
983    ///     DescriptorType::from_filename("/var/lib/tor/cached-consensus"),
984    ///     Some(DescriptorType::NetworkStatusConsensusV3)
985    /// );
986    /// assert_eq!(DescriptorType::from_filename("unknown-file"), None);
987    /// ```
988    pub fn from_filename(filename: &str) -> Option<Self> {
989        let filename = Path::new(filename)
990            .file_name()
991            .and_then(|s| s.to_str())
992            .unwrap_or(filename);
993
994        if filename.contains("cached-consensus") || filename.contains("consensus") {
995            Some(Self::NetworkStatusConsensusV3)
996        } else if filename.contains("cached-microdesc-consensus") {
997            Some(Self::NetworkStatusMicrodescConsensusV3)
998        } else if filename.contains("cached-microdescs") || filename.contains("microdescriptor") {
999            Some(Self::Microdescriptor)
1000        } else if filename.contains("cached-descriptors") || filename.contains("server-descriptor")
1001        {
1002            Some(Self::ServerDescriptor)
1003        } else if filename.contains("cached-extrainfo") || filename.contains("extra-info") {
1004            Some(Self::ExtraInfo)
1005        } else if filename.contains("exit-list") || filename.contains("tordnsel") {
1006            Some(Self::TorDNSEL)
1007        } else if filename.contains("bandwidth") {
1008            Some(Self::BandwidthFile)
1009        } else {
1010            None
1011        }
1012    }
1013}
1014
1015/// Hash algorithm used for computing descriptor digests.
1016///
1017/// Tor uses cryptographic hashes to identify and verify descriptors.
1018/// Older descriptor types use SHA-1, while newer ones use SHA-256.
1019///
1020/// # Example
1021///
1022/// ```rust
1023/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
1024///
1025/// let content = b"example content";
1026/// let sha1_digest = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
1027/// let sha256_digest = compute_digest(content, DigestHash::Sha256, DigestEncoding::Hex);
1028///
1029/// assert_eq!(sha1_digest.len(), 40);  // SHA-1 produces 20 bytes = 40 hex chars
1030/// assert_eq!(sha256_digest.len(), 64); // SHA-256 produces 32 bytes = 64 hex chars
1031/// ```
1032#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1033pub enum DigestHash {
1034    /// SHA-1 hash algorithm (160 bits / 20 bytes).
1035    ///
1036    /// Used by legacy descriptor types including server descriptors and
1037    /// v2 hidden service descriptors. While SHA-1 is considered weak for
1038    /// collision resistance, it remains in use for backward compatibility.
1039    Sha1,
1040    /// SHA-256 hash algorithm (256 bits / 32 bytes).
1041    ///
1042    /// Used by modern descriptor types including microdescriptors and
1043    /// v3 hidden service descriptors.
1044    Sha256,
1045}
1046
1047/// Encoding format for descriptor digests.
1048///
1049/// Digests can be represented in different formats depending on the use case.
1050///
1051/// # Example
1052///
1053/// ```rust
1054/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
1055///
1056/// let content = b"test";
1057///
1058/// // Hexadecimal encoding (uppercase)
1059/// let hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
1060/// assert!(hex.chars().all(|c| c.is_ascii_hexdigit()));
1061///
1062/// // Base64 encoding (without padding)
1063/// let b64 = compute_digest(content, DigestHash::Sha1, DigestEncoding::Base64);
1064/// assert!(b64.chars().all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/'));
1065/// ```
1066#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1067pub enum DigestEncoding {
1068    /// Raw bytes represented as characters.
1069    ///
1070    /// Each byte is converted directly to a char. This is primarily useful
1071    /// for internal processing rather than display.
1072    Raw,
1073    /// Uppercase hexadecimal encoding.
1074    ///
1075    /// Each byte becomes two hex characters (0-9, A-F).
1076    /// This is the most common format for displaying fingerprints.
1077    Hex,
1078    /// Base64 encoding without trailing padding.
1079    ///
1080    /// Uses the standard Base64 alphabet (A-Z, a-z, 0-9, +, /).
1081    /// Padding characters ('=') are omitted.
1082    Base64,
1083}
1084
1085/// Trait for parsing and serializing Tor descriptors.
1086///
1087/// This trait defines the common interface for all descriptor types in the
1088/// library. Implementors can parse descriptor content, serialize back to
1089/// the canonical string format, and compute cryptographic digests.
1090///
1091/// # Contract
1092///
1093/// Implementations must satisfy these invariants:
1094///
1095/// 1. **Round-trip consistency**: For any valid descriptor content,
1096///    `parse(content).to_descriptor_string()` should produce semantically
1097///    equivalent content (though whitespace may differ).
1098///
1099/// 2. **Digest stability**: The `digest()` method must return consistent
1100///    results for the same descriptor content.
1101///
1102/// 3. **Error handling**: `parse()` should return `Error::Parse` for
1103///    malformed content with a descriptive error message.
1104///
1105/// # Example
1106///
1107/// ```rust,no_run
1108/// use stem_rs::descriptor::{Descriptor, DigestHash, DigestEncoding};
1109/// use stem_rs::descriptor::ServerDescriptor;
1110///
1111/// let content = "router example 127.0.0.1 9001 0 0\n...";
1112/// let descriptor = ServerDescriptor::parse(content).unwrap();
1113///
1114/// // Serialize back to string
1115/// let serialized = descriptor.to_descriptor_string();
1116///
1117/// // Compute digest
1118/// let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
1119///
1120/// // Access raw content
1121/// let raw = descriptor.raw_content();
1122///
1123/// // Check for unrecognized lines
1124/// let unknown = descriptor.unrecognized_lines();
1125/// ```
1126///
1127/// # Implementors
1128///
1129/// - [`ServerDescriptor`] - Server descriptors
1130/// - [`Microdescriptor`] - Microdescriptors
1131/// - [`ExtraInfoDescriptor`] - Extra-info descriptors
1132/// - [`NetworkStatusDocument`] - Consensus documents
1133pub trait Descriptor: Sized {
1134    /// Parses a descriptor from its string content.
1135    ///
1136    /// # Arguments
1137    ///
1138    /// * `content` - The descriptor content as a string
1139    ///
1140    /// # Errors
1141    ///
1142    /// Returns [`Error::Parse`] if the content is malformed or missing
1143    /// required fields.
1144    fn parse(content: &str) -> Result<Self, Error>;
1145
1146    /// Serializes the descriptor to its canonical string format.
1147    ///
1148    /// The output should be valid descriptor content that can be parsed
1149    /// again with `parse()`.
1150    fn to_descriptor_string(&self) -> String;
1151
1152    /// Computes the cryptographic digest of the descriptor.
1153    ///
1154    /// The digest is computed over the appropriate portion of the descriptor
1155    /// content (which varies by descriptor type).
1156    ///
1157    /// # Arguments
1158    ///
1159    /// * `hash` - The hash algorithm to use
1160    /// * `encoding` - The output encoding format
1161    ///
1162    /// # Errors
1163    ///
1164    /// Returns an error if the digest cannot be computed (e.g., if the
1165    /// descriptor content is invalid).
1166    fn digest(&self, hash: DigestHash, encoding: DigestEncoding) -> Result<String, Error>;
1167
1168    /// Returns the raw bytes of the original descriptor content.
1169    ///
1170    /// This is the exact content that was parsed, preserving original
1171    /// formatting and whitespace.
1172    fn raw_content(&self) -> &[u8];
1173
1174    /// Returns lines from the descriptor that were not recognized.
1175    ///
1176    /// These are lines that don't match any known keyword for this
1177    /// descriptor type. This is useful for forward compatibility when
1178    /// new fields are added to the descriptor format.
1179    fn unrecognized_lines(&self) -> &[String];
1180}
1181
1182/// Detects the compression format of binary content.
1183///
1184/// Examines the magic bytes at the start of the content to determine
1185/// the compression format. This is useful for automatically decompressing
1186/// downloaded descriptors.
1187///
1188/// # Arguments
1189///
1190/// * `content` - The binary content to examine
1191///
1192/// # Returns
1193///
1194/// The detected [`Compression`] format, or [`Compression::Plaintext`] if
1195/// no compression is detected or the content is too short.
1196///
1197/// # Example
1198///
1199/// ```rust
1200/// use stem_rs::descriptor::{detect_compression, Compression};
1201///
1202/// // Gzip magic bytes
1203/// let gzip_content = &[0x1f, 0x8b, 0x08, 0x00];
1204/// assert_eq!(detect_compression(gzip_content), Compression::Gzip);
1205///
1206/// // Plain text
1207/// let plain = b"router example";
1208/// assert_eq!(detect_compression(plain), Compression::Plaintext);
1209/// ```
1210pub fn detect_compression(content: &[u8]) -> Compression {
1211    if content.len() < 2 {
1212        return Compression::Plaintext;
1213    }
1214
1215    if content[0] == 0x1f && content[1] == 0x8b {
1216        return Compression::Gzip;
1217    }
1218
1219    if content.len() >= 4
1220        && content[0] == 0x28
1221        && content[1] == 0xb5
1222        && content[2] == 0x2f
1223        && content[3] == 0xfd
1224    {
1225        return Compression::Zstd;
1226    }
1227
1228    if content.len() >= 6
1229        && content[0] == 0xfd
1230        && content[1] == 0x37
1231        && content[2] == 0x7a
1232        && content[3] == 0x58
1233        && content[4] == 0x5a
1234        && content[5] == 0x00
1235    {
1236        return Compression::Lzma;
1237    }
1238
1239    Compression::Plaintext
1240}
1241
1242/// Decompresses content using the specified compression format.
1243///
1244/// # Arguments
1245///
1246/// * `content` - The compressed content
1247/// * `compression` - The compression format to use
1248///
1249/// # Returns
1250///
1251/// The decompressed content as a byte vector.
1252///
1253/// # Errors
1254///
1255/// Returns [`Error::Parse`] if:
1256/// - Decompression fails (corrupted data)
1257/// - The compression format is not supported (Zstd, LZMA)
1258///
1259/// # Supported Formats
1260///
1261/// - [`Compression::Plaintext`] - Returns content unchanged
1262/// - [`Compression::Gzip`] - Full support via flate2
1263/// - [`Compression::Zstd`] - Detection only, returns error
1264/// - [`Compression::Lzma`] - Detection only, returns error
1265///
1266/// # Example
1267///
1268/// ```rust
1269/// use stem_rs::descriptor::{decompress, Compression};
1270///
1271/// // Plaintext passes through unchanged
1272/// let content = b"Hello, World!";
1273/// let result = decompress(content, Compression::Plaintext).unwrap();
1274/// assert_eq!(result, content);
1275/// ```
1276pub fn decompress(content: &[u8], compression: Compression) -> Result<Vec<u8>, Error> {
1277    match compression {
1278        Compression::Plaintext => Ok(content.to_vec()),
1279        Compression::Gzip => decompress_gzip(content),
1280        Compression::Zstd => Err(Error::Descriptor(DescriptorError::UnsupportedCompression(
1281            "Zstd decompression not supported (requires zstd crate)".into(),
1282        ))),
1283        Compression::Lzma => Err(Error::Descriptor(DescriptorError::UnsupportedCompression(
1284            "LZMA decompression not supported (requires lzma crate)".into(),
1285        ))),
1286    }
1287}
1288
1289fn decompress_gzip(_content: &[u8]) -> Result<Vec<u8>, Error> {
1290    #[cfg(feature = "compression")]
1291    {
1292        let mut decoder = GzDecoder::new(_content);
1293        let mut decompressed = Vec::new();
1294        decoder.read_to_end(&mut decompressed).map_err(|e| {
1295            Error::Descriptor(DescriptorError::DecompressionFailed(format!(
1296                "Failed to decompress gzip: {}",
1297                e
1298            )))
1299        })?;
1300        Ok(decompressed)
1301    }
1302    #[cfg(not(feature = "compression"))]
1303    {
1304        Err(Error::Descriptor(DescriptorError::UnsupportedCompression(
1305            "Gzip decompression not supported (enable 'compression' feature)".into(),
1306        )))
1307    }
1308}
1309
1310/// Automatically detects and decompresses content.
1311///
1312/// This is a convenience function that combines [`detect_compression`] and
1313/// [`decompress`]. It examines the content's magic bytes to determine the
1314/// compression format and decompresses accordingly.
1315///
1316/// # Arguments
1317///
1318/// * `content` - The potentially compressed content
1319///
1320/// # Returns
1321///
1322/// The decompressed content. If the content is not compressed, it is
1323/// returned unchanged.
1324///
1325/// # Errors
1326///
1327/// Returns [`Error::Parse`] if decompression fails or the detected
1328/// compression format is not supported.
1329///
1330/// # Example
1331///
1332/// ```rust
1333/// use stem_rs::descriptor::auto_decompress;
1334///
1335/// // Plain text passes through
1336/// let plain = b"router example 127.0.0.1";
1337/// let result = auto_decompress(plain).unwrap();
1338/// assert_eq!(result, plain);
1339/// ```
1340pub fn auto_decompress(content: &[u8]) -> Result<Vec<u8>, Error> {
1341    let compression = detect_compression(content);
1342    decompress(content, compression)
1343}
1344
1345/// Computes a cryptographic digest of content.
1346///
1347/// This is a low-level function for computing digests. For descriptor
1348/// digests, prefer using the [`Descriptor::digest`] method which knows
1349/// the correct content range to hash.
1350///
1351/// # Arguments
1352///
1353/// * `content` - The content to hash
1354/// * `hash` - The hash algorithm to use
1355/// * `encoding` - The output encoding format
1356///
1357/// # Returns
1358///
1359/// The digest as a string in the specified encoding.
1360///
1361/// # Example
1362///
1363/// ```rust
1364/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
1365///
1366/// let content = b"test content";
1367///
1368/// // SHA-1 in hex
1369/// let sha1_hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
1370/// assert_eq!(sha1_hex.len(), 40);
1371///
1372/// // SHA-256 in base64
1373/// let sha256_b64 = compute_digest(content, DigestHash::Sha256, DigestEncoding::Base64);
1374/// ```
1375pub fn compute_digest(content: &[u8], hash: DigestHash, encoding: DigestEncoding) -> String {
1376    match hash {
1377        DigestHash::Sha1 => {
1378            let mut hasher = Sha1::new();
1379            hasher.update(content);
1380            let result = hasher.finalize();
1381            encode_digest(&result, encoding)
1382        }
1383        DigestHash::Sha256 => {
1384            let mut hasher = Sha256::new();
1385            hasher.update(content);
1386            let result = hasher.finalize();
1387            encode_digest(&result, encoding)
1388        }
1389    }
1390}
1391
1392fn encode_digest(bytes: &[u8], encoding: DigestEncoding) -> String {
1393    match encoding {
1394        DigestEncoding::Raw => bytes.iter().map(|b| *b as char).collect(),
1395        DigestEncoding::Hex => bytes.iter().map(|b| format!("{:02X}", b)).collect(),
1396        DigestEncoding::Base64 => base64_encode(bytes),
1397    }
1398}
1399
1400fn base64_encode(bytes: &[u8]) -> String {
1401    const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1402    let mut result = String::new();
1403    let mut i = 0;
1404    while i < bytes.len() {
1405        let b0 = bytes[i] as u32;
1406        let b1 = bytes.get(i + 1).map(|&b| b as u32).unwrap_or(0);
1407        let b2 = bytes.get(i + 2).map(|&b| b as u32).unwrap_or(0);
1408        let triple = (b0 << 16) | (b1 << 8) | b2;
1409        result.push(ALPHABET[((triple >> 18) & 0x3F) as usize] as char);
1410        result.push(ALPHABET[((triple >> 12) & 0x3F) as usize] as char);
1411        if i + 1 < bytes.len() {
1412            result.push(ALPHABET[((triple >> 6) & 0x3F) as usize] as char);
1413        }
1414        if i + 2 < bytes.len() {
1415            result.push(ALPHABET[(triple & 0x3F) as usize] as char);
1416        }
1417        i += 3;
1418    }
1419    result
1420}
1421
1422/// Parses a descriptor from file content with automatic decompression.
1423///
1424/// This function handles the common case of reading a descriptor from a file:
1425/// 1. Automatically decompresses the content if compressed
1426/// 2. Strips any `@type` annotation from the beginning
1427/// 3. Parses the descriptor using the type's `parse` method
1428///
1429/// # Type Parameters
1430///
1431/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
1432///
1433/// # Arguments
1434///
1435/// * `content` - The raw file content (possibly compressed)
1436///
1437/// # Returns
1438///
1439/// The parsed descriptor.
1440///
1441/// # Errors
1442///
1443/// Returns [`Error::Parse`] if:
1444/// - Decompression fails
1445/// - The content is not valid UTF-8
1446/// - The descriptor content is malformed
1447///
1448/// # Example
1449///
1450/// ```rust,no_run
1451/// use stem_rs::descriptor::{parse_file, ServerDescriptor};
1452///
1453/// let content = std::fs::read("cached-descriptors").unwrap();
1454/// let descriptor: ServerDescriptor = parse_file(&content).unwrap();
1455/// println!("Parsed descriptor for: {}", descriptor.nickname);
1456/// ```
1457///
1458/// # See Also
1459///
1460/// - [`parse_file_with_annotation`] - Also returns the type annotation if present
1461/// - [`Descriptor::parse`] - Parse from string without decompression
1462pub fn parse_file<T: Descriptor>(content: &[u8]) -> Result<T, Error> {
1463    let decompressed = auto_decompress(content)?;
1464    let content_str = String::from_utf8_lossy(&decompressed);
1465    let (_, stripped) = strip_type_annotation(&content_str);
1466    T::parse(stripped)
1467}
1468
1469/// Parses a descriptor from file content, returning the type annotation.
1470///
1471/// Like [`parse_file`], but also returns the `@type` annotation if one
1472/// was present at the beginning of the content.
1473///
1474/// # Type Parameters
1475///
1476/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
1477///
1478/// # Arguments
1479///
1480/// * `content` - The raw file content (possibly compressed)
1481///
1482/// # Returns
1483///
1484/// A tuple of:
1485/// - `Option<TypeAnnotation>` - The type annotation if present
1486/// - `T` - The parsed descriptor
1487///
1488/// # Errors
1489///
1490/// Returns [`Error::Parse`] if decompression or parsing fails.
1491///
1492/// # Example
1493///
1494/// ```rust,no_run
1495/// use stem_rs::descriptor::{parse_file_with_annotation, ServerDescriptor};
1496///
1497/// let content = std::fs::read("server-descriptor").unwrap();
1498/// let (annotation, descriptor): (_, ServerDescriptor) =
1499///     parse_file_with_annotation(&content).unwrap();
1500///
1501/// if let Some(ann) = annotation {
1502///     println!("Type: {} v{}.{}", ann.name, ann.major_version, ann.minor_version);
1503/// }
1504/// ```
1505pub fn parse_file_with_annotation<T: Descriptor>(
1506    content: &[u8],
1507) -> Result<(Option<TypeAnnotation>, T), Error> {
1508    let decompressed = auto_decompress(content)?;
1509    let content_str = String::from_utf8_lossy(&decompressed);
1510    let (annotation, stripped) = strip_type_annotation(&content_str);
1511    let descriptor = T::parse(stripped)?;
1512    Ok((annotation, descriptor))
1513}
1514
1515/// Strips a type annotation from the beginning of descriptor content.
1516///
1517/// If the first line is a valid `@type` annotation, it is parsed and
1518/// removed from the content. Otherwise, the content is returned unchanged.
1519///
1520/// # Arguments
1521///
1522/// * `content` - The descriptor content
1523///
1524/// # Returns
1525///
1526/// A tuple of:
1527/// - `Option<TypeAnnotation>` - The parsed annotation if present
1528/// - `&str` - The remaining content after the annotation
1529///
1530/// # Example
1531///
1532/// ```rust
1533/// use stem_rs::descriptor::strip_type_annotation;
1534///
1535/// let content = "@type server-descriptor 1.0\nrouter example 127.0.0.1";
1536/// let (annotation, rest) = strip_type_annotation(content);
1537///
1538/// assert!(annotation.is_some());
1539/// assert_eq!(annotation.unwrap().name, "server-descriptor");
1540/// assert_eq!(rest, "router example 127.0.0.1");
1541///
1542/// // Without annotation
1543/// let content = "router example 127.0.0.1";
1544/// let (annotation, rest) = strip_type_annotation(content);
1545/// assert!(annotation.is_none());
1546/// assert_eq!(rest, content);
1547/// ```
1548pub fn strip_type_annotation(content: &str) -> (Option<TypeAnnotation>, &str) {
1549    let first_line_end = content.find('\n').unwrap_or(content.len());
1550    let first_line = &content[..first_line_end];
1551
1552    if let Some(annotation) = TypeAnnotation::parse(first_line) {
1553        let rest = if first_line_end < content.len() {
1554            &content[first_line_end + 1..]
1555        } else {
1556            ""
1557        };
1558        (Some(annotation), rest)
1559    } else {
1560        (None, content)
1561    }
1562}
1563
1564#[cfg(test)]
1565mod tests {
1566    use super::*;
1567
1568    #[test]
1569    fn test_detect_compression_plaintext() {
1570        let content = b"@type server-descriptor 1.0\nrouter test";
1571        assert_eq!(detect_compression(content), Compression::Plaintext);
1572    }
1573
1574    #[test]
1575    fn test_detect_compression_gzip() {
1576        let content = &[0x1f, 0x8b, 0x08, 0x00];
1577        assert_eq!(detect_compression(content), Compression::Gzip);
1578    }
1579
1580    #[test]
1581    fn test_detect_compression_zstd() {
1582        let content = &[0x28, 0xb5, 0x2f, 0xfd, 0x00];
1583        assert_eq!(detect_compression(content), Compression::Zstd);
1584    }
1585
1586    #[test]
1587    fn test_detect_compression_lzma() {
1588        let content = &[0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
1589        assert_eq!(detect_compression(content), Compression::Lzma);
1590    }
1591
1592    #[test]
1593    fn test_decompress_plaintext() {
1594        let content = b"Hello, World!";
1595        let result = decompress(content, Compression::Plaintext).unwrap();
1596        assert_eq!(result, content);
1597    }
1598
1599    #[test]
1600    fn test_auto_decompress_plaintext() {
1601        let content = b"Hello, World!";
1602        let result = auto_decompress(content).unwrap();
1603        assert_eq!(result, content);
1604    }
1605
1606    #[test]
1607    fn test_decompress_gzip() {
1608        let compressed = &[
1609            0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xf3, 0x48, 0xcd, 0xc9,
1610            0xc9, 0x07, 0x00, 0x82, 0x89, 0xd1, 0xf7, 0x05, 0x00, 0x00, 0x00,
1611        ];
1612        let result = decompress(compressed, Compression::Gzip).unwrap();
1613        assert_eq!(result, b"Hello");
1614    }
1615
1616    #[test]
1617    fn test_type_annotation_parse() {
1618        let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
1619        assert_eq!(annotation.name, "server-descriptor");
1620        assert_eq!(annotation.major_version, 1);
1621        assert_eq!(annotation.minor_version, 0);
1622    }
1623
1624    #[test]
1625    fn test_type_annotation_parse_extra_info() {
1626        let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
1627        assert_eq!(annotation.name, "extra-info");
1628        assert_eq!(annotation.major_version, 1);
1629        assert_eq!(annotation.minor_version, 0);
1630    }
1631
1632    #[test]
1633    fn test_type_annotation_parse_bridge_extra_info() {
1634        let annotation = TypeAnnotation::parse("@type bridge-extra-info 1.2").unwrap();
1635        assert_eq!(annotation.name, "bridge-extra-info");
1636        assert_eq!(annotation.major_version, 1);
1637        assert_eq!(annotation.minor_version, 2);
1638    }
1639
1640    #[test]
1641    fn test_type_annotation_parse_invalid() {
1642        assert!(TypeAnnotation::parse("router test").is_none());
1643        assert!(TypeAnnotation::parse("@type").is_none());
1644        assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
1645        assert!(TypeAnnotation::parse("@type server-descriptor 1").is_none());
1646    }
1647
1648    #[test]
1649    fn test_type_annotation_display() {
1650        let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1651        assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
1652    }
1653
1654    #[test]
1655    fn test_strip_type_annotation() {
1656        let content = "@type server-descriptor 1.0\nrouter test 127.0.0.1";
1657        let (annotation, rest) = strip_type_annotation(content);
1658        assert!(annotation.is_some());
1659        assert_eq!(annotation.unwrap().name, "server-descriptor");
1660        assert_eq!(rest, "router test 127.0.0.1");
1661    }
1662
1663    #[test]
1664    fn test_strip_type_annotation_no_annotation() {
1665        let content = "router test 127.0.0.1";
1666        let (annotation, rest) = strip_type_annotation(content);
1667        assert!(annotation.is_none());
1668        assert_eq!(rest, content);
1669    }
1670
1671    #[test]
1672    fn test_descriptor_type_from_annotation() {
1673        let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1674        assert_eq!(
1675            DescriptorType::from_annotation(&annotation),
1676            Some(DescriptorType::ServerDescriptor)
1677        );
1678
1679        let annotation = TypeAnnotation::new("extra-info", 1, 0);
1680        assert_eq!(
1681            DescriptorType::from_annotation(&annotation),
1682            Some(DescriptorType::ExtraInfo)
1683        );
1684
1685        let annotation = TypeAnnotation::new("tordnsel", 1, 0);
1686        assert_eq!(
1687            DescriptorType::from_annotation(&annotation),
1688            Some(DescriptorType::TorDNSEL)
1689        );
1690    }
1691
1692    #[test]
1693    fn test_descriptor_type_from_filename() {
1694        assert_eq!(
1695            DescriptorType::from_filename("cached-consensus"),
1696            Some(DescriptorType::NetworkStatusConsensusV3)
1697        );
1698        assert_eq!(
1699            DescriptorType::from_filename("cached-descriptors"),
1700            Some(DescriptorType::ServerDescriptor)
1701        );
1702        assert_eq!(
1703            DescriptorType::from_filename("cached-extrainfo"),
1704            Some(DescriptorType::ExtraInfo)
1705        );
1706        assert_eq!(
1707            DescriptorType::from_filename("exit-list"),
1708            Some(DescriptorType::TorDNSEL)
1709        );
1710    }
1711}