stem_rs/descriptor/
mod.rs

1//! Descriptor parsing for Tor network documents.
2//!
3//! This module provides types for parsing various Tor descriptor formats
4//! including server descriptors, microdescriptors, consensus documents,
5//! and hidden service descriptors.
6//!
7//! # Overview
8//!
9//! Tor relays and directory authorities publish various types of descriptors
10//! that describe the network topology, relay capabilities, and routing
11//! information. This module provides parsers for all major descriptor types:
12//!
13//! - [`ServerDescriptor`] - Full relay metadata including keys, policies, and capabilities
14//! - [`Microdescriptor`] - Compact client-side descriptors with essential routing info
15//! - [`NetworkStatusDocument`] - Consensus documents listing all relays and their status
16//! - [`ExtraInfoDescriptor`] - Bandwidth statistics and additional relay information
17//! - [`HiddenServiceDescriptorV2`] / [`HiddenServiceDescriptorV3`] - Onion service descriptors
18//! - [`Ed25519Certificate`] - Ed25519 certificates used by relays
19//! - [`KeyCertificate`] - Directory authority key certificates
20//! - [`BandwidthFile`] - Bandwidth authority measurement files
21//! - [`TorDNSEL`] - Exit list data from TorDNSEL
22//!
23//! # Descriptor Sources
24//!
25//! Descriptors can be obtained from several sources:
26//!
27//! - **Tor's data directory**: Cached files like `cached-descriptors`, `cached-consensus`
28//! - **Directory authorities**: Via the [`remote`] module's download functions
29//! - **CollecTor archives**: Historical descriptors with `@type` annotations
30//!
31//! # Type Annotations
32//!
33//! Descriptors from [CollecTor](https://metrics.torproject.org/collector.html) include
34//! a type annotation on the first line in the format `@type <name> <major>.<minor>`.
35//! The [`TypeAnnotation`] struct parses these annotations, and [`parse_file`] handles
36//! them automatically.
37//!
38//! # Compression
39//!
40//! Downloaded descriptors are often compressed. This module supports automatic
41//! decompression via [`auto_decompress`] for:
42//!
43//! - **Plaintext** - Uncompressed data
44//! - **Gzip** - Standard gzip compression (fully supported)
45//! - **Zstd** - Zstandard compression (detection only, requires external crate)
46//! - **LZMA** - LZMA/XZ compression (detection only, requires external crate)
47//!
48//! # Digests
49//!
50//! Descriptors have cryptographic digests used for identification and verification.
51//! The [`compute_digest`] function and [`Descriptor::digest`] method support:
52//!
53//! - [`DigestHash::Sha1`] - SHA-1 hash (legacy, used by older descriptors)
54//! - [`DigestHash::Sha256`] - SHA-256 hash (modern descriptors)
55//!
56//! With encodings:
57//!
58//! - [`DigestEncoding::Raw`] - Raw bytes as characters
59//! - [`DigestEncoding::Hex`] - Uppercase hexadecimal
60//! - [`DigestEncoding::Base64`] - Base64 without padding
61//!
62//! # Example
63//!
64//! ```rust,no_run
65//! use stem_rs::descriptor::{parse_file, ServerDescriptor, Descriptor};
66//! use stem_rs::descriptor::{DigestHash, DigestEncoding};
67//!
68//! // Parse a server descriptor from file contents
69//! let content = std::fs::read("cached-descriptors").unwrap();
70//! let descriptor: ServerDescriptor = parse_file(&content).unwrap();
71//!
72//! // Access descriptor fields
73//! println!("Nickname: {}", descriptor.nickname);
74//! println!("Address: {}", descriptor.address);
75//!
76//! // Compute the descriptor's digest
77//! let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
78//! println!("Digest: {}", digest);
79//! ```
80//!
81//! # See Also
82//!
83//! - [`remote`] - Download descriptors from directory authorities
84//! - [`server`] - Server descriptor parsing
85//! - [`micro`] - Microdescriptor parsing
86//! - [`consensus`] - Network status document parsing
87//! - [`hidden`] - Hidden service descriptor parsing
88//!
89//! # See Also
90//!
91//! - [Tor Directory Protocol Specification](https://spec.torproject.org/dir-spec)
92//! - [Python Stem descriptor module](https://stem.torproject.org/api/descriptor/descriptor.html)
93
94pub mod authority;
95pub mod bandwidth_file;
96pub mod certificate;
97pub mod consensus;
98pub mod extra_info;
99pub mod hidden;
100pub mod key_cert;
101pub mod micro;
102pub mod remote;
103pub mod router_status;
104pub mod server;
105pub mod tordnsel;
106
107pub use authority::{DirectoryAuthority, SharedRandomnessCommitment};
108pub use bandwidth_file::{BandwidthFile, BandwidthMeasurement, RecentStats, RelayFailures};
109pub use certificate::{
110    Ed25519Certificate, Ed25519Extension, ExtensionFlag, ExtensionType, ED25519_HEADER_LENGTH,
111    ED25519_KEY_LENGTH, ED25519_SIGNATURE_LENGTH,
112};
113pub use consensus::{DocumentSignature, NetworkStatusDocument, SharedRandomness};
114pub use extra_info::{
115    BandwidthHistory, DirResponse, DirStat, ExtraInfoDescriptor, PortKey, Transport,
116};
117pub use hidden::{
118    AuthorizedClient, HiddenServiceDescriptorV2, HiddenServiceDescriptorV3, InnerLayer,
119    IntroductionPointV2, IntroductionPointV3, LinkSpecifier, OuterLayer,
120};
121pub use key_cert::KeyCertificate;
122pub use micro::Microdescriptor;
123pub use remote::{
124    download_bandwidth_file, download_consensus, download_detached_signatures,
125    download_extrainfo_descriptors, download_from_dirport, download_key_certificates,
126    download_microdescriptors, download_server_descriptors, get_authorities, Compression, DirPort,
127    DownloadResult,
128};
129pub use router_status::{MicrodescriptorHash, RouterStatusEntry, RouterStatusEntryType};
130pub use server::ServerDescriptor;
131pub use tordnsel::{parse_exit_list, parse_exit_list_bytes, TorDNSEL};
132
133use crate::Error;
134use flate2::read::GzDecoder;
135use sha1::{Digest as Sha1Digest, Sha1};
136use sha2::Sha256;
137use std::io::Read;
138use std::path::Path;
139
140/// A type annotation from CollecTor descriptor archives.
141///
142/// CollecTor archives include a type annotation on the first line of each
143/// descriptor file in the format `@type <name> <major>.<minor>`. This struct
144/// represents that parsed annotation.
145///
146/// # Format
147///
148/// ```text
149/// @type server-descriptor 1.0
150/// @type network-status-consensus-3 1.0
151/// @type microdescriptor 1.0
152/// ```
153///
154/// # Example
155///
156/// ```rust
157/// use stem_rs::descriptor::TypeAnnotation;
158///
159/// let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
160/// assert_eq!(annotation.name, "server-descriptor");
161/// assert_eq!(annotation.major_version, 1);
162/// assert_eq!(annotation.minor_version, 0);
163///
164/// // Convert back to string
165/// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
166/// ```
167///
168/// # See Also
169///
170/// - [`DescriptorType`] - Enum of known descriptor types
171/// - [`strip_type_annotation`] - Extract annotation from content
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct TypeAnnotation {
174    /// The descriptor type name (e.g., "server-descriptor", "microdescriptor").
175    pub name: String,
176    /// The major version number.
177    pub major_version: u32,
178    /// The minor version number.
179    pub minor_version: u32,
180}
181
182impl TypeAnnotation {
183    /// Creates a new type annotation with the given name and version.
184    ///
185    /// # Arguments
186    ///
187    /// * `name` - The descriptor type name
188    /// * `major_version` - The major version number
189    /// * `minor_version` - The minor version number
190    ///
191    /// # Example
192    ///
193    /// ```rust
194    /// use stem_rs::descriptor::TypeAnnotation;
195    ///
196    /// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
197    /// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
198    /// ```
199    pub fn new(name: impl Into<String>, major_version: u32, minor_version: u32) -> Self {
200        Self {
201            name: name.into(),
202            major_version,
203            minor_version,
204        }
205    }
206
207    /// Parses a type annotation from a line of text.
208    ///
209    /// Returns `None` if the line is not a valid type annotation.
210    ///
211    /// # Arguments
212    ///
213    /// * `line` - The line to parse
214    ///
215    /// # Example
216    ///
217    /// ```rust
218    /// use stem_rs::descriptor::TypeAnnotation;
219    ///
220    /// // Valid annotation
221    /// let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
222    /// assert_eq!(annotation.name, "extra-info");
223    ///
224    /// // Invalid - not an annotation
225    /// assert!(TypeAnnotation::parse("router test 127.0.0.1").is_none());
226    ///
227    /// // Invalid - missing version
228    /// assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
229    /// ```
230    pub fn parse(line: &str) -> Option<Self> {
231        let line = line.trim();
232        if !line.starts_with("@type ") {
233            return None;
234        }
235
236        let rest = &line[6..];
237        let parts: Vec<&str> = rest.split_whitespace().collect();
238        if parts.len() != 2 {
239            return None;
240        }
241
242        let name = parts[0];
243        let version_parts: Vec<&str> = parts[1].split('.').collect();
244        if version_parts.len() != 2 {
245            return None;
246        }
247
248        let major_version = version_parts[0].parse().ok()?;
249        let minor_version = version_parts[1].parse().ok()?;
250
251        Some(Self {
252            name: name.to_string(),
253            major_version,
254            minor_version,
255        })
256    }
257}
258
259impl std::fmt::Display for TypeAnnotation {
260    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261        write!(
262            f,
263            "@type {} {}.{}",
264            self.name, self.major_version, self.minor_version
265        )
266    }
267}
268
269/// Known descriptor types in the Tor network.
270///
271/// This enum represents all descriptor types that can be identified from
272/// type annotations or filenames. Each variant corresponds to a specific
273/// descriptor format defined in the Tor directory protocol specification.
274///
275/// # Stability
276///
277/// This enum is non-exhaustive. New descriptor types may be added in future
278/// Tor versions.
279///
280/// # Example
281///
282/// ```rust
283/// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
284///
285/// // From type annotation
286/// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
287/// let desc_type = DescriptorType::from_annotation(&annotation);
288/// assert_eq!(desc_type, Some(DescriptorType::ServerDescriptor));
289///
290/// // From filename
291/// let desc_type = DescriptorType::from_filename("cached-consensus");
292/// assert_eq!(desc_type, Some(DescriptorType::NetworkStatusConsensusV3));
293/// ```
294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
295pub enum DescriptorType {
296    /// Server descriptor containing full relay metadata.
297    ///
298    /// Includes identity keys, exit policy, bandwidth, and other relay information.
299    /// Annotation name: `server-descriptor`
300    ServerDescriptor,
301    /// Extra-info descriptor with bandwidth statistics.
302    ///
303    /// Contains detailed statistics about relay operation.
304    /// Annotation name: `extra-info`
305    ExtraInfo,
306    /// Microdescriptor with compact routing information.
307    ///
308    /// Used by clients for building circuits with minimal data.
309    /// Annotation name: `microdescriptor`
310    Microdescriptor,
311    /// Network status consensus document (v3).
312    ///
313    /// The agreed-upon view of the network signed by directory authorities.
314    /// Annotation name: `network-status-consensus-3`
315    NetworkStatusConsensusV3,
316    /// Network status vote document (v3).
317    ///
318    /// Individual directory authority's view before consensus.
319    /// Annotation name: `network-status-vote-3`
320    NetworkStatusVoteV3,
321    /// Microdescriptor-flavored consensus document (v3).
322    ///
323    /// Consensus using microdescriptor hashes instead of full descriptors.
324    /// Annotation name: `network-status-microdesc-consensus-3`
325    NetworkStatusMicrodescConsensusV3,
326    /// Bridge network status document.
327    ///
328    /// Network status for bridge relays (not publicly listed).
329    /// Annotation name: `bridge-network-status`
330    BridgeNetworkStatus,
331    /// Bridge server descriptor.
332    ///
333    /// Server descriptor for bridge relays with some fields redacted.
334    /// Annotation name: `bridge-server-descriptor`
335    BridgeServerDescriptor,
336    /// Bridge extra-info descriptor.
337    ///
338    /// Extra-info for bridge relays.
339    /// Annotation name: `bridge-extra-info`
340    BridgeExtraInfo,
341    /// Directory key certificate (v3).
342    ///
343    /// Certificate binding a directory authority's signing key to its identity.
344    /// Annotation name: `dir-key-certificate-3`
345    DirKeyCertificateV3,
346    /// TorDNSEL exit list.
347    ///
348    /// List of exit relay IP addresses from the TorDNSEL service.
349    /// Annotation name: `tordnsel`
350    TorDNSEL,
351    /// Hidden service descriptor.
352    ///
353    /// Descriptor for onion services (v2 or v3).
354    /// Annotation name: `hidden-service-descriptor`
355    HiddenServiceDescriptor,
356    /// Bandwidth authority measurement file.
357    ///
358    /// Bandwidth measurements from bandwidth authorities.
359    /// Annotation name: `bandwidth-file`
360    BandwidthFile,
361}
362
363impl DescriptorType {
364    /// Returns the annotation name for this descriptor type.
365    ///
366    /// This is the name used in `@type` annotations in CollecTor archives.
367    ///
368    /// # Example
369    ///
370    /// ```rust
371    /// use stem_rs::descriptor::DescriptorType;
372    ///
373    /// assert_eq!(DescriptorType::ServerDescriptor.annotation_name(), "server-descriptor");
374    /// assert_eq!(DescriptorType::Microdescriptor.annotation_name(), "microdescriptor");
375    /// ```
376    pub fn annotation_name(&self) -> &'static str {
377        match self {
378            Self::ServerDescriptor => "server-descriptor",
379            Self::ExtraInfo => "extra-info",
380            Self::Microdescriptor => "microdescriptor",
381            Self::NetworkStatusConsensusV3 => "network-status-consensus-3",
382            Self::NetworkStatusVoteV3 => "network-status-vote-3",
383            Self::NetworkStatusMicrodescConsensusV3 => "network-status-microdesc-consensus-3",
384            Self::BridgeNetworkStatus => "bridge-network-status",
385            Self::BridgeServerDescriptor => "bridge-server-descriptor",
386            Self::BridgeExtraInfo => "bridge-extra-info",
387            Self::DirKeyCertificateV3 => "dir-key-certificate-3",
388            Self::TorDNSEL => "tordnsel",
389            Self::HiddenServiceDescriptor => "hidden-service-descriptor",
390            Self::BandwidthFile => "bandwidth-file",
391        }
392    }
393
394    /// Determines the descriptor type from a type annotation.
395    ///
396    /// Returns `None` if the annotation name is not recognized.
397    ///
398    /// # Arguments
399    ///
400    /// * `annotation` - The type annotation to match
401    ///
402    /// # Example
403    ///
404    /// ```rust
405    /// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
406    ///
407    /// let annotation = TypeAnnotation::new("extra-info", 1, 0);
408    /// assert_eq!(
409    ///     DescriptorType::from_annotation(&annotation),
410    ///     Some(DescriptorType::ExtraInfo)
411    /// );
412    ///
413    /// let unknown = TypeAnnotation::new("unknown-type", 1, 0);
414    /// assert_eq!(DescriptorType::from_annotation(&unknown), None);
415    /// ```
416    pub fn from_annotation(annotation: &TypeAnnotation) -> Option<Self> {
417        match annotation.name.as_str() {
418            "server-descriptor" => Some(Self::ServerDescriptor),
419            "extra-info" => Some(Self::ExtraInfo),
420            "microdescriptor" => Some(Self::Microdescriptor),
421            "network-status-consensus-3" => Some(Self::NetworkStatusConsensusV3),
422            "network-status-vote-3" => Some(Self::NetworkStatusVoteV3),
423            "network-status-microdesc-consensus-3" => Some(Self::NetworkStatusMicrodescConsensusV3),
424            "bridge-network-status" => Some(Self::BridgeNetworkStatus),
425            "bridge-server-descriptor" => Some(Self::BridgeServerDescriptor),
426            "bridge-extra-info" => Some(Self::BridgeExtraInfo),
427            "dir-key-certificate-3" => Some(Self::DirKeyCertificateV3),
428            "tordnsel" => Some(Self::TorDNSEL),
429            "hidden-service-descriptor" => Some(Self::HiddenServiceDescriptor),
430            "bandwidth-file" => Some(Self::BandwidthFile),
431            _ => None,
432        }
433    }
434
435    /// Determines the descriptor type from a filename.
436    ///
437    /// This is useful for parsing descriptors from Tor's data directory
438    /// where files have conventional names like `cached-descriptors` or
439    /// `cached-consensus`.
440    ///
441    /// Returns `None` if the filename doesn't match a known pattern.
442    ///
443    /// # Arguments
444    ///
445    /// * `filename` - The filename to match (path components are stripped)
446    ///
447    /// # Example
448    ///
449    /// ```rust
450    /// use stem_rs::descriptor::DescriptorType;
451    ///
452    /// assert_eq!(
453    ///     DescriptorType::from_filename("cached-descriptors"),
454    ///     Some(DescriptorType::ServerDescriptor)
455    /// );
456    /// assert_eq!(
457    ///     DescriptorType::from_filename("cached-extrainfo"),
458    ///     Some(DescriptorType::ExtraInfo)
459    /// );
460    /// assert_eq!(
461    ///     DescriptorType::from_filename("/var/lib/tor/cached-consensus"),
462    ///     Some(DescriptorType::NetworkStatusConsensusV3)
463    /// );
464    /// assert_eq!(DescriptorType::from_filename("unknown-file"), None);
465    /// ```
466    pub fn from_filename(filename: &str) -> Option<Self> {
467        let filename = Path::new(filename)
468            .file_name()
469            .and_then(|s| s.to_str())
470            .unwrap_or(filename);
471
472        if filename.contains("cached-consensus") || filename.contains("consensus") {
473            Some(Self::NetworkStatusConsensusV3)
474        } else if filename.contains("cached-microdesc-consensus") {
475            Some(Self::NetworkStatusMicrodescConsensusV3)
476        } else if filename.contains("cached-microdescs") || filename.contains("microdescriptor") {
477            Some(Self::Microdescriptor)
478        } else if filename.contains("cached-descriptors") || filename.contains("server-descriptor")
479        {
480            Some(Self::ServerDescriptor)
481        } else if filename.contains("cached-extrainfo") || filename.contains("extra-info") {
482            Some(Self::ExtraInfo)
483        } else if filename.contains("exit-list") || filename.contains("tordnsel") {
484            Some(Self::TorDNSEL)
485        } else if filename.contains("bandwidth") {
486            Some(Self::BandwidthFile)
487        } else {
488            None
489        }
490    }
491}
492
493/// Hash algorithm used for computing descriptor digests.
494///
495/// Tor uses cryptographic hashes to identify and verify descriptors.
496/// Older descriptor types use SHA-1, while newer ones use SHA-256.
497///
498/// # Example
499///
500/// ```rust
501/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
502///
503/// let content = b"example content";
504/// let sha1_digest = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
505/// let sha256_digest = compute_digest(content, DigestHash::Sha256, DigestEncoding::Hex);
506///
507/// assert_eq!(sha1_digest.len(), 40);  // SHA-1 produces 20 bytes = 40 hex chars
508/// assert_eq!(sha256_digest.len(), 64); // SHA-256 produces 32 bytes = 64 hex chars
509/// ```
510#[derive(Debug, Clone, Copy, PartialEq, Eq)]
511pub enum DigestHash {
512    /// SHA-1 hash algorithm (160 bits / 20 bytes).
513    ///
514    /// Used by legacy descriptor types including server descriptors and
515    /// v2 hidden service descriptors. While SHA-1 is considered weak for
516    /// collision resistance, it remains in use for backward compatibility.
517    Sha1,
518    /// SHA-256 hash algorithm (256 bits / 32 bytes).
519    ///
520    /// Used by modern descriptor types including microdescriptors and
521    /// v3 hidden service descriptors.
522    Sha256,
523}
524
525/// Encoding format for descriptor digests.
526///
527/// Digests can be represented in different formats depending on the use case.
528///
529/// # Example
530///
531/// ```rust
532/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
533///
534/// let content = b"test";
535///
536/// // Hexadecimal encoding (uppercase)
537/// let hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
538/// assert!(hex.chars().all(|c| c.is_ascii_hexdigit()));
539///
540/// // Base64 encoding (without padding)
541/// let b64 = compute_digest(content, DigestHash::Sha1, DigestEncoding::Base64);
542/// assert!(b64.chars().all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/'));
543/// ```
544#[derive(Debug, Clone, Copy, PartialEq, Eq)]
545pub enum DigestEncoding {
546    /// Raw bytes represented as characters.
547    ///
548    /// Each byte is converted directly to a char. This is primarily useful
549    /// for internal processing rather than display.
550    Raw,
551    /// Uppercase hexadecimal encoding.
552    ///
553    /// Each byte becomes two hex characters (0-9, A-F).
554    /// This is the most common format for displaying fingerprints.
555    Hex,
556    /// Base64 encoding without trailing padding.
557    ///
558    /// Uses the standard Base64 alphabet (A-Z, a-z, 0-9, +, /).
559    /// Padding characters ('=') are omitted.
560    Base64,
561}
562
563/// Trait for parsing and serializing Tor descriptors.
564///
565/// This trait defines the common interface for all descriptor types in the
566/// library. Implementors can parse descriptor content, serialize back to
567/// the canonical string format, and compute cryptographic digests.
568///
569/// # Contract
570///
571/// Implementations must satisfy these invariants:
572///
573/// 1. **Round-trip consistency**: For any valid descriptor content,
574///    `parse(content).to_descriptor_string()` should produce semantically
575///    equivalent content (though whitespace may differ).
576///
577/// 2. **Digest stability**: The `digest()` method must return consistent
578///    results for the same descriptor content.
579///
580/// 3. **Error handling**: `parse()` should return `Error::Parse` for
581///    malformed content with a descriptive error message.
582///
583/// # Example
584///
585/// ```rust,no_run
586/// use stem_rs::descriptor::{Descriptor, DigestHash, DigestEncoding};
587/// use stem_rs::descriptor::ServerDescriptor;
588///
589/// let content = "router example 127.0.0.1 9001 0 0\n...";
590/// let descriptor = ServerDescriptor::parse(content).unwrap();
591///
592/// // Serialize back to string
593/// let serialized = descriptor.to_descriptor_string();
594///
595/// // Compute digest
596/// let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
597///
598/// // Access raw content
599/// let raw = descriptor.raw_content();
600///
601/// // Check for unrecognized lines
602/// let unknown = descriptor.unrecognized_lines();
603/// ```
604///
605/// # Implementors
606///
607/// - [`ServerDescriptor`] - Server descriptors
608/// - [`Microdescriptor`] - Microdescriptors
609/// - [`ExtraInfoDescriptor`] - Extra-info descriptors
610/// - [`NetworkStatusDocument`] - Consensus documents
611pub trait Descriptor: Sized {
612    /// Parses a descriptor from its string content.
613    ///
614    /// # Arguments
615    ///
616    /// * `content` - The descriptor content as a string
617    ///
618    /// # Errors
619    ///
620    /// Returns [`Error::Parse`] if the content is malformed or missing
621    /// required fields.
622    fn parse(content: &str) -> Result<Self, Error>;
623
624    /// Serializes the descriptor to its canonical string format.
625    ///
626    /// The output should be valid descriptor content that can be parsed
627    /// again with `parse()`.
628    fn to_descriptor_string(&self) -> String;
629
630    /// Computes the cryptographic digest of the descriptor.
631    ///
632    /// The digest is computed over the appropriate portion of the descriptor
633    /// content (which varies by descriptor type).
634    ///
635    /// # Arguments
636    ///
637    /// * `hash` - The hash algorithm to use
638    /// * `encoding` - The output encoding format
639    ///
640    /// # Errors
641    ///
642    /// Returns an error if the digest cannot be computed (e.g., if the
643    /// descriptor content is invalid).
644    fn digest(&self, hash: DigestHash, encoding: DigestEncoding) -> Result<String, Error>;
645
646    /// Returns the raw bytes of the original descriptor content.
647    ///
648    /// This is the exact content that was parsed, preserving original
649    /// formatting and whitespace.
650    fn raw_content(&self) -> &[u8];
651
652    /// Returns lines from the descriptor that were not recognized.
653    ///
654    /// These are lines that don't match any known keyword for this
655    /// descriptor type. This is useful for forward compatibility when
656    /// new fields are added to the descriptor format.
657    fn unrecognized_lines(&self) -> &[String];
658}
659
660/// Detects the compression format of binary content.
661///
662/// Examines the magic bytes at the start of the content to determine
663/// the compression format. This is useful for automatically decompressing
664/// downloaded descriptors.
665///
666/// # Arguments
667///
668/// * `content` - The binary content to examine
669///
670/// # Returns
671///
672/// The detected [`Compression`] format, or [`Compression::Plaintext`] if
673/// no compression is detected or the content is too short.
674///
675/// # Example
676///
677/// ```rust
678/// use stem_rs::descriptor::{detect_compression, Compression};
679///
680/// // Gzip magic bytes
681/// let gzip_content = &[0x1f, 0x8b, 0x08, 0x00];
682/// assert_eq!(detect_compression(gzip_content), Compression::Gzip);
683///
684/// // Plain text
685/// let plain = b"router example";
686/// assert_eq!(detect_compression(plain), Compression::Plaintext);
687/// ```
688pub fn detect_compression(content: &[u8]) -> Compression {
689    if content.len() < 2 {
690        return Compression::Plaintext;
691    }
692
693    if content[0] == 0x1f && content[1] == 0x8b {
694        return Compression::Gzip;
695    }
696
697    if content.len() >= 4
698        && content[0] == 0x28
699        && content[1] == 0xb5
700        && content[2] == 0x2f
701        && content[3] == 0xfd
702    {
703        return Compression::Zstd;
704    }
705
706    if content.len() >= 6
707        && content[0] == 0xfd
708        && content[1] == 0x37
709        && content[2] == 0x7a
710        && content[3] == 0x58
711        && content[4] == 0x5a
712        && content[5] == 0x00
713    {
714        return Compression::Lzma;
715    }
716
717    Compression::Plaintext
718}
719
720/// Decompresses content using the specified compression format.
721///
722/// # Arguments
723///
724/// * `content` - The compressed content
725/// * `compression` - The compression format to use
726///
727/// # Returns
728///
729/// The decompressed content as a byte vector.
730///
731/// # Errors
732///
733/// Returns [`Error::Parse`] if:
734/// - Decompression fails (corrupted data)
735/// - The compression format is not supported (Zstd, LZMA)
736///
737/// # Supported Formats
738///
739/// - [`Compression::Plaintext`] - Returns content unchanged
740/// - [`Compression::Gzip`] - Full support via flate2
741/// - [`Compression::Zstd`] - Detection only, returns error
742/// - [`Compression::Lzma`] - Detection only, returns error
743///
744/// # Example
745///
746/// ```rust
747/// use stem_rs::descriptor::{decompress, Compression};
748///
749/// // Plaintext passes through unchanged
750/// let content = b"Hello, World!";
751/// let result = decompress(content, Compression::Plaintext).unwrap();
752/// assert_eq!(result, content);
753/// ```
754pub fn decompress(content: &[u8], compression: Compression) -> Result<Vec<u8>, Error> {
755    match compression {
756        Compression::Plaintext => Ok(content.to_vec()),
757        Compression::Gzip => decompress_gzip(content),
758        Compression::Zstd => Err(Error::Parse {
759            location: "decompress".into(),
760            reason: "Zstd decompression not supported (requires zstd crate)".into(),
761        }),
762        Compression::Lzma => Err(Error::Parse {
763            location: "decompress".into(),
764            reason: "LZMA decompression not supported (requires lzma crate)".into(),
765        }),
766    }
767}
768
769fn decompress_gzip(content: &[u8]) -> Result<Vec<u8>, Error> {
770    let mut decoder = GzDecoder::new(content);
771    let mut decompressed = Vec::new();
772    decoder
773        .read_to_end(&mut decompressed)
774        .map_err(|e| Error::Parse {
775            location: "decompress_gzip".into(),
776            reason: format!("Failed to decompress gzip: {}", e),
777        })?;
778    Ok(decompressed)
779}
780
781/// Automatically detects and decompresses content.
782///
783/// This is a convenience function that combines [`detect_compression`] and
784/// [`decompress`]. It examines the content's magic bytes to determine the
785/// compression format and decompresses accordingly.
786///
787/// # Arguments
788///
789/// * `content` - The potentially compressed content
790///
791/// # Returns
792///
793/// The decompressed content. If the content is not compressed, it is
794/// returned unchanged.
795///
796/// # Errors
797///
798/// Returns [`Error::Parse`] if decompression fails or the detected
799/// compression format is not supported.
800///
801/// # Example
802///
803/// ```rust
804/// use stem_rs::descriptor::auto_decompress;
805///
806/// // Plain text passes through
807/// let plain = b"router example 127.0.0.1";
808/// let result = auto_decompress(plain).unwrap();
809/// assert_eq!(result, plain);
810/// ```
811pub fn auto_decompress(content: &[u8]) -> Result<Vec<u8>, Error> {
812    let compression = detect_compression(content);
813    decompress(content, compression)
814}
815
816/// Computes a cryptographic digest of content.
817///
818/// This is a low-level function for computing digests. For descriptor
819/// digests, prefer using the [`Descriptor::digest`] method which knows
820/// the correct content range to hash.
821///
822/// # Arguments
823///
824/// * `content` - The content to hash
825/// * `hash` - The hash algorithm to use
826/// * `encoding` - The output encoding format
827///
828/// # Returns
829///
830/// The digest as a string in the specified encoding.
831///
832/// # Example
833///
834/// ```rust
835/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
836///
837/// let content = b"test content";
838///
839/// // SHA-1 in hex
840/// let sha1_hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
841/// assert_eq!(sha1_hex.len(), 40);
842///
843/// // SHA-256 in base64
844/// let sha256_b64 = compute_digest(content, DigestHash::Sha256, DigestEncoding::Base64);
845/// ```
846pub fn compute_digest(content: &[u8], hash: DigestHash, encoding: DigestEncoding) -> String {
847    match hash {
848        DigestHash::Sha1 => {
849            let mut hasher = Sha1::new();
850            hasher.update(content);
851            let result = hasher.finalize();
852            encode_digest(&result, encoding)
853        }
854        DigestHash::Sha256 => {
855            let mut hasher = Sha256::new();
856            hasher.update(content);
857            let result = hasher.finalize();
858            encode_digest(&result, encoding)
859        }
860    }
861}
862
863fn encode_digest(bytes: &[u8], encoding: DigestEncoding) -> String {
864    match encoding {
865        DigestEncoding::Raw => bytes.iter().map(|b| *b as char).collect(),
866        DigestEncoding::Hex => bytes.iter().map(|b| format!("{:02X}", b)).collect(),
867        DigestEncoding::Base64 => base64_encode(bytes),
868    }
869}
870
871fn base64_encode(bytes: &[u8]) -> String {
872    const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
873    let mut result = String::new();
874    let mut i = 0;
875    while i < bytes.len() {
876        let b0 = bytes[i] as u32;
877        let b1 = bytes.get(i + 1).map(|&b| b as u32).unwrap_or(0);
878        let b2 = bytes.get(i + 2).map(|&b| b as u32).unwrap_or(0);
879        let triple = (b0 << 16) | (b1 << 8) | b2;
880        result.push(ALPHABET[((triple >> 18) & 0x3F) as usize] as char);
881        result.push(ALPHABET[((triple >> 12) & 0x3F) as usize] as char);
882        if i + 1 < bytes.len() {
883            result.push(ALPHABET[((triple >> 6) & 0x3F) as usize] as char);
884        }
885        if i + 2 < bytes.len() {
886            result.push(ALPHABET[(triple & 0x3F) as usize] as char);
887        }
888        i += 3;
889    }
890    result
891}
892
893/// Parses a descriptor from file content with automatic decompression.
894///
895/// This function handles the common case of reading a descriptor from a file:
896/// 1. Automatically decompresses the content if compressed
897/// 2. Strips any `@type` annotation from the beginning
898/// 3. Parses the descriptor using the type's `parse` method
899///
900/// # Type Parameters
901///
902/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
903///
904/// # Arguments
905///
906/// * `content` - The raw file content (possibly compressed)
907///
908/// # Returns
909///
910/// The parsed descriptor.
911///
912/// # Errors
913///
914/// Returns [`Error::Parse`] if:
915/// - Decompression fails
916/// - The content is not valid UTF-8
917/// - The descriptor content is malformed
918///
919/// # Example
920///
921/// ```rust,no_run
922/// use stem_rs::descriptor::{parse_file, ServerDescriptor};
923///
924/// let content = std::fs::read("cached-descriptors").unwrap();
925/// let descriptor: ServerDescriptor = parse_file(&content).unwrap();
926/// println!("Parsed descriptor for: {}", descriptor.nickname);
927/// ```
928///
929/// # See Also
930///
931/// - [`parse_file_with_annotation`] - Also returns the type annotation if present
932/// - [`Descriptor::parse`] - Parse from string without decompression
933pub fn parse_file<T: Descriptor>(content: &[u8]) -> Result<T, Error> {
934    let decompressed = auto_decompress(content)?;
935    let content_str = String::from_utf8_lossy(&decompressed);
936    let (_, stripped) = strip_type_annotation(&content_str);
937    T::parse(stripped)
938}
939
940/// Parses a descriptor from file content, returning the type annotation.
941///
942/// Like [`parse_file`], but also returns the `@type` annotation if one
943/// was present at the beginning of the content.
944///
945/// # Type Parameters
946///
947/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
948///
949/// # Arguments
950///
951/// * `content` - The raw file content (possibly compressed)
952///
953/// # Returns
954///
955/// A tuple of:
956/// - `Option<TypeAnnotation>` - The type annotation if present
957/// - `T` - The parsed descriptor
958///
959/// # Errors
960///
961/// Returns [`Error::Parse`] if decompression or parsing fails.
962///
963/// # Example
964///
965/// ```rust,no_run
966/// use stem_rs::descriptor::{parse_file_with_annotation, ServerDescriptor};
967///
968/// let content = std::fs::read("server-descriptor").unwrap();
969/// let (annotation, descriptor): (_, ServerDescriptor) =
970///     parse_file_with_annotation(&content).unwrap();
971///
972/// if let Some(ann) = annotation {
973///     println!("Type: {} v{}.{}", ann.name, ann.major_version, ann.minor_version);
974/// }
975/// ```
976pub fn parse_file_with_annotation<T: Descriptor>(
977    content: &[u8],
978) -> Result<(Option<TypeAnnotation>, T), Error> {
979    let decompressed = auto_decompress(content)?;
980    let content_str = String::from_utf8_lossy(&decompressed);
981    let (annotation, stripped) = strip_type_annotation(&content_str);
982    let descriptor = T::parse(stripped)?;
983    Ok((annotation, descriptor))
984}
985
986/// Strips a type annotation from the beginning of descriptor content.
987///
988/// If the first line is a valid `@type` annotation, it is parsed and
989/// removed from the content. Otherwise, the content is returned unchanged.
990///
991/// # Arguments
992///
993/// * `content` - The descriptor content
994///
995/// # Returns
996///
997/// A tuple of:
998/// - `Option<TypeAnnotation>` - The parsed annotation if present
999/// - `&str` - The remaining content after the annotation
1000///
1001/// # Example
1002///
1003/// ```rust
1004/// use stem_rs::descriptor::strip_type_annotation;
1005///
1006/// let content = "@type server-descriptor 1.0\nrouter example 127.0.0.1";
1007/// let (annotation, rest) = strip_type_annotation(content);
1008///
1009/// assert!(annotation.is_some());
1010/// assert_eq!(annotation.unwrap().name, "server-descriptor");
1011/// assert_eq!(rest, "router example 127.0.0.1");
1012///
1013/// // Without annotation
1014/// let content = "router example 127.0.0.1";
1015/// let (annotation, rest) = strip_type_annotation(content);
1016/// assert!(annotation.is_none());
1017/// assert_eq!(rest, content);
1018/// ```
1019pub fn strip_type_annotation(content: &str) -> (Option<TypeAnnotation>, &str) {
1020    let first_line_end = content.find('\n').unwrap_or(content.len());
1021    let first_line = &content[..first_line_end];
1022
1023    if let Some(annotation) = TypeAnnotation::parse(first_line) {
1024        let rest = if first_line_end < content.len() {
1025            &content[first_line_end + 1..]
1026        } else {
1027            ""
1028        };
1029        (Some(annotation), rest)
1030    } else {
1031        (None, content)
1032    }
1033}
1034
1035#[cfg(test)]
1036mod tests {
1037    use super::*;
1038
1039    #[test]
1040    fn test_detect_compression_plaintext() {
1041        let content = b"@type server-descriptor 1.0\nrouter test";
1042        assert_eq!(detect_compression(content), Compression::Plaintext);
1043    }
1044
1045    #[test]
1046    fn test_detect_compression_gzip() {
1047        let content = &[0x1f, 0x8b, 0x08, 0x00];
1048        assert_eq!(detect_compression(content), Compression::Gzip);
1049    }
1050
1051    #[test]
1052    fn test_detect_compression_zstd() {
1053        let content = &[0x28, 0xb5, 0x2f, 0xfd, 0x00];
1054        assert_eq!(detect_compression(content), Compression::Zstd);
1055    }
1056
1057    #[test]
1058    fn test_detect_compression_lzma() {
1059        let content = &[0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
1060        assert_eq!(detect_compression(content), Compression::Lzma);
1061    }
1062
1063    #[test]
1064    fn test_decompress_plaintext() {
1065        let content = b"Hello, World!";
1066        let result = decompress(content, Compression::Plaintext).unwrap();
1067        assert_eq!(result, content);
1068    }
1069
1070    #[test]
1071    fn test_auto_decompress_plaintext() {
1072        let content = b"Hello, World!";
1073        let result = auto_decompress(content).unwrap();
1074        assert_eq!(result, content);
1075    }
1076
1077    #[test]
1078    fn test_decompress_gzip() {
1079        let compressed = &[
1080            0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xf3, 0x48, 0xcd, 0xc9,
1081            0xc9, 0x07, 0x00, 0x82, 0x89, 0xd1, 0xf7, 0x05, 0x00, 0x00, 0x00,
1082        ];
1083        let result = decompress(compressed, Compression::Gzip).unwrap();
1084        assert_eq!(result, b"Hello");
1085    }
1086
1087    #[test]
1088    fn test_type_annotation_parse() {
1089        let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
1090        assert_eq!(annotation.name, "server-descriptor");
1091        assert_eq!(annotation.major_version, 1);
1092        assert_eq!(annotation.minor_version, 0);
1093    }
1094
1095    #[test]
1096    fn test_type_annotation_parse_extra_info() {
1097        let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
1098        assert_eq!(annotation.name, "extra-info");
1099        assert_eq!(annotation.major_version, 1);
1100        assert_eq!(annotation.minor_version, 0);
1101    }
1102
1103    #[test]
1104    fn test_type_annotation_parse_bridge_extra_info() {
1105        let annotation = TypeAnnotation::parse("@type bridge-extra-info 1.2").unwrap();
1106        assert_eq!(annotation.name, "bridge-extra-info");
1107        assert_eq!(annotation.major_version, 1);
1108        assert_eq!(annotation.minor_version, 2);
1109    }
1110
1111    #[test]
1112    fn test_type_annotation_parse_invalid() {
1113        assert!(TypeAnnotation::parse("router test").is_none());
1114        assert!(TypeAnnotation::parse("@type").is_none());
1115        assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
1116        assert!(TypeAnnotation::parse("@type server-descriptor 1").is_none());
1117    }
1118
1119    #[test]
1120    fn test_type_annotation_display() {
1121        let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1122        assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
1123    }
1124
1125    #[test]
1126    fn test_strip_type_annotation() {
1127        let content = "@type server-descriptor 1.0\nrouter test 127.0.0.1";
1128        let (annotation, rest) = strip_type_annotation(content);
1129        assert!(annotation.is_some());
1130        assert_eq!(annotation.unwrap().name, "server-descriptor");
1131        assert_eq!(rest, "router test 127.0.0.1");
1132    }
1133
1134    #[test]
1135    fn test_strip_type_annotation_no_annotation() {
1136        let content = "router test 127.0.0.1";
1137        let (annotation, rest) = strip_type_annotation(content);
1138        assert!(annotation.is_none());
1139        assert_eq!(rest, content);
1140    }
1141
1142    #[test]
1143    fn test_descriptor_type_from_annotation() {
1144        let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1145        assert_eq!(
1146            DescriptorType::from_annotation(&annotation),
1147            Some(DescriptorType::ServerDescriptor)
1148        );
1149
1150        let annotation = TypeAnnotation::new("extra-info", 1, 0);
1151        assert_eq!(
1152            DescriptorType::from_annotation(&annotation),
1153            Some(DescriptorType::ExtraInfo)
1154        );
1155
1156        let annotation = TypeAnnotation::new("tordnsel", 1, 0);
1157        assert_eq!(
1158            DescriptorType::from_annotation(&annotation),
1159            Some(DescriptorType::TorDNSEL)
1160        );
1161    }
1162
1163    #[test]
1164    fn test_descriptor_type_from_filename() {
1165        assert_eq!(
1166            DescriptorType::from_filename("cached-consensus"),
1167            Some(DescriptorType::NetworkStatusConsensusV3)
1168        );
1169        assert_eq!(
1170            DescriptorType::from_filename("cached-descriptors"),
1171            Some(DescriptorType::ServerDescriptor)
1172        );
1173        assert_eq!(
1174            DescriptorType::from_filename("cached-extrainfo"),
1175            Some(DescriptorType::ExtraInfo)
1176        );
1177        assert_eq!(
1178            DescriptorType::from_filename("exit-list"),
1179            Some(DescriptorType::TorDNSEL)
1180        );
1181    }
1182}