stem_rs/descriptor/mod.rs
1//! Descriptor parsing for Tor network documents.
2//!
3//! This module provides types for parsing various Tor descriptor formats
4//! including server descriptors, microdescriptors, consensus documents,
5//! and hidden service descriptors.
6//!
7//! # Overview
8//!
9//! Tor relays and directory authorities publish various types of descriptors
10//! that describe the network topology, relay capabilities, and routing
11//! information. This module provides parsers for all major descriptor types:
12//!
13//! - [`ServerDescriptor`] - Full relay metadata including keys, policies, and capabilities
14//! - [`Microdescriptor`] - Compact client-side descriptors with essential routing info
15//! - [`NetworkStatusDocument`] - Consensus documents listing all relays and their status
16//! - [`ExtraInfoDescriptor`] - Bandwidth statistics and additional relay information
17//! - [`HiddenServiceDescriptorV2`] / [`HiddenServiceDescriptorV3`] - Onion service descriptors
18//! - [`Ed25519Certificate`] - Ed25519 certificates used by relays
19//! - [`KeyCertificate`] - Directory authority key certificates
20//! - [`BandwidthFile`] - Bandwidth authority measurement files
21//! - [`TorDNSEL`] - Exit list data from TorDNSEL
22//!
23//! # Descriptor Sources
24//!
25//! Descriptors can be obtained from several sources:
26//!
27//! - **Tor's data directory**: Cached files like `cached-descriptors`, `cached-consensus`
28//! - **Directory authorities**: Via the [`remote`] module's download functions
29//! - **CollecTor archives**: Historical descriptors with `@type` annotations
30//!
31//! # Type Annotations
32//!
33//! Descriptors from [CollecTor](https://metrics.torproject.org/collector.html) include
34//! a type annotation on the first line in the format `@type <name> <major>.<minor>`.
35//! The [`TypeAnnotation`] struct parses these annotations, and [`parse_file`] handles
36//! them automatically.
37//!
38//! # Compression
39//!
40//! Downloaded descriptors are often compressed. This module supports automatic
41//! decompression via [`auto_decompress`] for:
42//!
43//! - **Plaintext** - Uncompressed data
44//! - **Gzip** - Standard gzip compression (fully supported)
45//! - **Zstd** - Zstandard compression (detection only, requires external crate)
46//! - **LZMA** - LZMA/XZ compression (detection only, requires external crate)
47//!
48//! # Digests
49//!
50//! Descriptors have cryptographic digests used for identification and verification.
51//! The [`compute_digest`] function and [`Descriptor::digest`] method support:
52//!
53//! - [`DigestHash::Sha1`] - SHA-1 hash (legacy, used by older descriptors)
54//! - [`DigestHash::Sha256`] - SHA-256 hash (modern descriptors)
55//!
56//! With encodings:
57//!
58//! - [`DigestEncoding::Raw`] - Raw bytes as characters
59//! - [`DigestEncoding::Hex`] - Uppercase hexadecimal
60//! - [`DigestEncoding::Base64`] - Base64 without padding
61//!
62//! # Example
63//!
64//! ```rust,no_run
65//! use stem_rs::descriptor::{parse_file, ServerDescriptor, Descriptor};
66//! use stem_rs::descriptor::{DigestHash, DigestEncoding};
67//!
68//! // Parse a server descriptor from file contents
69//! let content = std::fs::read("cached-descriptors").unwrap();
70//! let descriptor: ServerDescriptor = parse_file(&content).unwrap();
71//!
72//! // Access descriptor fields
73//! println!("Nickname: {}", descriptor.nickname);
74//! println!("Address: {}", descriptor.address);
75//!
76//! // Compute the descriptor's digest
77//! let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
78//! println!("Digest: {}", digest);
79//! ```
80//!
81//! # See Also
82//!
83//! - [`remote`] - Download descriptors from directory authorities
84//! - [`server`] - Server descriptor parsing
85//! - [`micro`] - Microdescriptor parsing
86//! - [`consensus`] - Network status document parsing
87//! - [`hidden`] - Hidden service descriptor parsing
88//!
89//! # See Also
90//!
91//! - [Tor Directory Protocol Specification](https://spec.torproject.org/dir-spec)
92//! - [Python Stem descriptor module](https://stem.torproject.org/api/descriptor/descriptor.html)
93
94pub mod authority;
95pub mod bandwidth_file;
96pub mod certificate;
97pub mod consensus;
98pub mod extra_info;
99pub mod hidden;
100pub mod key_cert;
101pub mod micro;
102pub mod remote;
103pub mod router_status;
104pub mod server;
105pub mod tordnsel;
106
107pub use authority::{DirectoryAuthority, SharedRandomnessCommitment};
108pub use bandwidth_file::{BandwidthFile, BandwidthMeasurement, RecentStats, RelayFailures};
109pub use certificate::{
110 Ed25519Certificate, Ed25519Extension, ExtensionFlag, ExtensionType, ED25519_HEADER_LENGTH,
111 ED25519_KEY_LENGTH, ED25519_SIGNATURE_LENGTH,
112};
113pub use consensus::{DocumentSignature, NetworkStatusDocument, SharedRandomness};
114pub use extra_info::{
115 BandwidthHistory, DirResponse, DirStat, ExtraInfoDescriptor, PortKey, Transport,
116};
117pub use hidden::{
118 AuthorizedClient, HiddenServiceDescriptorV2, HiddenServiceDescriptorV3, InnerLayer,
119 IntroductionPointV2, IntroductionPointV3, LinkSpecifier, OuterLayer,
120};
121pub use key_cert::KeyCertificate;
122pub use micro::Microdescriptor;
123pub use remote::{
124 download_bandwidth_file, download_consensus, download_detached_signatures,
125 download_extrainfo_descriptors, download_from_dirport, download_key_certificates,
126 download_microdescriptors, download_server_descriptors, get_authorities, Compression, DirPort,
127 DownloadResult,
128};
129pub use router_status::{MicrodescriptorHash, RouterStatusEntry, RouterStatusEntryType};
130pub use server::ServerDescriptor;
131pub use tordnsel::{parse_exit_list, parse_exit_list_bytes, TorDNSEL};
132
133use crate::Error;
134use flate2::read::GzDecoder;
135use sha1::{Digest as Sha1Digest, Sha1};
136use sha2::Sha256;
137use std::io::Read;
138use std::path::Path;
139
140/// A type annotation from CollecTor descriptor archives.
141///
142/// CollecTor archives include a type annotation on the first line of each
143/// descriptor file in the format `@type <name> <major>.<minor>`. This struct
144/// represents that parsed annotation.
145///
146/// # Format
147///
148/// ```text
149/// @type server-descriptor 1.0
150/// @type network-status-consensus-3 1.0
151/// @type microdescriptor 1.0
152/// ```
153///
154/// # Example
155///
156/// ```rust
157/// use stem_rs::descriptor::TypeAnnotation;
158///
159/// let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
160/// assert_eq!(annotation.name, "server-descriptor");
161/// assert_eq!(annotation.major_version, 1);
162/// assert_eq!(annotation.minor_version, 0);
163///
164/// // Convert back to string
165/// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
166/// ```
167///
168/// # See Also
169///
170/// - [`DescriptorType`] - Enum of known descriptor types
171/// - [`strip_type_annotation`] - Extract annotation from content
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct TypeAnnotation {
174 /// The descriptor type name (e.g., "server-descriptor", "microdescriptor").
175 pub name: String,
176 /// The major version number.
177 pub major_version: u32,
178 /// The minor version number.
179 pub minor_version: u32,
180}
181
182impl TypeAnnotation {
183 /// Creates a new type annotation with the given name and version.
184 ///
185 /// # Arguments
186 ///
187 /// * `name` - The descriptor type name
188 /// * `major_version` - The major version number
189 /// * `minor_version` - The minor version number
190 ///
191 /// # Example
192 ///
193 /// ```rust
194 /// use stem_rs::descriptor::TypeAnnotation;
195 ///
196 /// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
197 /// assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
198 /// ```
199 pub fn new(name: impl Into<String>, major_version: u32, minor_version: u32) -> Self {
200 Self {
201 name: name.into(),
202 major_version,
203 minor_version,
204 }
205 }
206
207 /// Parses a type annotation from a line of text.
208 ///
209 /// Returns `None` if the line is not a valid type annotation.
210 ///
211 /// # Arguments
212 ///
213 /// * `line` - The line to parse
214 ///
215 /// # Example
216 ///
217 /// ```rust
218 /// use stem_rs::descriptor::TypeAnnotation;
219 ///
220 /// // Valid annotation
221 /// let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
222 /// assert_eq!(annotation.name, "extra-info");
223 ///
224 /// // Invalid - not an annotation
225 /// assert!(TypeAnnotation::parse("router test 127.0.0.1").is_none());
226 ///
227 /// // Invalid - missing version
228 /// assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
229 /// ```
230 pub fn parse(line: &str) -> Option<Self> {
231 let line = line.trim();
232 if !line.starts_with("@type ") {
233 return None;
234 }
235
236 let rest = &line[6..];
237 let parts: Vec<&str> = rest.split_whitespace().collect();
238 if parts.len() != 2 {
239 return None;
240 }
241
242 let name = parts[0];
243 let version_parts: Vec<&str> = parts[1].split('.').collect();
244 if version_parts.len() != 2 {
245 return None;
246 }
247
248 let major_version = version_parts[0].parse().ok()?;
249 let minor_version = version_parts[1].parse().ok()?;
250
251 Some(Self {
252 name: name.to_string(),
253 major_version,
254 minor_version,
255 })
256 }
257}
258
259impl std::fmt::Display for TypeAnnotation {
260 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261 write!(
262 f,
263 "@type {} {}.{}",
264 self.name, self.major_version, self.minor_version
265 )
266 }
267}
268
269/// Known descriptor types in the Tor network.
270///
271/// This enum represents all descriptor types that can be identified from
272/// type annotations or filenames. Each variant corresponds to a specific
273/// descriptor format defined in the Tor directory protocol specification.
274///
275/// # Stability
276///
277/// This enum is non-exhaustive. New descriptor types may be added in future
278/// Tor versions.
279///
280/// # Example
281///
282/// ```rust
283/// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
284///
285/// // From type annotation
286/// let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
287/// let desc_type = DescriptorType::from_annotation(&annotation);
288/// assert_eq!(desc_type, Some(DescriptorType::ServerDescriptor));
289///
290/// // From filename
291/// let desc_type = DescriptorType::from_filename("cached-consensus");
292/// assert_eq!(desc_type, Some(DescriptorType::NetworkStatusConsensusV3));
293/// ```
294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
295pub enum DescriptorType {
296 /// Server descriptor containing full relay metadata.
297 ///
298 /// Includes identity keys, exit policy, bandwidth, and other relay information.
299 /// Annotation name: `server-descriptor`
300 ServerDescriptor,
301 /// Extra-info descriptor with bandwidth statistics.
302 ///
303 /// Contains detailed statistics about relay operation.
304 /// Annotation name: `extra-info`
305 ExtraInfo,
306 /// Microdescriptor with compact routing information.
307 ///
308 /// Used by clients for building circuits with minimal data.
309 /// Annotation name: `microdescriptor`
310 Microdescriptor,
311 /// Network status consensus document (v3).
312 ///
313 /// The agreed-upon view of the network signed by directory authorities.
314 /// Annotation name: `network-status-consensus-3`
315 NetworkStatusConsensusV3,
316 /// Network status vote document (v3).
317 ///
318 /// Individual directory authority's view before consensus.
319 /// Annotation name: `network-status-vote-3`
320 NetworkStatusVoteV3,
321 /// Microdescriptor-flavored consensus document (v3).
322 ///
323 /// Consensus using microdescriptor hashes instead of full descriptors.
324 /// Annotation name: `network-status-microdesc-consensus-3`
325 NetworkStatusMicrodescConsensusV3,
326 /// Bridge network status document.
327 ///
328 /// Network status for bridge relays (not publicly listed).
329 /// Annotation name: `bridge-network-status`
330 BridgeNetworkStatus,
331 /// Bridge server descriptor.
332 ///
333 /// Server descriptor for bridge relays with some fields redacted.
334 /// Annotation name: `bridge-server-descriptor`
335 BridgeServerDescriptor,
336 /// Bridge extra-info descriptor.
337 ///
338 /// Extra-info for bridge relays.
339 /// Annotation name: `bridge-extra-info`
340 BridgeExtraInfo,
341 /// Directory key certificate (v3).
342 ///
343 /// Certificate binding a directory authority's signing key to its identity.
344 /// Annotation name: `dir-key-certificate-3`
345 DirKeyCertificateV3,
346 /// TorDNSEL exit list.
347 ///
348 /// List of exit relay IP addresses from the TorDNSEL service.
349 /// Annotation name: `tordnsel`
350 TorDNSEL,
351 /// Hidden service descriptor.
352 ///
353 /// Descriptor for onion services (v2 or v3).
354 /// Annotation name: `hidden-service-descriptor`
355 HiddenServiceDescriptor,
356 /// Bandwidth authority measurement file.
357 ///
358 /// Bandwidth measurements from bandwidth authorities.
359 /// Annotation name: `bandwidth-file`
360 BandwidthFile,
361}
362
363impl DescriptorType {
364 /// Returns the annotation name for this descriptor type.
365 ///
366 /// This is the name used in `@type` annotations in CollecTor archives.
367 ///
368 /// # Example
369 ///
370 /// ```rust
371 /// use stem_rs::descriptor::DescriptorType;
372 ///
373 /// assert_eq!(DescriptorType::ServerDescriptor.annotation_name(), "server-descriptor");
374 /// assert_eq!(DescriptorType::Microdescriptor.annotation_name(), "microdescriptor");
375 /// ```
376 pub fn annotation_name(&self) -> &'static str {
377 match self {
378 Self::ServerDescriptor => "server-descriptor",
379 Self::ExtraInfo => "extra-info",
380 Self::Microdescriptor => "microdescriptor",
381 Self::NetworkStatusConsensusV3 => "network-status-consensus-3",
382 Self::NetworkStatusVoteV3 => "network-status-vote-3",
383 Self::NetworkStatusMicrodescConsensusV3 => "network-status-microdesc-consensus-3",
384 Self::BridgeNetworkStatus => "bridge-network-status",
385 Self::BridgeServerDescriptor => "bridge-server-descriptor",
386 Self::BridgeExtraInfo => "bridge-extra-info",
387 Self::DirKeyCertificateV3 => "dir-key-certificate-3",
388 Self::TorDNSEL => "tordnsel",
389 Self::HiddenServiceDescriptor => "hidden-service-descriptor",
390 Self::BandwidthFile => "bandwidth-file",
391 }
392 }
393
394 /// Determines the descriptor type from a type annotation.
395 ///
396 /// Returns `None` if the annotation name is not recognized.
397 ///
398 /// # Arguments
399 ///
400 /// * `annotation` - The type annotation to match
401 ///
402 /// # Example
403 ///
404 /// ```rust
405 /// use stem_rs::descriptor::{DescriptorType, TypeAnnotation};
406 ///
407 /// let annotation = TypeAnnotation::new("extra-info", 1, 0);
408 /// assert_eq!(
409 /// DescriptorType::from_annotation(&annotation),
410 /// Some(DescriptorType::ExtraInfo)
411 /// );
412 ///
413 /// let unknown = TypeAnnotation::new("unknown-type", 1, 0);
414 /// assert_eq!(DescriptorType::from_annotation(&unknown), None);
415 /// ```
416 pub fn from_annotation(annotation: &TypeAnnotation) -> Option<Self> {
417 match annotation.name.as_str() {
418 "server-descriptor" => Some(Self::ServerDescriptor),
419 "extra-info" => Some(Self::ExtraInfo),
420 "microdescriptor" => Some(Self::Microdescriptor),
421 "network-status-consensus-3" => Some(Self::NetworkStatusConsensusV3),
422 "network-status-vote-3" => Some(Self::NetworkStatusVoteV3),
423 "network-status-microdesc-consensus-3" => Some(Self::NetworkStatusMicrodescConsensusV3),
424 "bridge-network-status" => Some(Self::BridgeNetworkStatus),
425 "bridge-server-descriptor" => Some(Self::BridgeServerDescriptor),
426 "bridge-extra-info" => Some(Self::BridgeExtraInfo),
427 "dir-key-certificate-3" => Some(Self::DirKeyCertificateV3),
428 "tordnsel" => Some(Self::TorDNSEL),
429 "hidden-service-descriptor" => Some(Self::HiddenServiceDescriptor),
430 "bandwidth-file" => Some(Self::BandwidthFile),
431 _ => None,
432 }
433 }
434
435 /// Determines the descriptor type from a filename.
436 ///
437 /// This is useful for parsing descriptors from Tor's data directory
438 /// where files have conventional names like `cached-descriptors` or
439 /// `cached-consensus`.
440 ///
441 /// Returns `None` if the filename doesn't match a known pattern.
442 ///
443 /// # Arguments
444 ///
445 /// * `filename` - The filename to match (path components are stripped)
446 ///
447 /// # Example
448 ///
449 /// ```rust
450 /// use stem_rs::descriptor::DescriptorType;
451 ///
452 /// assert_eq!(
453 /// DescriptorType::from_filename("cached-descriptors"),
454 /// Some(DescriptorType::ServerDescriptor)
455 /// );
456 /// assert_eq!(
457 /// DescriptorType::from_filename("cached-extrainfo"),
458 /// Some(DescriptorType::ExtraInfo)
459 /// );
460 /// assert_eq!(
461 /// DescriptorType::from_filename("/var/lib/tor/cached-consensus"),
462 /// Some(DescriptorType::NetworkStatusConsensusV3)
463 /// );
464 /// assert_eq!(DescriptorType::from_filename("unknown-file"), None);
465 /// ```
466 pub fn from_filename(filename: &str) -> Option<Self> {
467 let filename = Path::new(filename)
468 .file_name()
469 .and_then(|s| s.to_str())
470 .unwrap_or(filename);
471
472 if filename.contains("cached-consensus") || filename.contains("consensus") {
473 Some(Self::NetworkStatusConsensusV3)
474 } else if filename.contains("cached-microdesc-consensus") {
475 Some(Self::NetworkStatusMicrodescConsensusV3)
476 } else if filename.contains("cached-microdescs") || filename.contains("microdescriptor") {
477 Some(Self::Microdescriptor)
478 } else if filename.contains("cached-descriptors") || filename.contains("server-descriptor")
479 {
480 Some(Self::ServerDescriptor)
481 } else if filename.contains("cached-extrainfo") || filename.contains("extra-info") {
482 Some(Self::ExtraInfo)
483 } else if filename.contains("exit-list") || filename.contains("tordnsel") {
484 Some(Self::TorDNSEL)
485 } else if filename.contains("bandwidth") {
486 Some(Self::BandwidthFile)
487 } else {
488 None
489 }
490 }
491}
492
493/// Hash algorithm used for computing descriptor digests.
494///
495/// Tor uses cryptographic hashes to identify and verify descriptors.
496/// Older descriptor types use SHA-1, while newer ones use SHA-256.
497///
498/// # Example
499///
500/// ```rust
501/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
502///
503/// let content = b"example content";
504/// let sha1_digest = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
505/// let sha256_digest = compute_digest(content, DigestHash::Sha256, DigestEncoding::Hex);
506///
507/// assert_eq!(sha1_digest.len(), 40); // SHA-1 produces 20 bytes = 40 hex chars
508/// assert_eq!(sha256_digest.len(), 64); // SHA-256 produces 32 bytes = 64 hex chars
509/// ```
510#[derive(Debug, Clone, Copy, PartialEq, Eq)]
511pub enum DigestHash {
512 /// SHA-1 hash algorithm (160 bits / 20 bytes).
513 ///
514 /// Used by legacy descriptor types including server descriptors and
515 /// v2 hidden service descriptors. While SHA-1 is considered weak for
516 /// collision resistance, it remains in use for backward compatibility.
517 Sha1,
518 /// SHA-256 hash algorithm (256 bits / 32 bytes).
519 ///
520 /// Used by modern descriptor types including microdescriptors and
521 /// v3 hidden service descriptors.
522 Sha256,
523}
524
525/// Encoding format for descriptor digests.
526///
527/// Digests can be represented in different formats depending on the use case.
528///
529/// # Example
530///
531/// ```rust
532/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
533///
534/// let content = b"test";
535///
536/// // Hexadecimal encoding (uppercase)
537/// let hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
538/// assert!(hex.chars().all(|c| c.is_ascii_hexdigit()));
539///
540/// // Base64 encoding (without padding)
541/// let b64 = compute_digest(content, DigestHash::Sha1, DigestEncoding::Base64);
542/// assert!(b64.chars().all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/'));
543/// ```
544#[derive(Debug, Clone, Copy, PartialEq, Eq)]
545pub enum DigestEncoding {
546 /// Raw bytes represented as characters.
547 ///
548 /// Each byte is converted directly to a char. This is primarily useful
549 /// for internal processing rather than display.
550 Raw,
551 /// Uppercase hexadecimal encoding.
552 ///
553 /// Each byte becomes two hex characters (0-9, A-F).
554 /// This is the most common format for displaying fingerprints.
555 Hex,
556 /// Base64 encoding without trailing padding.
557 ///
558 /// Uses the standard Base64 alphabet (A-Z, a-z, 0-9, +, /).
559 /// Padding characters ('=') are omitted.
560 Base64,
561}
562
563/// Trait for parsing and serializing Tor descriptors.
564///
565/// This trait defines the common interface for all descriptor types in the
566/// library. Implementors can parse descriptor content, serialize back to
567/// the canonical string format, and compute cryptographic digests.
568///
569/// # Contract
570///
571/// Implementations must satisfy these invariants:
572///
573/// 1. **Round-trip consistency**: For any valid descriptor content,
574/// `parse(content).to_descriptor_string()` should produce semantically
575/// equivalent content (though whitespace may differ).
576///
577/// 2. **Digest stability**: The `digest()` method must return consistent
578/// results for the same descriptor content.
579///
580/// 3. **Error handling**: `parse()` should return `Error::Parse` for
581/// malformed content with a descriptive error message.
582///
583/// # Example
584///
585/// ```rust,no_run
586/// use stem_rs::descriptor::{Descriptor, DigestHash, DigestEncoding};
587/// use stem_rs::descriptor::ServerDescriptor;
588///
589/// let content = "router example 127.0.0.1 9001 0 0\n...";
590/// let descriptor = ServerDescriptor::parse(content).unwrap();
591///
592/// // Serialize back to string
593/// let serialized = descriptor.to_descriptor_string();
594///
595/// // Compute digest
596/// let digest = descriptor.digest(DigestHash::Sha1, DigestEncoding::Hex).unwrap();
597///
598/// // Access raw content
599/// let raw = descriptor.raw_content();
600///
601/// // Check for unrecognized lines
602/// let unknown = descriptor.unrecognized_lines();
603/// ```
604///
605/// # Implementors
606///
607/// - [`ServerDescriptor`] - Server descriptors
608/// - [`Microdescriptor`] - Microdescriptors
609/// - [`ExtraInfoDescriptor`] - Extra-info descriptors
610/// - [`NetworkStatusDocument`] - Consensus documents
611pub trait Descriptor: Sized {
612 /// Parses a descriptor from its string content.
613 ///
614 /// # Arguments
615 ///
616 /// * `content` - The descriptor content as a string
617 ///
618 /// # Errors
619 ///
620 /// Returns [`Error::Parse`] if the content is malformed or missing
621 /// required fields.
622 fn parse(content: &str) -> Result<Self, Error>;
623
624 /// Serializes the descriptor to its canonical string format.
625 ///
626 /// The output should be valid descriptor content that can be parsed
627 /// again with `parse()`.
628 fn to_descriptor_string(&self) -> String;
629
630 /// Computes the cryptographic digest of the descriptor.
631 ///
632 /// The digest is computed over the appropriate portion of the descriptor
633 /// content (which varies by descriptor type).
634 ///
635 /// # Arguments
636 ///
637 /// * `hash` - The hash algorithm to use
638 /// * `encoding` - The output encoding format
639 ///
640 /// # Errors
641 ///
642 /// Returns an error if the digest cannot be computed (e.g., if the
643 /// descriptor content is invalid).
644 fn digest(&self, hash: DigestHash, encoding: DigestEncoding) -> Result<String, Error>;
645
646 /// Returns the raw bytes of the original descriptor content.
647 ///
648 /// This is the exact content that was parsed, preserving original
649 /// formatting and whitespace.
650 fn raw_content(&self) -> &[u8];
651
652 /// Returns lines from the descriptor that were not recognized.
653 ///
654 /// These are lines that don't match any known keyword for this
655 /// descriptor type. This is useful for forward compatibility when
656 /// new fields are added to the descriptor format.
657 fn unrecognized_lines(&self) -> &[String];
658}
659
660/// Detects the compression format of binary content.
661///
662/// Examines the magic bytes at the start of the content to determine
663/// the compression format. This is useful for automatically decompressing
664/// downloaded descriptors.
665///
666/// # Arguments
667///
668/// * `content` - The binary content to examine
669///
670/// # Returns
671///
672/// The detected [`Compression`] format, or [`Compression::Plaintext`] if
673/// no compression is detected or the content is too short.
674///
675/// # Example
676///
677/// ```rust
678/// use stem_rs::descriptor::{detect_compression, Compression};
679///
680/// // Gzip magic bytes
681/// let gzip_content = &[0x1f, 0x8b, 0x08, 0x00];
682/// assert_eq!(detect_compression(gzip_content), Compression::Gzip);
683///
684/// // Plain text
685/// let plain = b"router example";
686/// assert_eq!(detect_compression(plain), Compression::Plaintext);
687/// ```
688pub fn detect_compression(content: &[u8]) -> Compression {
689 if content.len() < 2 {
690 return Compression::Plaintext;
691 }
692
693 if content[0] == 0x1f && content[1] == 0x8b {
694 return Compression::Gzip;
695 }
696
697 if content.len() >= 4
698 && content[0] == 0x28
699 && content[1] == 0xb5
700 && content[2] == 0x2f
701 && content[3] == 0xfd
702 {
703 return Compression::Zstd;
704 }
705
706 if content.len() >= 6
707 && content[0] == 0xfd
708 && content[1] == 0x37
709 && content[2] == 0x7a
710 && content[3] == 0x58
711 && content[4] == 0x5a
712 && content[5] == 0x00
713 {
714 return Compression::Lzma;
715 }
716
717 Compression::Plaintext
718}
719
720/// Decompresses content using the specified compression format.
721///
722/// # Arguments
723///
724/// * `content` - The compressed content
725/// * `compression` - The compression format to use
726///
727/// # Returns
728///
729/// The decompressed content as a byte vector.
730///
731/// # Errors
732///
733/// Returns [`Error::Parse`] if:
734/// - Decompression fails (corrupted data)
735/// - The compression format is not supported (Zstd, LZMA)
736///
737/// # Supported Formats
738///
739/// - [`Compression::Plaintext`] - Returns content unchanged
740/// - [`Compression::Gzip`] - Full support via flate2
741/// - [`Compression::Zstd`] - Detection only, returns error
742/// - [`Compression::Lzma`] - Detection only, returns error
743///
744/// # Example
745///
746/// ```rust
747/// use stem_rs::descriptor::{decompress, Compression};
748///
749/// // Plaintext passes through unchanged
750/// let content = b"Hello, World!";
751/// let result = decompress(content, Compression::Plaintext).unwrap();
752/// assert_eq!(result, content);
753/// ```
754pub fn decompress(content: &[u8], compression: Compression) -> Result<Vec<u8>, Error> {
755 match compression {
756 Compression::Plaintext => Ok(content.to_vec()),
757 Compression::Gzip => decompress_gzip(content),
758 Compression::Zstd => Err(Error::Parse {
759 location: "decompress".into(),
760 reason: "Zstd decompression not supported (requires zstd crate)".into(),
761 }),
762 Compression::Lzma => Err(Error::Parse {
763 location: "decompress".into(),
764 reason: "LZMA decompression not supported (requires lzma crate)".into(),
765 }),
766 }
767}
768
769fn decompress_gzip(content: &[u8]) -> Result<Vec<u8>, Error> {
770 let mut decoder = GzDecoder::new(content);
771 let mut decompressed = Vec::new();
772 decoder
773 .read_to_end(&mut decompressed)
774 .map_err(|e| Error::Parse {
775 location: "decompress_gzip".into(),
776 reason: format!("Failed to decompress gzip: {}", e),
777 })?;
778 Ok(decompressed)
779}
780
781/// Automatically detects and decompresses content.
782///
783/// This is a convenience function that combines [`detect_compression`] and
784/// [`decompress`]. It examines the content's magic bytes to determine the
785/// compression format and decompresses accordingly.
786///
787/// # Arguments
788///
789/// * `content` - The potentially compressed content
790///
791/// # Returns
792///
793/// The decompressed content. If the content is not compressed, it is
794/// returned unchanged.
795///
796/// # Errors
797///
798/// Returns [`Error::Parse`] if decompression fails or the detected
799/// compression format is not supported.
800///
801/// # Example
802///
803/// ```rust
804/// use stem_rs::descriptor::auto_decompress;
805///
806/// // Plain text passes through
807/// let plain = b"router example 127.0.0.1";
808/// let result = auto_decompress(plain).unwrap();
809/// assert_eq!(result, plain);
810/// ```
811pub fn auto_decompress(content: &[u8]) -> Result<Vec<u8>, Error> {
812 let compression = detect_compression(content);
813 decompress(content, compression)
814}
815
816/// Computes a cryptographic digest of content.
817///
818/// This is a low-level function for computing digests. For descriptor
819/// digests, prefer using the [`Descriptor::digest`] method which knows
820/// the correct content range to hash.
821///
822/// # Arguments
823///
824/// * `content` - The content to hash
825/// * `hash` - The hash algorithm to use
826/// * `encoding` - The output encoding format
827///
828/// # Returns
829///
830/// The digest as a string in the specified encoding.
831///
832/// # Example
833///
834/// ```rust
835/// use stem_rs::descriptor::{compute_digest, DigestHash, DigestEncoding};
836///
837/// let content = b"test content";
838///
839/// // SHA-1 in hex
840/// let sha1_hex = compute_digest(content, DigestHash::Sha1, DigestEncoding::Hex);
841/// assert_eq!(sha1_hex.len(), 40);
842///
843/// // SHA-256 in base64
844/// let sha256_b64 = compute_digest(content, DigestHash::Sha256, DigestEncoding::Base64);
845/// ```
846pub fn compute_digest(content: &[u8], hash: DigestHash, encoding: DigestEncoding) -> String {
847 match hash {
848 DigestHash::Sha1 => {
849 let mut hasher = Sha1::new();
850 hasher.update(content);
851 let result = hasher.finalize();
852 encode_digest(&result, encoding)
853 }
854 DigestHash::Sha256 => {
855 let mut hasher = Sha256::new();
856 hasher.update(content);
857 let result = hasher.finalize();
858 encode_digest(&result, encoding)
859 }
860 }
861}
862
863fn encode_digest(bytes: &[u8], encoding: DigestEncoding) -> String {
864 match encoding {
865 DigestEncoding::Raw => bytes.iter().map(|b| *b as char).collect(),
866 DigestEncoding::Hex => bytes.iter().map(|b| format!("{:02X}", b)).collect(),
867 DigestEncoding::Base64 => base64_encode(bytes),
868 }
869}
870
871fn base64_encode(bytes: &[u8]) -> String {
872 const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
873 let mut result = String::new();
874 let mut i = 0;
875 while i < bytes.len() {
876 let b0 = bytes[i] as u32;
877 let b1 = bytes.get(i + 1).map(|&b| b as u32).unwrap_or(0);
878 let b2 = bytes.get(i + 2).map(|&b| b as u32).unwrap_or(0);
879 let triple = (b0 << 16) | (b1 << 8) | b2;
880 result.push(ALPHABET[((triple >> 18) & 0x3F) as usize] as char);
881 result.push(ALPHABET[((triple >> 12) & 0x3F) as usize] as char);
882 if i + 1 < bytes.len() {
883 result.push(ALPHABET[((triple >> 6) & 0x3F) as usize] as char);
884 }
885 if i + 2 < bytes.len() {
886 result.push(ALPHABET[(triple & 0x3F) as usize] as char);
887 }
888 i += 3;
889 }
890 result
891}
892
893/// Parses a descriptor from file content with automatic decompression.
894///
895/// This function handles the common case of reading a descriptor from a file:
896/// 1. Automatically decompresses the content if compressed
897/// 2. Strips any `@type` annotation from the beginning
898/// 3. Parses the descriptor using the type's `parse` method
899///
900/// # Type Parameters
901///
902/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
903///
904/// # Arguments
905///
906/// * `content` - The raw file content (possibly compressed)
907///
908/// # Returns
909///
910/// The parsed descriptor.
911///
912/// # Errors
913///
914/// Returns [`Error::Parse`] if:
915/// - Decompression fails
916/// - The content is not valid UTF-8
917/// - The descriptor content is malformed
918///
919/// # Example
920///
921/// ```rust,no_run
922/// use stem_rs::descriptor::{parse_file, ServerDescriptor};
923///
924/// let content = std::fs::read("cached-descriptors").unwrap();
925/// let descriptor: ServerDescriptor = parse_file(&content).unwrap();
926/// println!("Parsed descriptor for: {}", descriptor.nickname);
927/// ```
928///
929/// # See Also
930///
931/// - [`parse_file_with_annotation`] - Also returns the type annotation if present
932/// - [`Descriptor::parse`] - Parse from string without decompression
933pub fn parse_file<T: Descriptor>(content: &[u8]) -> Result<T, Error> {
934 let decompressed = auto_decompress(content)?;
935 let content_str = String::from_utf8_lossy(&decompressed);
936 let (_, stripped) = strip_type_annotation(&content_str);
937 T::parse(stripped)
938}
939
940/// Parses a descriptor from file content, returning the type annotation.
941///
942/// Like [`parse_file`], but also returns the `@type` annotation if one
943/// was present at the beginning of the content.
944///
945/// # Type Parameters
946///
947/// * `T` - The descriptor type to parse (must implement [`Descriptor`])
948///
949/// # Arguments
950///
951/// * `content` - The raw file content (possibly compressed)
952///
953/// # Returns
954///
955/// A tuple of:
956/// - `Option<TypeAnnotation>` - The type annotation if present
957/// - `T` - The parsed descriptor
958///
959/// # Errors
960///
961/// Returns [`Error::Parse`] if decompression or parsing fails.
962///
963/// # Example
964///
965/// ```rust,no_run
966/// use stem_rs::descriptor::{parse_file_with_annotation, ServerDescriptor};
967///
968/// let content = std::fs::read("server-descriptor").unwrap();
969/// let (annotation, descriptor): (_, ServerDescriptor) =
970/// parse_file_with_annotation(&content).unwrap();
971///
972/// if let Some(ann) = annotation {
973/// println!("Type: {} v{}.{}", ann.name, ann.major_version, ann.minor_version);
974/// }
975/// ```
976pub fn parse_file_with_annotation<T: Descriptor>(
977 content: &[u8],
978) -> Result<(Option<TypeAnnotation>, T), Error> {
979 let decompressed = auto_decompress(content)?;
980 let content_str = String::from_utf8_lossy(&decompressed);
981 let (annotation, stripped) = strip_type_annotation(&content_str);
982 let descriptor = T::parse(stripped)?;
983 Ok((annotation, descriptor))
984}
985
986/// Strips a type annotation from the beginning of descriptor content.
987///
988/// If the first line is a valid `@type` annotation, it is parsed and
989/// removed from the content. Otherwise, the content is returned unchanged.
990///
991/// # Arguments
992///
993/// * `content` - The descriptor content
994///
995/// # Returns
996///
997/// A tuple of:
998/// - `Option<TypeAnnotation>` - The parsed annotation if present
999/// - `&str` - The remaining content after the annotation
1000///
1001/// # Example
1002///
1003/// ```rust
1004/// use stem_rs::descriptor::strip_type_annotation;
1005///
1006/// let content = "@type server-descriptor 1.0\nrouter example 127.0.0.1";
1007/// let (annotation, rest) = strip_type_annotation(content);
1008///
1009/// assert!(annotation.is_some());
1010/// assert_eq!(annotation.unwrap().name, "server-descriptor");
1011/// assert_eq!(rest, "router example 127.0.0.1");
1012///
1013/// // Without annotation
1014/// let content = "router example 127.0.0.1";
1015/// let (annotation, rest) = strip_type_annotation(content);
1016/// assert!(annotation.is_none());
1017/// assert_eq!(rest, content);
1018/// ```
1019pub fn strip_type_annotation(content: &str) -> (Option<TypeAnnotation>, &str) {
1020 let first_line_end = content.find('\n').unwrap_or(content.len());
1021 let first_line = &content[..first_line_end];
1022
1023 if let Some(annotation) = TypeAnnotation::parse(first_line) {
1024 let rest = if first_line_end < content.len() {
1025 &content[first_line_end + 1..]
1026 } else {
1027 ""
1028 };
1029 (Some(annotation), rest)
1030 } else {
1031 (None, content)
1032 }
1033}
1034
1035#[cfg(test)]
1036mod tests {
1037 use super::*;
1038
1039 #[test]
1040 fn test_detect_compression_plaintext() {
1041 let content = b"@type server-descriptor 1.0\nrouter test";
1042 assert_eq!(detect_compression(content), Compression::Plaintext);
1043 }
1044
1045 #[test]
1046 fn test_detect_compression_gzip() {
1047 let content = &[0x1f, 0x8b, 0x08, 0x00];
1048 assert_eq!(detect_compression(content), Compression::Gzip);
1049 }
1050
1051 #[test]
1052 fn test_detect_compression_zstd() {
1053 let content = &[0x28, 0xb5, 0x2f, 0xfd, 0x00];
1054 assert_eq!(detect_compression(content), Compression::Zstd);
1055 }
1056
1057 #[test]
1058 fn test_detect_compression_lzma() {
1059 let content = &[0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
1060 assert_eq!(detect_compression(content), Compression::Lzma);
1061 }
1062
1063 #[test]
1064 fn test_decompress_plaintext() {
1065 let content = b"Hello, World!";
1066 let result = decompress(content, Compression::Plaintext).unwrap();
1067 assert_eq!(result, content);
1068 }
1069
1070 #[test]
1071 fn test_auto_decompress_plaintext() {
1072 let content = b"Hello, World!";
1073 let result = auto_decompress(content).unwrap();
1074 assert_eq!(result, content);
1075 }
1076
1077 #[test]
1078 fn test_decompress_gzip() {
1079 let compressed = &[
1080 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xf3, 0x48, 0xcd, 0xc9,
1081 0xc9, 0x07, 0x00, 0x82, 0x89, 0xd1, 0xf7, 0x05, 0x00, 0x00, 0x00,
1082 ];
1083 let result = decompress(compressed, Compression::Gzip).unwrap();
1084 assert_eq!(result, b"Hello");
1085 }
1086
1087 #[test]
1088 fn test_type_annotation_parse() {
1089 let annotation = TypeAnnotation::parse("@type server-descriptor 1.0").unwrap();
1090 assert_eq!(annotation.name, "server-descriptor");
1091 assert_eq!(annotation.major_version, 1);
1092 assert_eq!(annotation.minor_version, 0);
1093 }
1094
1095 #[test]
1096 fn test_type_annotation_parse_extra_info() {
1097 let annotation = TypeAnnotation::parse("@type extra-info 1.0").unwrap();
1098 assert_eq!(annotation.name, "extra-info");
1099 assert_eq!(annotation.major_version, 1);
1100 assert_eq!(annotation.minor_version, 0);
1101 }
1102
1103 #[test]
1104 fn test_type_annotation_parse_bridge_extra_info() {
1105 let annotation = TypeAnnotation::parse("@type bridge-extra-info 1.2").unwrap();
1106 assert_eq!(annotation.name, "bridge-extra-info");
1107 assert_eq!(annotation.major_version, 1);
1108 assert_eq!(annotation.minor_version, 2);
1109 }
1110
1111 #[test]
1112 fn test_type_annotation_parse_invalid() {
1113 assert!(TypeAnnotation::parse("router test").is_none());
1114 assert!(TypeAnnotation::parse("@type").is_none());
1115 assert!(TypeAnnotation::parse("@type server-descriptor").is_none());
1116 assert!(TypeAnnotation::parse("@type server-descriptor 1").is_none());
1117 }
1118
1119 #[test]
1120 fn test_type_annotation_display() {
1121 let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1122 assert_eq!(annotation.to_string(), "@type server-descriptor 1.0");
1123 }
1124
1125 #[test]
1126 fn test_strip_type_annotation() {
1127 let content = "@type server-descriptor 1.0\nrouter test 127.0.0.1";
1128 let (annotation, rest) = strip_type_annotation(content);
1129 assert!(annotation.is_some());
1130 assert_eq!(annotation.unwrap().name, "server-descriptor");
1131 assert_eq!(rest, "router test 127.0.0.1");
1132 }
1133
1134 #[test]
1135 fn test_strip_type_annotation_no_annotation() {
1136 let content = "router test 127.0.0.1";
1137 let (annotation, rest) = strip_type_annotation(content);
1138 assert!(annotation.is_none());
1139 assert_eq!(rest, content);
1140 }
1141
1142 #[test]
1143 fn test_descriptor_type_from_annotation() {
1144 let annotation = TypeAnnotation::new("server-descriptor", 1, 0);
1145 assert_eq!(
1146 DescriptorType::from_annotation(&annotation),
1147 Some(DescriptorType::ServerDescriptor)
1148 );
1149
1150 let annotation = TypeAnnotation::new("extra-info", 1, 0);
1151 assert_eq!(
1152 DescriptorType::from_annotation(&annotation),
1153 Some(DescriptorType::ExtraInfo)
1154 );
1155
1156 let annotation = TypeAnnotation::new("tordnsel", 1, 0);
1157 assert_eq!(
1158 DescriptorType::from_annotation(&annotation),
1159 Some(DescriptorType::TorDNSEL)
1160 );
1161 }
1162
1163 #[test]
1164 fn test_descriptor_type_from_filename() {
1165 assert_eq!(
1166 DescriptorType::from_filename("cached-consensus"),
1167 Some(DescriptorType::NetworkStatusConsensusV3)
1168 );
1169 assert_eq!(
1170 DescriptorType::from_filename("cached-descriptors"),
1171 Some(DescriptorType::ServerDescriptor)
1172 );
1173 assert_eq!(
1174 DescriptorType::from_filename("cached-extrainfo"),
1175 Some(DescriptorType::ExtraInfo)
1176 );
1177 assert_eq!(
1178 DescriptorType::from_filename("exit-list"),
1179 Some(DescriptorType::TorDNSEL)
1180 );
1181 }
1182}