1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
//! Metadata of JPEG images.

use std::io::BufRead;
use std::fmt;

use byteorder::{ReadBytesExt, BigEndian};

use types::{Result, Dimensions};
use traits::LoadableMetadata;
use utils::BufReadExt;

/// Coding process used in an image.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum CodingProcess {
    /// Sequential DCT (discrete cosine transform).
    DctSequential,
    /// Progressive DCT.
    DctProgressive,
    /// Lossless coding.
    Lossless
}

impl fmt::Display for CodingProcess {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(match *self {
            CodingProcess::DctSequential => "Sequential DCT",
            CodingProcess::DctProgressive => "Progressive DCT",
            CodingProcess::Lossless => "Lossless",
        })
    }
}

impl CodingProcess {
    fn from_marker(marker: u8) -> Option<CodingProcess> {
        match marker {
            0xc0 | 0xc1 | 0xc5 | 0xc9 | 0xcd => Some(CodingProcess::DctSequential),
            0xc2 | 0xc6 | 0xca | 0xce => Some(CodingProcess::DctProgressive),
            0xc3 | 0xc7 | 0xcb | 0xcf => Some(CodingProcess::Lossless),
            _ => None
        }
    }
}

/// Entropy coding method used in an image.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum EntropyCoding {
    /// Huffman coding.
    Huffman,
    /// Arithmetic coding.
    Arithmetic
}

impl fmt::Display for EntropyCoding {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str(match *self {
            EntropyCoding::Huffman => "Huffman",
            EntropyCoding::Arithmetic => "Arithmetic",
        })
    }
}

impl EntropyCoding {
    fn from_marker(marker: u8) -> Option<EntropyCoding> {
        match marker {
            0xc0 | 0xc1 | 0xc2 | 0xc3 | 0xc5 | 0xc6 | 0xc7 => Some(EntropyCoding::Huffman),
            0xc9 | 0xca | 0xcb | 0xcd | 0xce | 0xcf => Some(EntropyCoding::Arithmetic),
            _ => None
        }
    }
}

/// Represents metadata of a JPEG image.
///
/// It provides information contained in JPEG frame header, including image dimensions,
/// coding process type and entropy coding type.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Metadata {
    /// Image size.
    pub dimensions: Dimensions,
    /// Sample precision (in bits).
    pub sample_precision: u8,
    /// Image coding process type.
    pub coding_process: CodingProcess,
    /// Image entropy coding type.
    pub entropy_coding: EntropyCoding,
    /// Whether this image uses a baseline DCT encoding.
    pub baseline: bool,
    /// Whether this image uses a differential encoding.
    pub differential: bool,
}

fn find_marker<R: ?Sized, F>(r: &mut R, name: &str, mut matcher: F) -> Result<u8>
    where R: BufRead, F: FnMut(u8) -> bool
{
    loop {
        if try!(r.skip_until(0xff)) == 0 {
            return Err(unexpected_eof!("when searching for {} marker", name));
        }
        let marker_type = try_if_eof!(r.read_u8(), "when reading marker type");
        if marker_type == 0 { continue; }  // skip "stuffed" byte

        if matcher(marker_type) {
            return Ok(marker_type);
        }
    }
}

impl LoadableMetadata for Metadata {
    fn load<R: ?Sized + BufRead>(r: &mut R) -> Result<Metadata> {
        // read SOI marker, it must be present in all JPEG files
        try!(find_marker(r, "SOI", |m| m == 0xd8));

        // XXX: do we need to check for APP0 JFIF marker? This doesn't seem strictly necessary
        // XXX: to me, and it seems that other interchange formats are also possible.

        // read SOF marker, it must also be present in all JPEG files
        let marker = try!(find_marker(r, "SOF", is_sof_marker));

        // read and check SOF marker length
        let size = try_if_eof!(r.read_u16::<BigEndian>(), "when reading SOF marker payload size");
        if size <= 8 {  // 2 bytes for the length itself, 6 bytes is the minimum header size
            return Err(invalid_format!("invalid JPEG frame header size: {}", size));
        }

        // read sample precision
        let sample_precision = try_if_eof!(r.read_u8(), "when reading sample precision of the frame");

        // read height and width
        let h = try_if_eof!(r.read_u16::<BigEndian>(), "when reading JPEG frame height");
        let w = try_if_eof!(r.read_u16::<BigEndian>(), "when reading JPEG frame width");
        // TODO: handle h == 0 (we need to read a DNL marker after the first scan)

        // there is only one baseline DCT marker, naturally
        let baseline = marker == 0xc0;

        let differential = match marker {
            0xc0 | 0xc1 | 0xc2 | 0xc3 | 0xc9 | 0xca | 0xcb => false,
            0xc5 | 0xc6 | 0xc7 | 0xcd | 0xce | 0xcf => true,
            _ => unreachable!(),  // because we are inside a valid SOF marker
        };

        // unwrap can't fail, we're inside a valid SOF marker
        let coding_process = CodingProcess::from_marker(marker).unwrap();
        let entropy_coding = EntropyCoding::from_marker(marker).unwrap();

        Ok(Metadata {
            dimensions: (w, h).into(),
            sample_precision: sample_precision,
            coding_process: coding_process,
            entropy_coding: entropy_coding,
            baseline: baseline,
            differential: differential,
        })
    }
}

fn is_sof_marker(value: u8) -> bool {
    match value {
        // no 0xC4, 0xC8 and 0xCC, they are not SOF markers
        0xc0 | 0xc1 | 0xc2 | 0xc3 | 0xc5 | 0xc6 | 0xc7 | 0xc9 |
        0xca | 0xcb | 0xcd | 0xce | 0xcf => true,
        _ => false
    }
}