ExifReader.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.IO;
  5. using System.Text;
  6. namespace MediaBrowser.Providers.Photos
  7. {
  8. /// <summary>
  9. /// A class for reading Exif data from a JPEG file. The file will be open for reading for as long as the class exists.
  10. /// <seealso cref="http://gvsoft.homedns.org/exif/Exif-explanation.html"/>
  11. /// </summary>
  12. public class ExifReader : IDisposable
  13. {
  14. private readonly FileStream fileStream = null;
  15. private readonly BinaryReader reader = null;
  16. /// <summary>
  17. /// The catalogue of tag ids and their absolute offsets within the
  18. /// file
  19. /// </summary>
  20. private Dictionary<ushort, long> catalogue;
  21. /// <summary>
  22. /// Indicates whether to read data using big or little endian byte aligns
  23. /// </summary>
  24. private bool isLittleEndian;
  25. /// <summary>
  26. /// The position in the filestream at which the TIFF header starts
  27. /// </summary>
  28. private long tiffHeaderStart;
  29. public ExifReader(string fileName)
  30. {
  31. // JPEG encoding uses big endian (i.e. Motorola) byte aligns. The TIFF encoding
  32. // found later in the document will specify the byte aligns used for the
  33. // rest of the document.
  34. isLittleEndian = false;
  35. try
  36. {
  37. // Open the file in a stream
  38. fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
  39. reader = new BinaryReader(fileStream);
  40. // Make sure the file's a JPEG.
  41. if (ReadUShort() != 0xFFD8)
  42. throw new Exception("File is not a valid JPEG");
  43. // Scan to the start of the Exif content
  44. ReadToExifStart();
  45. // Create an index of all Exif tags found within the document
  46. CreateTagIndex();
  47. }
  48. catch (Exception)
  49. {
  50. // If instantiation fails, make sure there's no mess left behind
  51. Dispose();
  52. throw;
  53. }
  54. }
  55. #region TIFF methods
  56. /// <summary>
  57. /// Returns the length (in bytes) per component of the specified TIFF data type
  58. /// </summary>
  59. /// <returns></returns>
  60. private byte GetTIFFFieldLength(ushort tiffDataType)
  61. {
  62. switch (tiffDataType)
  63. {
  64. case 1:
  65. case 2:
  66. case 6:
  67. return 1;
  68. case 3:
  69. case 8:
  70. return 2;
  71. case 4:
  72. case 7:
  73. case 9:
  74. case 11:
  75. return 4;
  76. case 5:
  77. case 10:
  78. case 12:
  79. return 8;
  80. default:
  81. throw new Exception(string.Format("Unknown TIFF datatype: {0}", tiffDataType));
  82. }
  83. }
  84. #endregion
  85. #region Methods for reading data directly from the filestream
  86. /// <summary>
  87. /// Gets a 2 byte unsigned integer from the file
  88. /// </summary>
  89. /// <returns></returns>
  90. private ushort ReadUShort()
  91. {
  92. return ToUShort(ReadBytes(2));
  93. }
  94. /// <summary>
  95. /// Gets a 4 byte unsigned integer from the file
  96. /// </summary>
  97. /// <returns></returns>
  98. private uint ReadUint()
  99. {
  100. return ToUint(ReadBytes(4));
  101. }
  102. private string ReadString(int chars)
  103. {
  104. return Encoding.ASCII.GetString(ReadBytes(chars));
  105. }
  106. private byte[] ReadBytes(int byteCount)
  107. {
  108. return reader.ReadBytes(byteCount);
  109. }
  110. /// <summary>
  111. /// Reads some bytes from the specified TIFF offset
  112. /// </summary>
  113. /// <param name="tiffOffset"></param>
  114. /// <param name="byteCount"></param>
  115. /// <returns></returns>
  116. private byte[] ReadBytes(ushort tiffOffset, int byteCount)
  117. {
  118. // Keep the current file offset
  119. long originalOffset = fileStream.Position;
  120. // Move to the TIFF offset and retrieve the data
  121. fileStream.Seek(tiffOffset + tiffHeaderStart, SeekOrigin.Begin);
  122. byte[] data = reader.ReadBytes(byteCount);
  123. // Restore the file offset
  124. fileStream.Position = originalOffset;
  125. return data;
  126. }
  127. #endregion
  128. #region Data conversion methods for interpreting datatypes from a byte array
  129. /// <summary>
  130. /// Converts 2 bytes to a ushort using the current byte aligns
  131. /// </summary>
  132. /// <returns></returns>
  133. private ushort ToUShort(byte[] data)
  134. {
  135. if (isLittleEndian != BitConverter.IsLittleEndian)
  136. Array.Reverse(data);
  137. return BitConverter.ToUInt16(data, 0);
  138. }
  139. /// <summary>
  140. /// Converts 8 bytes to an unsigned rational using the current byte aligns.
  141. /// </summary>
  142. /// <param name="data"></param>
  143. /// <returns></returns>
  144. /// <seealso cref="ToRational"/>
  145. private double ToURational(byte[] data)
  146. {
  147. var numeratorData = new byte[4];
  148. var denominatorData = new byte[4];
  149. Array.Copy(data, numeratorData, 4);
  150. Array.Copy(data, 4, denominatorData, 0, 4);
  151. uint numerator = ToUint(numeratorData);
  152. uint denominator = ToUint(denominatorData);
  153. return numerator / (double)denominator;
  154. }
  155. /// <summary>
  156. /// Converts 8 bytes to a signed rational using the current byte aligns.
  157. /// </summary>
  158. /// <remarks>
  159. /// A TIFF rational contains 2 4-byte integers, the first of which is
  160. /// the numerator, and the second of which is the denominator.
  161. /// </remarks>
  162. /// <param name="data"></param>
  163. /// <returns></returns>
  164. private double ToRational(byte[] data)
  165. {
  166. var numeratorData = new byte[4];
  167. var denominatorData = new byte[4];
  168. Array.Copy(data, numeratorData, 4);
  169. Array.Copy(data, 4, denominatorData, 0, 4);
  170. int numerator = ToInt(numeratorData);
  171. int denominator = ToInt(denominatorData);
  172. return numerator / (double)denominator;
  173. }
  174. /// <summary>
  175. /// Converts 4 bytes to a uint using the current byte aligns
  176. /// </summary>
  177. /// <returns></returns>
  178. private uint ToUint(byte[] data)
  179. {
  180. if (isLittleEndian != BitConverter.IsLittleEndian)
  181. Array.Reverse(data);
  182. return BitConverter.ToUInt32(data, 0);
  183. }
  184. /// <summary>
  185. /// Converts 4 bytes to an int using the current byte aligns
  186. /// </summary>
  187. /// <returns></returns>
  188. private int ToInt(byte[] data)
  189. {
  190. if (isLittleEndian != BitConverter.IsLittleEndian)
  191. Array.Reverse(data);
  192. return BitConverter.ToInt32(data, 0);
  193. }
  194. private double ToDouble(byte[] data)
  195. {
  196. if (isLittleEndian != BitConverter.IsLittleEndian)
  197. Array.Reverse(data);
  198. return BitConverter.ToDouble(data, 0);
  199. }
  200. private float ToSingle(byte[] data)
  201. {
  202. if (isLittleEndian != BitConverter.IsLittleEndian)
  203. Array.Reverse(data);
  204. return BitConverter.ToSingle(data, 0);
  205. }
  206. private short ToShort(byte[] data)
  207. {
  208. if (isLittleEndian != BitConverter.IsLittleEndian)
  209. Array.Reverse(data);
  210. return BitConverter.ToInt16(data, 0);
  211. }
  212. private sbyte ToSByte(byte[] data)
  213. {
  214. // An sbyte should just be a byte with an offset range.
  215. return (sbyte)(data[0] - byte.MaxValue);
  216. }
  217. /// <summary>
  218. /// Retrieves an array from a byte array using the supplied converter
  219. /// to read each individual element from the supplied byte array
  220. /// </summary>
  221. /// <param name="data"></param>
  222. /// <param name="elementLengthBytes"></param>
  223. /// <param name="converter"></param>
  224. /// <returns></returns>
  225. private Array GetArray<T>(byte[] data, int elementLengthBytes, ConverterMethod<T> converter)
  226. {
  227. Array convertedData = Array.CreateInstance(typeof(T), data.Length / elementLengthBytes);
  228. var buffer = new byte[elementLengthBytes];
  229. // Read each element from the array
  230. for (int elementCount = 0; elementCount < data.Length / elementLengthBytes; elementCount++)
  231. {
  232. // Place the data for the current element into the buffer
  233. Array.Copy(data, elementCount * elementLengthBytes, buffer, 0, elementLengthBytes);
  234. // Process the data and place it into the output array
  235. convertedData.SetValue(converter(buffer), elementCount);
  236. }
  237. return convertedData;
  238. }
  239. /// <summary>
  240. /// A delegate used to invoke any of the data conversion methods
  241. /// </summary>
  242. /// <param name="data"></param>
  243. /// <returns></returns>
  244. private delegate T ConverterMethod<out T>(byte[] data);
  245. #endregion
  246. #region Stream seek methods - used to get to locations within the JPEG
  247. /// <summary>
  248. /// Scans to the Exif block
  249. /// </summary>
  250. private void ReadToExifStart()
  251. {
  252. // The file has a number of blocks (Exif/JFIF), each of which
  253. // has a tag number followed by a length. We scan the document until the required tag (0xFFE1)
  254. // is found. All tags start with FF, so a non FF tag indicates an error.
  255. // Get the next tag.
  256. byte markerStart;
  257. byte markerNumber = 0;
  258. while (((markerStart = reader.ReadByte()) == 0xFF) && (markerNumber = reader.ReadByte()) != 0xE1)
  259. {
  260. // Get the length of the data.
  261. ushort dataLength = ReadUShort();
  262. // Jump to the end of the data (note that the size field includes its own size)!
  263. reader.BaseStream.Seek(dataLength - 2, SeekOrigin.Current);
  264. }
  265. // It's only success if we found the 0xFFE1 marker
  266. if (markerStart != 0xFF || markerNumber != 0xE1)
  267. throw new Exception("Could not find Exif data block");
  268. }
  269. /// <summary>
  270. /// Reads through the Exif data and builds an index of all Exif tags in the document
  271. /// </summary>
  272. /// <returns></returns>
  273. private void CreateTagIndex()
  274. {
  275. // The next 4 bytes are the size of the Exif data.
  276. ReadUShort();
  277. // Next is the Exif data itself. It starts with the ASCII "Exif" followed by 2 zero bytes.
  278. if (ReadString(4) != "Exif")
  279. throw new Exception("Exif data not found");
  280. // 2 zero bytes
  281. if (ReadUShort() != 0)
  282. throw new Exception("Malformed Exif data");
  283. // We're now into the TIFF format
  284. tiffHeaderStart = reader.BaseStream.Position;
  285. // What byte align will be used for the TIFF part of the document? II for Intel, MM for Motorola
  286. isLittleEndian = ReadString(2) == "II";
  287. // Next 2 bytes are always the same.
  288. if (ReadUShort() != 0x002A)
  289. throw new Exception("Error in TIFF data");
  290. // Get the offset to the IFD (image file directory)
  291. uint ifdOffset = ReadUint();
  292. // Note that this offset is from the first byte of the TIFF header. Jump to the IFD.
  293. fileStream.Position = ifdOffset + tiffHeaderStart;
  294. // Catalogue this first IFD (there will be another IFD)
  295. CatalogueIFD();
  296. // There's more data stored in the subifd, the offset to which is found in tag 0x8769.
  297. // As with all TIFF offsets, it will be relative to the first byte of the TIFF header.
  298. uint offset;
  299. if (!GetTagValue(0x8769, out offset))
  300. throw new Exception("Unable to locate Exif data");
  301. // Jump to the exif SubIFD
  302. fileStream.Position = offset + tiffHeaderStart;
  303. // Add the subIFD to the catalogue too
  304. CatalogueIFD();
  305. // Go to the GPS IFD and catalogue that too. It's an optional
  306. // section.
  307. if (GetTagValue(0x8825, out offset))
  308. {
  309. // Jump to the GPS SubIFD
  310. fileStream.Position = offset + tiffHeaderStart;
  311. // Add the subIFD to the catalogue too
  312. CatalogueIFD();
  313. }
  314. }
  315. #endregion
  316. #region Exif data catalog and retrieval methods
  317. public bool GetTagValue<T>(ExifTags tag, out T result)
  318. {
  319. return GetTagValue((ushort)tag, out result);
  320. }
  321. /// <summary>
  322. /// Retrieves an Exif value with the requested tag ID
  323. /// </summary>
  324. /// <param name="tagID"></param>
  325. /// <param name="result"></param>
  326. /// <returns></returns>
  327. public bool GetTagValue<T>(ushort tagID, out T result)
  328. {
  329. ushort tiffDataType;
  330. uint numberOfComponents;
  331. byte[] tagData = GetTagBytes(tagID, out tiffDataType, out numberOfComponents);
  332. if (tagData == null)
  333. {
  334. result = default(T);
  335. return false;
  336. }
  337. byte fieldLength = GetTIFFFieldLength(tiffDataType);
  338. // Convert the data to the appropriate datatype. Note the weird boxing via object.
  339. // The compiler doesn't like it otherwise.
  340. switch (tiffDataType)
  341. {
  342. case 1:
  343. // unsigned byte
  344. if (numberOfComponents == 1)
  345. result = (T)(object)tagData[0];
  346. else
  347. result = (T)(object)tagData;
  348. return true;
  349. case 2:
  350. // ascii string
  351. string str = Encoding.ASCII.GetString(tagData);
  352. // There may be a null character within the string
  353. int nullCharIndex = str.IndexOf('\0');
  354. if (nullCharIndex != -1)
  355. str = str.Substring(0, nullCharIndex);
  356. // Special processing for dates.
  357. if (typeof(T) == typeof(DateTime))
  358. {
  359. result =
  360. (T)(object)DateTime.ParseExact(str, "yyyy:MM:dd HH:mm:ss", CultureInfo.InvariantCulture);
  361. return true;
  362. }
  363. result = (T)(object)str;
  364. return true;
  365. case 3:
  366. // unsigned short
  367. if (numberOfComponents == 1)
  368. result = (T)(object)ToUShort(tagData);
  369. else
  370. result = (T)(object)GetArray(tagData, fieldLength, ToUShort);
  371. return true;
  372. case 4:
  373. // unsigned long
  374. if (numberOfComponents == 1)
  375. result = (T)(object)ToUint(tagData);
  376. else
  377. result = (T)(object)GetArray(tagData, fieldLength, ToUint);
  378. return true;
  379. case 5:
  380. // unsigned rational
  381. if (numberOfComponents == 1)
  382. result = (T)(object)ToURational(tagData);
  383. else
  384. result = (T)(object)GetArray(tagData, fieldLength, ToURational);
  385. return true;
  386. case 6:
  387. // signed byte
  388. if (numberOfComponents == 1)
  389. result = (T)(object)ToSByte(tagData);
  390. else
  391. result = (T)(object)GetArray(tagData, fieldLength, ToSByte);
  392. return true;
  393. case 7:
  394. // undefined. Treat it as an unsigned integer.
  395. if (numberOfComponents == 1)
  396. result = (T)(object)ToUint(tagData);
  397. else
  398. result = (T)(object)GetArray(tagData, fieldLength, ToUint);
  399. return true;
  400. case 8:
  401. // Signed short
  402. if (numberOfComponents == 1)
  403. result = (T)(object)ToShort(tagData);
  404. else
  405. result = (T)(object)GetArray(tagData, fieldLength, ToShort);
  406. return true;
  407. case 9:
  408. // Signed long
  409. if (numberOfComponents == 1)
  410. result = (T)(object)ToInt(tagData);
  411. else
  412. result = (T)(object)GetArray(tagData, fieldLength, ToInt);
  413. return true;
  414. case 10:
  415. // signed rational
  416. if (numberOfComponents == 1)
  417. result = (T)(object)ToRational(tagData);
  418. else
  419. result = (T)(object)GetArray(tagData, fieldLength, ToRational);
  420. return true;
  421. case 11:
  422. // single float
  423. if (numberOfComponents == 1)
  424. result = (T)(object)ToSingle(tagData);
  425. else
  426. result = (T)(object)GetArray(tagData, fieldLength, ToSingle);
  427. return true;
  428. case 12:
  429. // double float
  430. if (numberOfComponents == 1)
  431. result = (T)(object)ToDouble(tagData);
  432. else
  433. result = (T)(object)GetArray(tagData, fieldLength, ToDouble);
  434. return true;
  435. default:
  436. throw new Exception(string.Format("Unknown TIFF datatype: {0}", tiffDataType));
  437. }
  438. }
  439. /// <summary>
  440. /// Gets the data in the specified tag ID, starting from before the IFD block.
  441. /// </summary>
  442. /// <param name="tiffDataType"></param>
  443. /// <param name="numberOfComponents">The number of items which make up the data item - i.e. for a string, this will be the
  444. /// number of characters in the string</param>
  445. /// <param name="tagID"></param>
  446. private byte[] GetTagBytes(ushort tagID, out ushort tiffDataType, out uint numberOfComponents)
  447. {
  448. // Get the tag's offset from the catalogue and do some basic error checks
  449. if (fileStream == null || reader == null || catalogue == null || !catalogue.ContainsKey(tagID))
  450. {
  451. tiffDataType = 0;
  452. numberOfComponents = 0;
  453. return null;
  454. }
  455. long tagOffset = catalogue[tagID];
  456. // Jump to the TIFF offset
  457. fileStream.Position = tagOffset;
  458. // Read the tag number from the file
  459. ushort currentTagID = ReadUShort();
  460. if (currentTagID != tagID)
  461. throw new Exception("Tag number not at expected offset");
  462. // Read the offset to the Exif IFD
  463. tiffDataType = ReadUShort();
  464. numberOfComponents = ReadUint();
  465. byte[] tagData = ReadBytes(4);
  466. // If the total space taken up by the field is longer than the
  467. // 2 bytes afforded by the tagData, tagData will contain an offset
  468. // to the actual data.
  469. var dataSize = (int)(numberOfComponents * GetTIFFFieldLength(tiffDataType));
  470. if (dataSize > 4)
  471. {
  472. ushort offsetAddress = ToUShort(tagData);
  473. return ReadBytes(offsetAddress, dataSize);
  474. }
  475. // The value is stored in the tagData starting from the left
  476. Array.Resize(ref tagData, dataSize);
  477. return tagData;
  478. }
  479. /// <summary>
  480. /// Records all Exif tags and their offsets within
  481. /// the file from the current IFD
  482. /// </summary>
  483. private void CatalogueIFD()
  484. {
  485. if (catalogue == null)
  486. catalogue = new Dictionary<ushort, long>();
  487. // Assume we're just before the IFD.
  488. // First 2 bytes is the number of entries in this IFD
  489. ushort entryCount = ReadUShort();
  490. for (ushort currentEntry = 0; currentEntry < entryCount; currentEntry++)
  491. {
  492. ushort currentTagNumber = ReadUShort();
  493. // Record this in the catalogue
  494. catalogue[currentTagNumber] = fileStream.Position - 2;
  495. // Go to the end of this item (10 bytes, as each entry is 12 bytes long)
  496. reader.BaseStream.Seek(10, SeekOrigin.Current);
  497. }
  498. }
  499. #endregion
  500. #region IDisposable Members
  501. public void Dispose()
  502. {
  503. // Make sure the file handle is released
  504. if (reader != null)
  505. reader.Close();
  506. if (fileStream != null)
  507. fileStream.Close();
  508. }
  509. #endregion
  510. }
  511. }