Index: trunk/src/com/drew/metadata/iptc/IptcReader.java
===================================================================
--- trunk/src/com/drew/metadata/iptc/IptcReader.java	(revision 6127)
+++ trunk/src/com/drew/metadata/iptc/IptcReader.java	(revision 8132)
@@ -1,4 +1,4 @@
 /*
- * Copyright 2002-2012 Drew Noakes
+ * Copyright 2002-2015 Drew Noakes
  *
  *    Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,24 +16,29 @@
  * More information about this project is available at:
  *
- *    http://drewnoakes.com/code/exif/
- *    http://code.google.com/p/metadata-extractor/
+ *    https://drewnoakes.com/code/exif/
+ *    https://github.com/drewnoakes/metadata-extractor
  */
 package com.drew.metadata.iptc;
 
-import com.drew.lang.BufferBoundsException;
-import com.drew.lang.BufferReader;
+import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
+import com.drew.imaging.jpeg.JpegSegmentType;
+import com.drew.lang.SequentialByteArrayReader;
+import com.drew.lang.SequentialReader;
 import com.drew.lang.annotations.NotNull;
 import com.drew.metadata.Directory;
 import com.drew.metadata.Metadata;
-import com.drew.metadata.MetadataReader;
-
+
+import java.io.IOException;
+import java.util.Arrays;
 import java.util.Date;
 
 /**
- * Decodes IPTC binary data, populating a <code>Metadata</code> object with tag values in an <code>IptcDirectory</code>.
- *
- * @author Drew Noakes http://drewnoakes.com
+ * Decodes IPTC binary data, populating a {@link Metadata} object with tag values in an {@link IptcDirectory}.
+ * <p>
+ * http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
+ *
+ * @author Drew Noakes https://drewnoakes.com
  */
-public class IptcReader implements MetadataReader
+public class IptcReader implements JpegSegmentMetadataReader
 {
     // TODO consider breaking the IPTC section up into multiple directories and providing segregation of each IPTC directory
@@ -52,6 +57,25 @@
 */
 
-    /** Performs the IPTC data extraction, adding found values to the specified instance of <code>Metadata</code>. */
-    public void extract(@NotNull final BufferReader reader, @NotNull final Metadata metadata)
+    @NotNull
+    public Iterable<JpegSegmentType> getSegmentTypes()
+    {
+        return Arrays.asList(JpegSegmentType.APPD);
+    }
+
+    public boolean canProcess(@NotNull byte[] segmentBytes, @NotNull JpegSegmentType segmentType)
+    {
+        // Check whether the first byte resembles
+        return segmentBytes.length != 0 && segmentBytes[0] == 0x1c;
+    }
+
+    public void extract(@NotNull byte[] segmentBytes, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
+    {
+        extract(new SequentialByteArrayReader(segmentBytes), metadata, segmentBytes.length);
+    }
+
+    /**
+     * Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
+     */
+    public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length)
     {
         IptcDirectory directory = metadata.getOrCreateDirectory(IptcDirectory.class);
@@ -59,39 +83,30 @@
         int offset = 0;
 
-/*
-        // find start-of-segment marker (potentially need to skip some ASCII photoshop header info)
-        try {
-            while (offset < data.length - 1 && reader.getUInt16(offset) != 0x1c01 && reader.getUInt16(offset) != 0x1c02)
-                offset++;
-        } catch (BufferBoundsException e) {
-            directory.addError("Couldn't find start of IPTC data (invalid segment)");
-            return;
-        }
-*/
-
         // for each tag
-        while (offset < reader.getLength()) {
+        while (offset < length) {
 
             // identifies start of a tag
             short startByte;
             try {
-                startByte = reader.getUInt8(offset);
-            } catch (BufferBoundsException e) {
+                startByte = reader.getUInt8();
+                offset++;
+            } catch (IOException e) {
                 directory.addError("Unable to read starting byte of IPTC tag");
-                break;
+                return;
             }
 
             if (startByte != 0x1c) {
-                directory.addError("Invalid start to IPTC tag");
-                break;
+                // NOTE have seen images where there was one extra byte at the end, giving
+                // offset==length at this point, which is not worth logging as an error.
+                if (offset != length)
+                    directory.addError("Invalid IPTC tag marker at offset " + (offset - 1) + ". Expected '0x1c' but got '0x" + Integer.toHexString(startByte) + "'.");
+                return;
             }
 
             // we need at least five bytes left to read a tag
-            if (offset + 5 >= reader.getLength()) {
+            if (offset + 5 >= length) {
                 directory.addError("Too few bytes remain for a valid IPTC tag");
-                break;
-            }
-
-            offset++;
+                return;
+            }
 
             int directoryType;
@@ -99,42 +114,73 @@
             int tagByteCount;
             try {
-                directoryType = reader.getUInt8(offset++);
-                tagType = reader.getUInt8(offset++);
-                tagByteCount = reader.getUInt16(offset);
-                offset += 2;
-            } catch (BufferBoundsException e) {
+                directoryType = reader.getUInt8();
+                tagType = reader.getUInt8();
+                // TODO support Extended DataSet Tag (see 1.5(c), p14, IPTC-IIMV4.2.pdf)
+                tagByteCount = reader.getUInt16();
+                offset += 4;
+            } catch (IOException e) {
                 directory.addError("IPTC data segment ended mid-way through tag descriptor");
                 return;
             }
 
-            if (offset + tagByteCount > reader.getLength()) {
+            if (offset + tagByteCount > length) {
                 directory.addError("Data for tag extends beyond end of IPTC segment");
+                return;
+            }
+
+            try {
+                processTag(reader, directory, directoryType, tagType, tagByteCount);
+            } catch (IOException e) {
+                directory.addError("Error processing IPTC tag");
+                return;
+            }
+
+            offset += tagByteCount;
+        }
+    }
+
+    private void processTag(@NotNull SequentialReader reader, @NotNull Directory directory, int directoryType, int tagType, int tagByteCount) throws IOException
+    {
+        int tagIdentifier = tagType | (directoryType << 8);
+
+        // Some images have been seen that specify a zero byte tag, which cannot be of much use.
+        // We elect here to completely ignore the tag. The IPTC specification doesn't mention
+        // anything about the interpretation of this situation.
+        // https://raw.githubusercontent.com/wiki/drewnoakes/metadata-extractor/docs/IPTC-IIMV4.2.pdf
+        if (tagByteCount == 0) {
+            directory.setString(tagIdentifier, "");
+            return;
+        }
+
+        String string = null;
+
+        switch (tagIdentifier) {
+            case IptcDirectory.TAG_CODED_CHARACTER_SET:
+                byte[] bytes = reader.getBytes(tagByteCount);
+                String charset = Iso2022Converter.convertISO2022CharsetToJavaCharset(bytes);
+                if (charset == null) {
+                    // Unable to determine the charset, so fall through and treat tag as a regular string
+                    string = new String(bytes);
+                    break;
+                }
+                directory.setString(tagIdentifier, charset);
+                return;
+            case IptcDirectory.TAG_ENVELOPE_RECORD_VERSION:
+            case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
+            case IptcDirectory.TAG_FILE_VERSION:
+            case IptcDirectory.TAG_ARM_VERSION:
+            case IptcDirectory.TAG_PROGRAM_VERSION:
+                // short
+                if (tagByteCount >= 2) {
+                    int shortValue = reader.getUInt16();
+                    reader.skip(tagByteCount - 2);
+                    directory.setInt(tagIdentifier, shortValue);
+                    return;
+                }
                 break;
-            }
-
-            try {
-                processTag(reader, directory, directoryType, tagType, offset, tagByteCount);
-            } catch (BufferBoundsException e) {
-                directory.addError("Error processing IPTC tag");
-                break;
-            }
-
-            offset += tagByteCount;
-        }
-    }
-
-    private void processTag(@NotNull BufferReader reader, @NotNull Directory directory, int directoryType, int tagType, int offset, int tagByteCount) throws BufferBoundsException
-    {
-        int tagIdentifier = tagType | (directoryType << 8);
-
-        switch (tagIdentifier) {
-            case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
-                // short
-                int shortValue = reader.getUInt16(offset);
-                directory.setInt(tagIdentifier, shortValue);
-                return;
             case IptcDirectory.TAG_URGENCY:
                 // byte
-                directory.setInt(tagIdentifier, reader.getUInt8(offset));
+                directory.setInt(tagIdentifier, reader.getUInt8());
+                reader.skip(tagByteCount - 1);
                 return;
             case IptcDirectory.TAG_RELEASE_DATE:
@@ -142,15 +188,17 @@
                 // Date object
                 if (tagByteCount >= 8) {
-                    String dateStr = reader.getString(offset, tagByteCount);
+                    string = reader.getString(tagByteCount);
                     try {
-                        int year = Integer.parseInt(dateStr.substring(0, 4));
-                        int month = Integer.parseInt(dateStr.substring(4, 6)) - 1;
-                        int day = Integer.parseInt(dateStr.substring(6, 8));
+                        int year = Integer.parseInt(string.substring(0, 4));
+                        int month = Integer.parseInt(string.substring(4, 6)) - 1;
+                        int day = Integer.parseInt(string.substring(6, 8));
                         Date date = new java.util.GregorianCalendar(year, month, day).getTime();
                         directory.setDate(tagIdentifier, date);
                         return;
                     } catch (NumberFormatException e) {
-                        // fall through and we'll store whatever was there as a String
+                        // fall through and we'll process the 'string' value below
                     }
+                } else {
+                    reader.skip(tagByteCount);
                 }
             case IptcDirectory.TAG_RELEASE_TIME:
@@ -162,9 +210,14 @@
 
         // If we haven't returned yet, treat it as a string
-        String str;
-        if (tagByteCount < 1) {
-            str = "";
-        } else {
-            str = reader.getString(offset, tagByteCount, System.getProperty("file.encoding")); // "ISO-8859-1"
+        // NOTE that there's a chance we've already loaded the value as a string above, but failed to parse the value
+        if (string == null) {
+            String encoding = directory.getString(IptcDirectory.TAG_CODED_CHARACTER_SET);
+            if (encoding != null) {
+                string = reader.getString(tagByteCount, encoding);
+            } else {
+                byte[] bytes = reader.getBytes(tagByteCount);
+                encoding = Iso2022Converter.guessEncoding(bytes);
+                string = encoding != null ? new String(bytes, encoding) : new String(bytes);
+            }
         }
 
@@ -179,8 +232,8 @@
                 System.arraycopy(oldStrings, 0, newStrings, 0, oldStrings.length);
             }
-            newStrings[newStrings.length - 1] = str;
+            newStrings[newStrings.length - 1] = string;
             directory.setStringArray(tagIdentifier, newStrings);
         } else {
-            directory.setString(tagIdentifier, str);
+            directory.setString(tagIdentifier, string);
         }
     }
