/* xmp-parse.c - simple parser for XMP metadata
 *
 * Copyright (C) 2004-2007, Raphaël Quinet <raphael@gimp.org>
 *
 * This library is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.  If not, see
 * <http://www.gnu.org/licenses/>.
 */

/* This code implements a simple parser for XMP metadata.  Its API is
 * based on the one provided by GMarkupParser (part of Glib).
 *
 * This is not a full RDF parser: it shares some of the limitations
 * inherited from glib (UTF-8 only, no special entities) and supports
 * RDF only to the extent needed for XMP.  XMP defines several
 * "schemas" containing a list of "properties".  Each property in a
 * schema has one value, which can be a simple type (e.g., integer or
 * text) or a structured type (rdf:Alt, rdf:Bag, rdf:Seq).  As there
 * is no need to support a much deeper nesting of elements, this
 * parser does not try to maintain an arbitrarily large stack of
 * elements.  Also, it does not support RDF features that are
 * forbidden by the XMP specs, such as rdf:parseType="Litteral".
 *
 * The design goals for this parser are: support all RDF features
 * needed for XMP (at least the features explicitely described in the
 * XMP spec), be tolerant in case unknown elements or attributes are
 * found, be as simple as possible, avoid building a DOM tree.
 *
 * TODO:
 * - support UCS-2 and UCS-4 besides UTF-8 (copy and convert the data)
 * - write a decent scanner for finding <?xpacket...?> as recommended
 *   in the XMP specification (including support for UCS-2 and UCS-4)
 * - provide an API for passing unknown elements or tags to the caller
 * - think about re-writing this using a better XML parser (expat?)
 *   instead of the GMarkup parser
 */

#ifndef WITHOUT_GIMP
#  include "config.h"
#  include <string.h>
#  include "libgimp/stdplugins-intl.h"
#else
#  include <string.h>
#  define _(String) (String)
#  define N_(String) (String)
#endif
#include "xmp-parse.h"

GQuark
xmp_parse_error_quark (void)
{
  static GQuark error_quark = 0;

  if (error_quark == 0)
    error_quark = g_quark_from_static_string ("xmp-parse-error-quark");

  return error_quark;
}

/* The current version of XMP (January 2004) is relatively simple in
 * that only a few elements (<rdf:Description>, <rdf:Alt>, <rdf:Bag>,
 * <rdf:Seq>) may include other elements and no deep nesting is
 * allowed.  As a result, it is possible to include all allowed
 * combinations directly into the state instead of having to maintain
 * a separate stack besides the simple state enum.  There is only a
 * 1-element stack (saved_state) used for some special cases such as
 * when skipping unknown tags or parsing a property with qualifiers.
 *
 * Here is a quick overview of the structure of an XMP document and
 * the corresponding state after reading each element.  Depending on
 * the contents of each property, we can have the following cases
 * that are summarized as STATE_INSIDE... below:
 * - structured types can use any valid combination of the states
 *   between STATE_INSIDE_QDESC and STATE_INSIDE_SEQ_LI_RSC;
 * - simple property types contain some text and no other element so
 *   the only state will be STATE_INSIDE_PROPERTY while reading that
 *   text;
 * - if the shorthand notation is used for some simple properties,
 *   then they will be written as attributes of a top level
 *   rdf:Description instead of being a separate element, so the
 *   state will not go deeper than STATE_INSIDE_TOPLEVEL_DESC.
 *
 * (init)                                 STATE_START
 * <?xpacket begin='' id='...'?>           STATE_INSIDE_XPACKET
 *  <x:xmpmeta xmlns:x='adobe:ns:meta/'>    STATE_INSIDE_XMPMETA
 *   <rdf:RDF xmlns:rdf='...'>               STATE_INSIDE_RDF
 *    <rdf:Description rdf:about='' ...>      STATE_INSIDE_TOPLEVEL_DESC
 *     <foo:bar>                               STATE_INSIDE_PROPERTY
 *       ... (simple or structured property)    STATE_INSIDE...
 *     </foo:bar>                             STATE_INSIDE_TOPLEVEL_DESC
 *     <foo:baz>                               STATE_INSIDE_PROPERTY
 *       ... (simple or structured property)    STATE_INSIDE...
 *     </foo:baz>                             STATE_INSIDE_TOPLEVEL_DESC
 *    </rdf:Description>                     STATE_INSIDE_RDF
 *    <rdf:Description ...>                   STATE_INSIDE_TOPLEVEL_DESC
 *     ... (some properties)                   STATE_INSIDE_PROPERTY
 *    </rdf:Description>                     STATE_INSIDE_RDF
 *    ...
 *   </rdf:RDF>                             STATE_AFTER_RDF
 *  </x:xmpmeta>                           STATE_AFTER_XMPMETA
 * <?xpacket end='r'?>                    STATE_AFTER_XPACKET
 *
 * Note: The abbreviation QDESC is used for the properties with
 * qualifiers (when <rdf:Description> is used deeper than at the top
 * level inside <rdf:RDF>).  In that case, QDESC_VALUE contains the
 * value of the property and QDESC_QUAL is used for each of the
 * optional qualifiers (which are currently ignored).
 */
typedef enum
{
  STATE_START,
  STATE_INSIDE_XPACKET,
  STATE_INSIDE_XMPMETA,
  STATE_INSIDE_RDF,
  STATE_INSIDE_TOPLEVEL_DESC,
  STATE_INSIDE_PROPERTY,
  STATE_INSIDE_QDESC,
  STATE_INSIDE_QDESC_VALUE,
  STATE_INSIDE_QDESC_QUAL,
  STATE_INSIDE_STRUCT_ADD_NS,
  STATE_INSIDE_STRUCT,
  STATE_INSIDE_STRUCT_ELEMENT,
  STATE_INSIDE_ALT,
  STATE_INSIDE_ALT_LI,
  STATE_INSIDE_ALT_LI_RSC,
  STATE_INSIDE_ALT_LI_RSC_IMG,
  STATE_INSIDE_BAG,
  STATE_INSIDE_BAG_LI,
  STATE_INSIDE_BAG_LI_RSC,
  STATE_INSIDE_SEQ,
  STATE_INSIDE_SEQ_LI,
  STATE_INSIDE_SEQ_LI_RSC,
  STATE_AFTER_RDF,
  STATE_AFTER_XMPMETA,
  STATE_AFTER_XPACKET,
  STATE_SKIPPING_UNKNOWN_ELEMENTS,
  STATE_SKIPPING_IGNORED_ELEMENTS,
  STATE_ERROR
} XMPParseState;

typedef struct
{
  gint         depth;
  gchar       *uri;
  gchar       *prefix;
  gint         prefix_len;
  gpointer     ns_user_data;
} XMLNameSpace;

struct _XMPParseContext
{
  const XMPParser *parser;
  XMPParseFlags    flags;

  gpointer         user_data;
  GDestroyNotify   user_data_dnotify;

  XMPParseState    state;
  gint             depth;

  GSList          *namespaces;
  gchar           *xmp_prefix;
  guint            xmp_prefix_len;
  gchar           *rdf_prefix;
  guint            rdf_prefix_len;

  gchar           *property;
  XMLNameSpace    *property_ns;
  XMPParseType     property_type;
  gchar          **prop_value;
  gint             prop_cur_value;
  gint             prop_max_value;
  gboolean         prop_missing_value;
  /* used when skipping tags, or inside a struct or property qualifier */
  XMPParseState    saved_state;
  gint             saved_depth;

  GMarkupParseContext *markup_context;
};

#ifdef DEBUG_XMP_PARSER
static const gchar * const state_names[] =
{
  "START",
  "INSIDE_XPACKET",
  "INSIDE_XMPMETA",
  "INSIDE_RDF",
  "INSIDE_TOPLEVEL_DESC",
  "INSIDE_PROPERTY",
  "INSIDE_QDESC",
  "INSIDE_QDESC_VALUE",
  "INSIDE_QDESC_QUAL",
  "INSIDE_STRUCT_ADD_NS",
  "INSIDE_STRUCT",
  "INSIDE_STRUCT_ELEMENT",
  "INSIDE_ALT",
  "INSIDE_ALT_LI",
  "INSIDE_ALT_LI_RSC",
  "INSIDE_ALT_LI_RSC_IMG",
  "INSIDE_BAG",
  "INSIDE_BAG_LI",
  "INSIDE_BAG_LI_RSC",
  "INSIDE_SEQ",
  "INSIDE_SEQ_LI",
  "INSIDE_SEQ_LI_RSC",
  "AFTER_RDF",
  "AFTER_XMPMETA",
  "AFTER_XPACKET",
  "SKIPPING_UNKNOWN_ELEMENTS",
  "SKIPPING_IGNORED_ELEMENTS",
  "ERROR",
};
#endif

/* report an error and propagate it */
static void
parse_error (XMPParseContext  *context,
             GError          **error,
             XMPParseError     code,
             const gchar      *format,
             ...)
{
  GError  *tmp_error;

  if (code == XMP_ERROR_NO_XPACKET)
    tmp_error = g_error_new (XMP_PARSE_ERROR, code,
                             _("Error: No XMP packet found"));
  else
    {
      gchar   *s;
      va_list  args;
      gint     line_number;
      gint     char_number;

      va_start (args, format);
      s = g_strdup_vprintf (format, args);
      va_end (args);

      g_markup_parse_context_get_position (context->markup_context,
                                           &line_number,
                                           &char_number);
      tmp_error = g_error_new (XMP_PARSE_ERROR, code,
                               _("Error on line %d char %d: %s"),
                               line_number, char_number, s);
      g_free (s);
    }

  context->state = STATE_ERROR;
  if (context->parser->error)
    (*context->parser->error) (context, tmp_error, context->user_data);

  g_propagate_error (error, tmp_error);
}

/* report an error if an unexpected element is found in the wrong context */
static void
parse_error_element (XMPParseContext  *context,
                     GError          **error,
                     const gchar      *expected_element,
                     gboolean          optional,
                     const gchar      *found_element)
{
  if (optional == TRUE)
    parse_error (context, error, XMP_ERROR_UNEXPECTED_ELEMENT,
                 _("Expected text or optional element <%s>, found <%s> instead"),
                 expected_element, found_element);
  else
    parse_error (context, error, XMP_ERROR_UNEXPECTED_ELEMENT,
                 _("Expected element <%s>, found <%s> instead"),
                 expected_element, found_element);
}

/* skip an unknown element (unknown property) and its contents */
static void
unknown_element (XMPParseContext  *context,
                 GError          **error,
                 const gchar      *element_name)
{
#ifdef DEBUG_XMP_PARSER
  g_print ("XMP: SKIPPING %s\n", element_name);
#endif
  if (context->flags & XMP_FLAG_NO_UNKNOWN_ELEMENTS)
    parse_error (context, error, XMP_ERROR_UNKNOWN_ELEMENT,
                 _("Unknown element <%s>"),
                 element_name);
  else
    {
      context->saved_depth = context->depth;
      context->saved_state = context->state;
      context->state = STATE_SKIPPING_UNKNOWN_ELEMENTS;
    }
}

/* skip and element and all other elements that it may contain */
static void
ignore_element (XMPParseContext *context)
{
  context->saved_depth = context->depth;
  context->saved_state = context->state;
  context->state = STATE_SKIPPING_IGNORED_ELEMENTS;
}

/* skip an unknown attribute (or abort if flags forbid unknown attributes) */
static void
unknown_attribute (XMPParseContext  *context,
                   GError          **error,
                   const gchar      *element_name,
                   const gchar      *attribute_name,
                   const gchar      *attribute_value)
{
  if (context->flags & XMP_FLAG_NO_UNKNOWN_ATTRIBUTES)
    parse_error (context, error, XMP_ERROR_UNKNOWN_ATTRIBUTE,
                 _("Unknown attribute \"%s\"=\"%s\" in element <%s>"),
                 attribute_name, attribute_value, element_name);
#ifdef DEBUG_XMP_PARSER
  g_print ("skipping unknown attribute \"%s\"=\"%s\" in element <%s>\n",
           attribute_name, attribute_value, element_name);
#endif
}

static gboolean
is_whitespace_string (const gchar *string)
{
  const gchar *p;

  if (string == NULL)
    return TRUE;

  /* XML accepts only 4 ASCII chars as whitespace and no other UNICODE chars */
  for (p = string; *p; ++p)
    if (*p != ' ' && *p != '\t' && *p != '\r' && *p != '\n')
      return FALSE;

  return TRUE;
}

/* new namespace/schema seen - add it to the list of namespaces */
static void
push_namespace (XMPParseContext  *context,
                const gchar      *uri,
                const gchar      *prefix,
                GError          **error)
{
  XMLNameSpace *ns;

  if (! strcmp (uri, "adobe:ns:meta/"))
    {
      g_free (context->xmp_prefix);
      context->xmp_prefix = g_strdup (prefix); /* XMP recommends "x:" */
      context->xmp_prefix_len = strlen (prefix);
      return;
    }

  if (! strcmp (uri, "http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
    {
      g_free (context->rdf_prefix);
      context->rdf_prefix = g_strdup (prefix); /* XMP recommends "rdf:" */
      context->rdf_prefix_len = strlen (prefix);
      return;
    }

  ns = g_slice_new0 (XMLNameSpace);
  ns->depth = context->depth;
  ns->uri = g_strdup (uri);
  ns->prefix = g_strdup (prefix);
  ns->prefix_len = strlen (prefix);
  context->namespaces = g_slist_prepend (context->namespaces, ns);

  if (context->parser->start_schema)
    ns->ns_user_data = (*context->parser->start_schema) (context,
                                                         ns->uri,
                                                         ns->prefix,
                                                         context->user_data,
                                                         error);
  else
    ns->ns_user_data = NULL;
}

/* free all namespaces that are deeper than the current element depth */
static void
pop_namespaces (XMPParseContext  *context,
                GError          **error)
{
  XMLNameSpace *ns;

  if (context->namespaces == NULL)
    return;

  ns = context->namespaces->data;

  while (ns->depth >= context->depth)
    {
      if (context->parser->end_schema)
        (*context->parser->end_schema) (context,
                                        ns->ns_user_data,
                                        context->user_data,
                                        error);
      g_free (ns->uri);
      g_free (ns->prefix);
      g_slice_free (XMLNameSpace, ns);

      context->namespaces = g_slist_delete_link (context->namespaces,
                                                 context->namespaces);
      if (context->namespaces == NULL)
        break;

      ns = context->namespaces->data;
    }
}

/* checks if an element name starts with the prefix of the given namespace */
static gboolean
has_ns_prefix (const gchar  *name,
               XMLNameSpace *ns)
{
  if (ns == NULL)
    return FALSE;

  return ((strncmp (name, ns->prefix, ns->prefix_len) == 0)
          && (name[ns->prefix_len] == ':'));
}

/* checks if an element or attribute matches a target with the given prefix */
static gboolean
matches_with_prefix (const gchar *name,
                     const gchar *prefix,
                     guint        prefix_len,
                     const gchar *target_name)
{
  if (prefix == NULL)
    return FALSE;

  return ((strncmp (name, prefix, prefix_len) == 0)
          && (strlen (name) > prefix_len + 1)
          && (name[prefix_len] == ':')
          && (strcmp (name + prefix_len + 1, target_name) == 0));
}

/* checks if an element or attribute matches a target in the RDF namespace */
static gboolean
matches_rdf (const gchar     *name,
             XMPParseContext *context,
             const gchar     *target_name)
{
  if (context->rdf_prefix != NULL)
    return matches_with_prefix (name,
                                context->rdf_prefix,
                                context->rdf_prefix_len,
                                target_name);
  else
    return matches_with_prefix (name, "rdf", 3, target_name);
}

/* add a new property to the schema referenced by its prefix */
/* the value(s) of the property will be added later by add_property_value() */
static XMLNameSpace *
new_property_in_ns (XMPParseContext  *context,
                    const gchar      *element_name)
{
  GSList       *list;
  XMLNameSpace *ns;

  g_return_val_if_fail (context->property == NULL, NULL);
  g_return_val_if_fail (context->prop_cur_value == -1, NULL);

  /* element_name is a new property if it starts with a known prefix */
  for (list = context->namespaces;
       list != NULL;
       list = g_slist_next (list))
    {
      ns = list->data;

      if (has_ns_prefix (element_name, ns))
        {
          context->property = g_strdup (element_name + ns->prefix_len + 1);
          context->property_type = XMP_PTYPE_UNKNOWN;
          context->property_ns = ns;
          context->prop_missing_value = FALSE;
          return ns;
        }
    }

  return NULL;
}

/* store a value for the current property - if the element containing the */
/* value is being parsed but the actual value has not been seen yet, then */
/* call this function with a NULL value so that its data structure is */
/* allocated now; it will be updated later with update_property_value() */
static void
add_property_value (XMPParseContext *context,
                    XMPParseType     type,
                    gchar           *name,
                    gchar           *value)
{
  g_return_if_fail (context->property != NULL);

  if (type == XMP_PTYPE_TEXT || type == XMP_PTYPE_RESOURCE)
    g_return_if_fail (context->prop_cur_value < 0);

  if (context->property_type != type)
    {
      /* make sure that we are not mixing different types in this property */
      g_return_if_fail (context->property_type == XMP_PTYPE_UNKNOWN);
      context->property_type = type;
    }
  if (context->prop_cur_value + 3 >= context->prop_max_value)
    {
      context->prop_max_value += 10;
      context->prop_value = g_realloc (context->prop_value,
                                       sizeof (gchar *)
                                       * context->prop_max_value);
    }
  /* some types store a name and a value; most others store only a value */
  if (type == XMP_PTYPE_ALT_LANG
      || type == XMP_PTYPE_STRUCTURE
      || type == XMP_PTYPE_ALT_THUMBS) /* for thumbnails, name is the size */
    {
      context->prop_cur_value++;
      context->prop_value[context->prop_cur_value] = name;
    }
  else
    g_assert (name == NULL);

  context->prop_cur_value++;
  context->prop_value[context->prop_cur_value] = value;
  context->prop_value[context->prop_cur_value + 1] = NULL;
  /* if value was NULL, then we must update it later */
  context->prop_missing_value = (value == NULL);
}

/* update a value that has been allocated but not stored yet */
static void
update_property_value (XMPParseContext *context,
                       gchar           *value)
{
  g_return_if_fail (context->property != NULL);
  g_return_if_fail (context->prop_cur_value >= 0);
  g_return_if_fail (context->prop_missing_value == TRUE);

  context->prop_value[context->prop_cur_value] = value;
  context->prop_missing_value = FALSE;
}

/* invoke the 'set_property' callback and free the temporary structures */
static void
propagate_property (XMPParseContext  *context,
                    GError          **error)
{
  g_return_if_fail (context->property != NULL);
  g_return_if_fail (context->prop_cur_value >= 0);

  if (context->parser->set_property)
    (*context->parser->set_property) (context,
                                      context->property,
                                      context->property_type,
                                      (const gchar **)(context->prop_value),
                                      context->property_ns->ns_user_data,
                                      context->user_data,
                                      error);

  if (! (context->flags & XMP_FLAG_DEFER_VALUE_FREE))
    {
      while (context->prop_cur_value >= 0)
        {
          g_free (context->prop_value[context->prop_cur_value]);
          context->prop_cur_value--;
        }

      g_free (context->prop_value);
    }

  context->prop_value = NULL;
  context->prop_cur_value = -1;
  context->prop_max_value = 0;
  g_free (context->property);
  context->property = NULL;
  context->property_ns = NULL;
}

/* called from the GMarkupParser */
static void
start_element_handler  (GMarkupParseContext  *markup_context,
                        const gchar          *element_name,
                        const gchar         **attribute_names,
                        const gchar         **attribute_values,
                        gpointer              user_data,
                        GError              **error)
{
  XMPParseContext *context = user_data;
  gint             attr;

#ifdef DEBUG_XMP_PARSER
  g_print ("[%25s/%17s] %d <%s>\n",
           state_names[context->state],
           (context->saved_state == STATE_ERROR
            ? "-"
            : state_names[context->saved_state]),
           context->depth, element_name);
#endif
  context->depth++;
  for (attr = 0; attribute_names[attr] != NULL; ++attr)
    if (g_str_has_prefix (attribute_names[attr], "xmlns:"))
      push_namespace (context, attribute_values[attr],
                      attribute_names[attr] + sizeof ("xmlns:") - 1, error);
  switch (context->state)
    {
    case STATE_INSIDE_XPACKET:
      if (! strcmp (element_name, "x:xmpmeta")
	  || ! strcmp (element_name, "x:xapmeta")
          || matches_with_prefix (element_name, context->xmp_prefix,
                                  context->xmp_prefix_len, "xmpmeta"))
	context->state = STATE_INSIDE_XMPMETA;
      else if (matches_rdf (element_name, context, "RDF"))
        {
          /* the x:xmpmeta element is missing, but this is allowed */
          context->depth++;
          context->state = STATE_INSIDE_RDF;
        }
      else
	parse_error_element (context, error, "x:xmpmeta",
                             FALSE, element_name);
      break;

    case STATE_INSIDE_XMPMETA:
      if (matches_rdf (element_name, context, "RDF"))
	context->state = STATE_INSIDE_RDF;
      else
	parse_error_element (context, error, "rdf:RDF",
                             FALSE, element_name);
      break;

    case STATE_INSIDE_RDF:
      if (matches_rdf (element_name, context, "Description"))
	{
          XMLNameSpace *ns;
          gboolean      about_seen = FALSE;

	  context->state = STATE_INSIDE_TOPLEVEL_DESC;
	  for (attr = 0; attribute_names[attr] != NULL; ++attr)
	    {
              if (matches_rdf (attribute_names[attr], context, "about")
                  || ! strcmp (attribute_names[attr], "about") /* old style */)
                about_seen = TRUE;
	      else if (g_str_has_prefix (attribute_names[attr], "xmlns"))
                ; /* the namespace has already been pushed on the stack */
	      else
                {
                  ns = new_property_in_ns (context, attribute_names[attr]);
                  if (ns != NULL)
                    {
                      /* RDF shorthand notation */
                      add_property_value (context, XMP_PTYPE_TEXT, NULL,
                                          g_strdup (attribute_values[attr]));
                      propagate_property (context, error);
                    }
                  else
                    unknown_attribute (context, error, element_name,
                                       attribute_names[attr],
                                       attribute_values[attr]);
                }
            }
          if ((about_seen == FALSE)
              && (context->flags & XMP_FLAG_NO_MISSING_ABOUT))
            parse_error (context, error, XMP_ERROR_MISSING_ABOUT,
                             _("Required attribute rdf:about missing in <%s>"),
                             element_name);
	}
      else
	parse_error_element (context, error, "rdf:Description",
                                 FALSE, element_name);
      break;

    case STATE_INSIDE_TOPLEVEL_DESC:
      {
        XMLNameSpace *ns;

        ns = new_property_in_ns (context, element_name);
        if (ns != NULL)
          {
            context->state = STATE_INSIDE_PROPERTY;
            for (attr = 0; attribute_names[attr] != NULL; ++attr)
              {
                if (matches_rdf (attribute_names[attr], context, "resource"))
                  add_property_value (context, XMP_PTYPE_RESOURCE, NULL,
                                      g_strdup (attribute_values[attr]));
                else if (matches_rdf (attribute_names[attr], context,
                                      "parseType")
                         && ! strcmp (attribute_values[attr], "Resource"))
                  {
                    context->saved_state = STATE_INSIDE_TOPLEVEL_DESC;
                    context->state = STATE_INSIDE_STRUCT_ADD_NS;
                  }
                else
                  unknown_attribute (context, error, element_name,
                                     attribute_names[attr],
                                     attribute_values[attr]);
              }
          }
        else
          unknown_element (context, error, element_name);
      }
      break;

    case STATE_INSIDE_PROPERTY:
      if (matches_rdf (element_name, context, "Description"))
	{
          context->saved_state = context->state;
	  context->state = STATE_INSIDE_QDESC;
	  for (attr = 0; attribute_names[attr] != NULL; ++attr)
	    {
	      if (g_str_has_prefix (attribute_names[attr], "xmlns"))
		{
		  /* this desc. is a structure, not a property qualifier */
		  context->state = STATE_INSIDE_STRUCT_ADD_NS;
		}
	      else
                unknown_attribute (context, error, element_name,
                                   attribute_names[attr],
                                   attribute_values[attr]);
	    }
	}
      else if (matches_rdf (element_name, context, "Alt"))
	context->state = STATE_INSIDE_ALT;
      else if (matches_rdf (element_name, context, "Bag"))
	context->state = STATE_INSIDE_BAG;
      else if (matches_rdf (element_name, context, "Seq"))
	context->state = STATE_INSIDE_SEQ;
      else
        unknown_element (context, error, element_name);
      break;

    case STATE_INSIDE_QDESC:
      if (matches_rdf (element_name, context, "value"))
	context->state = STATE_INSIDE_QDESC_VALUE;
      else
	context->state = STATE_INSIDE_QDESC_QUAL;
      break;

    case STATE_INSIDE_STRUCT_ADD_NS:
    case STATE_INSIDE_STRUCT:
      {
        GSList       *ns_list;
        XMLNameSpace *ns;
        gboolean      found;

        /* compare with namespaces in scope of current rdf:Description */
        found = FALSE;
        ns_list = context->namespaces;
        while (ns_list != NULL)
          {
            ns = ns_list->data;
            if (ns->depth < context->depth - 2)
              break;
            if (has_ns_prefix (element_name, ns))
              {
                if (context->state == STATE_INSIDE_STRUCT_ADD_NS)
                  {
                    /* first element - save the namespace prefix and uri */
                    add_property_value (context, XMP_PTYPE_STRUCTURE,
                                        g_strdup (ns->prefix),
                                        g_strdup (ns->uri));
                  }
                context->state = STATE_INSIDE_STRUCT_ELEMENT;
                add_property_value (context, XMP_PTYPE_STRUCTURE,
                                    g_strdup (element_name
                                              + ns->prefix_len + 1),
                                    NULL);
                found = TRUE;
                break;
              }
            ns_list = ns_list->next;
          }
        if (found == FALSE)
          unknown_element (context, error, element_name);
      }
      break;

    case STATE_INSIDE_ALT:
      if (matches_rdf (element_name, context, "li"))
	{
	  context->state = STATE_INSIDE_ALT_LI;
	  for (attr = 0; attribute_names[attr] != NULL; ++attr)
	    {
	      if (matches_rdf (attribute_names[attr], context, "parseType")
                  && ! strcmp (attribute_values[attr], "Resource"))
                context->state = STATE_INSIDE_ALT_LI_RSC;
	      else if (! strcmp (attribute_names[attr], "xml:lang"))
                add_property_value (context, XMP_PTYPE_ALT_LANG,
                                    g_strdup (attribute_values[attr]),
                                    NULL);
	      else
                unknown_attribute (context, error, element_name,
                                   attribute_names[attr],
                                   attribute_values[attr]);
	    }
          /* rdf:Alt is not an ordered list, but some broken XMP files use */
          /* it instead of rdf:Seq.  Workaround: if we did not find some */
          /* attributes for the valid cases ALT_LANG or ALT_LI_RSC, then */
          /* we pretend that we are parsing a real list (bug #343315). */
          if ((context->property_type != XMP_PTYPE_ALT_LANG)
              && (context->state != STATE_INSIDE_ALT_LI_RSC))
            add_property_value (context, XMP_PTYPE_ORDERED_LIST,
                                NULL, NULL);
	}
      else
	parse_error_element (context, error, "rdf:li",
                                 FALSE, element_name);
      break;

    case STATE_INSIDE_BAG:
      if (matches_rdf (element_name, context, "li"))
	{
          context->state = STATE_INSIDE_BAG_LI;
	  for (attr = 0; attribute_names[attr] != NULL; ++attr)
	    {
	      if (matches_rdf (attribute_names[attr], context, "parseType")
                  && ! strcmp (attribute_values[attr], "Resource"))
                context->state = STATE_INSIDE_BAG_LI_RSC;
	      else
                unknown_attribute (context, error, element_name,
                                   attribute_names[attr],
                                   attribute_values[attr]);
	    }
          if (context->state != STATE_INSIDE_BAG_LI_RSC)
            add_property_value (context, XMP_PTYPE_UNORDERED_LIST,
                                NULL, NULL);
	}
      else
	parse_error_element (context, error, "rdf:li",
                                 FALSE, element_name);
      break;

    case STATE_INSIDE_SEQ:
      if (matches_rdf (element_name, context, "li"))
	{
          context->state = STATE_INSIDE_SEQ_LI;
	  for (attr = 0; attribute_names[attr] != NULL; ++attr)
	    {
	      if (matches_rdf (attribute_names[attr], context, "parseType")
                  && ! strcmp (attribute_values[attr], "Resource"))
                context->state = STATE_INSIDE_SEQ_LI_RSC;
	      else
                unknown_attribute (context, error, element_name,
                                   attribute_names[attr],
                                   attribute_values[attr]);
	    }
          if (context->state != STATE_INSIDE_SEQ_LI_RSC)
            add_property_value (context, XMP_PTYPE_ORDERED_LIST,
                                NULL, NULL);
	}
      else
	parse_error_element (context, error, "rdf:li",
                                 FALSE, element_name);
      break;

    case STATE_INSIDE_BAG_LI:
    case STATE_INSIDE_SEQ_LI:
      if (matches_rdf (element_name, context, "Description"))
        {
          context->saved_state = context->state;
          context->state = STATE_INSIDE_QDESC;
        }
      else
	parse_error_element (context, error, "rdf:Description",
                             TRUE, element_name);
      break;

    case STATE_INSIDE_ALT_LI_RSC:
      /* store the thumbnail image and ignore the other elements */
      if (! strcmp (element_name, "xapGImg:image")) /* FIXME */
        context->state = STATE_INSIDE_ALT_LI_RSC_IMG;
      else if (! strcmp (element_name, "xapGImg:format")
               || ! strcmp (element_name, "xapGImg:width")
               || ! strcmp (element_name, "xapGImg:height"))
        ignore_element (context);
      else
        unknown_element (context, error, element_name);
      break;

    case STATE_INSIDE_BAG_LI_RSC:
    case STATE_INSIDE_SEQ_LI_RSC:
      unknown_element (context, error, element_name);
      break;

    case STATE_SKIPPING_UNKNOWN_ELEMENTS:
    case STATE_SKIPPING_IGNORED_ELEMENTS:
      break;

    default:
      parse_error (context, error, XMP_ERROR_PARSE,
                   _("Nested elements (<%s>) are not allowed in this context"),
                   element_name);
      break;
    }
}

/* called from the GMarkupParser */
static void
end_element_handler    (GMarkupParseContext  *markup_context,
                        const gchar          *element_name,
                        gpointer              user_data,
                        GError              **error)
{
  XMPParseContext *context = user_data;

#ifdef DEBUG_XMP_PARSER
  g_print ("[%25s/%17s] %d </%s>\n",
           state_names[context->state],
           (context->saved_state == STATE_ERROR
            ? "-"
            : state_names[context->saved_state]),
           context->depth, element_name);
#endif
  switch (context->state)
    {
    case STATE_INSIDE_PROPERTY:
      context->state = STATE_INSIDE_TOPLEVEL_DESC;
      if (context->property != NULL)
        {
          if (context->prop_cur_value < 0)
            {
              /* if not set yet, then property was empty */
              add_property_value (context, XMP_PTYPE_TEXT,
                                  NULL, g_strdup (""));
            }
          propagate_property (context, error);
        }
      break;

    case STATE_INSIDE_STRUCT:
      context->state = context->saved_state;
      if (context->property != NULL)
        propagate_property (context, error);
      break;

    case STATE_INSIDE_ALT:
    case STATE_INSIDE_BAG:
    case STATE_INSIDE_SEQ:
      if (context->property && context->prop_cur_value < 0)
        {
          g_free (context->property);
          context->property = NULL;
        }
      context->state = STATE_INSIDE_PROPERTY;
      break;

    case STATE_INSIDE_QDESC:
      context->state = context->saved_state;
      break;

    case STATE_INSIDE_QDESC_VALUE:
    case STATE_INSIDE_QDESC_QUAL:
      context->state = STATE_INSIDE_QDESC;
      break;

    case STATE_INSIDE_STRUCT_ELEMENT:
      context->state = STATE_INSIDE_STRUCT;
      if ((context->prop_cur_value >= 0)
          && (context->prop_value[context->prop_cur_value] == NULL))
        update_property_value (context, g_strdup (""));
      break;

    case STATE_INSIDE_ALT_LI:
      context->state = STATE_INSIDE_ALT;
      if ((context->prop_cur_value >= 0)
          && (context->prop_value[context->prop_cur_value] == NULL))
        update_property_value (context, g_strdup (""));
      break;

    case STATE_INSIDE_ALT_LI_RSC:
      context->state = STATE_INSIDE_ALT;
      break;

    case STATE_INSIDE_ALT_LI_RSC_IMG:
      context->state = STATE_INSIDE_ALT_LI_RSC;
      break;

    case STATE_INSIDE_BAG_LI:
    case STATE_INSIDE_BAG_LI_RSC:
      context->state = STATE_INSIDE_BAG;
      break;

    case STATE_INSIDE_SEQ_LI:
    case STATE_INSIDE_SEQ_LI_RSC:
      context->state = STATE_INSIDE_SEQ;
      break;

    case STATE_INSIDE_TOPLEVEL_DESC:
      g_return_if_fail (matches_rdf (element_name, context, "Description"));
      context->state = STATE_INSIDE_RDF;
      break;

    case STATE_INSIDE_RDF:
      g_return_if_fail (matches_rdf (element_name, context, "RDF"));
      context->state = STATE_AFTER_RDF;
      break;

    case STATE_AFTER_RDF:
      g_return_if_fail (! strcmp (element_name, "x:xmpmeta")
                        || ! strcmp (element_name, "x:xapmeta")
                        || matches_with_prefix (element_name,
                                                context->xmp_prefix,
                                                context->xmp_prefix_len,
                                                "xmpmeta"));
      context->state = STATE_AFTER_XMPMETA;
      break;

    case STATE_SKIPPING_UNKNOWN_ELEMENTS:
    case STATE_SKIPPING_IGNORED_ELEMENTS:
      if (context->depth == context->saved_depth)
        {
          /* resume normal processing */
          context->state = context->saved_state;
          context->saved_state = STATE_ERROR;
        }
      break;

    default:
      parse_error (context, error, XMP_ERROR_PARSE,
		       _("End of element <%s> not expected in this context"),
                         element_name);
      break;
    }
  pop_namespaces (context, error);
  context->depth--;
}

/* called from the GMarkupParser */
static void
text_handler           (GMarkupParseContext  *markup_context,
                        const gchar          *text,
                        gsize                 text_len,
                        gpointer              user_data,
                        GError              **error)
{
  XMPParseContext *context = user_data;

  switch (context->state)
    {
    case STATE_INSIDE_PROPERTY:
      if (! is_whitespace_string (text))
        add_property_value (context, XMP_PTYPE_TEXT, NULL,
                            g_strndup (text, text_len));
      break;

    case STATE_INSIDE_STRUCT_ELEMENT:
    case STATE_INSIDE_ALT_LI:
    case STATE_INSIDE_BAG_LI:
    case STATE_INSIDE_SEQ_LI:
      if (! is_whitespace_string (text))
        update_property_value (context, g_strndup (text, text_len));
      break;

    case STATE_INSIDE_ALT_LI_RSC_IMG:
      {
        size_t  max_size;
        guchar *decoded;
        gint    decoded_size;
        gint    state;
        guint   save;

#ifdef DEBUG_XMP_PARSER
        /* g_print ("XMP: Pushing text:\n%s\n", text); */
#endif
        max_size = (text_len / 4) * 3 + 3;
        decoded = g_malloc (max_size);

        state = 0;
        save = 0;
        decoded_size = g_base64_decode_step (text, text_len,
                                             decoded,
                                             &state, &save);
#ifdef DEBUG_XMP_PARSER
        if (decoded_size > 0)
          {
            /* FIXME: remove this debugging code */
            /*
            FILE *ttt;

            ttt = fopen ("/tmp/xmp-thumb.jpg", "wb");
            fwrite (decoded, decoded_size, 1, ttt);
            fclose (ttt);
            */
            g_print ("XMP: Thumb text len: %d (1/4 = %d)\nMax size: %d\nUsed size: %d\n", (int) text_len, (int) text_len / 4, max_size, decoded_size);
          }
#endif
        if (decoded_size > 0)
          {
            gint  *size_p;

            size_p = g_new (gint, 1);
            *size_p = decoded_size;
            add_property_value (context, XMP_PTYPE_ALT_THUMBS,
                                (gchar *) size_p, (gchar *) decoded);
          }
        else
          add_property_value (context, XMP_PTYPE_ALT_THUMBS, NULL, NULL);
      }
      break;

    case STATE_INSIDE_QDESC_VALUE:
      if (! is_whitespace_string (text))
        {
          if (context->saved_state == STATE_INSIDE_PROPERTY)
            add_property_value (context, XMP_PTYPE_TEXT, NULL,
                                g_strndup (text, text_len));
          else
            update_property_value (context, g_strndup (text, text_len));
        }
      break;

    case STATE_INSIDE_QDESC_QUAL:
#ifdef DEBUG_XMP_PARSER
      g_print ("ignoring qualifier for part of \"%s\"[]: \"%.*s\"\n",
	       context->property,
	       (int)text_len, text);
#endif
      /* FIXME: notify the user? add a way to collect qualifiers? */
      break;

    case STATE_SKIPPING_UNKNOWN_ELEMENTS:
    case STATE_SKIPPING_IGNORED_ELEMENTS:
      break;

    default:
      if (! is_whitespace_string (text))
	parse_error (context, error, XMP_ERROR_INVALID_CONTENT,
			 _("The current element (<%s>) cannot contain text"),
                         g_markup_parse_context_get_element (markup_context));
      break;
    }
}

/* called from the GMarkupParser */
static void
passthrough_handler    (GMarkupParseContext  *markup_context,
                        const gchar          *passthrough_text,
                        gsize                 text_len,
                        gpointer              user_data,
                        GError              **error)
{
  XMPParseContext *context = user_data;

  switch (context->state)
    {
    case STATE_START:
    case STATE_AFTER_XPACKET:
      if ((text_len >= 21)
          && (! strncmp (passthrough_text, "<?xpacket begin=", 16)))
	context->state = STATE_INSIDE_XPACKET;
      else
	parse_error (context, error, XMP_ERROR_PARSE,
                     _("XMP packets must start with <?xpacket begin=...?>"));
      break;

    case STATE_AFTER_RDF:
      /* the x:xmpmeta element is missing */
      context->depth--;
      pop_namespaces (context, error);
      /*fallthrough*/
    case STATE_AFTER_XMPMETA:
      if ((text_len >= 19)
          && (! strncmp (passthrough_text, "<?xpacket end=", 14)))
	context->state = STATE_AFTER_XPACKET;
      else
	parse_error (context, error, XMP_ERROR_PARSE,
                     _("XMP packets must end with <?xpacket end=...?>"));
      break;

    default:
      if ((text_len >= 18)
          && (! strncmp (passthrough_text, "<?adobe-", 8)))
        ; /* ignore things like <?adobe-xap-filters esc="CRLF"?> */
      else if (! (context->flags & XMP_FLAG_NO_COMMENTS)
               && (text_len > 7)
               && (! strncmp (passthrough_text, "<!--", 4)))
        ; /* comments are not allowed in XMP, but let's ignore them */
      else
        parse_error (context, error, XMP_ERROR_INVALID_COMMENT,
                     _("XMP cannot contain XML comments or processing instructions"));
      break;
    }
}

/* called from the GMarkupParser */
static void
error_handler          (GMarkupParseContext *markup_context,
                        GError              *error,
                        gpointer             user_data)
{
  XMPParseContext *context = user_data;

  g_return_if_fail (context != NULL);

  context->state = STATE_ERROR;
  if ((error->domain != XMP_PARSE_ERROR) && (context->parser->error))
    (*context->parser->error) (context, error, context->user_data);
}

static GMarkupParser markup_xmp_parser = {
  start_element_handler,
  end_element_handler,
  text_handler,
  passthrough_handler,
  error_handler
};

/**
 * xmp_parse_context_new:
 * @parser: a #XMPParser
 * @flags: one or more #XMPParseFlags
 * @user_data: user data to pass to #GMarkupParser functions
 * @user_data_dnotify: user data destroy notifier called when the
 * parse context is freed
 *
 * Creates a new XMP parse context.  A parse context is used to parse
 * documents.  You can feed any number of documents containing XMP
 * metadata into a context, as long as no errors occur; once an error
 * occurs, the parse context can't continue to parse text (you have to
 * free it and create a new parse context).
 *
 * Return value: a new #XMPParseContext
 **/
XMPParseContext *
xmp_parse_context_new (const XMPParser *parser,
                       XMPParseFlags    flags,
                       gpointer         user_data,
                       GDestroyNotify   user_data_dnotify)
{
  XMPParseContext *context;

  g_return_val_if_fail (parser != NULL, NULL);

  context = g_slice_new0 (XMPParseContext);

  context->parser = parser;
  context->flags = flags;
  context->user_data = user_data;
  context->user_data_dnotify = user_data_dnotify;

  context->markup_context = g_markup_parse_context_new (&markup_xmp_parser,
                                                        0, context, NULL);
  context->state = STATE_START;
  context->depth = 0;

  context->namespaces = NULL;
  context->xmp_prefix = NULL;
  context->xmp_prefix_len = 0;
  context->rdf_prefix = NULL;
  context->rdf_prefix_len = 0;

  context->property = NULL;
  context->property_ns = NULL;
  context->prop_value = NULL;
  context->prop_cur_value = -1;
  context->prop_max_value = 0;
  context->prop_missing_value = FALSE;

  context->saved_state = STATE_ERROR;
  context->saved_depth = 0;

  return context;
}

/**
 * xmp_parse_context_free:
 * @context: a #XMPParseContext
 *
 * Frees a #XMPParseContext.  Cannot be called from inside one of the
 * #XMPParser functions.
 *
 **/
void
xmp_parse_context_free (XMPParseContext *context)
{
  g_return_if_fail (context != NULL);

  if (context->user_data_dnotify)
    (* context->user_data_dnotify) (context->user_data);

  g_slist_free (context->namespaces);
  g_free (context->xmp_prefix);
  g_free (context->rdf_prefix);

  if (! (context->flags & XMP_FLAG_DEFER_VALUE_FREE))
    {
      while (context->prop_cur_value >= 0)
        {
          g_free (context->prop_value[context->prop_cur_value]);
          context->prop_cur_value--;
        }
      g_free (context->prop_value);
    }

  g_free (context->property);
  g_slice_free (XMPParseContext, context);
}

/**
 * xmp_parse_context_parse:
 * @context: a #XMPParseContext
 * @text: chunk of text to parse
 * @text_len: length of @text in bytes
 * @error: return location for a #GError
 *
 * Feed some data to the #XMPParseContext.  The data need not be valid
 * UTF-8; an error will be signaled if it's invalid.  The data need
 * not be an entire document; you can feed a document into the parser
 * incrementally, via multiple calls to this function.  Typically, as
 * you receive data from a network connection or file, you feed each
 * received chunk of data into this function, aborting the process if
 * an error occurs.  Once an error is reported, no further data may be
 * fed to the #XMPParseContext; all errors are fatal.
 *
 * Return value: %FALSE if an error occurred, %TRUE on success
 **/
gboolean
xmp_parse_context_parse (XMPParseContext  *context,
                         const gchar      *text,
                         gssize            text_len,
                         GError          **error)
{
  g_return_val_if_fail (context != NULL, FALSE);
  g_return_val_if_fail (text != NULL, FALSE);
  g_return_val_if_fail (context->state != STATE_ERROR, FALSE);

  if ((context->flags & XMP_FLAG_FIND_XPACKET)
      && (context->state == STATE_START
          || context->state == STATE_AFTER_XPACKET))
    {
      /* There may be some arbitrary data before the <?xpacket...?>
       * marker so we should first find it according to the
       * recommendations of the XMP specification.  Once the start of
       * the XMP packet has been found, the GMarkupParser can start
       * its work on the text (including the <?xpacket...?> marker).
       */
      /* FIXME: wrong, wrong, wrong!  but useful for simple tests... */
      gint i, e;

      for (i = 0; i < text_len - 20; i++)
        if (! strncmp (text + i, "<?xpacket begin=", 16))
          {
            for (e = i; e < text_len - 10; e++)
              if (! strncmp (text + e, "<?xpacket end=", 14))
                break;

            while ((e < text_len) && *(text + e) != '>')
              e++;

            return g_markup_parse_context_parse (context->markup_context,
                                                 text + i, e - i + 1, error);
          }

      parse_error (context, error, XMP_ERROR_NO_XPACKET, NULL);

      return FALSE;
    }

  return g_markup_parse_context_parse (context->markup_context,
                                       text, text_len, error);
}

/**
 * xmp_parse_context_end_parse:
 * @context: a #XMPParseContext
 * @error: return location for a #GError
 *
 * Signals to the #XMPParseContext that all data has been fed into the
 * parse context with xmp_parse_context_parse().  This function
 * reports an error if the document did not contain any XMP packet or
 * if the document isn't complete, for example if elements are still
 * open.
 *
 * Return value: %TRUE on success, %FALSE if an error was set
 **/
gboolean
xmp_parse_context_end_parse (XMPParseContext  *context,
                             GError          **error)
{
  g_return_val_if_fail (context != NULL, FALSE);
  g_return_val_if_fail (context->state != STATE_ERROR, FALSE);

  if (context->state == STATE_START)
    parse_error (context, error, XMP_ERROR_NO_XPACKET, NULL);

  return g_markup_parse_context_end_parse (context->markup_context, error);
}
