diff options
author | Peter Eisentraut <peter_e@gmx.net> | 2006-12-21 16:05:16 +0000 |
---|---|---|
committer | Peter Eisentraut <peter_e@gmx.net> | 2006-12-21 16:05:16 +0000 |
commit | 8c1de5fb0010ae712568f1706b737270c3609bd8 (patch) | |
tree | bc328a654c41ea3eb1a9a27b76fd5215fb698608 /src/backend/utils/adt/xml.c | |
parent | ed1e9cd501b4dc89a6a7e5cef702f2f6830ae829 (diff) | |
download | postgresql-8c1de5fb0010ae712568f1706b737270c3609bd8.tar.gz postgresql-8c1de5fb0010ae712568f1706b737270c3609bd8.zip |
Initial SQL/XML support: xml data type and initial set of functions.
Diffstat (limited to 'src/backend/utils/adt/xml.c')
-rw-r--r-- | src/backend/utils/adt/xml.c | 942 |
1 files changed, 942 insertions, 0 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c new file mode 100644 index 00000000000..8997730fc8d --- /dev/null +++ b/src/backend/utils/adt/xml.c @@ -0,0 +1,942 @@ +/*------------------------------------------------------------------------- + * + * xml.c + * XML data type support. + * + * + * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.1 2006/12/21 16:05:15 petere Exp $ + * + *------------------------------------------------------------------------- + */ + +/* + * Generally, XML type support is only available when libxml use was + * configured during the build. But even if that is not done, the + * type and all the functions are available, but most of them will + * fail. For one thing, this avoids having to manage variant catalog + * installations. But it also has nice effects such as that you can + * dump a database containing XML type data even if the server is not + * linked with libxml. + */ + +#include "postgres.h" + +#ifdef USE_LIBXML +#include <libxml/chvalid.h> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include <libxml/uri.h> +#include <libxml/xmlerror.h> +#endif /* USE_LIBXML */ + +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "nodes/execnodes.h" +#include "utils/builtins.h" +#include "utils/xml.h" + + +#ifdef USE_LIBXML + +/* + * A couple of useful macros (similar to ones from libxml/parse.c) + */ +#define CMP4( s, c1, c2, c3, c4 ) \ + ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ + ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) +#define CMP5( s, c1, c2, c3, c4, c5 ) \ + ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) + +#define PG_XML_DEFAULT_URI "dummy.xml" +#define XML_ERRBUF_SIZE 200 + + +static void xml_init(void); +static void *xml_palloc(size_t size); +static void *xml_repalloc(void *ptr, size_t size); +static void xml_pfree(void *ptr); +static char *xml_pstrdup(const char *string); +static void xml_ereport(int level, char *msg, void *ctxt); +static void xml_errorHandler(void *ctxt, const char *msg, ...); +static void xml_ereport_by_code(int level, char *msg, int errcode); +static xmlChar *xml_text2xmlChar(text *in); +static xmlDocPtr xml_parse(text *data, int opts, bool is_document); + + +/* Global variables */ +/* taken from contrib/xml2 */ +/* FIXME: DO NOT USE global vars !!! */ +char *xml_errbuf; /* per line error buffer */ +char *xml_errmsg = NULL; /* overall error message */ + +#endif /* USE_LIBXML */ + + +#define NO_XML_SUPPORT() ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("no XML support in this installation"))) + + +Datum +xml_in(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + char *s = PG_GETARG_CSTRING(0); + size_t len; + xmltype *vardata; + + len = strlen(s); + vardata = palloc(len + VARHDRSZ); + VARATT_SIZEP(vardata) = len + VARHDRSZ; + memcpy(VARDATA(vardata), s, len); + + /* + * Parse the data to check if it is well-formed XML data. Assume + * that ERROR occurred if parsing failed. Do we need DTD + * validation (if DTD exists)? + */ + xml_parse(vardata, XML_PARSE_DTDATTR | XML_PARSE_DTDVALID, false); + + PG_RETURN_XML_P(vardata); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xml_out(PG_FUNCTION_ARGS) +{ + xmltype *s = PG_GETARG_XML_P(0); + char *result; + int32 len; + + len = VARSIZE(s) - VARHDRSZ; + result = palloc(len + 1); + memcpy(result, VARDATA(s), len); + result[len] = '\0'; + + PG_RETURN_CSTRING(result); +} + + +#ifdef USE_LIBXML +static void +appendStringInfoText(StringInfo str, const text *t) +{ + appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ); +} + + +static xmltype * +stringinfo_to_xmltype(StringInfo buf) +{ + int32 len; + xmltype *result; + + len = buf->len + VARHDRSZ; + result = palloc(len); + VARATT_SIZEP(result) = len; + memcpy(VARDATA(result), buf->data, buf->len); + + return result; +} +#endif + + +Datum +xmlcomment(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *arg = PG_GETARG_TEXT_P(0); + int len = VARATT_SIZEP(arg) - VARHDRSZ; + StringInfoData buf; + int i; + + /* check for "--" in string or "-" at the end */ + for (i = 1; i < len; i++) + if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-') + || (VARDATA(arg)[i] == '-' && i == len - 1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_COMMENT), + errmsg("invalid XML comment"))); + + initStringInfo(&buf); + appendStringInfo(&buf, "<!--"); + appendStringInfoText(&buf, arg); + appendStringInfo(&buf, "-->"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xmlparse(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data; + bool is_document; + bool preserve_whitespace; + + data = PG_GETARG_TEXT_P(0); + + if (PG_NARGS() >= 2) + is_document = PG_GETARG_BOOL(1); + else + is_document = false; + + if (PG_NARGS() >= 3) + preserve_whitespace = PG_GETARG_BOOL(2); + else + /* + * Since the XMLPARSE grammar makes STRIP WHITESPACE the + * default, this argument should really default to false. But + * until we have actually implemented whitespace stripping, + * this would be annoying. + */ + preserve_whitespace = true; + + if (!preserve_whitespace) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("XMLPARSE with STRIP WHITESPACE is not implemented"))); + + /* + * Note, that here we try to apply DTD defaults + * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d: 'Default + * valies defined by internal DTD are applied'. As for external + * DTDs, we try to support them too, (see SQL/XML:10.16.7.e) + */ + xml_parse(data, XML_PARSE_DTDATTR, is_document); /* assume that ERROR occurred if parsing failed */ + + PG_RETURN_XML_P(data); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xmlpi(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + char *target = NameStr(*PG_GETARG_NAME(0)); + StringInfoData buf; + + if (strlen(target) >= 3 + && (target[0] == 'x' || target[0] == 'X') + && (target[1] == 'm' || target[1] == 'M') + && (target[2] == 'l' || target[2] == 'L')) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction target name cannot start with \"xml\"."))); + } + + initStringInfo(&buf); + + appendStringInfo(&buf, "<?"); + appendStringInfoString(&buf, map_sql_identifier_to_xml_name(target, false)); + if (PG_NARGS() > 1) + { + text *arg = PG_GETARG_TEXT_P(1); + char *string; + + string = DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(arg))); + if (strstr(string, "?>")) + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION), + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction cannot contain \"?>\"."))); + + appendStringInfoString(&buf, " "); + appendStringInfoString(&buf, string); + } + appendStringInfoString(&buf, "?>"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xmlroot(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + xmltype *data; + text *version; + int standalone; + StringInfoData buf; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + else + data = PG_GETARG_XML_P(0); + + if (PG_ARGISNULL(1)) + version = NULL; + else + version = PG_GETARG_TEXT_P(1); + + if (PG_ARGISNULL(2)) + standalone = 0; + else + { + bool tmp = PG_GETARG_BOOL(2); + standalone = (tmp ? 1 : -1); + } + + /* + * FIXME: This is probably supposed to be cleverer if there + * already is an XML preamble. + */ + initStringInfo(&buf); + + appendStringInfo(&buf,"<?xml"); + if (version) { + appendStringInfo(&buf, " version=\""); + appendStringInfoText(&buf, version); + appendStringInfo(&buf, "\""); + } + if (standalone) + appendStringInfo(&buf, " standalone=\"%s\"", (standalone == 1 ? "yes" : "no")); + appendStringInfo(&buf, "?>"); + appendStringInfoText(&buf, (text *) data); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +/* + * Validate document (given as string) against DTD (given as external link) + * TODO !!! use text instead of cstring for second arg + * TODO allow passing DTD as a string value (not only as an URI) + * TODO redesign (see comment with '!!!' below) + */ +Datum +xmlvalidate(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + text *dtdOrUri = PG_GETARG_TEXT_P(1); + bool result = FALSE; + xmlParserCtxtPtr ctxt; /* the parser context */ + xmlDocPtr doc; /* the resulting document tree */ + xmlDtdPtr dtd; + + xml_init(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + xml_ereport(ERROR, "could not allocate parser context", ctxt); + doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data), + VARSIZE(data) - VARHDRSZ, PG_XML_DEFAULT_URI, NULL, 0); + if (doc == NULL) + xml_ereport(ERROR, "could not parse XML data", ctxt); + +#if 0 + uri = xmlCreateURI(); + ereport(NOTICE, (errcode(0),errmsg(" dtd - %s", dtdOrUri))); + dtd = palloc(sizeof(xmlDtdPtr)); + uri = xmlParseURI(dtdOrUri); + if (uri == NULL) + xml_ereport(ERROR, "not implemented yet... (TODO)", ctxt); + else +#endif + dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri)); + + if (dtd == NULL) + { +#if 0 + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); +#endif + xml_ereport(ERROR, "could not load DTD", ctxt); + } + + if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1) + result = TRUE; + +#if 0 + xmlFreeURI(uri); + xmlFreeDtd(dtd); + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); +#endif + + if (!result) + xml_ereport(NOTICE, "validation against DTD failed", ctxt); + + PG_RETURN_BOOL(result); +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + + +#ifdef USE_LIBXML + +/* + * Container for some init stuff (not good design!) + * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check) + */ +static void +xml_init(void) +{ + /* + * Currently, we have no pure UTF-8 support for internals -- check + * if we can work. + */ + if (sizeof (char) != sizeof (xmlChar)) + ereport(ERROR, + (errmsg("cannot initialize XML library"), + errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.", + sizeof(char), sizeof(xmlChar)))); + + xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup); + xmlInitParser(); + LIBXML_TEST_VERSION; + /* do not flood PG's logfile with libxml error messages - reset error handler*/ + xmlSetGenericErrorFunc(NULL, xml_errorHandler); + xml_errmsg = NULL; + xml_errbuf = palloc(XML_ERRBUF_SIZE); + memset(xml_errbuf, 0, XML_ERRBUF_SIZE); +} + + +/* + * Convert a C string to XML internal representation + * (same things as for TEXT, but with checking the data for well-formedness + * and, moreover, validation against DTD, if needed). + * NOTICE: We use TEXT type as internal storage type. In the future, + * we plan to create own storage type (maybe several types/strategies) + * TODO predefined DTDs / XSDs and validation + * TODO validation against XML Schema + * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c) + * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below) + */ +static xmlDocPtr +xml_parse(text *data, int opts, bool is_document) +{ + bool validationFailed = FALSE; + xmlParserCtxtPtr ctxt; /* the parser context */ + xmlDocPtr doc; /* the resulting document tree */ + int res_code; + int32 len; + xmlChar *string; +#ifdef XML_DEBUG_DTD_CONST + xmlDtdPtr dtd; /* pointer to DTD */ +#endif + + xml_init(); + + len = VARSIZE(data) - VARHDRSZ; /* will be useful later */ + string = xml_text2xmlChar(data); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + xml_ereport(ERROR, "could not allocate parser context", ctxt); + + /* first, we try to parse the string as it is XML doc, then, as XML chunk */ + ereport(DEBUG3, (errmsg("string to parse: %s", string))); + if (len > 4 && CMP5(string, '<', '?', 'x', 'm', 'l')) + { + /* consider it as DOCUMENT */ + doc = xmlCtxtReadMemory(ctxt, string, len, PG_XML_DEFAULT_URI, NULL, opts); + if (doc == NULL) + { + xml_ereport(ERROR, "could not parse XML data", ctxt); +#if 0 + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + ereport(ERROR, (errmsg("could not parse XML data"))); +#endif + } + } + else + { + /* attempt to parse the string as if it is an XML fragment */ + ereport(DEBUG3, (errmsg("the string is not an XML doc, trying to parse as a CHUNK"))); + doc = xmlNewDoc(NULL); + /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */ + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL); + if (res_code != 0) + { + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + xml_ereport_by_code(ERROR, "could not parse XML data", res_code); + } + } + +#ifdef XML_DEBUG_DTD_CONST + dtd = xmlParseDTD(NULL, (xmlChar *) XML_DEBUG_DTD_CONST); + xml_ereport(DEBUG3, "solid path to DTD was defined for debugging purposes", ctxt); + if (dtd == NULL) + { + xml_ereport(ERROR, "could not parse DTD data", ctxt); + } + else +#else + /* if dtd for our xml data is detected... */ + if ((doc->intSubset != NULL) || (doc->extSubset != NULL)) +#endif + { + /* assume that inline DTD exists - validation should be performed */ +#ifdef XML_DEBUG_DTD_CONST + if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) != 1) +#else + if (ctxt->valid == 0) +#endif + { + /* DTD exists, but validator reported 'validation failed' */ + validationFailed = TRUE; + } + } + + if (validationFailed) + xml_ereport(WARNING, "validation against DTD failed", ctxt); + + /* TODO encoding issues + * (thoughts: + * CASE: + * - XML data has explicit encoding attribute in its prolog + * - if not, assume that enc. of XML data is the same as client's one + * + * The common rule is to accept the XML data only if its encoding + * is the same as encoding of the storage (server's). The other possible + * option is to accept all the docs, but DO TRANSFORMATION and, if needed, + * change the prolog. + * + * I think I'd stick the first way (for the 1st version), + * it's much simplier (less errors...) + * ) */ + /* ... */ + + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + + ereport(DEBUG3, (errmsg("XML data successfully parsed, encoding: %s", + (char *) doc->encoding))); + + return doc; +} + + +/* + * xmlChar<->text convertions + */ +static xmlChar * +xml_text2xmlChar(text *in) +{ + int32 len = VARSIZE(in) - VARHDRSZ; + xmlChar *res; + + res = palloc(len + 1); + memcpy(res, VARDATA(in), len); + res[len] = '\0'; + + return(res); +} + + +/* + * Wrappers for memory management functions + */ +static void * +xml_palloc(size_t size) +{ + return palloc(size); +} + + +static void * +xml_repalloc(void *ptr, size_t size) +{ + return repalloc(ptr, size); +} + + +static void +xml_pfree(void *ptr) +{ + pfree(ptr); +} + + +static char * +xml_pstrdup(const char *string) +{ + return pstrdup(string); +} + + +/* + * Wrapper for "ereport" function. + * Adds detail - libxml's native error message, if any. + */ +static void +xml_ereport(int level, char *msg, void *ctxt) +{ + char *xmlErrDetail; + int xmlErrLen, i; + xmlErrorPtr libxmlErr = NULL; + + if (xml_errmsg != NULL) + { + ereport(DEBUG1, (errmsg("%s", xml_errmsg))); + pfree(xml_errmsg); + } + + if (ctxt != NULL) + libxmlErr = xmlCtxtGetLastError(ctxt); + + if (libxmlErr == NULL) + { + if (level == ERROR) + { + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + } + ereport(level, (errmsg(msg))); + } + else + { + /* as usual, libxml error message contains '\n'; get rid of it */ + xmlErrLen = strlen(libxmlErr->message); /* - 1; */ + xmlErrDetail = (char *) palloc(xmlErrLen); + for (i = 0; i < xmlErrLen; i++) + { + if (libxmlErr->message[i] == '\n') + xmlErrDetail[i] = '.'; + else + xmlErrDetail[i] = libxmlErr->message[i]; + } + if (level == ERROR) + { + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + } + ereport(level, (errmsg(msg), errdetail("%s", xmlErrDetail))); + } +} + + +/* + * Error handler for libxml error messages + */ +static void +xml_errorHandler(void *ctxt, const char *msg,...) +{ + va_list args; + + va_start(args, msg); + vsnprintf(xml_errbuf, XML_ERRBUF_SIZE, msg, args); + va_end(args); + /* Now copy the argument across */ + if (xml_errmsg == NULL) + xml_errmsg = pstrdup(xml_errbuf); + else + { + int32 xsize = strlen(xml_errmsg); + + xml_errmsg = repalloc(xml_errmsg, (size_t) (xsize + strlen(xml_errbuf) + 1)); + strncpy(&xml_errmsg[xsize - 1], xml_errbuf, strlen(xml_errbuf)); + xml_errmsg[xsize + strlen(xml_errbuf) - 1] = '\0'; + } + memset(xml_errbuf, 0, XML_ERRBUF_SIZE); +} + + +/* + * Return error message by libxml error code + * TODO make them closer to recommendations from Postgres manual + */ +static void +xml_ereport_by_code(int level, char *msg, int code) +{ + const char *det; + + if (code < 0) + { + ereport(level, (errmsg(msg))); + return; + } + + switch (code) { + case XML_ERR_INTERNAL_ERROR: + det = "libxml internal error"; + break; + case XML_ERR_ENTITY_LOOP: + det = "Detected an entity reference loop"; + break; + case XML_ERR_ENTITY_NOT_STARTED: + det = "EntityValue: \" or ' expected"; + break; + case XML_ERR_ENTITY_NOT_FINISHED: + det = "EntityValue: \" or ' expected"; + break; + case XML_ERR_ATTRIBUTE_NOT_STARTED: + det = "AttValue: \" or ' expected"; + break; + case XML_ERR_LT_IN_ATTRIBUTE: + det = "Unescaped '<' not allowed in attributes values"; + break; + case XML_ERR_LITERAL_NOT_STARTED: + det = "SystemLiteral \" or ' expected"; + break; + case XML_ERR_LITERAL_NOT_FINISHED: + det = "Unfinished System or Public ID \" or ' expected"; + break; + case XML_ERR_MISPLACED_CDATA_END: + det = "Sequence ']]>' not allowed in content"; + break; + case XML_ERR_URI_REQUIRED: + det = "SYSTEM or PUBLIC, the URI is missing"; + break; + case XML_ERR_PUBID_REQUIRED: + det = "PUBLIC, the Public Identifier is missing"; + break; + case XML_ERR_HYPHEN_IN_COMMENT: + det = "Comment must not contain '--' (double-hyphen)"; + break; + case XML_ERR_PI_NOT_STARTED: + det = "xmlParsePI : no target name"; + break; + case XML_ERR_RESERVED_XML_NAME: + det = "Invalid PI name"; + break; + case XML_ERR_NOTATION_NOT_STARTED: + det = "NOTATION: Name expected here"; + break; + case XML_ERR_NOTATION_NOT_FINISHED: + det = "'>' required to close NOTATION declaration"; + break; + case XML_ERR_VALUE_REQUIRED: + det = "Entity value required"; + break; + case XML_ERR_URI_FRAGMENT: + det = "Fragment not allowed"; + break; + case XML_ERR_ATTLIST_NOT_STARTED: + det = "'(' required to start ATTLIST enumeration"; + break; + case XML_ERR_NMTOKEN_REQUIRED: + det = "NmToken expected in ATTLIST enumeration"; + break; + case XML_ERR_ATTLIST_NOT_FINISHED: + det = "')' required to finish ATTLIST enumeration"; + break; + case XML_ERR_MIXED_NOT_STARTED: + det = "MixedContentDecl : '|' or ')*' expected"; + break; + case XML_ERR_PCDATA_REQUIRED: + det = "MixedContentDecl : '#PCDATA' expected"; + break; + case XML_ERR_ELEMCONTENT_NOT_STARTED: + det = "ContentDecl : Name or '(' expected"; + break; + case XML_ERR_ELEMCONTENT_NOT_FINISHED: + det = "ContentDecl : ',' '|' or ')' expected"; + break; + case XML_ERR_PEREF_IN_INT_SUBSET: + det = "PEReference: forbidden within markup decl in internal subset"; + break; + case XML_ERR_GT_REQUIRED: + det = "Expected '>'"; + break; + case XML_ERR_CONDSEC_INVALID: + det = "XML conditional section '[' expected"; + break; + case XML_ERR_EXT_SUBSET_NOT_FINISHED: + det = "Content error in the external subset"; + break; + case XML_ERR_CONDSEC_INVALID_KEYWORD: + det = "conditional section INCLUDE or IGNORE keyword expected"; + break; + case XML_ERR_CONDSEC_NOT_FINISHED: + det = "XML conditional section not closed"; + break; + case XML_ERR_XMLDECL_NOT_STARTED: + det = "Text declaration '<?xml' required"; + break; + case XML_ERR_XMLDECL_NOT_FINISHED: + det = "parsing XML declaration: '?>' expected"; + break; + case XML_ERR_EXT_ENTITY_STANDALONE: + det = "external parsed entities cannot be standalone"; + break; + case XML_ERR_ENTITYREF_SEMICOL_MISSING: + det = "EntityRef: expecting ';'"; + break; + case XML_ERR_DOCTYPE_NOT_FINISHED: + det = "DOCTYPE improperly terminated"; + break; + case XML_ERR_LTSLASH_REQUIRED: + det = "EndTag: '</' not found"; + break; + case XML_ERR_EQUAL_REQUIRED: + det = "Expected '='"; + break; + case XML_ERR_STRING_NOT_CLOSED: + det = "String not closed expecting \" or '"; + break; + case XML_ERR_STRING_NOT_STARTED: + det = "String not started expecting ' or \""; + break; + case XML_ERR_ENCODING_NAME: + det = "Invalid XML encoding name"; + break; + case XML_ERR_STANDALONE_VALUE: + det = "Standalone accepts only 'yes' or 'no'"; + break; + case XML_ERR_DOCUMENT_EMPTY: + det = "Document is empty"; + break; + case XML_ERR_DOCUMENT_END: + det = "Extra content at the end of the document"; + break; + case XML_ERR_NOT_WELL_BALANCED: + det = "Chunk is not well balanced"; + break; + case XML_ERR_EXTRA_CONTENT: + det = "Extra content at the end of well balanced chunk"; + break; + case XML_ERR_VERSION_MISSING: + det = "Malformed declaration expecting version"; + break; + /* more err codes... Please, keep the order! */ + case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */ + det ="Attribute without value"; + break; + case XML_ERR_ATTRIBUTE_REDEFINED: + det ="Attribute defined more than once in the same element"; + break; + case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */ + det = "Comment is not finished"; + break; + case XML_ERR_NAME_REQUIRED: /* 68 */ + det = "Element name not found"; + break; + case XML_ERR_TAG_NOT_FINISHED: /* 77 */ + det = "Closing tag not found"; + break; + default: + det = "Unregistered error (libxml error code: %d)"; + ereport(DEBUG1, (errmsg("Check out \"libxml/xmlerror.h\" and bring errcode \"%d\" processing to \"xml.c\".", code))); + } + + if (xml_errmsg != NULL) + { + ereport(DEBUG1, (errmsg("%s", xml_errmsg))); + pfree(xml_errmsg); + } + + ereport(level, (errmsg(msg), errdetail(det, code))); +} + + +/* + * Convert one char in the current server encoding to a Unicode + * codepoint. + */ +static pg_wchar +sqlchar_to_unicode(unsigned char *s) +{ + int save_enc; + pg_wchar ret; + char *utf8string = pg_do_encoding_conversion(s, pg_mblen(s), GetDatabaseEncoding(), PG_UTF8); + + save_enc = GetDatabaseEncoding(); + SetDatabaseEncoding(PG_UTF8); + pg_mb2wchar_with_len(utf8string, &ret, pg_mblen(s)); + SetDatabaseEncoding(save_enc); + + return ret; +} + + +static bool +is_valid_xml_namefirst(pg_wchar c) +{ + /* (Letter | '_' | ':') */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || c == '_' || c == ':'); +} + + +static bool +is_valid_xml_namechar(pg_wchar c) +{ + /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || xmlIsDigitQ(c) + || c == '.' || c == '-' || c == '_' || c == ':' + || xmlIsCombiningQ(c) + || xmlIsExtenderQ(c)); +} +#endif /* USE_LIBXML */ + + +/* + * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1. + */ +char * +map_sql_identifier_to_xml_name(unsigned char *ident, bool fully_escaped) +{ +#ifdef USE_LIBXML + StringInfoData buf; + unsigned char *p; + + initStringInfo(&buf); + + for (p = ident; *p; p += pg_mblen(p)) + { + if (*p == ':' && (p == ident || fully_escaped)) + appendStringInfo(&buf, "_x003A_"); + else if (*p == '_' && *(p+1) == 'x') + appendStringInfo(&buf, "_x005F_"); + else if (fully_escaped && p == ident + && ( *p == 'x' || *p == 'X') + && ( *(p+1) == 'm' || *(p+1) == 'M') + && ( *(p+2) == 'l' || *(p+2) == 'L')) + { + if (*p == 'x') + appendStringInfo(&buf, "_x0078_"); + else + appendStringInfo(&buf, "_x0058_"); + } + else + { + pg_wchar u = sqlchar_to_unicode(p); + + if (!is_valid_xml_namechar(u) + || (p == ident && !is_valid_xml_namefirst(u))) + appendStringInfo(&buf, "_x%04X_", (unsigned int) u); + else + appendBinaryStringInfo(&buf, p, pg_mblen(p)); + } + } + + return buf.data; +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return NULL; +#endif /* not USE_LIBXML */ +} |