1 files changed, 310 insertions, 0 deletions
diff --git a/contrib/xml/pgxml.c b/contrib/xml/pgxml.c
new file mode 100644
index 00000000000..47289031579
--- /dev/null
+++ b/contrib/xml/pgxml.c
@@ -0,0 +1,310 @@
+/********************************************************
+ * Interface code to parse an XML document using expat
+ ********************************************************/
+
+#include "postgres.h"
+#include "fmgr.h"
+
+#include "expat.h"
+#include "pgxml.h"
+
+/* Memory management - we make expat use standard pg MM */
+
+XML_Memory_Handling_Suite mhs;
+
+/* passthrough functions (palloc is a macro) */
+
+static void *pgxml_palloc(size_t size) 
+{
+  return palloc(size);
+}
+
+static void *pgxml_repalloc(void *ptr, size_t size)
+{
+  return repalloc(ptr,size);
+}
+
+static void pgxml_pfree(void *ptr)
+{
+  return pfree(ptr);
+}
+
+static void pgxml_mhs_init() 
+{
+  mhs.malloc_fcn = pgxml_palloc;
+  mhs.realloc_fcn = pgxml_repalloc;
+  mhs.free_fcn = pgxml_pfree;
+}
+
+static void pgxml_handler_init()
+{
+  /* This code should set up the relevant handlers from  user-supplied
+     settings. Quite how these settings are made is another matter :) */
+}
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+  /* called as pgxml_parse(document) */
+  XML_Parser p;
+  text *t = PG_GETARG_TEXT_P(0); /*document buffer */
+  int32 docsize = VARSIZE(t) - VARHDRSZ;
+
+  pgxml_mhs_init();
+
+  pgxml_handler_init();
+
+  p = XML_ParserCreate_MM(NULL,&mhs,NULL);
+  if (! p) {
+    elog(ERROR, "pgxml: Could not create expat parser");
+    PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
+  }
+
+  if (! XML_Parse(p, (char *)VARDATA(t) , docsize, 1)) {
+    /*    elog(NOTICE, "Parse error at line %d:%s",
+	    XML_GetCurrentLineNumber(p),
+	    XML_ErrorString(XML_GetErrorCode(p))); */
+    XML_ParserFree(p);
+    PG_RETURN_BOOL(false);
+  }
+
+  XML_ParserFree(p);
+  PG_RETURN_BOOL(true);
+}
+
+/* XPath handling functions */
+
+/* XPath support here is for a very skeletal kind of XPath!
+   It was easy to program though... */
+
+/* This first is the core function that builds a result set. The 
+   actual functions called by the user manipulate that result set
+   in various ways.
+*/
+
+static XPath_Results *build_xpath_results(text *doc, text *pathstr)
+{
+  XPath_Results *xpr;
+  char *res;
+  pgxml_udata *udata;
+  XML_Parser p;
+  int32 docsize;
+
+  xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
+  memset((void *)xpr, 0, sizeof(XPath_Results));
+  xpr->rescount=0;
+
+  docsize=VARSIZE(doc)-VARHDRSZ;
+
+  /* res isn't going to be the real return type, it is just a buffer */
+
+  res = (char *) palloc(docsize);
+  memset((void *)res, 0, docsize);
+
+  xpr->resbuf = res;
+
+  udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
+  memset((void *)udata,0,sizeof(pgxml_udata));
+
+  udata->currentpath[0]='\0';
+  udata->textgrab=0;
+
+  udata->path= (char *) palloc(VARSIZE(pathstr));
+  memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr)-VARHDRSZ);
+
+  udata->path[VARSIZE(pathstr)-VARHDRSZ]='\0';
+
+  udata->resptr = res;
+  udata->reslen = 0;
+
+  udata->xpres = xpr;
+
+  /* Now fire up the parser */
+  pgxml_mhs_init();
+
+  p = XML_ParserCreate_MM(NULL,&mhs,NULL);
+  if (! p) {
+    elog(ERROR, "pgxml: Could not create expat parser");
+    pfree(xpr);
+    pfree(udata->path);
+    pfree(udata);
+    pfree(res);
+    return NULL;
+  }
+  XML_SetUserData(p, (void *)udata);
+
+  /* Set the handlers */
+
+  XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
+  XML_SetCharacterDataHandler(p, pgxml_charhandler);
+
+  if (! XML_Parse(p, (char *)VARDATA(doc) , docsize, 1)) {
+    /*     elog(NOTICE, "Parse error at line %d:%s",
+	    XML_GetCurrentLineNumber(p),
+	    XML_ErrorString(XML_GetErrorCode(p))); */
+    XML_ParserFree(p);
+    pfree(xpr);
+    pfree(udata->path);
+    pfree(udata);
+
+    return NULL;
+  }
+
+  pfree(udata->path);
+  pfree(udata);
+  XML_ParserFree(p);
+  return xpr;
+}
+
+
+PG_FUNCTION_INFO_V1(pgxml_xpath);
+
+Datum
+pgxml_xpath(PG_FUNCTION_ARGS)
+{
+  /* called as pgxml_xpath(document,pathstr, index) for the moment*/
+
+  XPath_Results *xpresults;
+  text *restext;
+
+  text *t = PG_GETARG_TEXT_P(0); /*document buffer */
+  text *t2= PG_GETARG_TEXT_P(1);
+  int32 ind = PG_GETARG_INT32(2) - 1;
+
+  xpresults = build_xpath_results(t,t2);
+
+  /* This needs to be changed depending on the mechanism for returning
+     our set of results. */
+
+  if (xpresults==NULL)   /*parse error (not WF or parser failure) */
+    {
+      PG_RETURN_NULL();
+    }
+
+  if (ind >= (xpresults->rescount)) 
+    {
+      PG_RETURN_NULL();
+    }
+  
+  restext = (text *) palloc(xpresults->reslens[ind]+VARHDRSZ);
+  memcpy(VARDATA(restext),xpresults->results[ind],xpresults->reslens[ind]);
+
+  VARATT_SIZEP(restext) = xpresults->reslens[ind]+VARHDRSZ;
+
+  pfree(xpresults->resbuf);
+  pfree(xpresults);
+
+  PG_RETURN_TEXT_P(restext);
+}
+
+
+static void pgxml_pathcompare(void *userData)
+{
+  char  *matchpos;
+
+  matchpos=strstr(UD->currentpath, UD->path);
+
+  if (matchpos == NULL) { /* Should we have more logic here ? */
+    if (UD->textgrab) {
+      UD->textgrab=0;
+      pgxml_finalisegrabbedtext(userData);
+    }
+    return;
+  }
+  /* OK, we have a match of some sort. Now we need to check that
+     our match is anchored to the *end* of the string AND
+     that it is immediately preceded by a '/'*/
+  /* This test wouldn't work if strlen (UD->path) overran the length
+     of the currentpath, but that's not possible because we got a match! */
+
+  if ((matchpos + strlen(UD->path))[0]=='\0') 
+    {
+      if ((UD->path)[0]=='/') {
+	if (matchpos == UD->currentpath) {
+	  UD->textgrab=1;
+	}
+      } else {
+	if ((matchpos-1)[0]=='/') {
+	  UD->textgrab=1;
+	}
+      }
+    }
+}
+
+static void pgxml_starthandler(void *userData, const XML_Char *name,
+			const XML_Char **atts)
+{
+
+  char sepstr[]="/";
+
+  if ((strlen(name)+strlen(UD->currentpath))>MAXPATHLENGTH-2) {
+    elog(NOTICE,"Path too long");
+  } else {
+    strncat(UD->currentpath,sepstr,1);
+    strcat(UD->currentpath, name);
+  }
+  if (UD->textgrab) 
+    {
+      /* Depending on user preference, should we "reconstitute"
+	 the element into the result text?
+      */
+    } else {
+      pgxml_pathcompare(userData);
+    }
+}
+
+static void pgxml_endhandler(void *userData, const XML_Char *name)
+{
+  /* Start by removing the current element off the end of the
+     currentpath */
+
+  char *sepptr;
+
+  sepptr=strrchr(UD->currentpath,'/');
+  if (sepptr==NULL) {
+    elog(ERROR,"There's a problem...");
+    sepptr=UD->currentpath;
+  }
+  if (strcmp(name, sepptr+1) !=0) {
+    elog(NOTICE,"Wanted [%s], got [%s]",sepptr,name);
+    /* unmatched entry, so do nothing */
+  } else {
+    sepptr[0]='\0'; /* Chop that element off the end */
+  }
+
+  if (UD->textgrab) {  
+    pgxml_pathcompare(userData);
+  }
+
+}
+
+static void pgxml_charhandler(void *userData, const XML_Char *s, int len)
+{
+  if (UD->textgrab) {
+    if (len>0) {
+      memcpy(UD->resptr,s,len);
+      UD->resptr += len;
+      UD->reslen += len;
+    }
+  }
+}
+/* Should I be using PG list types here? */
+
+static void pgxml_finalisegrabbedtext(void *userData)
+{
+  /* In res/reslen, we have a single result. */
+  UD->xpres->results[UD->xpres->rescount]= UD->resptr - UD->reslen;
+  UD->xpres->reslens[UD->xpres->rescount]= UD->reslen;
+  UD->reslen=0;
+  UD->xpres->rescount++;
+
+  /* This effectively concatenates all the results together but we
+     do know where one ends and the next begins */
+}
+
+
+