diff options
Diffstat (limited to 'contrib/xml/pgxml.c')
-rw-r--r-- | contrib/xml/pgxml.c | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/contrib/xml/pgxml.c b/contrib/xml/pgxml.c new file mode 100644 index 00000000000..47289031579 --- /dev/null +++ b/contrib/xml/pgxml.c @@ -0,0 +1,310 @@ +/******************************************************** + * Interface code to parse an XML document using expat + ********************************************************/ + +#include "postgres.h" +#include "fmgr.h" + +#include "expat.h" +#include "pgxml.h" + +/* Memory management - we make expat use standard pg MM */ + +XML_Memory_Handling_Suite mhs; + +/* passthrough functions (palloc is a macro) */ + +static void *pgxml_palloc(size_t size) +{ + return palloc(size); +} + +static void *pgxml_repalloc(void *ptr, size_t size) +{ + return repalloc(ptr,size); +} + +static void pgxml_pfree(void *ptr) +{ + return pfree(ptr); +} + +static void pgxml_mhs_init() +{ + mhs.malloc_fcn = pgxml_palloc; + mhs.realloc_fcn = pgxml_repalloc; + mhs.free_fcn = pgxml_pfree; +} + +static void pgxml_handler_init() +{ + /* This code should set up the relevant handlers from user-supplied + settings. Quite how these settings are made is another matter :) */ +} + +/* Returns true if document is well-formed */ + +PG_FUNCTION_INFO_V1(pgxml_parse); + +Datum +pgxml_parse(PG_FUNCTION_ARGS) +{ + /* called as pgxml_parse(document) */ + XML_Parser p; + text *t = PG_GETARG_TEXT_P(0); /*document buffer */ + int32 docsize = VARSIZE(t) - VARHDRSZ; + + pgxml_mhs_init(); + + pgxml_handler_init(); + + p = XML_ParserCreate_MM(NULL,&mhs,NULL); + if (! p) { + elog(ERROR, "pgxml: Could not create expat parser"); + PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */ + } + + if (! XML_Parse(p, (char *)VARDATA(t) , docsize, 1)) { + /* elog(NOTICE, "Parse error at line %d:%s", + XML_GetCurrentLineNumber(p), + XML_ErrorString(XML_GetErrorCode(p))); */ + XML_ParserFree(p); + PG_RETURN_BOOL(false); + } + + XML_ParserFree(p); + PG_RETURN_BOOL(true); +} + +/* XPath handling functions */ + +/* XPath support here is for a very skeletal kind of XPath! + It was easy to program though... */ + +/* This first is the core function that builds a result set. The + actual functions called by the user manipulate that result set + in various ways. +*/ + +static XPath_Results *build_xpath_results(text *doc, text *pathstr) +{ + XPath_Results *xpr; + char *res; + pgxml_udata *udata; + XML_Parser p; + int32 docsize; + + xpr = (XPath_Results *) palloc((sizeof(XPath_Results))); + memset((void *)xpr, 0, sizeof(XPath_Results)); + xpr->rescount=0; + + docsize=VARSIZE(doc)-VARHDRSZ; + + /* res isn't going to be the real return type, it is just a buffer */ + + res = (char *) palloc(docsize); + memset((void *)res, 0, docsize); + + xpr->resbuf = res; + + udata = (pgxml_udata *) palloc((sizeof(pgxml_udata))); + memset((void *)udata,0,sizeof(pgxml_udata)); + + udata->currentpath[0]='\0'; + udata->textgrab=0; + + udata->path= (char *) palloc(VARSIZE(pathstr)); + memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr)-VARHDRSZ); + + udata->path[VARSIZE(pathstr)-VARHDRSZ]='\0'; + + udata->resptr = res; + udata->reslen = 0; + + udata->xpres = xpr; + + /* Now fire up the parser */ + pgxml_mhs_init(); + + p = XML_ParserCreate_MM(NULL,&mhs,NULL); + if (! p) { + elog(ERROR, "pgxml: Could not create expat parser"); + pfree(xpr); + pfree(udata->path); + pfree(udata); + pfree(res); + return NULL; + } + XML_SetUserData(p, (void *)udata); + + /* Set the handlers */ + + XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler); + XML_SetCharacterDataHandler(p, pgxml_charhandler); + + if (! XML_Parse(p, (char *)VARDATA(doc) , docsize, 1)) { + /* elog(NOTICE, "Parse error at line %d:%s", + XML_GetCurrentLineNumber(p), + XML_ErrorString(XML_GetErrorCode(p))); */ + XML_ParserFree(p); + pfree(xpr); + pfree(udata->path); + pfree(udata); + + return NULL; + } + + pfree(udata->path); + pfree(udata); + XML_ParserFree(p); + return xpr; +} + + +PG_FUNCTION_INFO_V1(pgxml_xpath); + +Datum +pgxml_xpath(PG_FUNCTION_ARGS) +{ + /* called as pgxml_xpath(document,pathstr, index) for the moment*/ + + XPath_Results *xpresults; + text *restext; + + text *t = PG_GETARG_TEXT_P(0); /*document buffer */ + text *t2= PG_GETARG_TEXT_P(1); + int32 ind = PG_GETARG_INT32(2) - 1; + + xpresults = build_xpath_results(t,t2); + + /* This needs to be changed depending on the mechanism for returning + our set of results. */ + + if (xpresults==NULL) /*parse error (not WF or parser failure) */ + { + PG_RETURN_NULL(); + } + + if (ind >= (xpresults->rescount)) + { + PG_RETURN_NULL(); + } + + restext = (text *) palloc(xpresults->reslens[ind]+VARHDRSZ); + memcpy(VARDATA(restext),xpresults->results[ind],xpresults->reslens[ind]); + + VARATT_SIZEP(restext) = xpresults->reslens[ind]+VARHDRSZ; + + pfree(xpresults->resbuf); + pfree(xpresults); + + PG_RETURN_TEXT_P(restext); +} + + +static void pgxml_pathcompare(void *userData) +{ + char *matchpos; + + matchpos=strstr(UD->currentpath, UD->path); + + if (matchpos == NULL) { /* Should we have more logic here ? */ + if (UD->textgrab) { + UD->textgrab=0; + pgxml_finalisegrabbedtext(userData); + } + return; + } + /* OK, we have a match of some sort. Now we need to check that + our match is anchored to the *end* of the string AND + that it is immediately preceded by a '/'*/ + /* This test wouldn't work if strlen (UD->path) overran the length + of the currentpath, but that's not possible because we got a match! */ + + if ((matchpos + strlen(UD->path))[0]=='\0') + { + if ((UD->path)[0]=='/') { + if (matchpos == UD->currentpath) { + UD->textgrab=1; + } + } else { + if ((matchpos-1)[0]=='/') { + UD->textgrab=1; + } + } + } +} + +static void pgxml_starthandler(void *userData, const XML_Char *name, + const XML_Char **atts) +{ + + char sepstr[]="/"; + + if ((strlen(name)+strlen(UD->currentpath))>MAXPATHLENGTH-2) { + elog(NOTICE,"Path too long"); + } else { + strncat(UD->currentpath,sepstr,1); + strcat(UD->currentpath, name); + } + if (UD->textgrab) + { + /* Depending on user preference, should we "reconstitute" + the element into the result text? + */ + } else { + pgxml_pathcompare(userData); + } +} + +static void pgxml_endhandler(void *userData, const XML_Char *name) +{ + /* Start by removing the current element off the end of the + currentpath */ + + char *sepptr; + + sepptr=strrchr(UD->currentpath,'/'); + if (sepptr==NULL) { + elog(ERROR,"There's a problem..."); + sepptr=UD->currentpath; + } + if (strcmp(name, sepptr+1) !=0) { + elog(NOTICE,"Wanted [%s], got [%s]",sepptr,name); + /* unmatched entry, so do nothing */ + } else { + sepptr[0]='\0'; /* Chop that element off the end */ + } + + if (UD->textgrab) { + pgxml_pathcompare(userData); + } + +} + +static void pgxml_charhandler(void *userData, const XML_Char *s, int len) +{ + if (UD->textgrab) { + if (len>0) { + memcpy(UD->resptr,s,len); + UD->resptr += len; + UD->reslen += len; + } + } +} +/* Should I be using PG list types here? */ + +static void pgxml_finalisegrabbedtext(void *userData) +{ + /* In res/reslen, we have a single result. */ + UD->xpres->results[UD->xpres->rescount]= UD->resptr - UD->reslen; + UD->xpres->reslens[UD->xpres->rescount]= UD->reslen; + UD->reslen=0; + UD->xpres->rescount++; + + /* This effectively concatenates all the results together but we + do know where one ends and the next begins */ +} + + + |