Support TZ and OF format codes in to_timestamp().

Formerly, these were only supported in to_char(), but there seems little reason for that restriction. We should at least have enough support to permit round-tripping the output of to_char(). In that spirit, TZ accepts either zone abbreviations or numeric (HH or HH:MM) offsets, which are the cases that to_char() can output. In an ideal world we'd make it take full zone names too, but that seems like it'd introduce an unreasonable amount of ambiguity, since the rules for POSIX-spec zone names are so lax. OF is a subset of this, accepting only HH or HH:MM. One small benefit of this improvement is that we can simplify jsonpath's executeDateTimeMethod function, which no longer needs to consider the HH and HH:MM cases separately. Moreover, letting it accept zone abbreviations means it will accept "Z" to mean UTC, which is emitted by JSON.stringify() for example. Patch by me, reviewed by Aleksander Alekseev and Daniel Gustafsson Discussion: https://postgr.es/m/1681086.1686673242@sss.pgh.pa.us
author: Tom Lane <tgl@sss.pgh.pa.us> 2024-01-25 17:47:08 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2024-01-25 17:47:08 -0500
commit: 8ba6fdf905d0f5aef70ced4504c6ad297bfe08ea (patch)
tree: 58e63620c2de356734338912a082e930fd1d6e3e /src/backend/utils/adt/formatting.c
parent: 06a66d87dbc7e06581af6765131ea250063fb4ac (diff)
download: postgresql-8ba6fdf905d0f5aef70ced4504c6ad297bfe08ea.tar.gz
postgresql-8ba6fdf905d0f5aef70ced4504c6ad297bfe08ea.zip
1 files changed, 114 insertions, 53 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 83e1f1265ce..829aaa8d0e7 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -418,14 +418,24 @@ typedef struct
 				us,
 				yysz,			/* is it YY or YYYY ? */
 				clock,			/* 12 or 24 hour clock? */
-				tzsign,			/* +1, -1 or 0 if timezone info is absent */
+				tzsign,			/* +1, -1, or 0 if no TZH/TZM fields */
 				tzh,
 				tzm,
 				ff;				/* fractional precision */
+	bool		has_tz;			/* was there a TZ field? */
+	int			gmtoffset;		/* GMT offset of fixed-offset zone abbrev */
+	pg_tz	   *tzp;			/* pg_tz for dynamic abbrev */
+	char	   *abbrev;			/* dynamic abbrev */
 } TmFromChar;
 
 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
 
+struct fmt_tz					/* do_to_timestamp's timezone info output */
+{
+	bool		has_tz;			/* was there any TZ/TZH/TZM field? */
+	int			gmtoffset;		/* GMT offset in seconds */
+};
+
 /* ----------
  * Debug
  * ----------
@@ -1058,8 +1068,8 @@ static bool from_char_seq_search(int *dest, const char **src,
 								 char **localized_array, Oid collid,
 								 FormatNode *node, Node *escontext);
 static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
-							struct pg_tm *tm, fsec_t *fsec, int *fprec,
-							uint32 *flags, Node *escontext);
+							struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
+							int *fprec, uint32 *flags, Node *escontext);
 static char *fill_str(char *str, int c, int max);
 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
 static char *int_to_roman(int number);
@@ -3444,7 +3454,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
 			case DCH_FF5:
 			case DCH_FF6:
 				out->ff = n->key->id - DCH_FF1 + 1;
-				/* fall through */
+				/* FALLTHROUGH */
 			case DCH_US:		/* microsecond */
 				len = from_char_parse_int_len(&out->us, &s,
 											  n->key->id == DCH_US ? 6 :
@@ -3467,11 +3477,63 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
 				break;
 			case DCH_tz:
 			case DCH_TZ:
+				{
+					int			tzlen;
+
+					tzlen = DecodeTimezoneAbbrevPrefix(s,
+													   &out->gmtoffset,
+													   &out->tzp);
+					if (tzlen > 0)
+					{
+						out->has_tz = true;
+						/* we only need the zone abbrev for DYNTZ case */
+						if (out->tzp)
+							out->abbrev = pnstrdup(s, tzlen);
+						out->tzsign = 0;	/* drop any earlier TZH/TZM info */
+						s += tzlen;
+						break;
+					}
+					else if (isalpha((unsigned char) *s))
+					{
+						/*
+						 * It doesn't match any abbreviation, but it starts
+						 * with a letter.  OF format certainly won't succeed;
+						 * assume it's a misspelled abbreviation and complain
+						 * accordingly.
+						 */
+						ereturn(escontext,,
+								(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+								 errmsg("invalid value \"%s\" for \"%s\"",
+										s, n->key->name),
+								 errdetail("Time zone abbreviation is not recognized.")));
+					}
+					/* otherwise parse it like OF */
+				}
+				/* FALLTHROUGH */
 			case DCH_OF:
-				ereturn(escontext,,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("formatting field \"%s\" is only supported in to_char",
-								n->key->name)));
+				/* OF is equivalent to TZH or TZH:TZM */
+				/* see TZH comments below */
+				if (*s == '+' || *s == '-' || *s == ' ')
+				{
+					out->tzsign = *s == '-' ? -1 : +1;
+					s++;
+				}
+				else
+				{
+					if (extra_skip > 0 && *(s - 1) == '-')
+						out->tzsign = -1;
+					else
+						out->tzsign = +1;
+				}
+				if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
+					return;
+				if (*s == ':')
+				{
+					s++;
+					if (from_char_parse_int_len(&out->tzm, &s, 2, n,
+												escontext) < 0)
+						return;
+				}
 				break;
 			case DCH_TZH:
 
@@ -4167,22 +4229,16 @@ to_timestamp(PG_FUNCTION_ARGS)
 	Timestamp	result;
 	int			tz;
 	struct pg_tm tm;
+	struct fmt_tz ftz;
 	fsec_t		fsec;
 	int			fprec;
 
 	do_to_timestamp(date_txt, fmt, collid, false,
-					&tm, &fsec, &fprec, NULL, NULL);
+					&tm, &fsec, &ftz, &fprec, NULL, NULL);
 
 	/* Use the specified time zone, if any. */
-	if (tm.tm_zone)
-	{
-		DateTimeErrorExtra extra;
-		int			dterr = DecodeTimezone(tm.tm_zone, &tz);
-
-		if (dterr)
-			DateTimeParseError(dterr, &extra, text_to_cstring(date_txt),
-							   "timestamptz", NULL);
-	}
+	if (ftz.has_tz)
+		tz = ftz.gmtoffset;
 	else
 		tz = DetermineTimeZoneOffset(&tm, session_timezone);
 
@@ -4211,10 +4267,11 @@ to_date(PG_FUNCTION_ARGS)
 	Oid			collid = PG_GET_COLLATION();
 	DateADT		result;
 	struct pg_tm tm;
+	struct fmt_tz ftz;
 	fsec_t		fsec;
 
 	do_to_timestamp(date_txt, fmt, collid, false,
-					&tm, &fsec, NULL, NULL, NULL);
+					&tm, &fsec, &ftz, NULL, NULL, NULL);
 
 	/* Prevent overflow in Julian-day routines */
 	if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
@@ -4256,12 +4313,13 @@ parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
 			   Node *escontext)
 {
 	struct pg_tm tm;
+	struct fmt_tz ftz;
 	fsec_t		fsec;
 	int			fprec;
 	uint32		flags;
 
 	if (!do_to_timestamp(date_txt, fmt, collid, strict,
-						 &tm, &fsec, &fprec, &flags, escontext))
+						 &tm, &fsec, &ftz, &fprec, &flags, escontext))
 		return (Datum) 0;
 
 	*typmod = fprec ? fprec : -1;	/* fractional part precision */
@@ -4274,18 +4332,9 @@ parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
 			{
 				TimestampTz result;
 
-				if (tm.tm_zone)
+				if (ftz.has_tz)
 				{
-					DateTimeErrorExtra extra;
-					int			dterr = DecodeTimezone(tm.tm_zone, tz);
-
-					if (dterr)
-					{
-						DateTimeParseError(dterr, &extra,
-										   text_to_cstring(date_txt),
-										   "timestamptz", escontext);
-						return (Datum) 0;
-					}
+					*tz = ftz.gmtoffset;
 				}
 				else
 				{
@@ -4366,18 +4415,9 @@ parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
 		{
 			TimeTzADT  *result = palloc(sizeof(TimeTzADT));
 
-			if (tm.tm_zone)
+			if (ftz.has_tz)
 			{
-				DateTimeErrorExtra extra;
-				int			dterr = DecodeTimezone(tm.tm_zone, tz);
-
-				if (dterr)
-				{
-					DateTimeParseError(dterr, &extra,
-									   text_to_cstring(date_txt),
-									   "timetz", escontext);
-					return (Datum) 0;
-				}
+				*tz = ftz.gmtoffset;
 			}
 			else
 			{
@@ -4430,7 +4470,7 @@ parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
  * do_to_timestamp: shared code for to_timestamp and to_date
  *
  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
- * fractional seconds, and fractional precision.
+ * fractional seconds, struct fmt_tz, and fractional precision.
  *
  * 'collid' identifies the collation to use, if needed.
  * 'std' specifies standard parsing mode.
@@ -4447,12 +4487,12 @@ parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
  * 'date_txt'.
  *
  * The TmFromChar is then analysed and converted into the final results in
- * struct 'tm', 'fsec', and 'fprec'.
+ * struct 'tm', 'fsec', struct 'tz', and 'fprec'.
  */
 static bool
 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
-				struct pg_tm *tm, fsec_t *fsec, int *fprec,
-				uint32 *flags, Node *escontext)
+				struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
+				int *fprec, uint32 *flags, Node *escontext)
 {
 	FormatNode *format = NULL;
 	TmFromChar	tmfc;
@@ -4469,6 +4509,7 @@ do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
 	ZERO_tmfc(&tmfc);
 	ZERO_tm(tm);
 	*fsec = 0;
+	tz->has_tz = false;
 	if (fprec)
 		*fprec = 0;
 	if (flags)
@@ -4744,11 +4785,14 @@ do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
 		goto fail;
 	}
 
-	/* Save parsed time-zone into tm->tm_zone if it was specified */
+	/*
+	 * If timezone info was present, reduce it to a GMT offset.  (We cannot do
+	 * this until we've filled all of the tm struct, since the zone's offset
+	 * might be time-varying.)
+	 */
 	if (tmfc.tzsign)
 	{
-		char	   *tz;
-
+		/* TZH and/or TZM fields */
 		if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
 			tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
 		{
@@ -4757,10 +4801,27 @@ do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
 			goto fail;
 		}
 
-		tz = psprintf("%c%02d:%02d",
-					  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
-
-		tm->tm_zone = tz;
+		tz->has_tz = true;
+		tz->gmtoffset = (tmfc.tzh * MINS_PER_HOUR + tmfc.tzm) * SECS_PER_MINUTE;
+		/* note we are flipping the sign convention here */
+		if (tmfc.tzsign > 0)
+			tz->gmtoffset = -tz->gmtoffset;
+	}
+	else if (tmfc.has_tz)
+	{
+		/* TZ field */
+		tz->has_tz = true;
+		if (tmfc.tzp == NULL)
+		{
+			/* fixed-offset abbreviation; flip the sign convention */
+			tz->gmtoffset = -tmfc.gmtoffset;
+		}
+		else
+		{
+			/* dynamic-offset abbreviation, resolve using specified time */
+			tz->gmtoffset = DetermineTimeZoneAbbrevOffset(tm, tmfc.abbrev,
+														  tmfc.tzp);
+		}
 	}
 
 	DEBUG_TM(tm);
author	Tom Lane <tgl@sss.pgh.pa.us>	2024-01-25 17:47:08 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2024-01-25 17:47:08 -0500
commit	8ba6fdf905d0f5aef70ced4504c6ad297bfe08ea (patch)
tree	58e63620c2de356734338912a082e930fd1d6e3e /src/backend/utils/adt/formatting.c
parent	06a66d87dbc7e06581af6765131ea250063fb4ac (diff)
download	postgresql-8ba6fdf905d0f5aef70ced4504c6ad297bfe08ea.tar.gz postgresql-8ba6fdf905d0f5aef70ced4504c6ad297bfe08ea.zip