aboutsummaryrefslogtreecommitdiff
path: root/src/test/modules/test_regex/expected/test_regex_utf8.out
blob: 112698ac618bc9f51bd3774858808bc956f843dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/*
 * This test must be run in a database with UTF-8 encoding,
 * because other encodings don't support all the characters used.
 */
SELECT getdatabaseencoding() <> 'UTF8'
       AS skip_test \gset
\if :skip_test
\quit
\endif
set client_encoding = utf8;
set standard_conforming_strings = on;
-- Run the Tcl test cases that require Unicode
-- expectMatch	9.44 EMP*	{a[\u00fe-\u0507][\u00ff-\u0300]b} \
-- 	"a\u0102\u02ffb"	"a\u0102\u02ffb"
select * from test_regex('a[\u00fe-\u0507][\u00ff-\u0300]b', E'a\u0102\u02ffb', 'EMP*');
               test_regex               
----------------------------------------
 {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT}
 {aĂ˿b}
(2 rows)

-- expectMatch	13.27 P		"a\\U00001234x"	"a\u1234x"	"a\u1234x"
select * from test_regex('a\U00001234x', E'a\u1234x', 'P');
    test_regex     
-------------------
 {0,REG_UNONPOSIX}
 {aሴx}
(2 rows)

-- expectMatch	13.28 P		{a\U00001234x}	"a\u1234x"	"a\u1234x"
select * from test_regex('a\U00001234x', E'a\u1234x', 'P');
    test_regex     
-------------------
 {0,REG_UNONPOSIX}
 {aሴx}
(2 rows)

-- expectMatch	13.29 P		"a\\U0001234x"	"a\u1234x"	"a\u1234x"
-- Tcl has relaxed their code to allow 1-8 hex digits, but Postgres hasn't
select * from test_regex('a\U0001234x', E'a\u1234x', 'P');
ERROR:  invalid regular expression: invalid escape \ sequence
-- expectMatch	13.30 P		{a\U0001234x}	"a\u1234x"	"a\u1234x"
-- Tcl has relaxed their code to allow 1-8 hex digits, but Postgres hasn't
select * from test_regex('a\U0001234x', E'a\u1234x', 'P');
ERROR:  invalid regular expression: invalid escape \ sequence
-- expectMatch	13.31 P		"a\\U000012345x"	"a\u12345x"	"a\u12345x"
select * from test_regex('a\U000012345x', E'a\u12345x', 'P');
    test_regex     
-------------------
 {0,REG_UNONPOSIX}
 {aሴ5x}
(2 rows)

-- expectMatch	13.32 P		{a\U000012345x}	"a\u12345x"	"a\u12345x"
select * from test_regex('a\U000012345x', E'a\u12345x', 'P');
    test_regex     
-------------------
 {0,REG_UNONPOSIX}
 {aሴ5x}
(2 rows)

-- expectMatch	13.33 P		"a\\U1000000x"	"a\ufffd0x"	"a\ufffd0x"
-- Tcl allows this as a standalone character, but Postgres doesn't
select * from test_regex('a\U1000000x', E'a\ufffd0x', 'P');
ERROR:  invalid regular expression: invalid escape \ sequence
-- expectMatch	13.34 P		{a\U1000000x}	"a\ufffd0x"	"a\ufffd0x"
-- Tcl allows this as a standalone character, but Postgres doesn't
select * from test_regex('a\U1000000x', E'a\ufffd0x', 'P');
ERROR:  invalid regular expression: invalid escape \ sequence
-- Additional tests, not derived from Tcl
-- Exercise logic around high character ranges a bit more
select * from test_regex('a
  [\u1000-\u1100]*
  [\u3000-\u3100]*
  [\u1234-\u25ff]+
  [\u2000-\u35ff]*
  [\u2600-\u2f00]*
  \u1236\u1236x',
  E'a\u1234\u1236\u1236x', 'xEMP');
               test_regex               
----------------------------------------
 {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT}
 {aሴሶሶx}
(2 rows)

select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237',
  E'\u1500\u1237', 'ELMP');
                     test_regex                     
----------------------------------------------------
 {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE}
 {ᔀሷ}
(2 rows)

select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237',
  E'A\u1239', 'ELMP');
                     test_regex                     
----------------------------------------------------
 {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE}
(1 row)