aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorRyan M. Moore <rmm1047@gmail.com>2024-12-30 22:00:57 -0500
committerLouis Pilfold <louis@lpil.uk>2025-01-03 21:02:38 +0000
commit6f44f8382a7dad77c42d53193701ae8e49beb214 (patch)
tree013ef4d1e253a88ecfad21eaac82a22a77b871b9 /test
parentc5d0edeaf6edd3280883497d931bdae8aa88afa5 (diff)
downloadgleam_stdlib-6f44f8382a7dad77c42d53193701ae8e49beb214.tar.gz
gleam_stdlib-6f44f8382a7dad77c42d53193701ae8e49beb214.zip
Fix non-character handling in `string.utf_codepoint`
Treats `U+FFFE` and `U+FFFF` as valid Unicode codepoints rather than errors. See #778.
Diffstat (limited to 'test')
-rw-r--r--test/gleam/string_test.gleam32
1 files changed, 29 insertions, 3 deletions
diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam
index 4eddb9c..14e6476 100644
--- a/test/gleam/string_test.gleam
+++ b/test/gleam/string_test.gleam
@@ -702,17 +702,43 @@ pub fn from_utf_codepoints_test() {
}
pub fn utf_codepoint_test() {
- string.utf_codepoint(1_114_444)
+ // Less than the lower bound on valid codepoints
+ string.utf_codepoint(-1)
|> should.be_error
- string.utf_codepoint(65_534)
+ // The lower bound on valid codepoints
+ string.utf_codepoint(0)
+ |> should.be_ok
+
+ // The upper bound for valid code points
+ string.utf_codepoint(1_114_111)
+ |> should.be_ok
+
+ // Greater than the upper bound on valid codepoints
+ string.utf_codepoint(1_114_112)
|> should.be_error
+ // Non-characters U+FFFE and U+FFFF are valid codepoints. See (#778).
+ string.utf_codepoint(65_534)
+ |> should.be_ok
+ string.utf_codepoint(65_535)
+ |> should.be_ok
+
+ // One less than the lowest "High-surrogate code point"
+ string.utf_codepoint(55_295)
+ |> should.be_ok
+
+ // Lowest value of the "High-surrogate code point" (U+D800 to U+DBFF)
string.utf_codepoint(55_296)
|> should.be_error
- string.utf_codepoint(-1)
+ // Highest value of the "Low-surrogate code point" (U+DC00 to U+DFFF)
+ string.utf_codepoint(57_343)
|> should.be_error
+
+ // One greater than the highest "Low-surrogate code point"
+ string.utf_codepoint(57_344)
+ |> should.be_ok
}
pub fn bit_array_utf_codepoint_test() {