From 6f44f8382a7dad77c42d53193701ae8e49beb214 Mon Sep 17 00:00:00 2001
From: "Ryan M. Moore" <rmm1047@gmail.com>
Date: Mon, 30 Dec 2024 22:00:57 -0500
Subject: Fix non-character handling in `string.utf_codepoint`

Treats `U+FFFE` and `U+FFFF` as valid Unicode codepoints rather than errors.  See #778.
---
 test/gleam/string_test.gleam | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

(limited to 'test')

diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam
index 4eddb9c..14e6476 100644
--- a/test/gleam/string_test.gleam
+++ b/test/gleam/string_test.gleam
@@ -702,17 +702,43 @@ pub fn from_utf_codepoints_test() {
 }
 
 pub fn utf_codepoint_test() {
-  string.utf_codepoint(1_114_444)
+  // Less than the lower bound on valid codepoints
+  string.utf_codepoint(-1)
   |> should.be_error
 
-  string.utf_codepoint(65_534)
+  // The lower bound on valid codepoints
+  string.utf_codepoint(0)
+  |> should.be_ok
+
+  // The upper bound for valid code points
+  string.utf_codepoint(1_114_111)
+  |> should.be_ok
+
+  // Greater than the upper bound on valid codepoints
+  string.utf_codepoint(1_114_112)
   |> should.be_error
 
+  // Non-characters U+FFFE and U+FFFF are valid codepoints.  See (#778).
+  string.utf_codepoint(65_534)
+  |> should.be_ok
+  string.utf_codepoint(65_535)
+  |> should.be_ok
+
+  // One less than the lowest "High-surrogate code point" 
+  string.utf_codepoint(55_295)
+  |> should.be_ok
+
+  // Lowest value of the "High-surrogate code point" (U+D800 to U+DBFF)
   string.utf_codepoint(55_296)
   |> should.be_error
 
-  string.utf_codepoint(-1)
+  // Highest value of the "Low-surrogate code point" (U+DC00 to U+DFFF)
+  string.utf_codepoint(57_343)
   |> should.be_error
+
+  // One greater than the highest "Low-surrogate code point"
+  string.utf_codepoint(57_344)
+  |> should.be_ok
 }
 
 pub fn bit_array_utf_codepoint_test() {
-- 
cgit v1.2.3