diff options
author | Louis Pilfold <louis@lpil.uk> | 2021-09-04 18:11:04 +0100 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2021-09-04 18:11:04 +0100 |
commit | a54b1986daf44ebfd941849584175215306ad6d9 (patch) | |
tree | 586b723fe3bdebb0e4dfca7cc45778351a822f3a /src | |
parent | 3477ae008a46ffca6c79ee4f9ad8ec876d25ce31 (diff) | |
download | gleam_stdlib-a54b1986daf44ebfd941849584175215306ad6d9.tar.gz gleam_stdlib-a54b1986daf44ebfd941849584175215306ad6d9.zip |
Gleam URI parsing
Diffstat (limited to 'src')
-rw-r--r-- | src/gleam/uri.gleam | 224 |
1 files changed, 169 insertions, 55 deletions
diff --git a/src/gleam/uri.gleam b/src/gleam/uri.gleam index a757b8b..e67837e 100644 --- a/src/gleam/uri.gleam +++ b/src/gleam/uri.gleam @@ -7,19 +7,20 @@ //// Query encoding (Form encoding) is defined in the w3c specification. //// https://www.w3.org/TR/html52/sec-forms.html#urlencoded-form-data -import gleam/option.{None, Option, Some} -import gleam/string -import gleam/int -import gleam/list - if erlang { - import gleam/result import gleam/dynamic.{Dynamic} - import gleam/map - import gleam/function - import gleam/pair } +import gleam/function +import gleam/int +import gleam/list +import gleam/map +import gleam/option.{None, Option, Some} +import gleam/pair +import gleam/regex +import gleam/result +import gleam/string + /// Type representing holding the parsed components of an URI. /// All components of a URI are optional, except the path. /// @@ -35,56 +36,161 @@ pub type Uri { ) } -if erlang { - pub external fn erl_parse(String) -> Dynamic = - "uri_string" "parse" +// Special thanks to Elixir for this algorithm +// +/// Parses a compliant URI string into the `Uri` Type. +/// If the string is not a valid URI string then an error is returned. +/// +/// The opposite operation is `uri.to_string` +/// +/// ## Examples +/// +/// ``` +/// > parse("https://example.com:1234/a/b?query=true#fragment") +/// +/// Ok(Uri(scheme: Some("https"), ...)) +/// ``` +/// +pub fn parse(uri_string: String) -> Uri { + // From https://tools.ietf.org/html/rfc3986#appendix-B + let pattern = + // 12 3 4 5 6 7 8 + "^(([a-z][a-z0-9\\+\\-\\.]*):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#.*)?" + let matches = + pattern + |> regex_submatches(uri_string) + |> pad_list(8) - type UriKey { - Scheme - Userinfo - Host - Port - Path - Query - Fragment + let #(scheme, authority, path, query, fragment) = case matches { + [ + _scheme_with_colon, + scheme, + authority_with_slashes, + _authority, + path, + query_with_question_mark, + _query, + fragment, + ] -> #( + scheme, + authority_with_slashes, + path, + query_with_question_mark, + fragment, + ) + _ -> #(None, None, None, None, None) } - /// Parses a compliant URI string into the `Uri` Type. - /// If the string is not a valid URI string then an error is returned. - /// - /// The opposite operation is `uri.to_string` - /// - /// ## Examples - /// - /// ``` - /// > parse("https://example.com:1234/a/b?query=true#fragment") - /// - /// Ok(Uri(scheme: Some("https"), ...)) - /// ``` - /// - pub fn parse(string: String) -> Result(Uri, Nil) { - try uri_map = - dynamic.map(erl_parse(string)) - |> result.nil_error - let get = fn(k: UriKey, decode_type: dynamic.Decoder(t)) -> Option(t) { - uri_map - |> map.get(dynamic.from(k)) - |> result.then(function.compose(decode_type, result.nil_error)) - |> option.from_result + let scheme = noneify_empty_string(scheme) + let path = option.unwrap(path, "") + let query = noneify_query(query) + let #(userinfo, host, port) = split_authority(authority) + let fragment = + fragment + |> option.to_result(Nil) + |> result.then(string.pop_grapheme) + |> result.map(pair.second) + |> option.from_result + let port = case port { + None -> + case scheme { + Some("ftp") -> Some(21) + Some("sftp") -> Some(22) + Some("tftp") -> Some(69) + Some("http") -> Some(80) + Some("https") -> Some(443) + Some("ldap") -> Some(389) + _ -> None + } + _ -> port + } + let scheme = + scheme + |> noneify_empty_string + |> option.map(string.lowercase) + Uri( + scheme: scheme, + userinfo: userinfo, + host: host, + port: port, + path: path, + query: query, + fragment: fragment, + ) +} + +fn regex_submatches(pattern: String, string: String) -> List(Option(String)) { + pattern + |> regex.compile(regex.Options(case_insensitive: True, multi_line: False)) + |> result.nil_error + |> result.map(regex.scan(_, string)) + |> result.then(list.head) + |> result.map(fn(m: regex.Match) { m.submatches }) + |> result.unwrap([]) +} + +fn noneify_query(x: Option(String)) -> Option(String) { + case x { + None -> None + Some(x) -> + case string.pop_grapheme(x) { + Ok(#("?", query)) -> Some(query) + _ -> None + } + } +} + +fn noneify_empty_string(x: Option(String)) -> Option(String) { + case x { + Some("") | None -> None + Some(_) -> x + } +} + +// Split an authority into its userinfo, host and port parts. +fn split_authority( + authority: Option(String), +) -> #(Option(String), Option(String), Option(Int)) { + case option.unwrap(authority, "") { + "" -> #(None, None, None) + "//" -> #(None, Some(""), None) + authority -> { + let matches = + "^(//)?((.*)@)?(\\[[a-zA-Z0-9:.]*\\]|[^:]*)(:(\\d*))?" + |> regex_submatches(authority) + |> pad_list(6) + case matches { + [_, _, userinfo, host, _, port] -> { + let userinfo = noneify_empty_string(userinfo) + let host = noneify_empty_string(host) + let port = + port + |> option.unwrap("") + |> int.parse + |> option.from_result + #(userinfo, host, port) + } + _ -> #(None, None, None) + } } + } +} + +fn pad_list(list: List(Option(a)), size: Int) -> List(Option(a)) { + list + |> list.append(list.repeat(None, extra_required(list, size))) +} - let uri = - Uri( - scheme: get(Scheme, dynamic.string), - userinfo: get(Userinfo, dynamic.string), - host: get(Host, dynamic.string), - port: get(Port, dynamic.int), - path: option.unwrap(get(Path, dynamic.string), ""), - query: get(Query, dynamic.string), - fragment: get(Fragment, dynamic.string), - ) - Ok(uri) +fn extra_required(list: List(a), remaining: Int) -> Int { + case list { + _ if remaining == 0 -> 0 + [] -> remaining + [_, ..xs] -> extra_required(xs, remaining - 1) } +} + +if erlang { + import gleam/io external fn erl_parse_query(String) -> Dynamic = "uri_string" "dissect_query" @@ -284,7 +390,15 @@ pub fn to_string(uri: Uri) -> String { pub fn origin(uri: Uri) -> Result(String, Nil) { let Uri(scheme: scheme, host: host, port: port, ..) = uri case scheme { - Some("https") | Some("http") -> { + Some("https") if port == Some(443) -> { + let origin = Uri(scheme, None, host, None, "", None, None) + Ok(to_string(origin)) + } + Some("http") if port == Some(80) -> { + let origin = Uri(scheme, None, host, None, "", None, None) + Ok(to_string(origin)) + } + Some(s) if s == "http" || s == "https" -> { let origin = Uri(scheme, None, host, port, "", None, None) Ok(to_string(origin)) } @@ -318,7 +432,7 @@ pub fn merge(base: Uri, relative: Uri) -> Result(Uri, Nil) { option.or(relative.scheme, base.scheme), None, relative.host, - relative.port, + option.or(relative.port, base.port), path, relative.query, relative.fragment, |