Lets try uriparser

darbyjohnston · darbyjohnston · commit 7bb887b258a2 · 2025-12-18T17:40:12.000-08:00
Signed-off-by: Darby Johnston &lt;darbyjohnston@yahoo.com&gt;
diff --git a/src/opentimelineio/bundleUtils.cpp b/src/opentimelineio/bundleUtils.cpp
@@ -118,7 +118,7 @@ SerializableObject::Retainer<Timeline> timeline_for_bundle_and_manifest(
             std::string const url    = er ? er->target_url()
                                           : isr->target_url_base();
             std::string const scheme = scheme_from_url(url);
-            if (!(scheme == "file://" || scheme.empty()))
+            if (!(scheme == "file" || scheme.empty()))
             {
                 if (MediaReferencePolicy::ErrorIfNotFile == media_policy)
                 {
diff --git a/src/opentimelineio/urlUtils.cpp b/src/opentimelineio/urlUtils.cpp
@@ -5,161 +5,64 @@
 
 #include "opentimelineio/fileUtils.h"
 
-#include <algorithm>
+#include <uriparser/Uri.h>
+
 #include <filesystem>
-#include <iomanip>
-#include <regex>
-#include <sstream>
+#include <iostream>
 
 namespace opentimelineio { namespace OPENTIMELINEIO_VERSION {
 
 std::string
 scheme_from_url(std::string const& url)
 {
-    std::regex const rx("^([A-Za-z0-9+-\\.]+://)");
-    auto const       rxi = std::sregex_iterator(url.begin(), url.end(), rx);
-    return rxi != std::sregex_iterator() ? rxi->str() : std::string();
-}
-
-std::string
-url_encode(std::string const& url)
-{
-    // Don't encode these characters.
-    std::vector<char> const chars = { '-', '.', '_', '~', ':', '/', '?',  '#',
-                                      '[', ']', '@', '!', '$', '&', '\'', '(',
-                                      ')', '*', '+', ',', ';', '=', '\\' };
-
-    // Copy characters to the result, encoding if necessary.
-    std::stringstream ss;
-    ss.fill('0');
-    ss << std::hex;
-    for (auto i = url.begin(), end = url.end(); i != end; ++i)
+    std::string out;
+    UriUriA uri;
+    const char* uri_error_pos = nullptr;
+    if (uriParseSingleUriA(&uri, url.c_str(), &uri_error_pos) == URI_SUCCESS)
     {
-        auto const j = std::find(chars.begin(), chars.end(), *i);
-        if (std::isalnum(*i) || j != chars.end())
+        if (uri.scheme.first)
         {
-            ss << *i;
-        }
-        else
-        {
-            ss << '%' << std::setw(2) << int(*i);
+            out = std::string(uri.scheme.first, uri.scheme.afterLast - uri.scheme.first);
         }
+        uriFreeUriMembersA(&uri);
     }
-    return ss.str();
+    return out;
 }
 
 std::string
-url_decode(std::string const& url)
+url_from_filepath(std::string const& filepath)
 {
-    std::string result;
-
-    // Find all percent encodings.
-    size_t           url_pos = 0;
-    std::regex const rx("(%[0-9A-Fa-f][0-9A-Fa-f])");
-    for (auto i = std::sregex_iterator(url.begin(), url.end(), rx);
-         i != std::sregex_iterator();
-         ++i)
+    std::cout << "url_from_filepath()" << std::endl;
+    std::cout << "  filepath: " << filepath << std::endl;
+    std::string out;
+    //std::string tmp(8 + 3 * filepath.size() + 1, 0);
+    //if (uriWindowsFilenameToUriStringA(filepath.c_str(), tmp.data()) == URI_SUCCESS)
+    std::vector<char> tmp(7 + 3 * filepath.size() + 1, 0);
+    if (uriUnixFilenameToUriStringA(filepath.c_str(), tmp.data()) == URI_SUCCESS &&
+        !tmp.empty())
     {
-        // Copy parts without any encodings.
-        if (url_pos != static_cast<size_t>(i->position()))
-        {
-            result.append(url.substr(url_pos, i->position() - url_pos));
-            url_pos = i->position();
-        }
-
-        // Convert the encoding and append it.
-        std::stringstream ss;
-        ss << std::hex << i->str().substr(1);
-        unsigned int j = 0;
-        ss >> j;
-        result.push_back(char(j));
-        url_pos += i->str().size();
-    }
-
-    // Copy the remainder without any encodings.
-    if (!url.empty() && url_pos != url.size() - 1)
-    {
-        result.append(url.substr(url_pos, url.size() - url_pos));
+        out = std::string(tmp.data());
     }
-
-    return result;
-}
-
-std::string
-url_from_filepath(std::string const& filepath)
-{
-    std::string const encoded = url_encode(to_unix_separators(filepath));
-    std::string const url = std::filesystem::u8path(filepath).is_relative()
-                                ? encoded
-                                : ("file://" + encoded);
-    return url;
+    std::cout << "  out: " << out.c_str() << std::endl;
+    return out;
 }
 
 std::string
 filepath_from_url(std::string const& url)
 {
-    // Skip over the URL scheme.
-    bool              has_scheme = false;
-    size_t            pos        = 0;
-    std::string const scheme     = scheme_from_url(url);
-    if (!scheme.empty())
-    {
-        has_scheme = true;
-        pos += scheme.size();
-    }
-
-    // Remove the URL query and fragment.
-    size_t size = std::string::npos;
-    size_t i    = url.find('?', pos);
-    size_t j    = url.find('#', pos);
-    if (i != std::string::npos || j != std::string::npos)
-    {
-        size = std::min(i, j) + 1;
-    }
-    std::string const path = url.substr(pos, size);
-
-    // Decode the path.
-    std::string decoded = url_decode(path);
-
-    // Use UNIX separators.
-    decoded = to_unix_separators(decoded);
-
-    // Check for Windows drive letters.
-    bool        has_windows_drive = false;
-    std::regex  rx                = std::regex("^([A-Za-z]:)");
-    std::smatch matches;
-    if (std::regex_search(decoded, matches, rx))
-    {
-        has_windows_drive = true;
-    }
-    else
-    {
-        rx = std::regex("^(.*/)([A-Za-z]:)");
-        if (std::regex_search(decoded, matches, rx))
-        {
-            has_windows_drive = true;
-            decoded = decoded.substr(matches.position(1) + matches.length(1));
-        }
-    }
-
-    // Add the "//" for UNC paths.
-    bool has_unc = false;
-    size         = decoded.size();
-    if (has_scheme && !has_windows_drive && pos < size - 1 && decoded[0] != '/')
+    std::cout << "filepath_from_url()" << std::endl;
+    std::cout << "  url: " << url << std::endl;
+    std::string out;
+    //std::string tmp(url.size() + 1, 0);
+    //if (uriUriStringToWindowsFilenameA(url.c_str(), tmp.data()) == URI_SUCCESS)
+    std::vector<char> tmp(url.size() + 1, 0);
+    if (uriUriStringToUnixFilenameA(url.c_str(), tmp.data()) == URI_SUCCESS &&
+        !tmp.empty())
     {
-        has_unc = true;
-        decoded.insert(0, "//");
+        out = std::string(tmp.data());
     }
-
-    // Remove the current directory.
-    rx = std::regex("^(./)");
-    if (!has_windows_drive && !has_unc
-        && std::regex_search(decoded, matches, rx))
-    {
-        decoded = decoded.substr(matches.position() + matches.length());
-    }
-
-    return decoded;
+    std::cout << "  out: " << out.c_str() << std::endl;
+    return out;
 }
 
 }} // namespace opentimelineio::OPENTIMELINEIO_VERSION
diff --git a/src/opentimelineio/urlUtils.h b/src/opentimelineio/urlUtils.h
@@ -9,38 +9,15 @@
 namespace opentimelineio { namespace OPENTIMELINEIO_VERSION {
 
 /// @name URL Utilities
-/// @todo Should we use a third party library for handling URLs?
 ///@{
 
 /// @brief Get the scheme from a URL.
 OTIO_API std::string scheme_from_url(std::string const&);
 
-/// @brief Encode a URL (i.e., replace " " characters with "%20").
-OTIO_API std::string url_encode(std::string const& url);
-
-/// @brief Decode a URL (i.e., replace "%20" strings with " ").
-std::string url_decode(std::string const& url);
-
 /// @brief Convert a filesystem path to a file URL.
-///
-/// For example:
-/// * "/var/tmp/thing.otio" -> "file:///var/tmp/thing.otio"
-/// * "subdir/thing.otio" -> "tmp/thing.otio"
-///
-/// @todo Hopefully this can be replaced by functionality from the C++
-/// standard library at some point.
 OTIO_API std::string url_from_filepath(std::string const&);
 
 /// @brief Convert a file URL to a filesystem path.
-///
-/// URLs can either be encoded according to the `RFC 3986` standard or not.
-/// Additionally, Windows mapped drive letter and UNC paths need to be
-/// accounted for when processing URLs.
-///
-/// RFC 3986: https://tools.ietf.org/html/rfc3986
-///
-/// @todo Hopefully this can be replaced by functionality from the C++
-/// standard library at some point.
 OTIO_API std::string filepath_from_url(std::string const&);
 
 ///@}

Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ SerializableObject::Retainer<Timeline> timeline_for_bundle_and_manifest(`
`118`	`118`	`std::string const url = er ? er->target_url()`
`119`	`119`	`: isr->target_url_base();`
`120`	`120`	`std::string const scheme = scheme_from_url(url);`
`121`		`- if (!(scheme == "file://" \|\| scheme.empty()))`
	`121`	`+ if (!(scheme == "file" \|\| scheme.empty()))`
`122`	`122`	`{`
`123`	`123`	`if (MediaReferencePolicy::ErrorIfNotFile == media_policy)`
`124`	`124`	`{`