-
-
Notifications
You must be signed in to change notification settings - Fork 111
implement data url parsing #756
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
KhafraDev
wants to merge
3
commits into
ada-url:main
Choose a base branch
from
KhafraDev:data-url
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from 1 commit
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#ifndef ADA_DATA_URL_H | ||
#define ADA_DATA_URL_H | ||
|
||
#include <string_view> | ||
|
||
namespace ada::data_url { | ||
// https://fetch.spec.whatwg.org/#data-url-struct | ||
struct data_url { | ||
data_url() = default; | ||
data_url(const data_url &m) = default; | ||
data_url(data_url &&m) noexcept = default; | ||
data_url &operator=(data_url &&m) noexcept = default; | ||
data_url &operator=(const data_url &m) = default; | ||
~data_url() = default; | ||
|
||
bool is_valid = true; | ||
std::string body{}; | ||
std::string essence{}; | ||
}; | ||
|
||
ada::data_url::data_url parse_data_url(std::string_view data_url); | ||
|
||
std::string collect_sequence_of_code_points(char c, const std::string& input, size_t& position); | ||
|
||
bool isASCIIWhiteSpace(char c); | ||
|
||
std::string removeASCIIWhiteSpace(const std::string& input, bool leading, bool trailing); | ||
|
||
static constexpr bool is_base64(std::string_view input); | ||
|
||
} | ||
|
||
#endif // ADA_DATA_URL_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
#include <string_view> | ||
#include <cctype> | ||
|
||
#include "ada.h" | ||
|
||
namespace ada::data_url { | ||
|
||
ada::data_url::data_url parse_data_url(std::string_view data_url) { | ||
auto out = ada::data_url::data_url(); | ||
|
||
auto url = ada::parse<ada::url>(data_url, nullptr); | ||
|
||
// 1. Assert: dataURL’s scheme is "data". | ||
if (!url || url->get_protocol() != "data:") { | ||
out.is_valid = false; | ||
return out; | ||
} | ||
|
||
// 2. Let input be the result of running the URL serializer on dataURL with exclude | ||
// fragment set to true. | ||
url->set_hash({}); | ||
auto input = url->get_href(); | ||
|
||
// 3. Remove the leading "data:" from input. | ||
input.erase(0, 5); | ||
|
||
// 4. Let position point at the start of input. | ||
size_t position = 0; | ||
|
||
// 5. Let mimeType be the result of collecting a sequence of code points that are | ||
// not equal to U+002C (,), given position. | ||
auto mimetype = collect_sequence_of_code_points(',', input, position); | ||
auto mimetype_length = mimetype.length(); | ||
|
||
// 6. Strip leading and trailing ASCII whitespace from mimeType. | ||
mimetype = removeASCIIWhiteSpace(mimetype, true, true); | ||
|
||
// 7. If position is past the end of input, then return failure. | ||
if (position >= input.length()) { | ||
out.is_valid = false; | ||
return out; | ||
} | ||
|
||
// 8. Advance position by 1. | ||
position++; | ||
|
||
// 9. Let encodedBody be the remainder of input. | ||
std::string encoded_body = input.substr(mimetype_length + 1); | ||
|
||
// 10. Let body be the percent-decoding of encodedBody. | ||
encoded_body = ada::unicode::percent_decode(encoded_body, encoded_body.find('%')); | ||
|
||
// 11. If mimeType ends with U+003B (;), followed by zero or more U+0020 SPACE, | ||
// followed by an ASCII case-insensitive match for "base64", then: | ||
size_t last_semi_colon = input.find_last_of(';'); | ||
|
||
if (last_semi_colon != std::string::npos) { | ||
size_t next_non_space = input.find_first_not_of(' ', last_semi_colon); | ||
|
||
out.essence = mimetype.substr(0, last_semi_colon); | ||
|
||
if (is_base64(mimetype)) { | ||
|
||
// 11.1. Let stringBody be the isomorphic decode of body. | ||
auto string_body = encoded_body; | ||
|
||
// 11.2. Set body to the forgiving-base64 decode of stringBody. | ||
// 11.3. If body is failure, then return failure. | ||
// TODO | ||
out.body = string_body; | ||
|
||
// 11.4. Remove the last 6 code points from mimeType. | ||
// 11.5. Remove trailing U+0020 SPACE code points from mimeType, if any. | ||
// 11.6. Remove the last U+003B (;) from mimeType. | ||
mimetype.erase(last_semi_colon); | ||
} | ||
} | ||
|
||
// 12. If mimeType starts with ";", then prepend "text/plain" to mimeType. | ||
if (mimetype.starts_with(';')) { | ||
mimetype = "text/plain" + mimetype; | ||
} | ||
|
||
return out; | ||
} | ||
|
||
std::string collect_sequence_of_code_points(char c, const std::string& input, size_t& position) { | ||
auto idx = input.find_first_of(c, position); | ||
size_t start = position; | ||
|
||
if (idx == std::string::npos) { | ||
position = reinterpret_cast<size_t>(input.length()); | ||
KhafraDev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return input.substr(start); | ||
} | ||
|
||
position = reinterpret_cast<size_t>(idx); | ||
KhafraDev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return input.substr(start, position); | ||
} | ||
|
||
std::string removeASCIIWhiteSpace(const std::string& input, bool leading, bool trailing) { | ||
KhafraDev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
size_t lead = 0; | ||
size_t trail = input.length(); | ||
|
||
if (leading) { | ||
while (lead < input.length() && isASCIIWhiteSpace(input[lead])) | ||
KhafraDev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
lead++; | ||
} | ||
|
||
if (trailing) { | ||
while (trail > 0 && isASCIIWhiteSpace(input[trail])) | ||
trail--; | ||
} | ||
|
||
return input.substr(lead, trail); | ||
} | ||
|
||
bool isASCIIWhiteSpace(char c) { | ||
KhafraDev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return c == '\r' || c == '\n' || c == '\t' || c == '\f'; | ||
KhafraDev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
static constexpr bool is_base64(std::string_view input) { | ||
auto last_idx = input.find_last_of(';'); | ||
if (last_idx != std::string_view::npos) { | ||
// TODO(@anonrig): Trim input | ||
auto res = input.substr(last_idx + 1); | ||
return res.size() == 6 && (res[0] | 0x20) == 'b' && (res[1] | 0x20) == 'a' && | ||
(res[2] | 0x20) == 's' && (res[3] | 0x20) == 'e' && (res[4] == '6') && (res[5] == '4'); | ||
} | ||
return false; | ||
} | ||
|
||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.