Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"files": "package-lock.json|^.secrets.baseline$",
"lines": null
},
"generated_at": "2023-11-10T17:28:14Z",
"generated_at": "2023-11-22T18:03:02Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -416,15 +416,15 @@
"hashed_secret": "2863fa4b5510c46afc2bd2998dfbc0cf3d6df032",
"is_secret": false,
"is_verified": false,
"line_number": 528,
"line_number": 527,
"type": "Secret Keyword",
"verified_result": null
},
{
"hashed_secret": "b9cad336062c0dc3bb30145b1a6697fccfe755a6",
"is_secret": false,
"is_verified": false,
"line_number": 589,
"line_number": 588,
"type": "Secret Keyword",
"verified_result": null
}
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ matrix:
- python: 3.10
- python: 3.11

cache: pip3
cache: pip

before_install:
- npm install npm@latest -g
Expand Down
49 changes: 45 additions & 4 deletions ibm_cloud_sdk_core/base_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@
from http.cookiejar import CookieJar
from os.path import basename
from typing import Dict, List, Optional, Tuple, Union
from urllib3.util.retry import Retry
from urllib.parse import urlparse

import requests
from requests.structures import CaseInsensitiveDict
from requests.exceptions import JSONDecodeError
from urllib3.exceptions import MaxRetryError
from urllib3.util.retry import Retry

from ibm_cloud_sdk_core.authenticators import Authenticator
from .api_exception import ApiException
Expand All @@ -52,6 +54,10 @@
logger = logging.getLogger(__name__)


MAX_REDIRECTS = 10
SAFE_HEADERS = ['authorization', 'www-authenticate', 'cookie', 'cookie2']


# pylint: disable=too-many-instance-attributes
# pylint: disable=too-many-locals
class BaseService:
Expand Down Expand Up @@ -96,7 +102,7 @@ def __init__(
service_url: str = None,
authenticator: Authenticator = None,
disable_ssl_verification: bool = False,
enable_gzip_compression: bool = False
enable_gzip_compression: bool = False,
) -> None:
self.set_service_url(service_url)
self.http_client = requests.Session()
Expand Down Expand Up @@ -280,6 +286,7 @@ def set_default_headers(self, headers: Dict[str, str]) -> None:
else:
raise TypeError("headers parameter must be a dictionary")

# pylint: disable=too-many-branches
def send(self, request: requests.Request, **kwargs) -> DetailedResponse:
"""Send a request and wrap the response in a DetailedResponse or APIException.

Expand All @@ -294,7 +301,9 @@ def send(self, request: requests.Request, **kwargs) -> DetailedResponse:
"""
# Use a one minute timeout when our caller doesn't give a timeout.
# http://docs.python-requests.org/en/master/user/quickstart/#timeouts
kwargs = dict({"timeout": 60}, **kwargs)
# We also disable the default redirection, to have more granular control
# over the headers sent in each request.
kwargs = dict({'timeout': 60, 'allow_redirects': False}, **kwargs)
kwargs = dict(kwargs, **self.http_config)

if self.disable_ssl_verification:
Expand All @@ -314,6 +323,38 @@ def send(self, request: requests.Request, **kwargs) -> DetailedResponse:
try:
response = self.http_client.request(**request, cookies=self.jar, **kwargs)

# Handle HTTP redirects.
redirects_count = 0
# Check if the response is a redirect to another host.
while response.is_redirect and response.next is not None and redirects_count < MAX_REDIRECTS:
redirects_count += 1

# urllib3 has already prepared a request that can almost be used as-is.
next_request = response.next

# If both the original and the redirected URL are under the `.cloud.ibm.com` domain,
# copy the safe headers that are used for authentication purposes,
if self.service_url.endswith('.cloud.ibm.com') and urlparse(next_request.url).netloc.endswith(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.service_url.endswith('.cloud.ibm.com') will most definitely not work in the general case. To see why, consider a service url like https://myhost.ibm.com/api or https://myhost.ibm.com:8080. I think you'll need to parse self.service_url and extract the hostname from it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's a good point and I already parse the url in the second part of the expression... :)

'.cloud.ibm.com'
):
original_headers = request.get('headers')
for header, value in original_headers.items():
if header.lower() in SAFE_HEADERS:
next_request.headers[header] = value
# otherwise remove them manually, because `urllib3` doesn't strip all of them.
else:
for header in SAFE_HEADERS:
next_request.headers.pop(header, None)

response = self.http_client.send(next_request, **kwargs)

# If we reached the max number of redirects and the last response is still a redirect
# stop processing the response and return an error to the user.
if redirects_count == MAX_REDIRECTS and response.is_redirect:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not do the limit check immediately after incrementing redirect_count above? I think it could be cleaner as you would just need to check:
if redirects_count >= MAX_REDIRECTS:

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense! I need to excuse myself so: initially I implemented the error handling in a different way then I forgot to refactor that check.. 😄

raise MaxRetryError(
None, response.url, reason=f'reached the maximum number of redirects: {MAX_REDIRECTS}'
)

# Process a "success" response.
if 200 <= response.status_code <= 299:
if response.status_code == 204 or request['method'] == 'HEAD':
Expand Down Expand Up @@ -362,7 +403,7 @@ def prepare_request(
params: Optional[dict] = None,
data: Optional[Union[str, dict]] = None,
files: Optional[Union[Dict[str, Tuple[str]], List[Tuple[str, Tuple[str, ...]]]]] = None,
**kwargs
**kwargs,
) -> dict:
"""Build a dict that represents an HTTP service request.

Expand Down
162 changes: 161 additions & 1 deletion test/test_base_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# coding=utf-8
# pylint: disable=missing-docstring,protected-access,too-few-public-methods
# pylint: disable=missing-docstring,protected-access,too-few-public-methods,too-many-lines
import gzip
import json
import os
Expand Down Expand Up @@ -788,6 +788,166 @@ def test_retry_config_external():
assert retry_err.value.reason == error


@responses.activate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to add some additional tests:

  • redirect to same host (both inside cloud.ibm.com)
  • redirect to same host (both outside cloud.ibm.com)

Also, because you are completely overriding the default "follow redirects" behavior, I think you'll need to add some tests for redirect scenarios involving POST and a representative sample of redirect status codes. There are some subtle gotcha's when implementing redirects and we need to make sure that we are providing equivalent function as compared to the default "follow redirects" behavior.
I had to do this in the Java implementation as well. For example, for 301/302 status codes, a POST should be re-written as a GET with no body (it may fail in the redirected server, but that's what the HTTP semantics should be). On the other hand, 307/308 status codes should NOT cause a POST to be re-written as a GET.
You should study the code that you are overriding and try to match it as closely as possible other than adding support for the *.cloud.ibm.com stuff.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests are already there those scenarios. Copied from the tests

def test_redirect_ibm_to_ibm_success():
    url_from = 'http://region1.cloud.ibm.com/'
    url_to = 'http://region2.cloud.ibm.com/'
    ...

and

def test_redirect_not_ibm_to_not_ibm_fail():
    url_from = 'http://region1.notcloud.ibm.com/'
    url_to = 'http://region2.notcloud.ibm.com/'
    ...

Are you talking about something else?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, because you are completely overriding the default "follow redirects" behavior...

That's not the case. Still the requests package handles the redirections, the "allow_redirect": False" option only prevents to fire the next request automatically, but it's still being processed and resolved appropriately. From the session.py in the requests package:

# If redirects aren't being followed, store the response on the Request for Response.next().
        if not allow_redirects:
            try:
                r._next = next(
                    self.resolve_redirects(r, request, yield_requests=True, **kwargs)
                )
            except StopIteration:
                pass

Let's touch base later, to make sure we are on the same page! :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you talking about something else?

  1. redirect to same host (both inside cloud.ibm.com):
    from: region1.cloud.ibm.com
    to: region1.cloud.ibm.com

  2. redirect to same host (both outside cloud.ibm.com):
    from: region1.notcloud.ibm.com
    to: region1.notcloud.ibm.com

Redirecting to the same host SHOULD allow the safe headers to be included in the redirected request, regardless of whether the host is inside or outside the cloud.ibm.com domain.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's touch base later, to make sure we are on the same page!

Yes, let's discuss tomorrow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redirecting to the same host SHOULD allow the safe headers to be included in the redirected request, regardless of whether the host is inside or outside the cloud.ibm.com domain.

Ok it's my bad, I used the domain and host words interchangeably(which is incorrect, I know :))...

def test_redirect_ibm_to_ibm_success():
url_from = 'http://region1.cloud.ibm.com/'
url_to = 'http://region2.cloud.ibm.com/'

safe_headers = {
'Authorization': 'foo',
'WWW-Authenticate': 'bar',
'Cookie': 'baz',
'Cookie2': 'baz2',
}

responses.add(
responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect'
)
responses.add(responses.GET, url_to, status=200, body='successfully redirected')

service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator())

prepped = service.prepare_request('GET', '', headers=safe_headers)
response = service.send(prepped)
result = response.get_result()

assert result.status_code == 200
assert result.url == url_to
assert result.text == 'successfully redirected'

# Make sure the headers are included in the 2nd, redirected request.
redirected_headers = responses.calls[1].request.headers
for key in safe_headers:
assert key in redirected_headers


@responses.activate
def test_redirect_not_ibm_to_ibm_fail():
url_from = 'http://region1.notcloud.ibm.com/'
url_to = 'http://region2.cloud.ibm.com/'

safe_headers = {
'Authorization': 'foo',
'WWW-Authenticate': 'bar',
'Cookie': 'baz',
'Cookie2': 'baz2',
}

responses.add(
responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect'
)
responses.add(responses.GET, url_to, status=200, body='successfully redirected')

service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator())

prepped = service.prepare_request('GET', '', headers=safe_headers)
response = service.send(prepped)
result = response.get_result()

assert result.status_code == 200
assert result.url == url_to
assert result.text == 'successfully redirected'

# Make sure the headers have been excluded from the 2nd, redirected request.
redirected_headers = responses.calls[1].request.headers
for key in safe_headers:
assert key not in redirected_headers


@responses.activate
def test_redirect_ibm_to_not_ibm_fail():
url_from = 'http://region1.cloud.ibm.com/'
url_to = 'http://region2.notcloud.ibm.com/'

safe_headers = {
'Authorization': 'foo',
'WWW-Authenticate': 'bar',
'Cookie': 'baz',
'Cookie2': 'baz2',
}

responses.add(
responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect'
)
responses.add(responses.GET, url_to, status=200, body='successfully redirected')

service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator())

prepped = service.prepare_request('GET', '', headers=safe_headers)
response = service.send(prepped)
result = response.get_result()

assert result.status_code == 200
assert result.url == url_to
assert result.text == 'successfully redirected'

# Make sure the headers have been excluded from the 2nd, redirected request.
redirected_headers = responses.calls[1].request.headers
for key in safe_headers:
assert key not in redirected_headers


@responses.activate
def test_redirect_not_ibm_to_not_ibm_fail():
url_from = 'http://region1.notcloud.ibm.com/'
url_to = 'http://region2.notcloud.ibm.com/'

safe_headers = {
'Authorization': 'foo',
'WWW-Authenticate': 'bar',
'Cookie': 'baz',
'Cookie2': 'baz2',
}

responses.add(
responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect'
)
responses.add(responses.GET, url_to, status=200, body='successfully redirected')

service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator())

prepped = service.prepare_request('GET', '', headers=safe_headers)
response = service.send(prepped)
result = response.get_result()

assert result.status_code == 200
assert result.url == url_to
assert result.text == 'successfully redirected'

# Make sure the headers have been excluded from the 2nd, redirected request.
redirected_headers = responses.calls[1].request.headers
for key in safe_headers:
assert key not in redirected_headers


@responses.activate
def test_redirect_ibm_to_ibm_exhausted_fail():
redirects = 11
safe_headers = {
'Authorization': 'foo',
'WWW-Authenticate': 'bar',
'Cookie': 'baz',
'Cookie2': 'baz2',
}

for i in range(redirects):
responses.add(
responses.GET,
f'http://region{i+1}.cloud.ibm.com/',
status=302,
adding_headers={'Location': f'http://region{i+2}.cloud.ibm.com/'},
body='just about to redirect',
)

service = BaseService(service_url='http://region1.cloud.ibm.com/', authenticator=NoAuthAuthenticator())

with pytest.raises(MaxRetryError) as ex:
prepped = service.prepare_request('GET', '', headers=safe_headers)
service.send(prepped)

assert ex.value.reason == 'reached the maximum number of redirects: 10'


@responses.activate
def test_user_agent_header():
service = AnyServiceV1('2018-11-20', authenticator=NoAuthAuthenticator())
Expand Down