From 0125954b83b45835b624011aa3e491f30081e14a Mon Sep 17 00:00:00 2001 From: Jonathan Demirgian Date: Wed, 28 Apr 2021 16:51:52 -0400 Subject: [PATCH 1/2] Write tests confirming Binary Support response behavior No functional change to the Zappa codebase is introduced by this commit. This area of the application was untested. Tests introduced to ensure new behavior discussed in https://github.com/zappa/Zappa/issues/908 does not cause a regression. --- tests/test_binary_support_settings.py | 12 ++++++ tests/test_handler.py | 61 +++++++++++++++++++++++++++ tests/test_wsgi_binary_support_app.py | 31 ++++++++++++++ tests/utils.py | 9 ++++ 4 files changed, 113 insertions(+) create mode 100644 tests/test_binary_support_settings.py create mode 100644 tests/test_wsgi_binary_support_app.py diff --git a/tests/test_binary_support_settings.py b/tests/test_binary_support_settings.py new file mode 100644 index 000000000..adb257d55 --- /dev/null +++ b/tests/test_binary_support_settings.py @@ -0,0 +1,12 @@ +API_STAGE = "dev" +APP_FUNCTION = "app" +APP_MODULE = "tests.test_wsgi_binary_support_app" +BINARY_SUPPORT = True +CONTEXT_HEADER_MAPPINGS = {} +DEBUG = "True" +DJANGO_SETTINGS = None +DOMAIN = "api.example.com" +ENVIRONMENT_VARIABLES = {} +LOG_LEVEL = "DEBUG" +PROJECT_NAME = "binary_support_settings" +COGNITO_TRIGGER_MAPPING = {} diff --git a/tests/test_handler.py b/tests/test_handler.py index eecb3dcab..ce38278b3 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -3,6 +3,7 @@ from mock import Mock +from tests.utils import is_base64 from zappa.handler import LambdaHandler from zappa.utilities import merge_headers @@ -225,6 +226,66 @@ def test_exception_handler_on_web_request(self): self.assertEqual(response["statusCode"], 500) mocked_exception_handler.assert_called() + def test_wsgi_script_binary_support_base64_behavior(self): + """ + With Binary Support enabled, response mimetypes that are not text/* or application/json will be base64 encoded + """ + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", + } + + response = lh.handler(text_plain_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + text_arbitrary_event = { + **text_plain_event, + **{"path": "/textarbitrary_mimetype_response1"}, + } + + response = lh.handler(text_arbitrary_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + application_json_event = { + **text_plain_event, + **{"path": "/json_mimetype_response1"}, + } + + response = lh.handler(application_json_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + arbitrary_binary_event = { + **text_plain_event, + **{"path": "/arbitrarybinary_mimetype_response1"}, + } + + response = lh.handler(arbitrary_binary_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(response["isBase64Encoded"]) + self.assertTrue(is_base64(response["body"])) + def test_wsgi_script_on_cognito_event_request(self): """ Ensure that requests sent by cognito behave sensibly diff --git a/tests/test_wsgi_binary_support_app.py b/tests/test_wsgi_binary_support_app.py new file mode 100644 index 000000000..ef0f62287 --- /dev/null +++ b/tests/test_wsgi_binary_support_app.py @@ -0,0 +1,31 @@ +### +# This test application exists to confirm how Zappa handles WSGI application +# _responses_ when Binary Support is enabled. +### + +import io +import json + +from flask import Flask, Response, send_file + +app = Flask(__name__) + + +@app.route("/textplain_mimetype_response1", methods=["GET"]) +def text_mimetype_response_1(): + return Response(response="OK", mimetype="text/plain") + + +@app.route("/textarbitrary_mimetype_response1", methods=["GET"]) +def text_mimetype_response_2(): + return Response(response="OK", mimetype="text/arbitary") + + +@app.route("/json_mimetype_response1", methods=["GET"]) +def json_mimetype_response_1(): + return Response(response=json.dumps({"some": "data"}), mimetype="application/json") + + +@app.route("/arbitrarybinary_mimetype_response1", methods=["GET"]) +def arbitrary_mimetype_response_1(): + return Response(response=b"some binary data", mimetype="arbitrary/binary_mimetype") diff --git a/tests/utils.py b/tests/utils.py index 31453e0ac..6433db5bc 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,3 +1,4 @@ +import base64 import functools import os from contextlib import contextmanager @@ -74,3 +75,11 @@ def stub_open(*args, **kwargs): with patch("__builtin__.open", stub_open): yield mock_open, mock_file + + +def is_base64(test_string: str) -> bool: + # Taken from https://stackoverflow.com/a/45928164/3200002 + try: + return base64.b64encode(base64.b64decode(test_string)).decode() == test_string + except Exception: + return False From eb2b50f65acb1b04d2a3f3d74c5de6ae919c7fe1 Mon Sep 17 00:00:00 2001 From: Jonathan Demirgian Date: Wed, 28 Apr 2021 11:15:12 -0400 Subject: [PATCH 2/2] Use Content-Encoding to identify if data is binary When using _whitenoise_ for caching, which provides compression, binary types may include mimetypes, "text/", "application/json": - response.mimetype.startswith("text/") - response.mimetype == "application/json" Assuming that Content-Encoding will be set (as whitenoise apparently does) this allows compression to be applied by the application for "text/" and "application/json". About Content-Encoding: developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding --- The above commit message and functional code change to zappa/handler.py was written by GH user @monkut and a PR was provided with https://github.com/Miserlou/Zappa/pull/2170. That PR was not merged in before the fork from Miserlou/zappa to Zappa/zappa. This commit copies the code from that PR, adds a comment line referencing the new migrated issue, and additionally adds tests to confirm behavior. Co-authored-by: monkut --- tests/test_handler.py | 104 +++++++++++++++++++++----- tests/test_wsgi_binary_support_app.py | 34 ++++++++- zappa/handler.py | 18 ++++- 3 files changed, 133 insertions(+), 23 deletions(-) diff --git a/tests/test_handler.py b/tests/test_handler.py index ce38278b3..4d184b153 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -226,10 +226,10 @@ def test_exception_handler_on_web_request(self): self.assertEqual(response["statusCode"], 500) mocked_exception_handler.assert_called() - def test_wsgi_script_binary_support_base64_behavior(self): - """ - With Binary Support enabled, response mimetypes that are not text/* or application/json will be base64 encoded + def test_wsgi_script_binary_support_with_content_encoding(self): """ + Ensure that response body is base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding header is present. + """ # don't linebreak so that whole line is shown during nosetest readout lh = LambdaHandler("tests.test_binary_support_settings") text_plain_event = { @@ -243,48 +243,112 @@ def test_wsgi_script_binary_support_base64_behavior(self): "pathParameters": {"proxy": "return/request/url"}, "httpMethod": "GET", "stageVariables": {}, - "path": "/textplain_mimetype_response1", + "path": "/content_encoding_header_json1", } + # A likely scenario is that the application would be gzip compressing some json response. That's checked first. response = lh.handler(text_plain_event, None) self.assertEqual(response["statusCode"], 200) - self.assertNotIn("isBase64Encoded", response) - self.assertFalse(is_base64(response["body"])) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # We also verify that some unknown mimetype with a Content-Encoding also encodes to b64. This route serves + # bytes in the response. text_arbitrary_event = { **text_plain_event, - **{"path": "/textarbitrary_mimetype_response1"}, + **{"path": "/content_encoding_header_textarbitrary1"}, } response = lh.handler(text_arbitrary_event, None) self.assertEqual(response["statusCode"], 200) - self.assertNotIn("isBase64Encoded", response) - self.assertFalse(is_base64(response["body"])) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # This route is similar to the above, but it serves its response as text and not bytes. That the response + # isn't bytes shouldn't matter because it still has a Content-Encoding header. application_json_event = { **text_plain_event, - **{"path": "/json_mimetype_response1"}, + **{"path": "/content_encoding_header_textarbitrary2"}, } response = lh.handler(application_json_event, None) self.assertEqual(response["statusCode"], 200) - self.assertNotIn("isBase64Encoded", response) - self.assertFalse(is_base64(response["body"])) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) - arbitrary_binary_event = { - **text_plain_event, - **{"path": "/arbitrarybinary_mimetype_response1"}, + def test_wsgi_script_binary_support_without_content_encoding_edgecases( + self, + ): + """ + Ensure zappa response bodies are NOT base64 encoded when BINARY_SUPPORT is enabled and the mimetype is "application/json" or starts with "text/". + """ # don't linebreak so that whole line is shown during nosetest readout + + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", } - response = lh.handler(arbitrary_binary_event, None) + for path in [ + "/textplain_mimetype_response1", # text/plain mimetype should not be turned to base64 + "/textarbitrary_mimetype_response1", # text/arbitrary mimetype should not be turned to base64 + "/json_mimetype_response1", # application/json mimetype should not be turned to base64 + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + def test_wsgi_script_binary_support_without_content_encoding( + self, + ): + """ + Ensure zappa response bodies are base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding is absent. + """ # don't linebreak so that whole line is shown during nosetest readout - self.assertEqual(response["statusCode"], 200) - self.assertIn("isBase64Encoded", response) - self.assertTrue(response["isBase64Encoded"]) - self.assertTrue(is_base64(response["body"])) + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", + } + + for path in [ + "/arbitrarybinary_mimetype_response1", + "/arbitrarybinary_mimetype_response2", + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) def test_wsgi_script_on_cognito_event_request(self): """ diff --git a/tests/test_wsgi_binary_support_app.py b/tests/test_wsgi_binary_support_app.py index ef0f62287..d1d2e6638 100644 --- a/tests/test_wsgi_binary_support_app.py +++ b/tests/test_wsgi_binary_support_app.py @@ -3,7 +3,7 @@ # _responses_ when Binary Support is enabled. ### -import io +import gzip import json from flask import Flask, Response, send_file @@ -29,3 +29,35 @@ def json_mimetype_response_1(): @app.route("/arbitrarybinary_mimetype_response1", methods=["GET"]) def arbitrary_mimetype_response_1(): return Response(response=b"some binary data", mimetype="arbitrary/binary_mimetype") + + +@app.route("/arbitrarybinary_mimetype_response2", methods=["GET"]) +def arbitrary_mimetype_response_3(): + return Response(response="doesnt_matter", mimetype="definitely_not_text") + + +@app.route("/content_encoding_header_json1", methods=["GET"]) +def response_with_content_encoding_1(): + return Response( + response=gzip.compress(json.dumps({"some": "data"}).encode()), + mimetype="application/json", + headers={"Content-Encoding": "gzip"}, + ) + + +@app.route("/content_encoding_header_textarbitrary1", methods=["GET"]) +def response_with_content_encoding_2(): + return Response( + response=b"OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "something_arbitrarily_binary"}, + ) + + +@app.route("/content_encoding_header_textarbitrary2", methods=["GET"]) +def response_with_content_encoding_3(): + return Response( + response="OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "with_content_type_but_not_bytes_response"}, + ) diff --git a/zappa/handler.py b/zappa/handler.py index 41336dc15..50163c212 100644 --- a/zappa/handler.py +++ b/zappa/handler.py @@ -583,14 +583,28 @@ def handler(self, event, context): ) if response.data: - if ( + # We base64 encode for two reasons when BINARY_SUPPORT is enabled: + # - Content-Encoding is present, which is commonly used by compression mechanisms to indicate + # that the content is in br/gzip/deflate/etc encoding + # (Related: https://github.com/zappa/Zappa/issues/908). Content like this must be + # transmitted as b64. + # - The response is assumed to be some binary format (since BINARY_SUPPORT is enabled and it + # isn't application/json or text/) + if settings.BINARY_SUPPORT and response.headers.get( + "Content-Encoding" + ): + zappa_returndict["body"] = base64.b64encode( + response.data + ).decode() + zappa_returndict["isBase64Encoded"] = True + elif ( settings.BINARY_SUPPORT and not response.mimetype.startswith("text/") and response.mimetype != "application/json" ): zappa_returndict["body"] = base64.b64encode( response.data - ).decode("utf-8") + ).decode() zappa_returndict["isBase64Encoded"] = True else: zappa_returndict["body"] = response.get_data(as_text=True)