Bail early in ipro-recording if the content-type is known in the
kPreliminaryHeaders to be something we cannot optimize.
This commit is contained in:
@@ -740,10 +740,28 @@ apr_status_t instaweb_in_place_filter(ap_filter_t* filter,
|
||||
first = false;
|
||||
ResponseHeaders response_headers(recorder->http_options());
|
||||
ApacheRequestToResponseHeaders(*request, &response_headers, NULL);
|
||||
|
||||
// The content-type is likely to be missing from the Apache response
|
||||
// headers until AP_FTYPE_PROTOCOL, and this filter is run earlier, at
|
||||
// AP_FTYPE_CONTENT_SET + 1. However, the content-type may be in
|
||||
// the request object, and if we populate it into the response headers
|
||||
// early, we can check for uninteresting content-types in
|
||||
// ConsiderResponseHeaders and avoid the overhead of collecting the
|
||||
// content into memory.
|
||||
if ((request->content_type != nullptr) &&
|
||||
(response_headers.DetermineContentType() == nullptr)) {
|
||||
response_headers.Replace(
|
||||
HttpAttributes::kContentType, request->content_type);
|
||||
}
|
||||
|
||||
recorder->ConsiderResponseHeaders(
|
||||
InPlaceResourceRecorder::kPreliminaryHeaders, &response_headers);
|
||||
}
|
||||
|
||||
if (recorder->failed()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Content bucket.
|
||||
const char* buf = NULL;
|
||||
size_t bytes = 0;
|
||||
@@ -843,7 +861,7 @@ apr_status_t instaweb_in_place_check_headers_filter(ap_filter_t* filter,
|
||||
return ap_pass_brigade(filter->next, bb);
|
||||
}
|
||||
|
||||
// This should always be set by handle_as_in_place() in instaweb_handler.cc.
|
||||
// This should always be set by Instaweb::HandleAsInPlace().
|
||||
InPlaceResourceRecorder* recorder =
|
||||
static_cast<InPlaceResourceRecorder*>(filter->ctx);
|
||||
|
||||
@@ -1147,13 +1165,19 @@ void mod_pagespeed_register_hooks(apr_pool_t* pool) {
|
||||
static_cast<ap_filter_type>(AP_FTYPE_CONTENT_SET + 1));
|
||||
|
||||
// Run after contents are set, but before mod_deflate, which runs at
|
||||
// AP_FTYPE_CONTENT_SET.
|
||||
// AP_FTYPE_CONTENT_SET. We use a separate filter rather
|
||||
// than just adding logic to instaweb_fix_headers_filter because the
|
||||
// recorder gets passed in as the filter->ctx when it is registered
|
||||
// in InstawebHandler::HandleAsInPlace.
|
||||
ap_register_output_filter(
|
||||
kModPagespeedInPlaceFilterName, instaweb_in_place_filter, NULL,
|
||||
static_cast<ap_filter_type>(AP_FTYPE_CONTENT_SET - 1));
|
||||
// Run after headers are set by mod_headers, mod_expires, etc. and
|
||||
// after Content-Type has been set (which appears to be at
|
||||
// AP_FTYPE_PROTOCOL).
|
||||
// AP_FTYPE_PROTOCOL). We cannot simply collect the bytes at
|
||||
// AP_FTYPE_PROTOCOL+1 because, it appears, at that time the headers
|
||||
// have been serialized into the content, and it's rather embarassing
|
||||
// to have to rescan for the end of the headers.
|
||||
ap_register_output_filter(
|
||||
kModPagespeedInPlaceCheckHeadersName,
|
||||
instaweb_in_place_check_headers_filter, NULL,
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "net/instaweb/http/public/http_cache.h"
|
||||
#include "net/instaweb/http/public/http_cache_failure.h"
|
||||
#include "net/instaweb/http/public/http_value.h"
|
||||
#include "net/instaweb/http/public/inflating_fetch.h"
|
||||
#include "pagespeed/kernel/base/statistics.h"
|
||||
#include "pagespeed/kernel/base/string_util.h"
|
||||
#include "pagespeed/kernel/http/content_type.h"
|
||||
@@ -128,7 +129,7 @@ bool InPlaceResourceRecorder::Write(const StringPiece& contents,
|
||||
void InPlaceResourceRecorder::ConsiderResponseHeaders(
|
||||
HeadersKind headers_kind,
|
||||
ResponseHeaders* response_headers) {
|
||||
CHECK(response_headers != NULL) << "Response headers cannot be NULL";
|
||||
CHECK(response_headers != nullptr);
|
||||
DCHECK(!full_response_headers_considered_);
|
||||
|
||||
if (!consider_response_headers_called_) {
|
||||
@@ -141,6 +142,8 @@ void InPlaceResourceRecorder::ConsiderResponseHeaders(
|
||||
HttpStatus::kOK);
|
||||
}
|
||||
|
||||
status_code_ = response_headers->status_code();
|
||||
|
||||
// Shortcut for bailing out early when the response will be too large.
|
||||
int64 content_length;
|
||||
if (max_response_bytes_ <= 0 &&
|
||||
@@ -153,13 +156,46 @@ void InPlaceResourceRecorder::ConsiderResponseHeaders(
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if IPRO applies considering the content type, if we have one at
|
||||
// this point. Depending on the server, we may only know the content type
|
||||
// after the we are called with kFullHeaders.
|
||||
//
|
||||
// Note: in a proxy setup it might be desirable to cache HTML and
|
||||
// non-rewritable Content-Types to avoid re-fetching from the origin server.
|
||||
//
|
||||
// If we have the full-headers, then we demand to have a good content type
|
||||
// now.
|
||||
if (response_headers->Has(HttpAttributes::kContentType) ||
|
||||
(headers_kind == kFullHeaders)) {
|
||||
const ContentType* content_type = response_headers->DetermineContentType();
|
||||
|
||||
// Bail if not an image, css, or JS.
|
||||
if ((content_type == nullptr) ||
|
||||
!(content_type->IsImage() ||
|
||||
content_type->IsCss() ||
|
||||
content_type->IsJsLike())) {
|
||||
// TODO(jmarantz): I feel that it's better not to write a note
|
||||
// about uncachable-content-types to the cache when we are failing
|
||||
// early. However, I cannot prove that in a siege test. It is
|
||||
// clear that calling DroppedAsUncacheable() here fills the HTTP
|
||||
// cache with a new entry for every HTML URL, including all
|
||||
// query-params, and that seems bad, but
|
||||
// siege_ipro_high_entropy.sh does not benefit from just simply
|
||||
// setting failure_=true here and skipping the call to
|
||||
// DroppedAsUncacheable(). If at some point we decide to go this
|
||||
// way, we must also change the expected cache_inserts count in
|
||||
// "Blocking rewrite enabled." in apache/system_test.sh from 3 to
|
||||
// 2.
|
||||
DroppedAsUncacheable();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (headers_kind != kFullHeaders) {
|
||||
return;
|
||||
}
|
||||
full_response_headers_considered_ = true;
|
||||
|
||||
status_code_ = response_headers->status_code();
|
||||
|
||||
// For 4xx and 5xx we can't IPRO, but we can also cache the failure so we
|
||||
// don't retry recording for a bit.
|
||||
if (response_headers->IsErrorStatus()) {
|
||||
@@ -181,20 +217,6 @@ void InPlaceResourceRecorder::ConsiderResponseHeaders(
|
||||
return;
|
||||
}
|
||||
|
||||
// First, check if IPRO applies considering the content type.
|
||||
// Note: in a proxy setup it might be desirable to cache HTML and
|
||||
// non-rewritable Content-Types to avoid re-fetching from the origin server.
|
||||
const ContentType* content_type =
|
||||
response_headers->DetermineContentType();
|
||||
if (content_type == NULL ||
|
||||
!(content_type->IsImage() ||
|
||||
content_type->IsCss() ||
|
||||
content_type->IsJsLike())) {
|
||||
// We remember wrong mimetypes as uncacheable. This is slightly goofy,
|
||||
// and is inconsistent with how they are treated on normal rewrite path...
|
||||
DroppedAsUncacheable();
|
||||
return;
|
||||
}
|
||||
bool is_cacheable = response_headers->IsProxyCacheable(
|
||||
request_properties_,
|
||||
ResponseHeaders::GetVaryOption(http_options_.respect_vary),
|
||||
@@ -213,12 +235,14 @@ void InPlaceResourceRecorder::DroppedDueToSize() {
|
||||
}
|
||||
|
||||
void InPlaceResourceRecorder::DroppedAsUncacheable() {
|
||||
cache_->RememberFailure(
|
||||
url_, fragment_,
|
||||
status_code_ == 200 ? kFetchStatusUncacheable200
|
||||
: kFetchStatusUncacheableError,
|
||||
handler_);
|
||||
failure_ = true;
|
||||
if (!failure_) {
|
||||
cache_->RememberFailure(
|
||||
url_, fragment_,
|
||||
status_code_ == 200 ? kFetchStatusUncacheable200
|
||||
: kFetchStatusUncacheableError,
|
||||
handler_);
|
||||
failure_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void InPlaceResourceRecorder::SaveCacheControl(const char* cache_control) {
|
||||
@@ -243,7 +267,9 @@ void InPlaceResourceRecorder::DoneAndSetHeaders(
|
||||
if (status_code_ == HttpStatus::kOK && resource_value_.contents_size() == 0) {
|
||||
// Ignore Empty 200 responses.
|
||||
// https://github.com/pagespeed/mod_pagespeed/issues/1050
|
||||
cache_->RememberFailure(url_, fragment_, kFetchStatusEmpty, handler_);
|
||||
if (!failure_) {
|
||||
cache_->RememberFailure(url_, fragment_, kFetchStatusEmpty, handler_);
|
||||
}
|
||||
failure_ = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -84,7 +84,8 @@ class InPlaceResourceRecorderTest : public RewriteTestBase {
|
||||
|
||||
ResponseHeaders final_headers;
|
||||
SetDefaultLongCacheHeaders(&kContentTypeCss, &final_headers);
|
||||
if (header_time == kLateGzipHeader) {
|
||||
if ((header_time == kLateGzipHeader) ||
|
||||
(header_time == kPrelimGzipHeader)) {
|
||||
final_headers.Add(HttpAttributes::kContentEncoding,
|
||||
HttpAttributes::kGzip);
|
||||
}
|
||||
@@ -144,7 +145,33 @@ class InPlaceResourceRecorderTest : public RewriteTestBase {
|
||||
EXPECT_EQ(
|
||||
HTTPCache::FindResult(HTTPCache::kRecentFailure,
|
||||
kFetchStatusUncacheable200),
|
||||
HttpBlockingFind(kTestUrl, http_cache(), &value_out, &headers_out));
|
||||
HttpBlockingFind(kTestUrl, http_cache(), &value_out, &headers_out))
|
||||
<< content_type->mime_type();
|
||||
}
|
||||
|
||||
const char* BailsForContentType(const char* mime_type) {
|
||||
ResponseHeaders headers;
|
||||
headers.set_status_code(HttpStatus::kOK);
|
||||
SetDefaultLongCacheHeaders(&kContentTypeCss, &headers);
|
||||
if (mime_type == nullptr) {
|
||||
headers.RemoveAll(HttpAttributes::kContentType);
|
||||
} else {
|
||||
headers.Replace(HttpAttributes::kContentType, mime_type);
|
||||
}
|
||||
headers.ComputeCaching();
|
||||
|
||||
scoped_ptr<InPlaceResourceRecorder> recorder(MakeRecorder(kTestUrl));
|
||||
recorder->ConsiderResponseHeaders(
|
||||
InPlaceResourceRecorder::kPreliminaryHeaders, &headers);
|
||||
if (recorder->failed()) {
|
||||
return "prelim";
|
||||
}
|
||||
recorder->ConsiderResponseHeaders(
|
||||
InPlaceResourceRecorder::kFullHeaders, &headers);
|
||||
if (recorder->failed()) {
|
||||
return "full";
|
||||
}
|
||||
return "none";
|
||||
}
|
||||
};
|
||||
|
||||
@@ -314,6 +341,25 @@ TEST_F(InPlaceResourceRecorderTest, SplitOperationWithGzipWithCompressedCache) {
|
||||
TestWithGzip(kLateGzipHeader);
|
||||
}
|
||||
|
||||
TEST_F(InPlaceResourceRecorderTest, BailEarlyOnUnexpectedContentType) {
|
||||
EXPECT_STREQ("prelim", BailsForContentType(kContentTypeHtml.mime_type()));
|
||||
EXPECT_STREQ("prelim", BailsForContentType(kContentTypePdf.mime_type()));
|
||||
EXPECT_STREQ("prelim", BailsForContentType(kContentTypeText.mime_type()));
|
||||
EXPECT_STREQ("prelim", BailsForContentType("bogus"));
|
||||
EXPECT_STREQ("none", BailsForContentType(kContentTypeCss.mime_type()));
|
||||
EXPECT_STREQ("none", BailsForContentType(kContentTypeJavascript.mime_type()));
|
||||
EXPECT_STREQ("none", BailsForContentType(kContentTypeGif.mime_type()));
|
||||
EXPECT_STREQ("none", BailsForContentType(kContentTypePng.mime_type()));
|
||||
EXPECT_STREQ("none", BailsForContentType(kContentTypeJpeg.mime_type()));
|
||||
EXPECT_STREQ("none", BailsForContentType(kContentTypeWebp.mime_type()));
|
||||
|
||||
// Note that if the content-type is missing in the first round, we
|
||||
// don't bail early, but we will bail late. This is because in
|
||||
// the preliminary round, we may not know the correct content type
|
||||
// yet, so we need to be conservative and let the processing continue.
|
||||
EXPECT_STREQ("full", BailsForContentType(nullptr));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
Reference in New Issue
Block a user