ipro: support in-place resource optimization

Squash-merge of chaizhenhua's work over many pull requests, especially #450.

The copy of in_place_resource_recorder.cc is temporary and can be removed after
the next PSOL release.
This commit is contained in:
Jeff Kaufman
2013-07-24 10:28:35 -04:00
parent 59cdafaf70
commit ad6429d26f
10 changed files with 946 additions and 487 deletions
+7
View File
@@ -175,8 +175,15 @@ if [ $ngx_found = yes ]; then
$mod_pagespeed_dir/out/$buildtype/obj/gen/data2c_out/instaweb/net/instaweb/system/console_out.cc \
$mod_pagespeed_dir/out/$buildtype/obj/gen/data2c_out/instaweb/net/instaweb/system/console_css_out.cc \
$mod_pagespeed_dir/net/instaweb/system/add_headers_fetcher.cc \
$ps_src/in_place_resource_recorder.cc \
$mod_pagespeed_dir/net/instaweb/system/loopback_route_fetcher.cc \
$mod_pagespeed_dir/net/instaweb/system/serf_url_async_fetcher.cc"
# TODO(jefftk): when the next release of PSOL after 1.6.29.3 comes out, change
# $ps_src/in_place_resource_recorder.cc
# above to
# $mod_pagespeed_dir/net/instaweb/apache/in_place_resource_recorder.cc
# Make pagespeed run immediately before gzip.
HTTP_FILTER_MODULES=$(echo $HTTP_FILTER_MODULES |\
sed "s/$HTTP_GZIP_FILTER_MODULE/$HTTP_GZIP_FILTER_MODULE $ngx_addon_name/")
+1
View File
@@ -70,6 +70,7 @@ rsync -arvz "$MOD_PAGESPEED_SRC/" "psol/include/" --prune-empty-dirs \
--include="sparse_hash_map" \
--include="sparse_hash_set" \
--include="sparsetable" \
--include="in_place_resource_recorder.cc" \
--exclude='*'
mkdir -p psol/lib/Debug/linux/ia32
mkdir -p psol/lib/Debug/linux/x64
+114
View File
@@ -0,0 +1,114 @@
// Copyright 2013 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: sligocki@google.com (Shawn Ligocki)
// Copied from net/instaweb/apache/in_place_resource_recorder.cc
// The next release of PSOL will include that file, at which point this file can
// be removed.
// TODO(jefftk): delete this when the next PSOL release after 1.6.29.3 is ready.
#include "net/instaweb/apache/in_place_resource_recorder.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/http_value.h"
#include "net/instaweb/http/public/meta_data.h"
#include "net/instaweb/http/public/request_headers.h"
#include "net/instaweb/http/public/response_headers.h"
#include "net/instaweb/util/public/statistics.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class MessageHandler;
namespace {
const char kNumResources[] = "ipro_recorder_resources";
const char kNumInsertedIntoCache[] = "ipro_recorder_inserted_into_cache";
const char kNumNotCacheable[] = "ipro_recorder_not_cacheable";
const char kNumFailed[] = "ipro_recorder_failed";
}
InPlaceResourceRecorder::InPlaceResourceRecorder(
StringPiece url, RequestHeaders* request_headers, bool respect_vary,
HTTPCache* cache, Statistics* stats, MessageHandler* handler)
: url_(url.data(), url.size()), request_headers_(request_headers),
respect_vary_(respect_vary), success_(true), cache_(cache),
handler_(handler),
num_resources_(stats->GetVariable(kNumResources)),
num_inserted_into_cache_(stats->GetVariable(kNumInsertedIntoCache)),
num_not_cacheable_(stats->GetVariable(kNumNotCacheable)),
num_failed_(stats->GetVariable(kNumFailed)) {
num_resources_->Add(1);
}
InPlaceResourceRecorder::~InPlaceResourceRecorder() {
}
void InPlaceResourceRecorder::InitStats(Statistics* statistics) {
statistics->AddVariable(kNumResources);
statistics->AddVariable(kNumInsertedIntoCache);
statistics->AddVariable(kNumNotCacheable);
statistics->AddVariable(kNumFailed);
}
bool InPlaceResourceRecorder::Write(const StringPiece& contents,
MessageHandler* handler) {
return resource_value_.Write(contents, handler);
}
bool InPlaceResourceRecorder::Flush(MessageHandler* handler) {
return true;
}
void InPlaceResourceRecorder::DoneAndSetHeaders(
ResponseHeaders* response_headers) {
if (success_) {
// TODO(sligocki): Currently this will cache HTML and other Content-Types
// that cannot be rewritten in-place. It seems like we should only cache
// resources that have a hope of being rewritten in-place, and store a
// failure in the IPRO meta-data lookup for all others.
bool is_cacheable =
response_headers->IsProxyCacheableGivenRequest(*request_headers_);
if (is_cacheable && respect_vary_) {
is_cacheable = response_headers->VaryCacheable(
request_headers_->Has(HttpAttributes::kCookie));
}
// Put resource_value_ in cache.
if (is_cacheable) {
resource_value_.SetHeaders(response_headers);
cache_->Put(url_, &resource_value_, handler_);
// TODO(sligocki): Start IPRO rewrite.
num_inserted_into_cache_->Add(1);
} else {
cache_->RememberNotCacheable(url_, response_headers->status_code() == 200,
handler_);
num_not_cacheable_->Add(1);
}
} else {
// TODO(sligocki): Should we RememberFetchFailed() if success_ == false?
// We don't expect this to happen much, it seems to only happen with Apache
// Bucket Brigade issues.
num_failed_->Add(1);
}
delete this;
}
} // namespace net_instaweb
+31 -47
View File
@@ -37,7 +37,8 @@ NgxBaseFetch::NgxBaseFetch(ngx_http_request_t* r, int pipe_fd,
done_called_(false),
last_buf_sent_(false),
pipe_fd_(pipe_fd),
references_(2) {
references_(2),
handle_error_(true) {
if (pthread_mutex_init(&mutex_, NULL)) CHECK(0);
PopulateRequestHeaders();
}
@@ -55,43 +56,13 @@ void NgxBaseFetch::Unlock() {
}
void NgxBaseFetch::PopulateRequestHeaders() {
CopyHeadersFromTable<RequestHeaders>(&request_->headers_in.headers,
request_headers());
ngx_psol::copy_request_headers_from_ngx(request_, request_headers());
}
void NgxBaseFetch::PopulateResponseHeaders() {
CopyHeadersFromTable<ResponseHeaders>(&request_->headers_out.headers,
response_headers());
response_headers()->set_status_code(request_->headers_out.status);
// Manually copy over the content type because it's not included in
// request_->headers_out.headers.
response_headers()->Add(
HttpAttributes::kContentType,
ngx_psol::str_to_string_piece(request_->headers_out.content_type));
// TODO(oschaaf): ComputeCaching should be called in setupforhtml()?
response_headers()->ComputeCaching();
ngx_psol::copy_response_headers_from_ngx(request_, response_headers());
}
template<class HeadersT>
void NgxBaseFetch::CopyHeadersFromTable(ngx_list_t* headers_from,
HeadersT* headers_to) {
// http_version is the version number of protocol; 1.1 = 1001. See
// NGX_HTTP_VERSION_* in ngx_http_request.h
headers_to->set_major_version(request_->http_version / 1000);
headers_to->set_minor_version(request_->http_version % 1000);
ngx_table_elt_t* header;
NgxListIterator it(&headers_from->part);
while ((header = it.Next()) != NULL) {
StringPiece key = ngx_psol::str_to_string_piece(header->key);
StringPiece value = ngx_psol::str_to_string_piece(header->value);
headers_to->Add(key, value);
}
}
bool NgxBaseFetch::HandleWrite(const StringPiece& sp,
MessageHandler* handler) {
@@ -101,9 +72,15 @@ bool NgxBaseFetch::HandleWrite(const StringPiece& sp,
return true;
}
// should only be called in nginx thread
ngx_int_t NgxBaseFetch::CopyBufferToNginx(ngx_chain_t** link_ptr) {
if (done_called_ && last_buf_sent_) {
return NGX_DECLINED;
CHECK(!(done_called_ && last_buf_sent_))
<< "CopyBufferToNginx() was called after the last buffer was sent";
// there is no buffer to send
if (!done_called_ && buffer_.empty()) {
*link_ptr = NULL;
return NGX_AGAIN;
}
int rc = ngx_psol::string_piece_to_buffer_chain(
@@ -117,30 +94,32 @@ ngx_int_t NgxBaseFetch::CopyBufferToNginx(ngx_chain_t** link_ptr) {
if (done_called_) {
last_buf_sent_ = true;
return NGX_OK;
}
return NGX_OK;
return NGX_AGAIN;
}
// There may also be a race condition if this is called between the last Write()
// and Done() such that we're sending an empty buffer with last_buf set, which I
// think nginx will reject.
ngx_int_t NgxBaseFetch::CollectAccumulatedWrites(ngx_chain_t** link_ptr) {
ngx_int_t rc;
Lock();
ngx_int_t rc = CopyBufferToNginx(link_ptr);
rc = CopyBufferToNginx(link_ptr);
Unlock();
if (rc == NGX_DECLINED) {
*link_ptr = NULL;
return NGX_OK;
}
return rc;
}
ngx_int_t NgxBaseFetch::CollectHeaders(ngx_http_headers_out_t* headers_out) {
Lock();
const ResponseHeaders* pagespeed_headers = response_headers();
Unlock();
// TODO(chaizhenhua): Add and check.
// if (content_length_known()) {
// headers_out->content_length = NULL;
// headers_out->content_length_n = content_length();
// }
return ngx_psol::copy_response_headers_to_ngx(request_, *pagespeed_headers);
}
@@ -163,9 +142,14 @@ void NgxBaseFetch::RequestCollection() {
}
void NgxBaseFetch::HandleHeadersComplete() {
// If this is a 404 response we need to count it in the stats.
if (response_headers()->status_code() == HttpStatus::kNotFound) {
server_context_->rewrite_stats()->resource_404_count()->Add(1);
int status_code = response_headers()->status_code();
bool status_ok = (status_code != 0) && (status_code < 400);
if (status_ok || handle_error_) {
// If this is a 404 response we need to count it in the stats.
if (response_headers()->status_code() == HttpStatus::kNotFound) {
server_context_->rewrite_stats()->resource_404_count()->Add(1);
}
}
RequestCollection(); // Headers available.
+5 -6
View File
@@ -84,6 +84,7 @@ class NgxBaseFetch : public AsyncFetch {
// Called by nginx when it's done with us.
void Release();
void set_handle_error(bool x) { handle_error_ = x; }
private:
virtual bool HandleWrite(const StringPiece& sp, MessageHandler* handler);
@@ -91,18 +92,15 @@ class NgxBaseFetch : public AsyncFetch {
virtual void HandleHeadersComplete();
virtual void HandleDone(bool success);
// Helper method for PopulateRequestHeaders and PopulateResponseHeaders.
template<class HeadersT>
void CopyHeadersFromTable(ngx_list_t* headers_from, HeadersT* headers_to);
// Indicate to nginx that we would like it to call
// CollectAccumulatedWrites().
void RequestCollection();
// Lock must be acquired first.
// Returns:
// NGX_DECLINED: nothing to send, short circuit. Buffer not allocated.
// NGX_OK, NGX_ERROR: success, failure
// NGX_ERROR: failure
// NGX_AGAIN: still has buffer to send, need to checkout link_ptr
// NGX_OK: done, HandleDone has been called
// Allocates an nginx buffer, copies our buffer_ contents into it, clears
// buffer_.
ngx_int_t CopyBufferToNginx(ngx_chain_t** link_ptr);
@@ -124,6 +122,7 @@ class NgxBaseFetch : public AsyncFetch {
// decremented once when Done() is called and once when Release() is called.
int references_;
pthread_mutex_t mutex_;
bool handle_error_;
DISALLOW_COPY_AND_ASSIGN(NgxBaseFetch);
};
+742 -405
View File
File diff suppressed because it is too large Load Diff
+33 -16
View File
@@ -43,7 +43,9 @@ class GzipInflater;
class NgxBaseFetch;
class ProxyFetch;
class RewriteDriver;
class RequestHeaders;
class ResponseHeaders;
class InPlaceResourceRecorder;
} // namespace net_instaweb
namespace ngx_psol {
@@ -75,24 +77,39 @@ StringPiece str_to_string_piece(ngx_str_t s);
// Allocate memory out of the pool for the string piece, and copy the contents
// over. Returns NULL if we can't get memory.
char* string_piece_to_pool_string(ngx_pool_t* pool, StringPiece sp);
typedef struct {
net_instaweb::NgxBaseFetch* base_fetch;
ngx_connection_t* pagespeed_connection;
ngx_http_request_t* r;
bool html_rewrite;
bool in_place;
bool write_pending;
bool fetch_done;
bool modify_headers;
// for html rewrite
net_instaweb::ProxyFetch* proxy_fetch;
net_instaweb::GzipInflater* inflater_;
// for in place resource
net_instaweb::RewriteDriver* driver;
net_instaweb::InPlaceResourceRecorder *recorder;
} ps_request_ctx_t;
void copy_request_headers_from_ngx(const ngx_http_request_t *r,
net_instaweb::RequestHeaders *headers);
void copy_response_headers_from_ngx(const ngx_http_request_t *r,
net_instaweb::ResponseHeaders *headers);
ngx_int_t copy_response_headers_to_ngx(
ngx_http_request_t* r,
const net_instaweb::ResponseHeaders& pagespeed_headers);
typedef struct {
net_instaweb::ProxyFetch* proxy_fetch;
net_instaweb::NgxBaseFetch* base_fetch;
net_instaweb::RewriteDriver* driver;
bool data_received;
int pipe_fd;
ngx_connection_t* pagespeed_connection;
ngx_http_request_t* r;
bool is_resource_fetch;
bool sent_headers;
bool write_pending;
net_instaweb::GzipInflater* inflater_;
} ps_request_ctx_t;
} // namespace ngx_psol
#endif // NGX_PAGESPEED_H_
+2
View File
@@ -36,6 +36,7 @@
#include "net/instaweb/rewriter/public/rewrite_driver_factory.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/static_asset_manager.h"
#include "net/instaweb/apache/in_place_resource_recorder.h"
#include "net/instaweb/system/public/serf_url_async_fetcher.h"
#include "net/instaweb/system/public/system_caches.h"
#include "net/instaweb/system/public/system_rewrite_options.h"
@@ -430,6 +431,7 @@ void NgxRewriteDriverFactory::InitStats(Statistics* statistics) {
// Init Ngx-specific stats.
NgxServerContext::InitStats(statistics);
InPlaceResourceRecorder::InitStats(statistics);
statistics->AddVariable(kShutdownCount);
}
+5 -12
View File
@@ -214,11 +214,8 @@ fi
PSA_JS_LIBRARY_URL_PREFIX="ngx_pagespeed_static"
PAGESPEED_EXPECTED_FAILURES="
~convert_meta_tags~
~In-place resource optimization~
~keepalive with html rewriting~
"
# An expected failure can be indicated like: "~In-place resource optimization~"
PAGESPEED_EXPECTED_FAILURES=""
# The existing system test takes its arguments as positional parameters, and
# wants different ones than we want, so we need to reset our positional args.
@@ -362,6 +359,7 @@ check_from "$OUT" grep "$EXPECTED_EXAMPLES_TEXT"
# And also with bad request headers.
OUT=$(wget -O - --header=PageSpeedFilters:bogus $EXAMPLE_ROOT)
echo $OUT
check_from "$OUT" grep "$EXPECTED_EXAMPLES_TEXT"
# Test that loopback route fetcher works with vhosts not listening on
@@ -1252,13 +1250,8 @@ check $WGET_DUMP --header 'X-PSA-Blocking-Rewrite: psatest'\
$WGET_DUMP $STATISTICS_URL > $NEWSTATS
check_stat $OLDSTATS $NEWSTATS image_rewrites 1
check_stat $OLDSTATS $NEWSTATS cache_hits 0
# Something about IPRO means that in mod_pagespeed this comes in as 2. Before
# IPRO this said 1, and we're getting 1 in ngx_pagespeed, so I think this is
# probably correct.
check_stat $OLDSTATS $NEWSTATS cache_misses 1
# In mod_pagespeed this is 2 cache inserts for image + 1 for HTML in IPRO flow.
# We don't have IPRO, so this is just 2 cache inserts for the image.
check_stat $OLDSTATS $NEWSTATS cache_inserts 2
check_stat $OLDSTATS $NEWSTATS cache_misses 2
check_stat $OLDSTATS $NEWSTATS cache_inserts 3
# TODO(sligocki): There is no stat num_rewrites_executed. Fix.
#check_stat $OLDSTATS $NEWSTATS num_rewrites_executed 1
+6 -1
View File
@@ -29,7 +29,7 @@ http {
root "@@SERVER_ROOT@@";
pagespeed UsePerVHostStatistics on;
pagespeed InPlaceResourceOptimization on;
pagespeed CreateSharedMemoryMetadataCache "@@SHM_CACHE@@" 8192;
# CriticalImagesBeaconEnabled is now on by default, but we disable in testing.
@@ -650,6 +650,11 @@ http {
pagespeed DisableFilters convert_jpeg_to_progressive;
}
location /mod_pagespeed_test/ipro/test_image_dont_reuse.png {
expires 5m;
}
pagespeed EnableFilters remove_comments;
# Test LoadFromFile mapping by mapping one dir to another.