diff --git a/config b/config index 589ca7a3d..33eb3266e 100644 --- a/config +++ b/config @@ -57,6 +57,7 @@ if [ $ngx_found = yes ]; then ngx_addon_name=ngx_pagespeed NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ps_src/ngx_pagespeed.cc" NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ps_src/ngx_rewrite_driver_factory.cc" + NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ps_src/ngx_base_fetch.cc" HTTP_AUX_FILTER_MODULES="$HTTP_AUX_FILTER_MODULES $ngx_addon_name" CORE_LIBS="$CORE_LIBS $pagespeed_libs" CORE_INCS="$CORE_INCS $pagespeed_include" diff --git a/src/ngx_base_fetch.cc b/src/ngx_base_fetch.cc new file mode 100644 index 000000000..4d4ca2920 --- /dev/null +++ b/src/ngx_base_fetch.cc @@ -0,0 +1,165 @@ +/* + * Copyright 2012 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Author: jefftk@google.com (Jeff Kaufman) + +#include "ngx_base_fetch.h" +#include "ngx_pagespeed.h" +#include "net/instaweb/util/public/google_message_handler.h" +#include "net/instaweb/util/public/message_handler.h" + +namespace net_instaweb { + +NgxBaseFetch::NgxBaseFetch(ngx_http_request_t* r, int pipe_fd) + : request_(r), done_called_(false), pipe_fd_(pipe_fd) { +} + +NgxBaseFetch::~NgxBaseFetch() { } + +void NgxBaseFetch::PopulateHeaders() { + // http_version is the version number of protocol; 1.1 = 1001. See + // NGX_HTTP_VERSION_* in ngx_http_request.h + response_headers()->set_major_version(request_->http_version / 1000); + response_headers()->set_minor_version(request_->http_version % 1000); + + // Standard nginx idiom for iterating over a list. See ngx_list.h + ngx_uint_t i; + ngx_list_part_t* part = &request_->headers_out.headers.part; + ngx_table_elt_t* header = static_cast(part->elts); + + for (i = 0 ; /* void */; i++) { + if (i >= part->nelts) { + if (part->next == NULL) { + break; + } + + part = part->next; + header = static_cast(part->elts); + i = 0; + } + + StringPiece key = ngx_http_pagespeed_str_to_string_piece(&header[i].key); + StringPiece value = ngx_http_pagespeed_str_to_string_piece( + &header[i].value); + + response_headers()->Add(key, value); + } + + // For some reason content_type is not included in + // request_->headers_out.headers, which means I don't fully understand how + // headers_out works, but manually copying over content type works. + StringPiece content_type = ngx_http_pagespeed_str_to_string_piece( + &request_->headers_out.content_type); + response_headers()->Add(HttpAttributes::kContentType, content_type); +} + +bool NgxBaseFetch::HandleWrite(const StringPiece& sp, + MessageHandler* handler) { + // TODO(jefftk): acquire lock on buffer_ here. + buffer_.append(sp.data(), sp.size()); + // TODO(jefftk): release lock here. + return true; +} + +// TODO(jefftk): this is vulnerable to race conditions. Memory inconsistencies +// between threads can mean that chain links get dropped, which is of course +// very bad. To fix this we should protect buffer_ with a lock that gets +// acquired both here and in HandleWrite so that we make sure they both have a +// consistent view of memory. +// +// There may also be a race condition if this is called between the last Write() +// and Done() such that we're sending an empty buffer with last_buf set, which I +// think nginx will reject. +ngx_int_t NgxBaseFetch::CollectAccumulatedWrites(ngx_chain_t** link_ptr) { + // TODO(jefftk): acquire lock on buffer_ here. + + if (!done_called_ && buffer_.length() == 0) { + // Nothing to send. But if done_called_ then we can't short circuit because + // we need to set last_buf. + *link_ptr = NULL; + return NGX_OK; + } + + // Prepare a new nginx buffer to put our buffered writes into. + ngx_buf_t* b = static_cast(ngx_calloc_buf(request_->pool)); + if (b == NULL) { + return NGX_ERROR; + } + b->start = b->pos = static_cast( + ngx_palloc(request_->pool, buffer_.length())); + if (b->pos == NULL) { + return NGX_ERROR; + } + + // Copy our writes over. + buffer_.copy(reinterpret_cast(b->pos), buffer_.length()); + b->last = b->end = b->pos + buffer_.length(); + + if (buffer_.length()) { + b->temporary = 1; + } else { + b->sync = 1; + } + + // Done with buffer contents now. + buffer_.clear(); + + // TODO(jefftk): release lock here. + + b->last_buf = b->last_in_chain = done_called_; + + // Prepare a chain link for our new buffer. + *link_ptr = static_cast( + ngx_alloc_chain_link(request_->pool)); + if (*link_ptr == NULL) { + return NGX_ERROR; + } + + (*link_ptr)->buf = b; + (*link_ptr)->next = NULL; + return NGX_OK; +} + +void NgxBaseFetch::RequestCollection() { + int rc; + char c = 'A'; // What byte we write is arbitrary. + while (true) { + rc = write(pipe_fd_, &c, 1); + if (rc == 1) { + break; + } else if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) { + // TODO(jefftk): is this rare enough that spinning isn't a problem? Could + // we get into a case where the pipe fills up and we spin forever? + + } else { + perror("NgxBaseFetch::RequestCollection"); + break; + } + } +} + +bool NgxBaseFetch::HandleFlush(MessageHandler* handler) { + RequestCollection(); // data available. + return true; +} + +void NgxBaseFetch::HandleDone(bool success) { + done_called_ = true; + close(pipe_fd_); // Indicates to nginx that we're done with the rewrite. + pipe_fd_ = -1; +} + +} // namespace net_instaweb diff --git a/src/ngx_base_fetch.h b/src/ngx_base_fetch.h new file mode 100644 index 000000000..2f4e68c76 --- /dev/null +++ b/src/ngx_base_fetch.h @@ -0,0 +1,83 @@ +/* + * Copyright 2012 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Author: jefftk@google.com (Jeff Kaufman) +// +// Collects output from pagespeed and buffers it until nginx asks for it. +// Notifies nginx via pipe to call CollectAccumulatedWrites() on flush. +// +// - nginx creates a base fetch and passes it to a new proxy fetch. +// - The proxy fetch manages rewriting and thread complexity, and through +// several chained steps passes rewritten html to HandleWrite(). +// - Written data is buffered. +// - When Flush() is called the base fetch writes a byte to a pipe nginx is +// watching so nginx knows to call CollectAccumulatedWrites() to pick up the +// rewritten html. +// - When Done() is called the base fetch closes the pipe, which tells nginx to +// make a final call to CollectAccumulatedWrites(). + +#ifndef NGX_BASE_FETCH_H_ +#define NGX_BASE_FETCH_H_ + +extern "C" { +#include +} + +#include "net/instaweb/http/public/async_fetch.h" +#include "net/instaweb/util/public/string.h" + +namespace net_instaweb { + +class NgxBaseFetch : public AsyncFetch { + public: + NgxBaseFetch(ngx_http_request_t* r, int pipe_fd); + virtual ~NgxBaseFetch(); + + // Copies the response headers out of request_->headers_out->headers. + void PopulateHeaders(); + + // Puts a chain link in link_ptr if we have any output data buffered. Returns + // NGX_OK on success, NGX_ERROR on errors. If there's no data to send, sends + // data only if Done() has been called. Indicates the end of output by + // setting last_buf on the chain link + // + // Always sets link_ptr to a single chain link, never a full chain. + // + // Called by nginx. + ngx_int_t CollectAccumulatedWrites(ngx_chain_t** link_ptr); + + private: + ResponseHeaders response_headers_; + virtual bool HandleWrite(const StringPiece& sp, MessageHandler* handler); + virtual bool HandleFlush(MessageHandler* handler); + virtual void HandleHeadersComplete() {} + virtual void HandleDone(bool success); + + // Indicate to nginx that we would like it to call + // CollectAccumulatedWrites(). + void RequestCollection(); + + ngx_http_request_t* request_; + GoogleString buffer_; + bool done_called_; + int pipe_fd_; + + DISALLOW_COPY_AND_ASSIGN(NgxBaseFetch); +}; + +} // namespace net_instaweb + +#endif // NGX_BASE_FETCH_H_ diff --git a/src/ngx_pagespeed.cc b/src/ngx_pagespeed.cc index e74f9ed00..51effd2ee 100644 --- a/src/ngx_pagespeed.cc +++ b/src/ngx_pagespeed.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +// Author: jefftk@google.com (Jeff Kaufman) + /* * Usage: * server { @@ -21,43 +23,112 @@ * } */ +#include "ngx_pagespeed.h" + extern "C" { #include #include #include } -#include "ngx_rewrite_driver_factory.h" +#include +#include "ngx_rewrite_driver_factory.h" +#include "ngx_base_fetch.h" + +#include "net/instaweb/automatic/public/proxy_fetch.h" +#include "net/instaweb/rewriter/public/furious_matcher.h" #include "net/instaweb/rewriter/public/process_context.h" #include "net/instaweb/rewriter/public/rewrite_driver.h" #include "net/instaweb/public/version.h" +#include "net/instaweb/util/public/google_url.h" #include "net/instaweb/util/public/string.h" -#include "net/instaweb/util/public/string_util.h" -#include "net/instaweb/util/public/string_writer.h" -#include "net/instaweb/util/public/null_message_handler.h" +#include "net/instaweb/util/public/google_message_handler.h" extern ngx_module_t ngx_pagespeed; // Hacks for debugging. #define DBG(r, args...) \ - ngx_log_error(NGX_LOG_ALERT, (r)->connection->log, 0, args) + ngx_log_error(NGX_LOG_DEBUG, (r)->connection->log, 0, args) +#define PDBG(ctx, args...) \ + ngx_log_error(NGX_LOG_DEBUG, (ctx)->pagespeed_connection->log, 0, args) #define CDBG(cf, args...) \ - ngx_conf_log_error(NGX_LOG_ALERT, cf, 0, args) + ngx_conf_log_error(NGX_LOG_DEBUG, cf, 0, args) typedef struct { ngx_flag_t active; ngx_str_t cache_dir; net_instaweb::NgxRewriteDriverFactory* driver_factory; net_instaweb::ServerContext* server_context; + net_instaweb::ProxyFetchFactory* proxy_fetch_factory; + net_instaweb::MessageHandler* handler; } ngx_http_pagespeed_srv_conf_t; typedef struct { + ngx_http_pagespeed_srv_conf_t* cfg; net_instaweb::RewriteDriver* driver; - scoped_ptr output; - scoped_ptr writer; + net_instaweb::ProxyFetch* proxy_fetch; + net_instaweb::NgxBaseFetch* base_fetch; + bool data_received; + int pipe_fd; + ngx_connection_t* pagespeed_connection; + ngx_http_request_t* r; + const net_instaweb::ContentType* content_type; } ngx_http_pagespeed_request_ctx_t; +static ngx_int_t +ngx_http_pagespeed_body_filter(ngx_http_request_t* r, ngx_chain_t* in); + +static void* +ngx_http_pagespeed_create_srv_conf(ngx_conf_t* cf); + +static char* +ngx_http_pagespeed_merge_srv_conf(ngx_conf_t* cf, void* parent, void* child); + +static void +ngx_http_pagespeed_release_request_context(void* data); + +static void +ngx_http_pagespeed_set_buffered(ngx_http_request_t* r, bool on); + +static GoogleString +ngx_http_pagespeed_determine_url(ngx_http_request_t* r); + +static ngx_http_pagespeed_request_ctx_t* +ngx_http_pagespeed_get_request_context(ngx_http_request_t* r); + +static void +ngx_http_pagespeed_initialize_server_context( + ngx_http_pagespeed_srv_conf_t* cfg); + +static ngx_int_t +ngx_http_pagespeed_update(ngx_http_pagespeed_request_ctx_t* ctx, + ngx_event_t* ev); + +static void +ngx_http_pagespeed_connection_read_handler(ngx_event_t* ev); + +static ngx_int_t +ngx_http_pagespeed_create_connection(ngx_http_pagespeed_request_ctx_t* ctx); + +static ngx_int_t +ngx_http_pagespeed_create_request_context(ngx_http_request_t* r); + +static void +ngx_http_pagespeed_send_to_pagespeed( + ngx_http_request_t* r, + ngx_http_pagespeed_request_ctx_t* ctx, + ngx_chain_t* in); + +static ngx_int_t +ngx_http_pagespeed_body_filter(ngx_http_request_t* r, ngx_chain_t* in); + +static ngx_int_t +ngx_http_pagespeed_header_filter(ngx_http_request_t* r); + +static ngx_int_t +ngx_http_pagespeed_init(ngx_conf_t* cf); + static ngx_command_t ngx_http_pagespeed_commands[] = { { ngx_string("pagespeed"), NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_CONF_TAKE1, @@ -65,7 +136,6 @@ static ngx_command_t ngx_http_pagespeed_commands[] = { NGX_HTTP_SRV_CONF_OFFSET, offsetof(ngx_http_pagespeed_srv_conf_t, active), NULL }, - { ngx_string("pagespeed_cache"), NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_CONF_TAKE1, ngx_conf_set_str_slot, @@ -76,14 +146,13 @@ static ngx_command_t ngx_http_pagespeed_commands[] = { ngx_null_command }; -static StringPiece +StringPiece ngx_http_pagespeed_str_to_string_piece(ngx_str_t* s) { return StringPiece(reinterpret_cast(s->data), s->len); } static void* -ngx_http_pagespeed_create_srv_conf(ngx_conf_t* cf) -{ +ngx_http_pagespeed_create_srv_conf(ngx_conf_t* cf) { ngx_http_pagespeed_srv_conf_t* conf; conf = static_cast( @@ -102,8 +171,7 @@ ngx_http_pagespeed_create_srv_conf(ngx_conf_t* cf) } static char* -ngx_http_pagespeed_merge_srv_conf(ngx_conf_t* cf, void* parent, void* child) -{ +ngx_http_pagespeed_merge_srv_conf(ngx_conf_t* cf, void* parent, void* child) { ngx_http_pagespeed_srv_conf_t* prev = static_cast(parent); ngx_http_pagespeed_srv_conf_t* conf = @@ -118,78 +186,42 @@ ngx_http_pagespeed_merge_srv_conf(ngx_conf_t* cf, void* parent, void* child) static ngx_http_output_header_filter_pt ngx_http_next_header_filter; static ngx_http_output_body_filter_pt ngx_http_next_body_filter; -// Add a buffer to the end of the buffer chain indicating that we were processed -// through ngx_pagespeed. -static ngx_int_t -ngx_http_pagespeed_note_processed(ngx_http_request_t* r, ngx_chain_t* in) { - // Find the end of the buffer chain. - ngx_chain_t* chain_link; - int chain_contains_last_buffer = 0; - for (chain_link = in; chain_link != NULL; chain_link = chain_link->next) { - if (chain_link->buf->last_buf) { - chain_contains_last_buffer = 1; - if (chain_link->next != NULL) { - ngx_log_error(NGX_LOG_ERR, (r)->connection->log, 0, - "Chain link thinks its last but has a child."); - return NGX_ERROR; - } - break; // Chain link now is the last link in the chain. - } - } - - if (!chain_contains_last_buffer) { - // None of the buffers had last_buf set, meaning we have an incomplete chain - // and are still waiting to get the final buffer. Wait until we're called - // again with the last buffer. - return NGX_OK; - } - - // Prepare a new buffer to put the note into. - ngx_buf_t* b = static_cast(ngx_calloc_buf(r->pool)); - if (b == NULL) { - return NGX_ERROR; - } - - // Write to the new buffer. - const char note[] = "\n"; - int note_len = strlen(note); - b->start = b->pos = static_cast(ngx_pnalloc(r->pool, note_len)); - strncpy((char*)b->pos, note, note_len); - b->end = b->last = b->pos + note_len; - b->temporary = 1; - - // Link the new buffer into the buffer chain. - ngx_chain_t* added_link = static_cast( - ngx_alloc_chain_link(r->pool)); - if (added_link == NULL) { - return NGX_ERROR; - } - - added_link->buf = b; - - // Add our new link to the buffer chain. - added_link->next = NULL; - chain_link->next = added_link; - - // Mark our new link as the end of the chain. - chain_link->buf->last_buf = 0; - added_link->buf->last_buf = 1; - chain_link->buf->last_in_chain = 0; - added_link->buf->last_in_chain = 1; - - return NGX_OK; -} - static void -ngx_http_pagespeed_release_request_context( - ngx_http_request_t* r, ngx_http_pagespeed_request_ctx_t* ctx) { +ngx_http_pagespeed_release_request_context(void* data) { + ngx_http_pagespeed_request_ctx_t* ctx = + static_cast(data); - // release request context - ngx_http_set_ctx(r, NULL, ngx_pagespeed); - delete ctx; + // The proxy fetch deleted itself if we called Done(), but if an error + // happened before then we need to tell it to delete itself. + if (ctx->proxy_fetch != NULL) { + ctx->proxy_fetch->Done(false /* failure */); + } + + // BaseFetch doesn't delete itself in HandleDone() because we still need to + // recieve notification via pipe and call CollectAccumulatedWrites. + // TODO(jefftk): If we close the proxy_fetch above and not in the normal flow, + // can this delete base_fetch while proxy_fetch still needs it? Is there a + // race condition here? + delete ctx->base_fetch; + + // Don't close the pipe if it was never opened or already closed. + if (ctx->pipe_fd != -1) { + close(ctx->pipe_fd); + } + + delete ctx; } +// Tell nginx whether we have network activity we're waiting for so that it sets +// a write handler. See src/http/ngx_http_request.c:2083. +static void +ngx_http_pagespeed_set_buffered(ngx_http_request_t* r, bool on) { + if (on) { + r->buffered |= NGX_HTTP_SSI_BUFFERED; + } else { + r->buffered &= ~NGX_HTTP_SSI_BUFFERED; + } +} static GoogleString ngx_http_pagespeed_determine_url(ngx_http_request_t* r) { @@ -263,179 +295,316 @@ ngx_http_pagespeed_initialize_server_context( // TODO(jefftk): We should call NgxRewriteDriverFactory::Terminate() when // we're done with it. + cfg->handler = new net_instaweb::GoogleMessageHandler(); + cfg->driver_factory = new net_instaweb::NgxRewriteDriverFactory(); cfg->driver_factory->set_filename_prefix( ngx_http_pagespeed_str_to_string_piece(&cfg->cache_dir)); cfg->server_context = cfg->driver_factory->CreateServerContext(); - - // In real use, with filters coming from the user, this would be some other - // kind of message handler that actually sent errors back to the user. - net_instaweb::NullMessageHandler handler; + cfg->proxy_fetch_factory = + new net_instaweb::ProxyFetchFactory(cfg->server_context); // Turn on some filters so we can see if this is working. These filters are - // specifically chosen as ones that don't make requests for subresources or do - // work that needs to complete asynchronously. They should be fast enough to - // run in the line of the request. + // chosen to make sub-resource fetches but not create any *.pagespeed.* + // resource urls because we don't yet have a handler in place for them. net_instaweb::RewriteOptions* global_options = cfg->server_context->global_options(); global_options->SetRewriteLevel(net_instaweb::RewriteOptions::kPassThrough); global_options->EnableFiltersByCommaSeparatedList( - "collapse_whitespace,remove_comments,remove_quotes", &handler); + "collapse_whitespace,remove_comments,remove_quotes," + "inline_css,inline_javascript,rewrite_css", cfg->handler); } - -// Set us up for processing a request. When the request finishes, call -// ngx_http_pagespeed_release_request_context. +// Returns: +// NGX_OK: pagespeed is done, request complete +// NGX_AGAIN: pagespeed still working, needs to be called again later +// NGX_ERROR: error static ngx_int_t -ngx_http_pagespeed_create_request_context(ngx_http_request_t* r) { - ngx_http_pagespeed_srv_conf_t* cfg = - static_cast( - ngx_http_get_module_srv_conf(r, ngx_pagespeed)); +ngx_http_pagespeed_update(ngx_http_pagespeed_request_ctx_t* ctx, + ngx_event_t* ev) { + bool done; + int rc; + char chr; + do { + rc = read(ctx->pipe_fd, &chr, 1); + } while (rc == -1 && errno == EINTR); // Retry on EINTR. - if (cfg->driver_factory == NULL) { - // This is the first request handled by this server block. - ngx_http_pagespeed_initialize_server_context(cfg); + // read() should only ever return 0 (closed), 1 (data), or -1 (error). + CHECK(rc == -1 || rc == 0 || rc == 1); + + if (rc == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + PDBG(ctx, "no data to read from pagespeed yet"); + return NGX_AGAIN; + } else { + perror("ngx_http_pagespeed_connection_read_handler"); + return NGX_ERROR; + } + } else { + // We're done iff we read 0 bytes because that means the pipe was closed. + done = (rc == 0); } - // Pull the server context out of the per-server config. - net_instaweb::ServerContext* server_context = - static_cast( - ngx_http_get_module_srv_conf(r, ngx_pagespeed))->server_context; - - if (server_context == NULL) { - ngx_log_error(NGX_LOG_ERR, (r)->connection->log, 0, - "ServerContext should have been initialized."); - return NGX_ERROR; + // Get output from pagespeed. + ngx_chain_t* cl; + rc = ctx->base_fetch->CollectAccumulatedWrites(&cl); + if (rc != NGX_OK) { + PDBG(ctx, "problem with CollectAccumulatedWrites"); + return rc; } - GoogleString url = ngx_http_pagespeed_determine_url(r); + rc = ngx_http_next_body_filter(ctx->r, cl); + if (rc != NGX_OK) { + return rc; + } + + return done ? NGX_OK : NGX_AGAIN; +} + +static void +ngx_http_pagespeed_connection_read_handler(ngx_event_t* ev) { + CHECK(ev != NULL); + + ngx_connection_t* c = static_cast(ev->data); + CHECK(c != NULL); ngx_http_pagespeed_request_ctx_t* ctx = - new ngx_http_pagespeed_request_ctx_t(); + static_cast(c->data); + CHECK(ctx != NULL); - ctx->driver = server_context->NewRewriteDriver(); + int rc = ngx_http_pagespeed_update(ctx, ev); + CHECK(rc == NGX_OK || rc == NGX_ERROR || rc == NGX_AGAIN); + if (rc == NGX_AGAIN) { + // request needs more work by pagespeed + rc = ngx_handle_read_event(ev, 0); + CHECK(rc == NGX_OK); + } else { + ngx_del_event(ev, NGX_READ_EVENT, 0); + ngx_http_pagespeed_set_buffered(ctx->r, false); + ngx_http_finalize_request(ctx->r, rc == NGX_OK ? NGX_DONE : NGX_ERROR); + } +} - // TODO(jefftk): replace this with a writer that generates proper nginx - // buffers and puts them in the chain. Or avoids the double - // copy some other way. - ctx->output.reset(new GoogleString()); - ctx->writer.reset(new net_instaweb::StringWriter(ctx->output.get())); - ctx->driver->SetWriter(ctx->writer.get()); - - // For testing we always want to perform any optimizations we can, so we - // wait until everything is done rather than using a deadline, the way we - // want to eventually. - ctx->driver->set_fully_rewrite_on_flush(true); - - ngx_http_set_ctx(r, ctx, ngx_pagespeed); - bool ok = ctx->driver->StartParse(url); - if (!ok) { - ngx_log_error(NGX_LOG_ERR, (r)->connection->log, 0, - "Failed to StartParse on url %*s", - url.size(), url.data()); +static ngx_int_t +ngx_http_pagespeed_create_connection(ngx_http_pagespeed_request_ctx_t* ctx) { + ngx_connection_t* c = ngx_get_connection( + ctx->pipe_fd, ctx->r->connection->log); + if (c == NULL) { return NGX_ERROR; } + c->recv = ngx_recv; + c->send = ngx_send; + c->recv_chain = ngx_recv_chain; + c->send_chain = ngx_send_chain; + + c->log_error = ctx->r->connection->log_error; + + c->read->log = c->log; + c->write->log = c->log; + + ctx->pagespeed_connection = c; + + // Tell nginx to monitor this pipe and call us back when there's data. + c->data = ctx; + c->read->handler = ngx_http_pagespeed_connection_read_handler; + ngx_add_event(c->read, NGX_READ_EVENT, 0); + return NGX_OK; } -// Replace each buffer chain with a new one that's been optimized. +// Set us up for processing a request. static ngx_int_t -ngx_http_pagespeed_optimize_and_replace_buffer(ngx_http_request_t* r, - ngx_chain_t* in) { +ngx_http_pagespeed_create_request_context(ngx_http_request_t* r) { ngx_http_pagespeed_request_ctx_t* ctx = - ngx_http_pagespeed_get_request_context(r); - if (ctx == NULL) { + new ngx_http_pagespeed_request_ctx_t(); + ctx->pipe_fd = -1; + ctx->r = r; + ctx->cfg = static_cast( + ngx_http_get_module_srv_conf(r, ngx_pagespeed)); + + int file_descriptors[2]; + int rc = pipe(file_descriptors); + if (rc != 0) { + ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, "pipe() failed"); + ngx_http_pagespeed_release_request_context(ctx); return NGX_ERROR; } + if (ngx_nonblocking(file_descriptors[0]) == -1) { + ngx_log_error(NGX_LOG_EMERG, r->connection->log, ngx_socket_errno, + ngx_nonblocking_n " pipe[0] failed"); + ngx_http_pagespeed_release_request_context(ctx); + return NGX_ERROR; + } + + if (ngx_nonblocking(file_descriptors[1]) == -1) { + ngx_log_error(NGX_LOG_EMERG, r->connection->log, ngx_socket_errno, + ngx_nonblocking_n " pipe[1] failed"); + ngx_http_pagespeed_release_request_context(ctx); + return NGX_ERROR; + } + + ctx->pipe_fd = file_descriptors[0]; + rc = ngx_http_pagespeed_create_connection(ctx); + if (rc != NGX_OK) { + close(file_descriptors[0]); + close(file_descriptors[1]); + ctx->pipe_fd = -1; + + ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, + "ngx_http_pagespeed_create_request_context: " + "no pagespeed connection."); + ngx_http_pagespeed_release_request_context(ctx); + return NGX_ERROR; + } + + // Deletes itself when HandleDone is called, which happens when we call Done() + // on the proxy fetch below. + ctx->base_fetch = new net_instaweb::NgxBaseFetch(r, file_descriptors[1]); + + if (ctx->cfg->driver_factory == NULL) { + // This is the first request handled by this server block. + ngx_http_pagespeed_initialize_server_context(ctx->cfg); + } + + GoogleString url_string = ngx_http_pagespeed_determine_url(r); + net_instaweb::GoogleUrl request_url(url_string); + + // This code is based off of ProxyInterface::ProxyRequestCallback and + // ProxyFetchFactory::StartNewProxyFetch + + // If the global options say we're running furious (the experiment framework) + // then clone them into custom_options so we can manipulate custom options + // without affecting the global options. + net_instaweb::RewriteOptions* custom_options = NULL; + net_instaweb::RewriteOptions* global_options = + ctx->cfg->server_context->global_options(); + if (global_options->running_furious()) { + custom_options = global_options->Clone(); + custom_options->set_need_to_store_experiment_data( + ctx->cfg->server_context->furious_matcher()->ClassifyIntoExperiment( + *ctx->base_fetch->request_headers(), custom_options)); + } + + // TODO(jefftk): port ProxyInterface::InitiatePropertyCacheLookup so that we + // have the propery cache in nginx. + + // If we don't have custom options we can use NewRewriteDriver which reuses + // rewrite drivers and so is faster because there's no wait to construct + // them. Otherwise we have to build a new one every time. + if (custom_options == NULL) { + ctx->driver = ctx->cfg->server_context->NewRewriteDriver(); + } else { + // NewCustomRewriteDriver takes ownership of custom_options. + ctx->driver = ctx->cfg->server_context->NewCustomRewriteDriver( + custom_options); + } + ctx->driver->set_log_record(ctx->base_fetch->log_record()); + + // TODO(jefftk): FlushEarlyFlow would go here. + + ngx_http_set_ctx(r, ctx, ngx_pagespeed); + + // Set up a cleanup handler on the request. + ngx_http_cleanup_t* cleanup = ngx_http_cleanup_add(r, 0); + if (cleanup == NULL) { + ngx_http_pagespeed_release_request_context(ctx); + return NGX_ERROR; + } + cleanup->handler = ngx_http_pagespeed_release_request_context; + cleanup->data = ctx; + + // Will call StartParse etc. + ctx->proxy_fetch = ctx->cfg->proxy_fetch_factory->CreateNewProxyFetch( + url_string, ctx->base_fetch, ctx->driver, + NULL /* property_callback */, + NULL /* original_content_fetch */); + + return NGX_OK; +} + +// Send each buffer in the chain to the proxy_fetch for optimization. +// Eventually it will make it's way, optimized, to base_fetch. +static void +ngx_http_pagespeed_send_to_pagespeed( + ngx_http_request_t* r, + ngx_http_pagespeed_request_ctx_t* ctx, + ngx_chain_t* in) { + ngx_chain_t* cur; int last_buf = 0; - int last_in_chain = 0; - for (cur = in; cur != NULL;) { + for (cur = in; cur != NULL; cur = cur->next) { last_buf = cur->buf->last_buf; - last_in_chain = cur->buf->last_in_chain; + + // Buffers are not really the last buffer until they've been through + // pagespeed. + cur->buf->last_buf = 0; - ctx->driver->ParseText(StringPiece(reinterpret_cast(cur->buf->pos), - cur->buf->last - cur->buf->pos)); + ctx->proxy_fetch->Write(StringPiece(reinterpret_cast(cur->buf->pos), + cur->buf->last - cur->buf->pos), + ctx->cfg->handler); - // We're done with buffers as we pass them to the rewrite driver, so free - // them and their chain links as we go. Don't free the first buffer (in) - // which we need below. - ngx_chain_t* next_link = cur->next; - if (cur != in) { - ngx_pfree(r->pool, cur->buf); - ngx_pfree(r->pool, cur); - } - cur = next_link; + // We're done with buffers as we pass them through, so mark them as sent as + // we go. + cur->buf->pos = cur->buf->last; } - in->next = NULL; // We freed all the later buffers. - - // Prepare the new buffer. - ngx_buf_t* b = static_cast(ngx_calloc_buf(r->pool)); - if (b == NULL) { - ngx_log_error(NGX_LOG_ERR, (r)->connection->log, 0, - "failed to allocate buffer"); - - return NGX_ERROR; - } - - b->temporary = 1; - b->last_buf = last_buf; - b->last_in_chain = last_in_chain; - in->next = NULL; - - // replace the first link's buffer with our new one. - ngx_pfree(r->pool, in->buf); - in->buf = b; if (last_buf) { - ctx->driver->FinishParse(); + ctx->proxy_fetch->Done(true /* success */); + ctx->proxy_fetch = NULL; // ProxyFetch deletes itself on Done(). } else { - ctx->driver->Flush(); + // TODO(jefftk): Decide whether Flush() is warranted here. + ctx->proxy_fetch->Flush(ctx->cfg->handler); } - - // TODO(jefftk): need to store how much went out on previous flushes and only - // copy here the new stuff. Keep the count in the request context. - b->pos = b->start = static_cast( - ngx_pnalloc(r->pool, ctx->output->length())); - if (b->pos == NULL) { - ngx_log_error(NGX_LOG_ERR, (r)->connection->log, 0, - "failed to allocate %d bytes", ctx->output->length()); - return NGX_ERROR; - } - ctx->output->copy(reinterpret_cast(b->pos), ctx->output->length()); - b->last = b->end = b->pos + ctx->output->length(); - - if (last_buf) { - ngx_http_pagespeed_release_request_context(r, ctx); - ctx = NULL; - } - - return NGX_OK; } static ngx_int_t -ngx_http_pagespeed_body_filter(ngx_http_request_t* r, ngx_chain_t* in) -{ - ngx_int_t rc; +ngx_http_pagespeed_body_filter(ngx_http_request_t* r, ngx_chain_t* in) { + ngx_http_pagespeed_request_ctx_t* ctx = + ngx_http_pagespeed_get_request_context(r); - rc = ngx_http_pagespeed_optimize_and_replace_buffer(r, in); - if (rc != NGX_OK) { - return rc; + if (ctx == NULL) { + // ctx is null iff we've decided to pass through this request unchanged. + return ngx_http_next_body_filter(r, in); } - rc = ngx_http_pagespeed_note_processed(r, in); - if (rc != NGX_OK) { - return rc; + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "http pagespeed filter \"%V\"", &r->uri); + + if (ctx->content_type == NULL) { + // We don't know what type of resource this is, but we only want to send + // html through to pagespeed. Check the content type header and find out. + ctx->content_type = net_instaweb::MimeTypeToContentType( + ngx_http_pagespeed_str_to_string_piece( + &r->headers_out.content_type)); + if (ctx->content_type == NULL || !ctx->content_type->IsHtmlLike()) { + // Unknown or otherwise non-html content type: skip it. + ngx_http_set_ctx(r, NULL, ngx_pagespeed); + return ngx_http_next_body_filter(r, in); + } } - return ngx_http_next_body_filter(r, in); + if (!ctx->data_received) { + // This is the first set of buffers we've got for this request. + ctx->data_received = true; + // Call this here and not in the header filter because we want to see the + // headers after any other filters are finished modifying them. At this + // point they are final. + ctx->base_fetch->PopulateHeaders(); // TODO(jefftk): is this thread safe? + } + + if (in != NULL) { + // Send all input data to the proxy fetch. + ngx_http_pagespeed_send_to_pagespeed(r, ctx, in); + } + + ngx_http_pagespeed_set_buffered(r, true); + return NGX_AGAIN; } static ngx_int_t -ngx_http_pagespeed_header_filter(ngx_http_request_t* r) -{ +ngx_http_pagespeed_header_filter(ngx_http_request_t* r) { // We're modifying content below, so switch to 'Transfer-Encoding: chunked' // and calculate on the fly. ngx_http_clear_content_length(r); @@ -444,6 +613,7 @@ ngx_http_pagespeed_header_filter(ngx_http_request_t* r) int rc = ngx_http_pagespeed_create_request_context(r); if (rc != NGX_OK) { + ngx_http_finalize_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR); return rc; } @@ -451,8 +621,7 @@ ngx_http_pagespeed_header_filter(ngx_http_request_t* r) } static ngx_int_t -ngx_http_pagespeed_init(ngx_conf_t* cf) -{ +ngx_http_pagespeed_init(ngx_conf_t* cf) { ngx_http_pagespeed_srv_conf_t* pagespeed_config; pagespeed_config = static_cast( ngx_http_conf_get_module_srv_conf(cf, ngx_pagespeed)); diff --git a/src/ngx_pagespeed.h b/src/ngx_pagespeed.h new file mode 100644 index 000000000..76fa9c3db --- /dev/null +++ b/src/ngx_pagespeed.h @@ -0,0 +1,30 @@ +/* + * Copyright 2012 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Author: jefftk@google.com (Jeff Kaufman) + +#ifndef NGX_PAGESPEED_H_ +#define NGX_PAGESPEED_H_ + +extern "C" { + #include +} + +#include "net/instaweb/util/public/string_util.h" + +StringPiece ngx_http_pagespeed_str_to_string_piece(ngx_str_t* s); + +#endif // NGX_PAGESPEED_H_ diff --git a/src/ngx_rewrite_driver_factory.cc b/src/ngx_rewrite_driver_factory.cc index 2f787fb32..e8c1442ec 100644 --- a/src/ngx_rewrite_driver_factory.cc +++ b/src/ngx_rewrite_driver_factory.cc @@ -27,7 +27,6 @@ #include "net/instaweb/rewriter/public/server_context.h" #include "net/instaweb/rewriter/public/rewrite_driver.h" #include "net/instaweb/rewriter/public/rewrite_driver_factory.h" -#include "net/instaweb/rewriter/public/rewrite_gflags.h" #include "net/instaweb/util/public/google_message_handler.h" #include "net/instaweb/util/public/google_timer.h" #include "net/instaweb/util/public/lru_cache.h" @@ -90,7 +89,8 @@ Timer* NgxRewriteDriverFactory::DefaultTimer() { } void NgxRewriteDriverFactory::SetupCaches(ServerContext* resource_manager) { - LRUCache* lru_cache = new LRUCache(gflags_.lru_cache_size_bytes()); + // TODO(jefftk): make LRUCache size configurable. + LRUCache* lru_cache = new LRUCache(10 * 1000 * 1000); CacheInterface* cache = new ThreadsafeCache( lru_cache, thread_system()->NewMutex()); HTTPCache* http_cache = new HTTPCache(cache, timer(), hasher(), statistics()); diff --git a/src/ngx_rewrite_driver_factory.h b/src/ngx_rewrite_driver_factory.h index 9de9ca85f..d8c79e915 100644 --- a/src/ngx_rewrite_driver_factory.h +++ b/src/ngx_rewrite_driver_factory.h @@ -20,7 +20,6 @@ #define NGX_REWRITE_DRIVER_FACTORY_H_ #include "net/instaweb/rewriter/public/rewrite_driver_factory.h" -#include "net/instaweb/rewriter/public/rewrite_gflags.h" #include "net/instaweb/util/public/simple_stats.h" namespace net_instaweb { @@ -41,7 +40,6 @@ class NgxRewriteDriverFactory : public RewriteDriverFactory { private: SimpleStats simple_stats_; - const RewriteGflags gflags_; Timer* timer_; DISALLOW_COPY_AND_ASSIGN(NgxRewriteDriverFactory); diff --git a/test/www/Puzzle.jpg b/test/www/Puzzle.jpg new file mode 100644 index 000000000..a11910952 Binary files /dev/null and b/test/www/Puzzle.jpg differ diff --git a/test/www/script.js b/test/www/script.js new file mode 100644 index 000000000..34b2e3d4e --- /dev/null +++ b/test/www/script.js @@ -0,0 +1 @@ +alert('run script'); diff --git a/test/www/style.css b/test/www/style.css new file mode 100644 index 000000000..de20f4201 --- /dev/null +++ b/test/www/style.css @@ -0,0 +1 @@ +h2 { color: green } diff --git a/test/www/test.html b/test/www/test.html index b1094cdba..544aad4e3 100644 --- a/test/www/test.html +++ b/test/www/test.html @@ -16,6 +16,8 @@ bold text + +