psol: updating from r2577

This commit is contained in:
Jeff Kaufman
2013-03-18 12:36:07 -04:00
parent 05765a33f7
commit a996856b7b
2960 changed files with 47330 additions and 545853 deletions
+20 -3
View File
@@ -69,6 +69,8 @@ class GURL {
GURL_API GURL(const char* canonical_spec, size_t canonical_spec_len,
const url_parse::Parsed& parsed, bool is_valid);
GURL_API ~GURL();
GURL_API GURL& operator=(const GURL& other);
// Returns true when this object represents a valid parsed URL. When not
@@ -210,8 +212,8 @@ class GURL {
// Returns true if the scheme for the current URL is a known "standard"
// scheme. Standard schemes have an authority and a path section. This
// includes file:, which some callers may want to filter out explicitly by
// calling SchemeIsFile.
// includes file: and filesystem:, which some callers may want to filter out
// explicitly by calling SchemeIsFile[System].
GURL_API bool IsStandard() const;
// Returns true if the given parameter (should be lower-case ASCII to match
@@ -226,9 +228,15 @@ class GURL {
return SchemeIs("file");
}
// FileSystem URLs need to be treated differently in some cases.
bool SchemeIsFileSystem() const {
return SchemeIs("filesystem");
}
// If the scheme indicates a secure connection
bool SchemeIsSecure() const {
return SchemeIs("https");
return SchemeIs("https") ||
(SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure());
}
// Returns true if the hostname is an IP address. Note: this function isn't
@@ -347,6 +355,12 @@ class GURL {
// This function may be called from any thread.
GURL_API static const GURL& EmptyGURL();
// Returns the inner URL of a nested URL [currently only non-null for
// filesystem: URLs].
const GURL* inner_url() const {
return inner_url_;
}
private:
// Returns the substring of the input identified by the given component.
std::string ComponentString(const url_parse::Component& comp) const {
@@ -366,6 +380,9 @@ class GURL {
// Identified components of the canonical spec.
url_parse::Parsed parsed_;
// Used for nested schemes [currently only filesystem:].
GURL* inner_url_;
// TODO bug 684583: Add encoding for query params.
};
+43 -3
View File
@@ -29,7 +29,7 @@
#ifndef GOOGLEURL_SRC_URL_CANON_H__
#define GOOGLEURL_SRC_URL_CANON_H__
#include <memory.h>
#include <string.h>
#include <stdlib.h>
#include "base/string16.h"
@@ -360,6 +360,17 @@ struct CanonHostInfo {
// CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6.
// CanonicalizeHostVerbose() always sets it.
url_parse::Component out_host;
// |address| contains the parsed IP Address (if any) in its first
// AddressLength() bytes, in network order. If IsIPAddress() is false
// AddressLength() will return zero and the content of |address| is undefined.
unsigned char address[16];
// Convenience function to calculate the length of an IP address corresponding
// to the current IP version in |family|, if any. For use with |address|.
int AddressLength() const {
return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
}
};
@@ -538,6 +549,20 @@ GURL_API bool CanonicalizeFileURL(const char16* spec,
CanonOutput* output,
url_parse::Parsed* new_parsed);
// Use for filesystem URLs.
GURL_API bool CanonicalizeFileSystemURL(const char* spec,
int spec_len,
const url_parse::Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
url_parse::Parsed* new_parsed);
GURL_API bool CanonicalizeFileSystemURL(const char16* spec,
int spec_len,
const url_parse::Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
url_parse::Parsed* new_parsed);
// Use for path URLs such as javascript. This does not modify the path in any
// way, for example, by escaping it.
GURL_API bool CanonicalizePathURL(const char* spec,
@@ -571,7 +596,7 @@ GURL_API bool CanonicalizeMailtoURL(const char16* spec,
// Internal structure used for storing separate strings for each component.
// The basic canonicalization functions use this structure internally so that
// component remplacement (different strings for different components) can be
// component replacement (different strings for different components) can be
// treated on the same code path as regular canonicalization (the same string
// for each component).
//
@@ -763,6 +788,21 @@ GURL_API bool ReplaceStandardURL(const char* base,
CanonOutput* output,
url_parse::Parsed* new_parsed);
// Filesystem URLs can only have the path, query, or ref replaced.
// All other components will be ignored.
GURL_API bool ReplaceFileSystemURL(const char* base,
const url_parse::Parsed& base_parsed,
const Replacements<char>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
url_parse::Parsed* new_parsed);
GURL_API bool ReplaceFileSystemURL(const char* base,
const url_parse::Parsed& base_parsed,
const Replacements<char16>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
url_parse::Parsed* new_parsed);
// Replacing some parts of a file URL is not permitted. Everything except
// the host, path, query, and ref will be ignored.
GURL_API bool ReplaceFileURL(const char* base,
@@ -811,7 +851,7 @@ GURL_API bool ReplaceMailtoURL(const char* base,
// relative, the relevant portion of the URL will be placed into
// |*relative_component| (there may have been trimmed whitespace, for example).
// This value is passed to ResolveRelativeURL. If the input is not relative,
// this value is UNDEFINED (it may be changed by the functin).
// this value is UNDEFINED (it may be changed by the function).
//
// Returns true on success (we successfully determined the URL is relative or
// not). Failure means that the combination of URLs doesn't make any sense.
+2 -2
View File
@@ -1,4 +1,4 @@
// Copyright 2007, Google Inc.
// Copyright 2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -47,7 +47,7 @@ class ICUCharsetConverter : public CharsetConverter {
// be managed by the creator such that it is alive as long as this is.
GURL_API ICUCharsetConverter(UConverter* converter);
GURL_API virtual ~ICUCharsetConverter() {}
GURL_API virtual ~ICUCharsetConverter();
GURL_API virtual void ConvertFromUTF16(const char16* input,
int input_len,
+13 -12
View File
@@ -1,4 +1,4 @@
// Copyright 2007, Google Inc.
// Copyright 2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -108,7 +108,7 @@ void AppendStringOfType(const char16* source, int length,
// Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit
// that will be used to represent it.
extern const char kHexCharLookup[0x10];
GURL_API extern const char kHexCharLookup[0x10];
// This lookup table allows fast conversion between ASCII hex letters and their
// corresponding numerical value. The 8-bit range is divided up into 8
@@ -175,8 +175,8 @@ extern const char16 kUnicodeReplacementCharacter;
// (for a single-byte ASCII character, it will not be changed).
//
// Implementation is in url_canon_icu.cc.
bool ReadUTFChar(const char* str, int* begin, int length,
unsigned* code_point_out);
GURL_API bool ReadUTFChar(const char* str, int* begin, int length,
unsigned* code_point_out);
// Generic To-UTF-8 converter. This will call the given append method for each
// character that should be appended, with the given output method. Wrappers
@@ -252,8 +252,8 @@ inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
// (for a single-16-bit-word character, it will not be changed).
//
// Implementation is in url_canon_icu.cc.
bool ReadUTFChar(const char16* str, int* begin, int length,
unsigned* code_point);
GURL_API bool ReadUTFChar(const char16* str, int* begin, int length,
unsigned* code_point);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
inline void AppendUTF16Value(unsigned code_point,
@@ -371,10 +371,10 @@ void AppendInvalidNarrowString(const char16* spec, int begin, int end,
// replacing the invalid characters with the "invalid character". It will
// return false in the failure case, and the caller should not continue as
// normal.
bool ConvertUTF16ToUTF8(const char16* input, int input_len,
CanonOutput* output);
bool ConvertUTF8ToUTF16(const char* input, int input_len,
CanonOutputT<char16>* output);
GURL_API bool ConvertUTF16ToUTF8(const char16* input, int input_len,
CanonOutput* output);
GURL_API bool ConvertUTF8ToUTF16(const char* input, int input_len,
CanonOutputT<char16>* output);
// Converts from UTF-16 to 8-bit using the character set converter. If the
// converter is NULL, this will use UTF-8.
@@ -433,8 +433,9 @@ bool CanonicalizePartialPath(const char16* spec,
#ifndef WIN32
// Implementations of Windows' int-to-string conversions
int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix);
GURL_API int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
GURL_API int _itow_s(int value, char16* buffer, size_t size_in_chars,
int radix);
// Secure template overloads for these functions
template<size_t N>
@@ -37,6 +37,14 @@
namespace url_canon {
// Writes the given IPv4 address to |output|.
GURL_API void AppendIPv4Address(const unsigned char address[4],
CanonOutput* output);
// Writes the given IPv6 address to |output|.
GURL_API void AppendIPv6Address(const unsigned char address[16],
CanonOutput* output);
// Searches the host name for the portions of the IPv4 address. On success,
// each component will be placed into |components| and it will return true.
// It will return false if the host can not be separated as an IPv4 address
@@ -62,7 +62,7 @@ class StdStringCanonOutput : public CanonOutput {
str_(str) {
cur_len_ = static_cast<int>(str_->size()); // Append to existing data.
str_->resize(str_->capacity());
buffer_ = &(*str_)[0];
buffer_ = str_->empty() ? NULL : &(*str_)[0];
buffer_len_ = static_cast<int>(str_->size());
}
virtual ~StdStringCanonOutput() {
@@ -77,7 +77,7 @@ class StdStringCanonOutput : public CanonOutput {
virtual void Resize(int sz) {
str_->resize(sz);
buffer_ = &(*str_)[0];
buffer_ = str_->empty() ? NULL : &(*str_)[0];
buffer_len_ = sz;
}
+38 -1
View File
@@ -119,8 +119,12 @@ struct Parsed {
REF,
};
// The default constructor is sufficient for the components.
// The default constructor is sufficient for the components, but inner_parsed_
// requires special handling.
GURL_API Parsed();
GURL_API Parsed(const Parsed&);
GURL_API Parsed& operator=(const Parsed&);
GURL_API ~Parsed();
// Returns the length of the URL (the end of the last component).
//
@@ -198,6 +202,31 @@ struct Parsed {
// Length will be -1 if there is no hash sign, or 0 if there is one but
// nothing follows it.
Component ref;
// This is used for nested URL types, currently only filesystem. If you
// parse a filesystem URL, the resulting Parsed will have a nested
// inner_parsed_ to hold the parsed inner URL's component information.
// For all other url types [including the inner URL], it will be NULL.
Parsed* inner_parsed() const {
return inner_parsed_;
}
void set_inner_parsed(const Parsed& inner_parsed) {
if (!inner_parsed_)
inner_parsed_ = new Parsed(inner_parsed);
else
*inner_parsed_ = inner_parsed;
}
void clear_inner_parsed() {
if (inner_parsed_) {
delete inner_parsed_;
inner_parsed_ = NULL;
}
}
private:
Parsed* inner_parsed_; // This object is owned and managed by this struct.
};
// Initialization functions ---------------------------------------------------
@@ -232,6 +261,14 @@ GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed);
GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed);
GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed);
// Filesystem URLs are structured differently than other URLs.
GURL_API void ParseFileSystemURL(const char* url,
int url_len,
Parsed* parsed);
GURL_API void ParseFileSystemURL(const char16* url,
int url_len,
Parsed* parsed);
// MailtoURL is for mailto: urls. They are made up scheme,path,query
GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed);
@@ -1,4 +1,4 @@
// Copyright 2006, Google Inc.
// Copyright 2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -26,30 +26,31 @@
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: wan@google.com (Zhanyong Wan)
//
// This is part of the unit test for include/gtest/gtest_prod.h.
#ifndef GTEST_TEST_PRODUCTION_H_
#define GTEST_TEST_PRODUCTION_H_
#ifndef GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__
#define GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__
#include "gtest/gtest_prod.h"
#include <string>
class PrivateCode {
public:
// Declares a friend test that does not use a fixture.
FRIEND_TEST(PrivateCodeTest, CanAccessPrivateMembers);
#include "base/string16.h"
#include "googleurl/src/url_common.h"
#include "googleurl/src/url_parse.h"
// Declares a friend test that uses a fixture.
FRIEND_TEST(PrivateCodeFixtureTest, CanAccessPrivateMembers);
namespace url_util {
PrivateCode();
extern const char kFileScheme[];
extern const char kFileSystemScheme[];
extern const char kMailtoScheme[];
int x() const { return x_; }
private:
void set_x(int an_x) { x_ = an_x; }
int x_;
};
// Given a string and a range inside the string, compares it to the given
// lower-case |compare_to| buffer.
bool CompareSchemeComponent(const char* spec,
const url_parse::Component& component,
const char* compare_to);
bool CompareSchemeComponent(const char16* spec,
const url_parse::Component& component,
const char* compare_to);
#endif // GTEST_TEST_PRODUCTION_H_
} // namespace url_util
#endif // GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__
+10 -156
View File
@@ -18,6 +18,7 @@
#define NET_INSTAWEB_APACHE_APACHE_CONFIG_H_
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/system/public/system_rewrite_options.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
@@ -28,7 +29,7 @@ class Hasher;
// Establishes a context for VirtualHosts and directory-scoped
// options, either via .htaccess or <Directory>...</Directory>.
class ApacheConfig : public RewriteOptions {
class ApacheConfig : public SystemRewriteOptions {
public:
enum RefererStatisticsOutputLevel {
kFast,
@@ -36,8 +37,6 @@ class ApacheConfig : public RewriteOptions {
kOrganized,
};
static const char kClassName[];
static bool ParseRefererStatisticsOutputLevel(
const StringPiece& in, RefererStatisticsOutputLevel* out);
@@ -54,48 +53,12 @@ class ApacheConfig : public RewriteOptions {
StringPiece description() const { return description_; }
void set_description(const StringPiece& x) { x.CopyToString(&description_); }
int64 file_cache_clean_interval_ms() const {
return file_cache_clean_interval_ms_.value();
}
void set_file_cache_clean_interval_ms(int64 x) {
set_option(x, &file_cache_clean_interval_ms_);
}
int64 file_cache_clean_size_kb() const {
return file_cache_clean_size_kb_.value();
}
void set_file_cache_clean_size_kb(int64 x) {
set_option(x, &file_cache_clean_size_kb_);
}
int64 file_cache_clean_inode_limit() const {
return file_cache_clean_inode_limit_.value();
}
void set_file_cache_clean_inode_limit(int64 x) {
set_option(x, &file_cache_clean_inode_limit_);
}
int64 lru_cache_byte_limit() const {
return lru_cache_byte_limit_.value();
}
void set_lru_cache_byte_limit(int64 x) {
set_option(x, &lru_cache_byte_limit_);
}
int64 lru_cache_kb_per_process() const {
return lru_cache_kb_per_process_.value();
}
void set_lru_cache_kb_per_process(int64 x) {
set_option(x, &lru_cache_kb_per_process_);
}
int64 slurp_flush_limit() const {
return slurp_flush_limit_.value();
}
void set_slurp_flush_limit(int64 x) {
set_option(x, &slurp_flush_limit_);
}
bool use_shared_mem_locking() const {
return use_shared_mem_locking_.value();
}
void set_use_shared_mem_locking(bool x) {
set_option(x, &use_shared_mem_locking_);
}
bool collect_referer_statistics() const {
return collect_referer_statistics_.value();
}
@@ -108,36 +71,6 @@ class ApacheConfig : public RewriteOptions {
void set_hash_referer_statistics(bool x) {
set_option(x, &hash_referer_statistics_);
}
bool statistics_enabled() const {
return statistics_enabled_.value();
}
void set_statistics_enabled(bool x) {
set_option(x, &statistics_enabled_);
}
bool statistics_logging_enabled() const {
return statistics_logging_enabled_.value();
}
void set_statistics_logging_enabled(bool x) {
set_option(x, &statistics_logging_enabled_);
}
const GoogleString& statistics_logging_file() const {
return statistics_logging_file_.value();
}
const GoogleString& statistics_logging_charts_css() const {
return statistics_logging_charts_css_.value();
}
const GoogleString& statistics_logging_charts_js() const {
return statistics_logging_charts_js_.value();
}
void set_statistics_logging_file(GoogleString x) {
set_option(x, &statistics_logging_file_);
}
int64 statistics_logging_interval_ms() const {
return statistics_logging_interval_ms_.value();
}
void set_statistics_logging_interval_ms(int64 x) {
set_option(x, &statistics_logging_interval_ms_);
}
bool slurp_read_only() const {
return slurp_read_only_.value();
}
@@ -153,59 +86,12 @@ class ApacheConfig : public RewriteOptions {
void set_referer_statistics_output_level(RefererStatisticsOutputLevel x) {
set_option(x, &referer_statistics_output_level_);
}
const GoogleString& file_cache_path() const {
return file_cache_path_.value();
}
void set_file_cache_path(GoogleString x) {
set_option(x, &file_cache_path_);
}
const GoogleString& memcached_servers() const {
return memcached_servers_.value();
}
void set_memcached_servers(GoogleString x) {
set_option(x, &memcached_servers_);
}
int memcached_threads() const {
return memcached_threads_.value();
}
void set_memcached_threads(int x) {
set_option(x, &memcached_threads_);
}
int memcached_timeout_us() const {
return memcached_timeout_us_.value();
}
bool has_memcached_timeout_us() const {
return memcached_timeout_us_.was_set();
}
void set_memcached_timeout_us(int x) {
set_option(x, &memcached_timeout_us_);
}
const GoogleString& slurp_directory() const {
return slurp_directory_.value();
}
void set_slurp_directory(GoogleString x) {
set_option(x, &slurp_directory_);
}
const GoogleString& fetcher_proxy() const {
return fetcher_proxy_.value();
}
void set_fetcher_proxy(GoogleString x) {
set_option(x, &fetcher_proxy_);
}
// Cache flushing configuration.
void set_cache_flush_poll_interval_sec(int64 num_seconds) {
set_option(num_seconds, &cache_flush_poll_interval_sec_);
}
int64 cache_flush_poll_interval_sec() const {
return cache_flush_poll_interval_sec_.value();
}
void set_cache_flush_filename(const StringPiece& sp) {
set_option(sp.as_string(), &cache_flush_filename_);
}
const GoogleString& cache_flush_filename() const {
return cache_flush_filename_.value();
}
// If this is set to true, we'll turn on our fallback proxy-like behavior
// on non-.pagespeed. URLs without changing the main fetcher from Serf
@@ -249,9 +135,6 @@ class ApacheConfig : public RewriteOptions {
static const ApacheConfig* DynamicCast(const RewriteOptions* instance);
static ApacheConfig* DynamicCast(RewriteOptions* instance);
// Name of the actual type of this instance as a poor man's RTTI.
virtual const char* class_name() const;
protected:
template<class T> class ApacheOption : public OptionTemplateBase<T> {
public:
@@ -285,17 +168,15 @@ class ApacheConfig : public RewriteOptions {
static Properties* apache_properties_;
// Adds an option to apache_properties_.
//
// TODO(jmarantz): rename this to avoid coinciding with private
// method RewriteOptions::add_option. This is done for now so
// review-diffs are readable, at the cost of a small non-functional
// follow-up refactor.
template<class RewriteOptionsSubclass, class OptionClass>
static void add_option(typename OptionClass::ValueType default_value,
OptionClass RewriteOptionsSubclass::*offset,
const char* id,
OptionEnum option_enum) {
AddProperty(default_value, offset, id, option_enum, apache_properties_);
static void AddApacheProperty(typename OptionClass::ValueType default_value,
OptionClass RewriteOptionsSubclass::*offset,
const char* id,
OptionEnum option_enum,
const char* help) {
AddProperty(default_value, offset, id, option_enum,
RewriteOptions::kServerScope, help,
apache_properties_);
}
void InitializeSignaturesAndDefaults();
@@ -321,20 +202,8 @@ class ApacheConfig : public RewriteOptions {
}
GoogleString description_;
RewriteOptions options_;
Option<GoogleString> fetcher_proxy_;
Option<GoogleString> file_cache_path_;
// comma-separated list of host[:port]. See AprMemCache::AprMemCache
// for code that parses it.
Option<GoogleString> fetch_https_;
Option<GoogleString> memcached_servers_;
Option<GoogleString> slurp_directory_;
Option<GoogleString> statistics_logging_file_;
Option<GoogleString> statistics_logging_charts_css_;
Option<GoogleString> statistics_logging_charts_js_;
Option<GoogleString> cache_flush_filename_;
Option<GoogleString> test_proxy_slurp_;
ApacheOption<RefererStatisticsOutputLevel> referer_statistics_output_level_;
@@ -342,26 +211,11 @@ class ApacheConfig : public RewriteOptions {
Option<bool> collect_referer_statistics_;
Option<bool> hash_referer_statistics_;
Option<bool> slurp_read_only_;
Option<bool> statistics_enabled_;
Option<bool> statistics_logging_enabled_;
Option<bool> test_proxy_;
Option<bool> use_shared_mem_locking_;
Option<bool> rate_limit_background_fetches_;
Option<bool> experimental_fetch_from_mod_spdy_;
Option<int> memcached_threads_;
Option<int> memcached_timeout_us_;
Option<int64> file_cache_clean_inode_limit_;
Option<int64> file_cache_clean_interval_ms_;
Option<int64> file_cache_clean_size_kb_;
Option<int64> lru_cache_byte_limit_;
Option<int64> lru_cache_kb_per_process_;
Option<int64> slurp_flush_limit_;
Option<int64> statistics_logging_interval_ms_;
// If cache_flush_poll_interval_sec_<=0 then we turn off polling for
// cache-flushes.
Option<int64> cache_flush_poll_interval_sec_;
DISALLOW_COPY_AND_ASSIGN(ApacheConfig);
};
@@ -0,0 +1,71 @@
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
//
// Captures the Apache request details in our request context, including
// the port (used for loopback fetches) and (if enabled & serving spdy)
// a factory for generating SPDY fetches.
#ifndef NET_INSTAWEB_APACHE_APACHE_REQUEST_CONTEXT_H_
#define NET_INSTAWEB_APACHE_APACHE_REQUEST_CONTEXT_H_
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
struct request_rec;
struct spdy_slave_connection_factory;
namespace net_instaweb {
class AbstractMutex;
class ApacheRequestContext : public RequestContext {
public:
ApacheRequestContext(AbstractMutex* logging_mutex, request_rec* req);
// Captures the original URL of the request, which is used to help
// authorize domains for fetches we do on behalf of that request.
void set_url(StringPiece url) { url.CopyToString(&url_); }
// Returns rc as an ApacheRequestContext* if it is one and CHECK
// fails if it is not. Returns NULL if rc is NULL.
static ApacheRequestContext* DynamicCast(RequestContext* rc);
bool use_spdy_fetcher() const { return use_spdy_fetcher_; }
int local_port() const { return local_port_; }
StringPiece url() const { return url_; }
spdy_slave_connection_factory* spdy_connection_factory() {
return spdy_connection_factory_;
}
protected:
virtual ~ApacheRequestContext();
private:
bool use_spdy_fetcher_;
int local_port_;
GoogleString url_;
spdy_slave_connection_factory* spdy_connection_factory_;
DISALLOW_COPY_AND_ASSIGN(ApacheRequestContext);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_APACHE_APACHE_REQUEST_CONTEXT_H_
@@ -20,36 +20,30 @@
#include <map>
#include <set>
#include <vector>
#include "net/instaweb/rewriter/public/rewrite_driver_factory.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/md5_hasher.h"
#include "net/instaweb/util/public/cache_interface.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/shared_mem_cache.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
struct apr_pool_t;
struct request_rec;
struct server_rec;
namespace net_instaweb {
class AbstractSharedMem;
class ApacheCache;
class ApacheConfig;
class ApacheMessageHandler;
class ApacheServerContext;
class AprMemCache;
class AsyncCache;
class CacheInterface;
class FileSystem;
class Hasher;
class MessageHandler;
class ModSpdyFetchController;
class NamedLockManager;
class QueuedWorkerPool;
class RewriteDriver;
class RewriteOptions;
class SerfUrlAsyncFetcher;
class ServerContext;
@@ -57,8 +51,9 @@ class SharedCircularBuffer;
class SharedMemRefererStatistics;
class SharedMemStatistics;
class SlowWorker;
class StaticJavascriptManager;
class StaticAssetManager;
class Statistics;
class SystemCaches;
class Timer;
class UrlAsyncFetcher;
class UrlFetcher;
@@ -68,8 +63,9 @@ class Writer;
// Creates an Apache RewriteDriver.
class ApacheRewriteDriverFactory : public RewriteDriverFactory {
public:
static const char kMemcached[];
static const char kStaticJavaScriptPrefix[];
// Path prefix where we serve static assets (primarily images and js
// resources) needed by some filters.
static const char kStaticAssetPrefix[];
ApacheRewriteDriverFactory(server_rec* server, const StringPiece& version);
virtual ~ApacheRewriteDriverFactory();
@@ -125,8 +121,6 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
void DumpRefererStatistics(Writer* writer);
SlowWorker* slow_worker() { return slow_worker_.get(); }
// Build global shared-memory statistics. This is invoked if at least
// one server context (global or VirtualHost) enables statistics.
Statistics* MakeGlobalSharedMemStatistics(bool logging,
@@ -138,7 +132,9 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
const StringPiece& name, const bool logging,
const int64 logging_interval_ms, const GoogleString& logging_file);
ApacheServerContext* MakeApacheServerContext(server_rec* server);
virtual ApacheServerContext* MakeApacheServerContext(server_rec* server);
ServerContext* NewServerContext();
// Makes fetches from PSA to origin-server request
// accept-encoding:gzip, even when used in a context when we want
@@ -217,31 +213,7 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
install_crash_handler_ = x;
}
// Finds a Cache for the file_cache_path in the config. If none exists,
// creates one, using all the other parameters in the ApacheConfig.
// Currently, no checking is done that the other parameters (e.g. cache
// size, cleanup interval, etc.) are consistent.
ApacheCache* GetCache(ApacheConfig* config);
// Create a new AprMemCache from the given hostname[:port] specification.
AprMemCache* NewAprMemCache(const GoogleString& spec);
// Makes a memcached-based cache if the configuration contains a
// memcached server specification. The l2_cache passed in is used
// to handle puts/gets for huge (>1M) values. NULL is returned if
// memcached is not specified for this server.
//
// If a non-null CacheInterface* is returned, its ownership is transferred
// to the caller and must be freed on destruction.
CacheInterface* GetMemcached(ApacheConfig* config, CacheInterface* l2_cache);
// Returns the filesystem metadata cache for the given config's specification
// (if it has one). NULL is returned if no cache is specified.
CacheInterface* GetFilesystemMetadataCache(ApacheConfig* config);
// Stops any further Gets from occuring in the Async cache. This is used to
// help wind down activity during a shutdown.
void StopAsyncGets();
SystemCaches* caches() { return caches_.get(); }
// Finds a fetcher for the settings in this config, sharing with
// existing fetchers if possible, otherwise making a new one (and
@@ -272,23 +244,6 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
static void Initialize();
static void Terminate();
// Print out details of all the connections to memcached servers.
void PrintMemCacheStats(GoogleString* out);
// If needed, sets session fetchers on the driver to do the following:
// a) Adds custom headers when configured in RewriteOptions.
// b) Route requests directly to this very server when they are not
// configured to be external.
// c) Route requests to mod_spdy's slave connection code if configured to.
void ApplySessionFetchers(ApacheServerContext* manager,
RewriteDriver* driver, request_rec* req);
// Returns true if we should handle request as SPDY.
// This happens in two cases:
// 1) It's actually a SPDY request using mod_spdy
// 2) The header X-PSA-Optimize-For-SPDY is present, with any value.
static bool TreatRequestAsSpdy(request_rec* req);
// Parses a comma-separated list of HTTPS options. If successful, applies
// the options to the fetcher and returns true. If the options were invalid,
// *error_message is populated and false is returned.
@@ -298,6 +253,10 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
// in the server log, and the option-setting will have no effect.
bool SetHttpsOptions(StringPiece directive, GoogleString* error_message);
ModSpdyFetchController* mod_spdy_fetch_controller() {
return mod_spdy_fetch_controller_.get();
}
protected:
virtual UrlFetcher* DefaultUrlFetcher();
virtual UrlAsyncFetcher* DefaultAsyncUrlFetcher();
@@ -310,7 +269,8 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
virtual Timer* DefaultTimer();
virtual void SetupCaches(ServerContext* resource_manager);
virtual NamedLockManager* DefaultLockManager();
virtual QueuedWorkerPool* CreateWorkerPool(WorkerPoolName name);
virtual QueuedWorkerPool* CreateWorkerPool(WorkerPoolCategory pool,
StringPiece name);
// Disable the Resource Manager's filesystem since we have a
// write-through http_cache.
@@ -331,11 +291,19 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
// the base class resources.
virtual void ShutDown();
// Initializes the StaticJavascriptManager.
virtual void InitStaticJavascriptManager(
StaticJavascriptManager* static_js_manager);
// Initializes the StaticAssetManager.
virtual void InitStaticAssetManager(StaticAssetManager* static_asset_manager);
private:
typedef SharedMemCache<64> MetadataShmCache;
struct MetadataShmCacheInfo {
MetadataShmCacheInfo() : cache_backend(NULL) {}
// Note that the fields may be NULL if e.g. initialization failed.
scoped_ptr<CacheInterface> cache_to_use; // may be CacheStats or such.
MetadataShmCache* cache_backend;
};
// Updates num_rewrite_threads_ and num_expensive_rewrite_threads_
// with sensible values if they are not explicitly set.
void AutoDetectThreadCounts();
@@ -418,39 +386,6 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
// /mod_pagespeed_messages.
int message_buffer_size_;
// File-Caches are expensive. Just allocate one per distinct file-cache path.
// At the moment there is no consistency checking for other parameters. Note
// that the LRUCache is instantiated inside the ApacheCache, so we get a new
// LRUCache for each distinct file-cache path. Also note that only the
// file-cache path is used as the key in this map. Other parameters changed,
// such as lru cache size or file cache clean interval, are taken from the
// first file-cache found configured to one address.
//
// TODO(jmarantz): Consider instantiating one LRUCache per process.
typedef std::map<GoogleString, ApacheCache*> PathCacheMap;
PathCacheMap path_cache_map_;
// memcache connections are expensive. Just allocate one per
// distinct server-list. At the moment there is no consistency
// checking for other parameters. Note that each memcached
// interface share the thread allocation, based on the
// ModPagespeedMemcachedThreads settings first encountered for
// a particular server-set.
//
// The QueuedWorkerPool for async cache-gets is shared among all
// memcached connections.
//
// The CacheInterface* value in the MemcacheMap now includes,
// depending on options, instances of CacheBatcher, AsyncCache,
// and CacheStats. Explicit lists of AprMemCache instances and
// AsyncCache objects are also included, as they require extra
// treatment during startup and shutdown.
typedef std::map<GoogleString, CacheInterface*> MemcachedMap;
MemcachedMap memcached_map_;
scoped_ptr<QueuedWorkerPool> memcached_pool_;
std::vector<AprMemCache*> memcache_servers_;
std::vector<AsyncCache*> async_caches_;
// Serf fetchers are expensive -- they each cost a thread. Allocate
// one for each proxy/slurp-setting. Currently there is no
// consistency checking for fetcher timeout.
@@ -458,13 +393,15 @@ class ApacheRewriteDriverFactory : public RewriteDriverFactory {
FetcherMap fetcher_map_;
typedef std::map<GoogleString, SerfUrlAsyncFetcher*> SerfFetcherMap;
SerfFetcherMap serf_fetcher_map_;
MD5Hasher cache_hasher_;
// Helps coordinate direct-to-mod_spdy fetches.
scoped_ptr<ModSpdyFetchController> mod_spdy_fetch_controller_;
GoogleString https_options_;
// Manages all our caches & lock managers.
scoped_ptr<SystemCaches> caches_;
DISALLOW_COPY_AND_ASSIGN(ApacheRewriteDriverFactory);
};
@@ -18,6 +18,7 @@
#define NET_INSTAWEB_APACHE_APACHE_SERVER_CONTEXT_H_
#include "net/instaweb/apache/apache_config.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/scoped_ptr.h"
@@ -31,7 +32,9 @@ namespace net_instaweb {
class AbstractMutex;
class ApacheRewriteDriverFactory;
class Histogram;
class ProxyFetchFactory;
class RewriteDriverPool;
class RewriteDriver;
class RewriteStats;
class SharedMemStatistics;
class Statistics;
@@ -136,6 +139,22 @@ class ApacheServerContext : public ServerContext {
const server_rec* server() const { return server_rec_; }
virtual RewriteDriverPool* SelectDriverPool(bool using_spdy);
virtual void ApplySessionFetchers(const RequestContextPtr& req,
RewriteDriver* driver);
ProxyFetchFactory* proxy_fetch_factory() {
return proxy_fetch_factory_.get();
}
void InitProxyFetchFactory();
// We do not proxy external HTML from mod_pagespeed in Apache using the
// ProxyFetch flow. Currently we must rely on a separate module to
// let mod_pagespeed behave as an origin fetcher.
virtual bool ProxiesHtml() const { return false; }
private:
bool UpdateCacheFlushTimestampMs(int64 timestamp_ms);
@@ -183,6 +202,8 @@ class ApacheServerContext : public ServerContext {
Variable* cache_flush_count_;
Variable* cache_flush_timestamp_ms_;
scoped_ptr<ProxyFetchFactory> proxy_fetch_factory_;
DISALLOW_COPY_AND_ASSIGN(ApacheServerContext);
};
@@ -0,0 +1,70 @@
// Copyright 2013 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: jmarantz@google.com (Joshua Marantz)
#ifndef NET_INSTAWEB_APACHE_APACHE_WRITER_H_
#define NET_INSTAWEB_APACHE_APACHE_WRITER_H_
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/writer.h"
#include "httpd.h" // NOLINT
struct request_rec;
namespace net_instaweb {
class MessageHandler;
class ResponseHeaders;
// Writer object that writes to an Apache Request stream.
class ApacheWriter : public Writer {
public:
explicit ApacheWriter(request_rec* r);
virtual ~ApacheWriter();
virtual bool Write(const StringPiece& str, MessageHandler* handler);
virtual bool Flush(MessageHandler* handler);
// Copies the contents of the specified response_headers to the Apache
// headers_out structure. This must be done before any bytes are flushed.
//
// Note: if strip_cokies is set, the cookies will be stripped here.
void OutputHeaders(ResponseHeaders* response_headers);
// Disables mod_expires and mod_headers to allow the headers to
// be under control of mod_pagespeed. Default is false.
void set_disable_downstream_header_filters(bool x) {
disable_downstream_header_filters_ = x;
}
// Removes 'Set-Cookie' and 'Set-Cookie2' from the response headers
// once they are complete. Default is false.
void set_strip_cookies(bool x) {
strip_cookies_ = x;
}
private:
request_rec* request_;
bool headers_out_;
bool disable_downstream_header_filters_;
bool strip_cookies_;
DISALLOW_COPY_AND_ASSIGN(ApacheWriter);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_APACHE_APACHE_WRITER_H_
@@ -1,171 +0,0 @@
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#ifndef NET_INSTAWEB_APACHE_APR_MEM_CACHE_H_
#define NET_INSTAWEB_APACHE_APR_MEM_CACHE_H_
#include <cstddef>
#include <vector>
#include "net/instaweb/util/public/atomic_bool.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/cache_interface.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/timer.h"
struct apr_memcache2_t;
struct apr_memcache2_server_t;
struct apr_pool_t;
namespace net_instaweb {
class Hasher;
class MessageHandler;
class SharedString;
class Statistics;
class Variable;
// Interface to memcached via the apr_memcache2*, as documented in
// http://apr.apache.org/docs/apr-util/1.4/group___a_p_r___util___m_c.html.
//
// While this class derives from CacheInterface, it is a blocking
// implementation, suitable for instantiating underneath an AsyncCache.
class AprMemCache : public CacheInterface {
public:
// Experimentally it seems large values larger than 1M bytes result in
// a failure, e.g. from load-tests:
// [Fri Jul 20 10:29:34 2012] [error] [mod_pagespeed 0.10.0.0-1699 @1522]
// AprMemCache::Put error: Internal error on key
// http://example.com/image.jpg, value-size 1393146
// External to this class, we use a fallback cache (in Apache a FileCache) to
// handle too-large requests. This is managed by class FallbackCache in
// ../util.
static const size_t kValueSizeThreshold = 1 * 1000 * 1000;
// Amount of time after a burst of errors to retry memcached operations.
static const int64 kHealthCheckpointIntervalMs = 30 * Timer::kSecondMs;
// Maximum number of errors tolerated within kHealthCheckpointIntervalMs,
// after which AprMemCache will declare itself unhealthy for
// kHealthCheckpointIntervalMs.
static const int64 kMaxErrorBurst = 4;
// servers is a comma-separated list of host[:port] where port defaults
// to 11211, the memcached default.
//
// thread_limit is used to provide apr_memcache2_server_create with
// a hard maximum number of client connections to open.
AprMemCache(const StringPiece& servers, int thread_limit, Hasher* hasher,
Statistics* statistics, Timer* timer, MessageHandler* handler);
~AprMemCache();
static void InitStats(Statistics* statistics);
const GoogleString& server_spec() const { return server_spec_; }
// As mentioned above, Get and MultiGet are blocking in this implementation.
virtual void Get(const GoogleString& key, Callback* callback);
virtual void Put(const GoogleString& key, SharedString* value);
virtual void Delete(const GoogleString& key);
virtual void MultiGet(MultiGetRequest* request);
// Connects to the server, returning whether the connnection was
// successful or not.
bool Connect();
bool valid_server_spec() const { return valid_server_spec_; }
// Get detailed status in a string, returning false if the server
// failed to return status.
bool GetStatus(GoogleString* status_string);
virtual const char* Name() const { return "AprMemCache"; }
virtual bool IsBlocking() const { return true; }
// Records in statistics that a system error occurred, helping it detect
// when it's unhealthy if they are too frequent.
void RecordError();
// Determines whether memcached is healthy enough to attempt another
// operation. Note that even though there may be multiple shards,
// some of which are healthy and some not, we don't currently track
// errors on a per-shard basis, so we effectively declare all the
// memcached instances unhealthy if any of them are.
virtual bool IsHealthy() const;
// Close down the connection to the memcached servers.
virtual void ShutDown();
virtual bool MustEncodeKeyInValueOnPut() const { return true; }
virtual void PutWithKeyInValue(const GoogleString& key,
SharedString* key_and_value);
// Sets the I/O timeout in microseconds. This should be called at
// setup time and not while there are operations in flight.
void set_timeout_us(int timeout_us);
private:
void DecodeValueMatchingKeyAndCallCallback(
const GoogleString& key, const char* data, size_t data_len,
const char* calling_method, Callback* callback);
// Puts a value that's already encoded with the key into the cache, without
// checking health first. This is meant to be called from Put and
// PutWithKeyInValue, which will do the health check.
void PutHelper(const GoogleString& key, SharedString* key_and_value);
StringVector hosts_;
std::vector<int> ports_;
GoogleString server_spec_;
bool valid_server_spec_;
int thread_limit_;
int timeout_us_;
apr_pool_t* pool_;
apr_memcache2_t* memcached_;
std::vector<apr_memcache2_server_t*> servers_;
Hasher* hasher_;
Timer* timer_;
AtomicBool shutdown_;
Variable* timeouts_;
Variable* last_error_checkpoint_ms_;
Variable* error_burst_size_;
bool is_machine_local_;
MessageHandler* message_handler_;
// When memcached is killed, we will generate errors for every cache
// operation. To bound the amount of logging we do, we keep track
// of the last time when we issued a log message for an APR failure.
// We use a Statistic here for this so that it's shared across
// Apache processes.
//
// Note that we have some messages indicating a potential functional issue on
// (e.g. key collision) and a variety of places where we print messages
// because the Apr routine failed. We are grouping together Apr failures
// for Get, Put, Delete, and MultiGet. We might at some point wish to
// track the last time we sent a message for each of those.
Variable* last_apr_error_;
DISALLOW_COPY_AND_ASSIGN(AprMemCache);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_APACHE_APR_MEM_CACHE_H_
@@ -1,54 +0,0 @@
// Copyright 2012 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: morlovich@google.com (Maksim Orlovich)
// jmarantz@google.com (Joshua Marantz) (refactoring only)
#include "net/instaweb/apache/apr_thread_compatible_pool.h"
#include <cstddef>
#include "apr_pools.h"
#include "base/logging.h"
#include "net/instaweb/util/stack_buffer.h"
namespace net_instaweb {
apr_pool_t* AprCreateThreadCompatiblePool(apr_pool_t* parent_pool) {
// Creates a pool that can be used in any thread, even when run in
// Apache prefork.
//
// 1) Concurrent allocations from the same pools are not (thread)safe.
// 2) Concurrent allocations from different pools using the same allocator
// are not safe unless the allocator has a mutex set.
// 3) prefork's pchild pool (which is our ancestor) has an allocator without
// a mutex set.
//
// Note: the above is all about the release version of the pool code, the
// checking one has some additional locking!
apr_pool_t* pool = NULL;
apr_allocator_t* allocator = NULL;
CHECK(apr_allocator_create(&allocator) == APR_SUCCESS);
apr_status_t status =
apr_pool_create_ex(&pool, parent_pool, NULL /*abortfn*/, allocator);
if ((status != APR_SUCCESS) || (pool == NULL)) {
char buf[kStackBufferSize];
apr_strerror(status, buf, sizeof(buf));
CHECK_EQ(APR_SUCCESS, status) << "apr_pool_create_ex failed: " << buf;
CHECK(pool != NULL) << "apr_pool_create_ex failed: " << buf;
}
apr_allocator_owner_set(allocator, pool);
return pool;
}
} // namespace net_instaweb
@@ -0,0 +1,89 @@
// Copyright 2013 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: sligocki@google.com (Shawn Ligocki)
#ifndef NET_INSTAWEB_APACHE_IN_PLACE_RESOURCE_RECORDER_H_
#define NET_INSTAWEB_APACHE_IN_PLACE_RESOURCE_RECORDER_H_
#include "net/instaweb/http/public/http_value.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/writer.h"
namespace net_instaweb {
class HTTPCache;
class MessageHandler;
class RequestHeaders;
class ResponseHeaders;
class Statistics;
class Variable;
// Records a copy of a resource streamed through it and saves the result to
// the cache if it's cacheable. Used in the In-Place Resource Optimization
// (IPRO) flow to get resources into the cache.
class InPlaceResourceRecorder : public Writer {
public:
// Takes ownership of request_headers, but not cache nor handler.
// Like other callbacks, InPlaceResourceRecorder is self-owned and will
// delete itself when DoneAndSetHeaders(). is called.
InPlaceResourceRecorder(StringPiece url, RequestHeaders* request_headers,
bool respect_vary, HTTPCache* cache,
Statistics* statistics, MessageHandler* handler);
virtual ~InPlaceResourceRecorder();
static void InitStats(Statistics* statistics);
virtual bool Write(const StringPiece& contents, MessageHandler* handler);
virtual bool Flush(MessageHandler* handler);
// Call if something went wrong. The results will not be added to cache.
void Fail() { success_ = false; }
// Call when finished and the final response headers are known.
// Because of Apache's quirky filter order, we cannot get both the
// uncompressed final contents and the complete headers at the same time.
// Does not take ownership of response_headers.
//
// Deletes itself. Do not use object after calling DoneAndSetHeaders().
void DoneAndSetHeaders(ResponseHeaders* response_headers);
const GoogleString& url() const { return url_; }
MessageHandler* handler() { return handler_; }
private:
const GoogleString url_;
const scoped_ptr<RequestHeaders> request_headers_;
const bool respect_vary_;
HTTPValue resource_value_;
bool success_;
HTTPCache* cache_;
MessageHandler* handler_;
Variable* num_resources_;
Variable* num_inserted_into_cache_;
Variable* num_not_cacheable_;
Variable* num_failed_;
DISALLOW_COPY_AND_ASSIGN(InPlaceResourceRecorder);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_APACHE_IN_PLACE_RESOURCE_RECORDER_H_
@@ -20,6 +20,7 @@
#include "net/instaweb/automatic/public/html_detector.h"
#include "net/instaweb/http/public/content_type.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/http/public/response_headers.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/property_cache.h"
@@ -49,6 +50,14 @@ class RewriteOptions;
const char kPagespeedOriginalUrl[] = "mod_pagespeed_original_url";
// Generic deleter meant to be used with apr_pool_cleanup_register().
template <class T>
apr_status_t apache_cleanup(void* object) {
T* resolved = static_cast<T*>(object);
delete resolved;
return APR_SUCCESS;
}
// Tracks a single property-cache lookup.
class PropertyCallback : public PropertyPage {
public:
@@ -90,7 +99,7 @@ class InstawebContext {
const ContentType& content_type,
ApacheServerContext* server_context,
const GoogleString& base_url,
bool using_spdy,
const RequestContextPtr& request_context,
bool use_custom_options,
const RewriteOptions& options);
~InstawebContext();
@@ -141,8 +150,6 @@ class InstawebContext {
// If there was one, make sure to set the options state appropriately.
void SetFuriousStateAndCookie(request_rec* request, RewriteOptions* options);
static apr_status_t Cleanup(void* object);
GoogleString output_; // content after instaweb rewritten.
apr_bucket_brigade* bucket_brigade_;
ContentEncoding content_encoding_;
@@ -28,6 +28,8 @@
namespace net_instaweb {
class ApacheServerContext;
// Was this request made by mod_pagespeed itself? If so, we should not try to
// handle it, just let Apache deal with it like normal.
bool is_pagespeed_subrequest(request_rec* request);
@@ -42,6 +44,11 @@ apr_status_t instaweb_handler(request_rec* request);
// prevent instaweb_handler from being able to decode the resource.
apr_status_t save_url_hook(request_rec *request);
// Implementation of the Apache 'translate_name' hook. Used by the actual hook
// 'save_url_hook' and directly when we already have the server context.
apr_status_t save_url_in_note(request_rec *request,
ApacheServerContext* server_context);
// By default, apache imposes limitations on URL segments of around
// 256 characters that appear to correspond to filename limitations.
// To prevent that, we hook map_to_storage for our own purposes.
@@ -20,6 +20,21 @@
#include "http_config.h"
#include "httpd.h"
namespace net_instaweb {
// Filter used for HTML rewriting.
const char kModPagespeedFilterName[] = "MOD_PAGESPEED_OUTPUT_FILTER";
// Filter used to fix headers after mod_headers runs.
const char kModPagespeedFixHeadersName[] = "MOD_PAGESPEED_FIX_HEADERS_FILTER";
// Filters used for In-Place Resource Optimization.
// First filter stores un-gzipped contents.
const char kModPagespeedInPlaceFilterName[] = "MOD_PAGESPEED_IN_PLACE_FILTER";
// Second filter checks headers for cacheability.
const char kModPagespeedInPlaceCheckHeadersName[] =
"MOD_PAGESPEED_IN_PLACE_CHECK_HEADERS_FILTER";
} // namespace net_instaweb
extern "C" {
extern module AP_MODULE_DECLARE_DATA pagespeed_module;
}
@@ -30,6 +30,7 @@
#include "net/instaweb/apache/interface_mod_spdy.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
struct request_rec;
struct spdy_slave_connection_factory;
@@ -48,7 +49,8 @@ class ModSpdyFetcher : public UrlAsyncFetcher {
static void Initialize();
ModSpdyFetcher(ModSpdyFetchController* controller,
request_rec* req, RewriteDriver* driver);
StringPiece url, RewriteDriver* driver,
spdy_slave_connection_factory* connection_factory);
virtual ~ModSpdyFetcher();
virtual void Fetch(const GoogleString& url,
@@ -72,9 +74,9 @@ class ModSpdyFetcher : public UrlAsyncFetcher {
AsyncFetch* fetch);
ModSpdyFetchController* controller_;
spdy_slave_connection_factory* connection_factory_;
UrlAsyncFetcher* fallback_fetcher_;
GoogleString own_origin_; // empty if we couldn't figure it out.
spdy_slave_connection_factory* connection_factory_;
DISALLOW_COPY_AND_ASSIGN(ModSpdyFetcher);
};
@@ -30,7 +30,6 @@
#include "apr_pools.h"
#include "apr_thread_proc.h"
#include "base/logging.h"
#include "net/instaweb/apache/apr_thread_compatible_pool.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/meta_data.h"
#include "net/instaweb/http/public/request_headers.h"
@@ -38,6 +37,7 @@
#include "net/instaweb/http/public/response_headers_parser.h"
#include "net/instaweb/public/global_constants.h"
#include "net/instaweb/public/version.h"
#include "net/instaweb/system/public/apr_thread_compatible_pool.h"
#include "net/instaweb/util/public/abstract_mutex.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/condvar.h"
@@ -1140,7 +1140,8 @@ bool SerfUrlAsyncFetcher::StartFetch(SerfFetch* fetch) {
void SerfUrlAsyncFetcher::Fetch(const GoogleString& url,
MessageHandler* message_handler,
AsyncFetch* async_fetch) {
async_fetch = EnableInflation(async_fetch, NULL /* blacklist */);
async_fetch = EnableInflation(
async_fetch, &inflation_content_type_blacklist_);
SerfFetch* fetch = new SerfFetch(url, async_fetch, message_handler, timer_);
request_count_->Add(1);
@@ -18,6 +18,7 @@
#ifndef NET_INSTAWEB_APACHE_SERF_URL_ASYNC_FETCHER_H_
#define NET_INSTAWEB_APACHE_SERF_URL_ASYNC_FETCHER_H_
#include <set>
#include <vector>
#include "net/instaweb/http/public/url_pollable_async_fetcher.h"
@@ -141,6 +142,11 @@ class SerfUrlAsyncFetcher : public UrlPollableAsyncFetcher {
}
void set_track_original_content_length(bool x);
void set_inflation_content_type_blacklist(
const std::set<const ContentType*>& bypass_set) {
inflation_content_type_blacklist_ = bypass_set;
}
// Indicates that direct HTTPS fetching should be allowed, and how picky
// to be about certificates. The directive is a comma separated list of
// these keywords:
@@ -243,6 +249,10 @@ class SerfUrlAsyncFetcher : public UrlPollableAsyncFetcher {
uint32 https_options_; // Composed of HttpsOptions ORed together.
MessageHandler* message_handler_;
// Set of content types that will not be inflated, when passing through
// inflating fetch.
std::set<const ContentType*> inflation_content_type_blacklist_;
DISALLOW_COPY_AND_ASSIGN(SerfUrlAsyncFetcher);
};
@@ -44,6 +44,8 @@ class TimedVariable;
// lookups to compute the critical line and insert it into cache.
class BlinkFlowCriticalLine {
public:
class LogHelper;
// These strings identify sync-points for reproducing races between foreground
// serving request and background blink computation requests in tests.
static const char kBackgroundComputationDone[];
@@ -146,14 +148,13 @@ class BlinkFlowCriticalLine {
// Returns true if property cache has last response code as non 200.
bool IsLastResponseCodeInvalid(PropertyPage* page);
// Convenience method to access the log record from base_fetch_'s request
// context.
LogRecord* log_record();
GoogleString url_;
GoogleUrl google_url_;
GoogleString critical_html_;
AsyncFetch* base_fetch_;
// Blink needs its own log record since it needs to log even after the main
// log record is written out when the request processing is finished.
scoped_ptr<LogRecord> blink_log_record_;
RewriteOptions* options_;
ProxyFetchFactory* factory_;
ServerContext* manager_;
@@ -163,6 +164,7 @@ class BlinkFlowCriticalLine {
int64 request_start_time_ms_;
int64 time_to_start_blink_flow_critical_line_ms_;
int64 time_to_critical_line_data_look_up_done_ms_;
scoped_ptr<LogHelper> blink_log_helper_;
TimedVariable* num_blink_html_cache_hits_;
TimedVariable* num_blink_shared_fetches_started_;
@@ -0,0 +1,123 @@
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Authors: mmohabey@google.com (Megha Mohabey)
// pulkitg@google.com (Pulkit Goyal)
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_CACHE_HTML_FLOW_H_
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_CACHE_HTML_FLOW_H_
#include "net/instaweb/rewriter/cache_html_info.pb.h"
#include "net/instaweb/util/public/google_url.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
namespace net_instaweb {
class AsyncFetch;
class MessageHandler;
class PropertyPage;
class ProxyFetchPropertyCallbackCollector;
class ProxyFetchFactory;
class ServerContext;
class RewriteOptions;
class RewriteDriver;
class Statistics;
class TimedVariable;
// CacheHtmlFlow manages the flow for an html request where we can flush a
// cached html to the client before receiving a response from the origin server.
// In order to flush the html early before we start getting bytes back from the
// fetcher, we lookup property cache for CacheHtmlInfo. If found, we flush
// cached html out (with the non cacheable parts removed) and then trigger the
// normal ProxyFetch flow which extracts cookies and non cacheable parts from
// the page and sends it out. If CacheHtmlInfo is not found in cache, we pass
// this request through normal ProxyFetch flow buffering the html. In the
// background we create a driver to parse it, remove the non-cacheable parts,
// compute CacheHtmlInfo and store it into the property cache.
class CacheHtmlFlow {
public:
// Identifies the sync-point for reproducing races between foreground
// serving request and background cache html computation requests in tests.
static const char kBackgroundComputationDone[];
static void Start(const GoogleString& url,
AsyncFetch* base_fetch,
RewriteDriver* driver,
ProxyFetchFactory* factory,
ProxyFetchPropertyCallbackCollector* property_callback);
virtual ~CacheHtmlFlow();
static void InitStats(Statistics* statistics);
static const char kNumCacheHtmlHits[];
static const char kNumCacheHtmlMisses[];
static const char kNumCacheHtmlMatches[];
static const char kNumCacheHtmlMismatches[];
static const char kNumCacheHtmlMismatchesCacheDeletes[];
static const char kNumCacheHtmlSmartdiffMatches[];
static const char kNumCacheHtmlSmartdiffMismatches[];
private:
CacheHtmlFlow(const GoogleString& url,
AsyncFetch* base_fetch,
RewriteDriver* driver,
ProxyFetchFactory* factory,
ProxyFetchPropertyCallbackCollector* property_callback);
void CacheHtmlLookupDone();
void Cancel();
// Callback that is invoked after we rewrite the cached html.
void CacheHtmlRewriteDone();
// Serves the cached html content to the client and triggers the proxy fetch
// for non cacheable content.
void CacheHtmlHit(PropertyPage* page);
// Serves the request in passthru mode and triggers a background request to
// compute CacheHtmlInfo.
void CacheHtmlMiss();
// Triggers proxy fetch.
void TriggerProxyFetch();
// Populates the cache html info from the property cache to cache_html_info_.
// It also determines whether this info is stale or not.
void PopulateCacheHtmlInfo(PropertyPage* page);
GoogleString url_;
GoogleUrl google_url_;
AsyncFetch* base_fetch_;
RewriteDriver* rewrite_driver_;
const RewriteOptions* options_;
ProxyFetchFactory* factory_;
ServerContext* server_context_;
ProxyFetchPropertyCallbackCollector* property_cache_callback_;
MessageHandler* handler_;
CacheHtmlInfo cache_html_info_;
TimedVariable* num_cache_html_misses_;
TimedVariable* num_cache_html_hits_;
DISALLOW_COPY_AND_ASSIGN(CacheHtmlFlow);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_CACHE_HTML_FLOW_H_
@@ -48,6 +48,8 @@ class FlushEarlyFlow {
static const char kNumResourcesFlushedEarly[];
static const char kFlushEarlyRewriteLatencyMs[];
static const char kNumFlushEarlyHttpStatusCodeDeemedUnstable[];
static const char kNumFlushEarlyRequestsRedirected[];
static const char kRedirectPageJs[];
static void Start(
const GoogleString& url,
@@ -76,18 +78,9 @@ class FlushEarlyFlow {
ProxyFetchFactory* factory,
ProxyFetchPropertyCallbackCollector* property_cache_callback);
// Generates a dummy head with subresources and counts the number of resources
// which can be flused early.
void GenerateDummyHeadAndCountResources(
const FlushEarlyInfo& flush_early_info);
// Generates response headers from previous values stored in property cache.
void GenerateResponseHeaders(const FlushEarlyInfo& flush_early_info);
GoogleString GetHeadString(const FlushEarlyInfo& flush_early_info,
const char* css_format,
const char* js_format);
// Callback that is invoked after we rewrite the early head.
// start_time_ms indicates the time we started rewriting the flush early
// head. This is set to -1 if is_experimental_hit is false.
@@ -96,12 +89,6 @@ class FlushEarlyFlow {
void Write(const StringPiece& val);
// Writes the script content to base_fetch.
void WriteScript(const GoogleString& script_content);
// Write the external script to base fetch.
void WriteExternalScript(const GoogleString& script_url);
GoogleString url_;
GoogleString dummy_head_;
StringWriter dummy_head_writer_;
@@ -116,7 +103,6 @@ class FlushEarlyFlow {
ServerContext* manager_;
ProxyFetchPropertyCallbackCollector* property_cache_callback_;
bool should_flush_early_lazyload_script_;
bool should_flush_early_js_defer_script_;
MessageHandler* handler_;
TimedVariable* num_requests_flushed_early_;
@@ -30,8 +30,11 @@
#include "net/instaweb/automatic/public/html_detector.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/meta_data.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/http/public/user_agent_matcher.h"
#include "net/instaweb/util/public/queued_worker_pool.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/gtest_prod.h"
#include "net/instaweb/util/public/property_cache.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string.h"
@@ -43,6 +46,7 @@ class AbstractClientState;
class AbstractMutex;
class CacheUrlAsyncFetcher;
class Function;
class LogRecord;
class MessageHandler;
class ProxyFetch;
class ProxyFetchPropertyCallbackCollector;
@@ -120,23 +124,32 @@ class ProxyFetchPropertyCallback : public PropertyPage {
// The cache type associated with this callback.
enum CacheType {
kPagePropertyCache,
kClientPropertyCache
kClientPropertyCache,
kDevicePropertyCache
};
ProxyFetchPropertyCallback(CacheType cache_type,
const PropertyCache& property_cache,
const StringPiece& key,
UserAgentMatcher::DeviceType device_type,
ProxyFetchPropertyCallbackCollector* collector,
AbstractMutex* mutex);
CacheType cache_type() const { return cache_type_; }
UserAgentMatcher::DeviceType device_type() const { return device_type_; }
// Delegates to collector_'s IsCacheValid.
virtual bool IsCacheValid(int64 write_timestamp_ms) const;
virtual void Done(bool success);
// Adds logs for the given PropertyPage to the specified cohort info index.
virtual void LogPageCohortInfo(LogRecord* log_record, int cohort_index);
private:
CacheType cache_type_;
UserAgentMatcher::DeviceType device_type_;
ProxyFetchPropertyCallbackCollector* collector_;
GoogleString url_;
DISALLOW_COPY_AND_ASSIGN(ProxyFetchPropertyCallback);
@@ -147,7 +160,9 @@ class ProxyFetchPropertyCallbackCollector {
public:
ProxyFetchPropertyCallbackCollector(ServerContext* manager,
const StringPiece& url,
const RewriteOptions* options);
const RequestContextPtr& req_ctx,
const RewriteOptions* options,
const StringPiece& user_agent);
virtual ~ProxyFetchPropertyCallbackCollector();
// Add a callback to be handled by this collector.
@@ -202,13 +217,26 @@ class ProxyFetchPropertyCallbackCollector {
// Updates the status code of response in property cache.
void UpdateStatusCodeInPropertyCache();
const RequestContextPtr& request_context() { return request_context_; }
// Returns DeviceType from device property page.
UserAgentMatcher::DeviceType GetDeviceTypeFromDeviceCacheMutexHeld();
private:
// Set the property page corresponding to device_type for kPagePropertyCache.
void SetPropertyPageForDeviceTypeMutexHeld(
UserAgentMatcher::DeviceType device_type);
std::set<ProxyFetchPropertyCallback*> pending_callbacks_;
std::map<ProxyFetchPropertyCallback::CacheType, PropertyPage*>
property_pages_;
std::map<UserAgentMatcher::DeviceType, PropertyPage*>
property_pages_for_device_types_;
scoped_ptr<AbstractMutex> mutex_;
ServerContext* server_context_;
GoogleString url_;
RequestContextPtr request_context_;
GoogleString user_agent_;
bool detached_; // protected by mutex_.
bool done_; // protected by mutex_.
bool success_; // protected by mutex_; accessed after quiescence.
@@ -280,6 +308,7 @@ class ProxyFetch : public SharedAsyncFetch {
friend class ProxyFetchFactory;
friend class ProxyFetchPropertyCallbackCollector;
friend class MockProxyFetch;
FRIEND_TEST(ProxyFetchTest, TestInhibitParsing);
// Called by ProxyFetchPropertyCallbackCollector when all property-cache
// fetches are complete. This function takes ownership of collector.
@@ -84,7 +84,8 @@ class ProxyInterface : public UrlAsyncFetcher {
bool is_resource_fetch,
const GoogleUrl& request_url,
RewriteOptions* options,
AsyncFetch* async_fetch);
AsyncFetch* async_fetch,
bool* added_page_property_callback = NULL);
private:
friend class ProxyInterfaceTest;
@@ -0,0 +1,253 @@
/*
* Copyright 2011 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: mmohabey@google.com (Megha Mohabey)
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_TEST_BASE_H_
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_TEST_BASE_H_
#include "net/instaweb/automatic/public/proxy_interface.h"
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/response_headers.h"
#include "net/instaweb/http/public/url_async_fetcher.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/rewrite_test_base.h"
#include "net/instaweb/rewriter/public/test_rewrite_driver_factory.h"
#include "net/instaweb/rewriter/public/url_namer.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/worker_test_base.h"
namespace net_instaweb {
class AbstractClientState;
class CriticalImagesFinder;
class GoogleUrl;
class HtmlElement;
class HtmlFilter;
class MessageHandler;
class PropertyValue;
class RequestHeaders;
class RewriteDriver;
const char kPageUrl[] = "page.html";
const char kBackgroundFetchHeader[] = "X-Background-Fetch";
// Creates a proxy URL naming rule that encodes an "owner" domain and an
// "origin" domain, all inside a fixed proxy-domain.
class ProxyUrlNamer : public UrlNamer {
public:
static const char kProxyHost[];
ProxyUrlNamer() : authorized_(true), options_(NULL) {}
// Given the request_url, generate the original url.
virtual bool Decode(const GoogleUrl& gurl,
GoogleUrl* domain,
GoogleString* decoded) const;
virtual bool IsAuthorized(const GoogleUrl& gurl,
const RewriteOptions& options) const {
return authorized_;
}
// Given the request url and request headers, generate the rewrite options.
virtual void DecodeOptions(const GoogleUrl& request_url,
const RequestHeaders& request_headers,
Callback* callback,
MessageHandler* handler) const {
callback->Done((options_ == NULL) ? NULL : options_->Clone());
}
void set_authorized(bool authorized) { authorized_ = authorized; }
void set_options(RewriteOptions* options) { options_ = options; }
private:
bool authorized_;
RewriteOptions* options_;
DISALLOW_COPY_AND_ASSIGN(ProxyUrlNamer);
};
// Mock filter which gets passed to the new rewrite driver created in
// proxy_fetch.
//
// This is used to check the flow for injecting data into filters via the
// ProxyInterface, including:
// property_cache.
class MockFilter : public EmptyHtmlFilter {
public:
explicit MockFilter(RewriteDriver* driver)
: driver_(driver),
num_elements_(0),
num_elements_property_(NULL),
client_state_(NULL) {
}
virtual void StartDocument();
virtual void StartElement(HtmlElement* element);
virtual void EndDocument();
virtual const char* Name() const { return "MockFilter"; }
private:
RewriteDriver* driver_;
int num_elements_;
PropertyValue* num_elements_property_;
GoogleString client_id_;
AbstractClientState* client_state_;
DISALLOW_COPY_AND_ASSIGN(MockFilter);
};
// Hook provided to TestRewriteDriverFactory to add a new filter when
// a rewrite_driver is created.
class CreateFilterCallback
: public TestRewriteDriverFactory::CreateFilterCallback {
public:
CreateFilterCallback() {}
virtual ~CreateFilterCallback() {}
virtual HtmlFilter* Done(RewriteDriver* driver) {
return new MockFilter(driver);
}
private:
DISALLOW_COPY_AND_ASSIGN(CreateFilterCallback);
};
// Subclass of AsyncFetch that adds a response header indicating whether the
// fetch is for a user-facing request, or a background rewrite.
class BackgroundFetchCheckingAsyncFetch : public SharedAsyncFetch {
public:
explicit BackgroundFetchCheckingAsyncFetch(AsyncFetch* base_fetch)
: SharedAsyncFetch(base_fetch) {}
virtual ~BackgroundFetchCheckingAsyncFetch() {}
virtual void HandleHeadersComplete() {
base_fetch()->HeadersComplete();
response_headers()->Add(kBackgroundFetchHeader,
base_fetch()->IsBackgroundFetch() ? "1" : "0");
// Call ComputeCaching again since Add sets cache_fields_dirty_ to true.
response_headers()->ComputeCaching();
}
virtual void HandleDone(bool success) {
base_fetch()->Done(success);
delete this;
}
private:
DISALLOW_COPY_AND_ASSIGN(BackgroundFetchCheckingAsyncFetch);
};
// Subclass of UrlAsyncFetcher that wraps the AsyncFetch with a
// BackgroundFetchCheckingAsyncFetch.
class BackgroundFetchCheckingUrlAsyncFetcher : public UrlAsyncFetcher {
public:
explicit BackgroundFetchCheckingUrlAsyncFetcher(UrlAsyncFetcher* fetcher)
: base_fetcher_(fetcher),
num_background_fetches_(0) {}
virtual ~BackgroundFetchCheckingUrlAsyncFetcher() {}
virtual void Fetch(const GoogleString& url,
MessageHandler* message_handler,
AsyncFetch* fetch) {
if (fetch->IsBackgroundFetch()) {
num_background_fetches_++;
}
BackgroundFetchCheckingAsyncFetch* new_fetch =
new BackgroundFetchCheckingAsyncFetch(fetch);
base_fetcher_->Fetch(url, message_handler, new_fetch);
}
int num_background_fetches() { return num_background_fetches_; }
void clear_num_background_fetches() { num_background_fetches_ = 0; }
private:
UrlAsyncFetcher* base_fetcher_;
int num_background_fetches_;
DISALLOW_COPY_AND_ASSIGN(BackgroundFetchCheckingUrlAsyncFetcher);
};
// TODO(morlovich): This currently relies on ResourceManagerTestBase to help
// setup fetchers; and also indirectly to prevent any rewrites from timing out
// (as it runs the tests with real scheduler but mock timer). It would probably
// be better to port this away to use TestRewriteDriverFactory directly.
class ProxyInterfaceTestBase : public RewriteTestBase {
public:
void TestHeadersSetupRace();
protected:
static const int kHtmlCacheTimeSec = 5000;
ProxyInterfaceTestBase();
virtual void SetUp();
virtual void TearDown();
void FetchFromProxy(const StringPiece& url,
const RequestHeaders& request_headers,
bool expect_success,
GoogleString* string_out,
ResponseHeaders* headers_out);
void FetchFromProxy(const StringPiece& url,
bool expect_success,
GoogleString* string_out,
ResponseHeaders* headers_out);
void FetchFromProxyLoggingFlushes(const StringPiece& url,
bool expect_success,
GoogleString* string_out);
void FetchFromProxyNoWait(const StringPiece& url,
const RequestHeaders& request_headers,
bool expect_success,
bool log_flush,
ResponseHeaders* headers_out);
void WaitForFetch();
void TestPropertyCache(const StringPiece& url,
bool delay_pcache, bool thread_pcache,
bool expect_success);
void TestPropertyCacheWithHeadersAndOutput(
const StringPiece& url, bool delay_pcache, bool thread_pcache,
bool expect_success, bool check_stats, bool add_create_filter_callback,
bool expect_detach_before_pcache, const RequestHeaders& request_headers,
ResponseHeaders* response_headers, GoogleString* output);
void SetCriticalImagesInFinder(StringSet* critical_images);
void SetCssCriticalImagesInFinder(StringSet* css_critical_images);
scoped_ptr<ProxyInterface> proxy_interface_;
scoped_ptr<WorkerTestBase::SyncPoint> sync_;
ResponseHeaders callback_response_headers_;
GoogleString callback_buffer_;
bool callback_done_value_;
private:
friend class FilterCallback;
CriticalImagesFinder* fake_critical_images_finder_;
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_TEST_BASE_H_
@@ -57,6 +57,7 @@ class FileRewriter : public RewriteDriverFactory {
virtual Timer* DefaultTimer();
virtual void SetupCaches(ServerContext* resource_manager);
virtual Statistics* statistics();
virtual ServerContext* NewServerContext();
private:
const RewriteGflags* gflags_;
@@ -72,6 +72,8 @@ class HtmlName {
kContent,
kControls,
kData,
kDataPagespeedHref,
kDataPagespeedPrioritize,
kDataSrc,
kDd,
kDeclare,
@@ -159,6 +161,7 @@ class HtmlName {
kPagespeedBlankSrc,
kPagespeedHighResSrc,
kPagespeedIframe,
kPagespeedInlineSrc,
kPagespeedLazySrc,
kPagespeedLowResSrc,
kPagespeedLscExpiry,
@@ -169,6 +172,7 @@ class HtmlName {
kPagespeedOrigSrc,
kPagespeedOrigType,
kPagespeedSize,
kPagespeedUrlHash,
kParam,
kPre,
kProfile,
@@ -208,6 +212,7 @@ class HtmlName {
kTh,
kThead,
kTime,
kTitle,
kTr,
kTrack,
kType,
@@ -344,6 +344,7 @@ class HtmlParse {
// Provide timer to helping to report timing of each filter. You must also
// set_log_rewrite_timing(true) to turn on this reporting.
void set_timer(Timer* timer) { timer_ = timer; }
Timer* timer() const { return timer_; }
void set_log_rewrite_timing(bool x) { log_rewrite_timing_ = x; }
// Adds a filter to be called during parsing as new events are added.
@@ -373,18 +374,6 @@ class HtmlParse {
// Returns the number of events on the event queue.
size_t GetEventQueueSize();
// Move the entire contents of extra_events onto the end of the event queue.
void AppendEventsToQueue(HtmlEventList* extra_events);
// Move the entire event queue after the first event in event_set to the end
// of tail. Return that event, or NULL if there was none.
HtmlEvent* SplitQueueOnFirstEventInSet(const ConstHtmlEventSet& event_set,
HtmlEventList* tail);
// Return the EndElementEvent for this element, or NULL if it doesn't exist
// yet.
HtmlEvent* GetEndElementEvent(const HtmlElement* element);
virtual void ParseTextInternal(const char* content, int size);
// Allow filters to determine whether they are enabled for this request.
@@ -167,18 +167,11 @@ class AsyncFetch : public Writer {
// TODO(jmarantz): move StringAsyncFetch into its own file.
class StringAsyncFetch : public AsyncFetch {
public:
// TODO(marq): Remove constructors lacking a request context.
StringAsyncFetch() : buffer_pointer_(&buffer_) { Init(); }
explicit StringAsyncFetch(const RequestContextPtr& request_ctx)
: AsyncFetch(request_ctx), buffer_pointer_(&buffer_) {
Init();
}
explicit StringAsyncFetch(GoogleString* buffer) : buffer_pointer_(buffer) {
Init();
}
StringAsyncFetch(const RequestContextPtr& request_ctx, GoogleString* buffer)
: AsyncFetch(request_ctx), buffer_pointer_(buffer) {
Init();
@@ -67,6 +67,10 @@ class CacheUrlAsyncFetcher : public UrlAsyncFetcher {
MessageHandler* message_handler,
AsyncFetch* base_fetch);
// HTTP status code used to indicate that we failed the Fetch because
// result was not found in cache. (Only happens if fetcher_ == NULL).
static const int kNotInCacheStatus;
HTTPCache* http_cache() const { return http_cache_; }
UrlAsyncFetcher* fetcher() const { return fetcher_; }
@@ -21,6 +21,8 @@
#ifndef NET_INSTAWEB_HTTP_PUBLIC_CONTENT_TYPE_H_
#define NET_INSTAWEB_HTTP_PUBLIC_CONTENT_TYPE_H_
#include <set>
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
@@ -42,6 +44,7 @@ struct ContentType {
kJpeg,
kSwf,
kWebp,
kIco,
kJson,
kPdf,
kVideo,
@@ -59,6 +62,9 @@ struct ContentType {
const char* file_extension() const { return file_extension_; }
Type type() const { return type_; }
// Return true iff this content type is CSS.
bool IsCss() const;
// Return true iff this content type is HTML, or XHTML, or some other such
// thing (e.g. CE-HTML) that we can rewrite.
bool IsHtmlLike() const;
@@ -100,11 +106,12 @@ extern const ContentType& kContentTypeGif;
extern const ContentType& kContentTypeJpeg;
extern const ContentType& kContentTypeSwf;
extern const ContentType& kContentTypeWebp;
// Pdf:
extern const ContentType& kContentTypeIco;
// PDF:
extern const ContentType& kContentTypePdf;
// Binary/octet-stream.
extern const ContentType& kBinaryOctetStream;
extern const ContentType& kContentTypeBinaryOctetStream;
// Given a name (file or url), see if it has the canonical extension
// corresponding to a particular content type.
@@ -125,7 +132,7 @@ bool ParseContentType(const StringPiece& content_type_str,
// present.
void MimeTypeListToContentTypeSet(
const GoogleString& in,
std::set<ContentType::Type>* out);
std::set<const ContentType*>* out);
} // namespace net_instaweb
@@ -0,0 +1,69 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NET_INSTAWEB_HTTP_PUBLIC_DEVICE_PROPERTIES_H_
#define NET_INSTAWEB_HTTP_PUBLIC_DEVICE_PROPERTIES_H_
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/gtest_prod.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class UserAgentMatcher;
class RequestHeaders;
// This class keeps track of the device properties of the client, which are
// for the most part learned from the UserAgent string.
class DeviceProperties {
public:
explicit DeviceProperties(UserAgentMatcher* matcher);
virtual ~DeviceProperties();
void set_user_agent(const StringPiece& user_agent_string);
bool SupportsImageInlining() const;
bool SupportsCriticalImagesBeacon() const;
bool SupportsJsDefer(bool enable_mobile) const;
bool SupportsWebp() const;
bool SupportsWebpLosslessAlpha() const;
bool IsMobileUserAgent() const;
bool SupportsSplitHtml(bool enable_mobile) const;
bool CanPreloadResources(const RequestHeaders* request_headers) const;
bool GetScreenResolution(int* width, int* height) const;
private:
friend class ImageRewriteTest;
FRIEND_TEST(ImageRewriteTest, SquashImagesForMobileScreen);
void SetScreenResolution(int width, int height) const;
GoogleString user_agent_;
UserAgentMatcher* ua_matcher_;
mutable LazyBool supports_image_inlining_;
mutable LazyBool supports_js_defer_;
mutable LazyBool supports_webp_;
mutable LazyBool supports_webp_lossless_alpha_;
mutable LazyBool is_mobile_user_agent_;
mutable LazyBool supports_split_html_;
mutable LazyBool supports_flush_early_;
mutable LazyBool screen_dimensions_set_;
mutable int screen_width_;
mutable int screen_height_;
DISALLOW_COPY_AND_ASSIGN(DeviceProperties);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_HTTP_PUBLIC_DEVICE_PROPERTIES_H_
@@ -86,7 +86,8 @@ class HTTPCache {
explicit Callback(const RequestContextPtr& request_ctx)
: response_headers_(NULL),
owns_response_headers_(false),
request_ctx_(request_ctx) {
request_ctx_(request_ctx),
log_timing_(true) {
}
virtual ~Callback();
virtual void Done(FindResult find_result) = 0;
@@ -141,6 +142,8 @@ class HTTPCache {
LogRecord* log_record();
const RequestContextPtr& request_context() { return request_ctx_; }
void set_log_timing(bool t) { log_timing_ = t; }
bool log_timing() const { return log_timing_; }
virtual void SetTimingMs(int64 timing_value_ms);
@@ -152,6 +155,7 @@ class HTTPCache {
ResponseHeaders* response_headers_;
bool owns_response_headers_;
RequestContextPtr request_ctx_;
bool log_timing_;
DISALLOW_COPY_AND_ASSIGN(Callback);
};
@@ -49,6 +49,7 @@ class HTTPValue : public Writer {
//
// If Clear() is called, then SetHeaders() can be called once again.
//
// Does NOT take ownership of headers.
// A non-const pointer is required for the response headers so that
// the cache fields can be updated if necessary.
void SetHeaders(ResponseHeaders* headers);
@@ -45,9 +45,11 @@ class InflatingFetch : public SharedAsyncFetch {
virtual ~InflatingFetch();
// Use this one cautiously, since it may cause resources to be corrupted
// if you use it with anything other than the IPRO path.
// if you use it with anything other than the IPRO path. Note, that if NULL
// is contained in the bypass_set then resources with unknown content type
// will not be inflated.
void set_inflation_content_type_blacklist(
const std::set<ContentType::Type>& bypass_set) {
const std::set<const ContentType*>& bypass_set) {
inflation_content_type_blacklist_ = bypass_set;
}
@@ -87,7 +89,7 @@ class InflatingFetch : public SharedAsyncFetch {
bool inflate_failure_;
// Set of content types that will not be inflated.
std::set<ContentType::Type> inflation_content_type_blacklist_;
std::set<const ContentType*> inflation_content_type_blacklist_;
DISALLOW_COPY_AND_ASSIGN(InflatingFetch);
};
@@ -20,11 +20,11 @@
#define NET_INSTAWEB_HTTP_PUBLIC_LOG_RECORD_H_
#include "net/instaweb/http/public/logging_proto.h"
#include "net/instaweb/http/public/logging_proto_impl.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/gtest_prod.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
// If your .cc file needs to use the types declared in logging_proto.h,
// you must also include net/instaweb/http/public/logging_proto_impl.h
@@ -58,45 +58,97 @@ class LogRecord {
explicit LogRecord(AbstractMutex* mutex);
virtual ~LogRecord();
// Log a rewriter (identified by an id string) as having been applied to
// the request being logged. These ids will be aggregated and written to the
// protobuf when Finalize() is called.
void LogAppliedRewriter(const char* rewriter_id);
// For compatibility with older logging methods, returns a comma-joined string
// concatenating the sorted coalesced rewriter ids of APPLIED_OK entries in
// the rewriter_info array. Each id will appear once in the string if any
// number of successful rewrites for that id have been logged.
GoogleString AppliedRewritersString();
// This should be called when all logging activity on the log record is
// complete. If a subclass of this class uses other aggregate data structures
// or other intermediates before writing to the wrapped data structure,
// it should do those writes in FinalizeImpl. mutex_ guards this.
void Finalize();
// Create a new rewriter logging submessage for |rewriter_id|, returning a
// pointer to it for later access. Note that this can return NULL if the
// size of rewriter_info has grown too large. It is the caller's
// responsibility to handle this safely.
RewriterInfo* NewRewriterInfo(const char* rewriter_id);
// Creates a new rewriter logging submessage for |rewriter_id|,
// and sets status it.
void SetRewriterLoggingStatus(
const char* rewriter_id, RewriterInfo::RewriterApplicationStatus status);
// Return the LoggingInfo proto wrapped by this class. Calling code must
// guard any reads and writes to this using mutex().
virtual LoggingInfo* logging_info();
// Atomically sets is_html_response in the logging proto.
void SetIsHtml(bool is_html);
// Adds a new cohort info with the given cohort name and returns its index.
int AddPropertyCohortInfo(const GoogleString& cohort);
// Updates the cohort info at the specified index, to include the given
// property in the last of properties found in the cache.
void AddFoundPropertyToCohortInfo(int index, const GoogleString& property);
// Updates the cohort info at the specified index, to indicate whether it was
// a cache hit.
void SetCacheStatusForCohortInfo(int index, bool found, int key_state);
// Updates the cohort info at the specified index with the device and cache
// type.
void SetDeviceAndCacheTypeForCohortInfo(
int index, int device_type, int cache_type);
// Mutex-guarded log mutation convenience methods. The rule of thumb is that
// if a single-field update to a logging proto occurs multiple times, it
// should be factored out into a method on this class.
void SetBlinkRequestFlow(int flow);
void SetIsOriginalResourceCacheable(bool cacheable);
void SetTimingRequestStartMs(int64 ms);
void SetTimingHeaderFetchMs(int64 ms);
void SetTimingFetchMs(int64 ms);
int64 GetTimingFetchMs();
void SetTimingProcessingTimeMs(int64 ms);
// Sets time_to_start_fetch_ms in the TimingInfo submessage as an offset from
// timing_info.request_start_ms (|start_time_ms| is an absolute time value
// and is converted into the offset). If request_start_ms is unset, this is a
// silent no-op. This may be called several times in sucession, for example
// in the case of retried fetches. In that case, if time_to_start_fetch_ms has
// already been set in the log record, this is again a silent no-op.
void UpdateTimingInfoWithFetchStartTime(int64 start_time_ms);
// Override SetBlinkInfoImpl if necessary.
void SetBlinkInfo(const GoogleString& user_agent);
// Log a RewriterInfo for the image rewrite filter.
void LogImageRewriteActivity(
const char* id,
RewriterInfo::RewriterApplicationStatus status,
bool is_image_inlined,
bool is_critical_image,
bool try_low_res_src_insertion,
bool low_res_src_inserted,
int low_res_data_size);
void LogJsDisableFilter(const char* id,
RewriterInfo::RewriterApplicationStatus status,
bool has_pagespeed_no_defer);
void LogLazyloadFilter(const char* id,
RewriterInfo::RewriterApplicationStatus status,
bool is_blacklisted, bool is_critical);
// Mutex-guarded log-writing operations. Derived classes should override
// *Impl methods. Returns false if the log write attempt failed.
bool WriteLog();
// Update the log record with Blink-specific information, then write the
// log as if WriteLog() was called.
bool WriteLogForBlink(const GoogleString& user_agent);
// If log-writing needs to occur in the context of an existing lock,
// these methods may be used. Returns false if write attempt failed.
bool WriteLogWhileLocked();
bool WriteLogForBlinkWhileLocked(const GoogleString& user_agent);
// Return the mutex associated with this instance. Calling code should
// guard reads and writes of LogRecords
AbstractMutex* mutex() { return mutex_.get(); }
// Sets the maximum number of RewriterInfo submessages that can accumulate in
// the LoggingInfo proto wrapped by this class.
void SetRewriterInfoMaxSize(int x);
protected:
// Non-initializing default constructor for subclasses. Subclasses that invoke
// this constructor should implement and call their own initializer that
@@ -106,40 +158,25 @@ class LogRecord {
void set_mutex(AbstractMutex* m);
// Returns a comma-joined string concatenating the contents of
// applied_rewriters_
GoogleString ConcatenatedRewriterString();
// Implementation methods for subclasses to override.
// Implements logging an applied rewriter.
virtual void LogAppliedRewriterImpl(const char* rewriter_id);
// Implements finalization.
virtual void FinalizeImpl();
// Implements setting Blink-specific log information; base impl is a no-op.
virtual void SetBlinkInfoImpl(const GoogleString& user_agent) {}
// Implements writing a log, base implementation is a no-op. Returns false if
// writing failed.
virtual bool WriteLogImpl() { return true; }
// Implements writing the Blink log, base implementation is a no-op. Returns
// false if writing failed.
virtual bool WriteLogForBlinkImpl(const GoogleString& user_agent) {
return true;
}
// True if Finalize() has been called. mutex_ guards this.
bool finalized() { return finalized_; }
FRIEND_TEST(LogRecordTest, NoAppliedRewriters);
private:
// Called on construction.
void InitLogging();
StringSet applied_rewriters_;
scoped_ptr<LoggingInfo> logging_info_;
bool finalized_;
// Thus must be set. Implementation constructors must minimally default this
// to a NullMutex.
scoped_ptr<AbstractMutex> mutex_;
// The maximum number of rewrite info logs stored for a single request.
int rewriter_info_max_size_;
DISALLOW_COPY_AND_ASSIGN(LogRecord);
};
@@ -21,11 +21,18 @@
namespace net_instaweb {
class BlinkInfo;
class FlushEarlyFilterInfo;
class FlushEarlyResourceInfo;
class ImageRewriteResourceInfo;
class LoggingInfo;
class MetadataCacheInfo;
class PropertyCohortInfo;
class PropertyPageInfo;
class RewriterInfo;
class RewriteResourceInfo;
class TimingInfo;
}
} // namespace net_instaweb
#endif // NET_INSTAWEB_HTTP_PUBLIC_LOGGING_PROTO_H_
@@ -72,13 +72,22 @@ struct HttpAttributes {
// all rewrites are completed before the response is sent to the client.
static const char kXPsaBlockingRewrite[];
// This header is set on optional fetches that got dropped due to load.
static const char kXPsaLoadShed[];
// If this header is present on an incoming request it will be treated as if
// it came over a SPDY connection for purposes of applying special
// configuration or optimizations.
static const char kXPsaOptimizeForSpdy[];
// This header is set on optional fetches that got dropped due to load.
static const char kXPsaLoadShed[];
// This header is set in a distributed rewrite task to ask for metadata
// in the response.
static const char kXPsaRequestMetadata[];
// This header is set in a distributed rewrite response and the value
// is the serialized metadata.
static const char kXPsaResponseMetadata[];
static const char kXRequestedWith[];
// This header is set on optimized responses to indicate the original
@@ -143,6 +152,7 @@ enum Code {
kProxyFailure = 521,
kProxyConfigurationFailure = 522,
kProxyDeclinedRequest = 523,
kProxyDnsLookupFailure = 524,
// Instaweb-specific response codes: these are intentionally chosen to be
// outside the normal HTTP range, but we consider these response codes
@@ -33,8 +33,6 @@ namespace net_instaweb {
// Can be used multiple times by calling Reset in between.
class ExpectStringAsyncFetch : public StringAsyncFetch {
public:
explicit ExpectStringAsyncFetch(bool expect_success)
: expect_success_(expect_success) {}
ExpectStringAsyncFetch(bool expect_success,
const RequestContextPtr& request_context)
: StringAsyncFetch(request_context), expect_success_(expect_success) {}
@@ -22,6 +22,7 @@
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/ref_counted_ptr.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
@@ -51,13 +52,53 @@ class RequestContext : public RefCounted<RequestContext> {
// Makes a request context for running tests.
static RequestContextPtr NewTestRequestContext(ThreadSystem* thread_system);
RequestTrace* trace_context() { return trace_context_.get(); }
// Creates a new, unowned LogRecord, for use by some subordinate action.
// Also useful in case of background activity where logging is required after
// the response is written out, e.g., blink flow.
virtual LogRecord* NewSubordinateLogRecord(AbstractMutex* logging_mutex);
// The root trace context is associated with the user request which we
// are attempting to serve. If this is a request with constituent resources
// that we rewrite, there may be several dependent fetches synthesized
// by PSOL during rewrites. Those are traced separately.
RequestTrace* root_trace_context() { return root_trace_context_.get(); }
// Takes ownership of the given context.
void set_trace_context(RequestTrace* x);
void set_root_trace_context(RequestTrace* x);
// Creates a new RequestTrace associated with a request depending on the
// root user request; e.g., a subresource fetch for an HTML page.
//
// This implementation is a no-op. Subclasses should customize this based
// on their underlying tracing system. A few interface notes:
// - The caller is not responsible for releasing memory or managing the
// lifecycle of the RequestTrace.
// - A call to CreateDependentTraceContext() need not be matched by a call
// to ReleaseDependentTraceContext(). Cleanup should be automatic and
// managed by RequestContext subclass implementations.
virtual RequestTrace* CreateDependentTraceContext(const StringPiece& label) {
return NULL;
}
// Releases this object's reference to the given context and frees memory.
// Calls to CreateDependentTraceContext need not be matched by
// calls to this function. If a dependent trace span is not released when
// the request context reference count drops to zero, this object will clean
// all dependent traces.
//
// Note that automatic cleanup of dependent traces is provided for safety.
// To provide meaningful performance statistics, cleanup should be
// coupled with the completion of the event being traced.
//
// Subclasses should customize this based on their underlying tracing system.
virtual void ReleaseDependentTraceContext(RequestTrace* t);
// The log record for the this request, created when the request context is.
LogRecord* log_record();
// Determines whether this request is using the SPDY protocol.
bool using_spdy() const { return using_spdy_; }
void set_using_spdy(bool x) { using_spdy_ = x; }
protected:
// The default constructor will not create a LogRecord. Subclass constructors
// must do this explicitly.
@@ -75,8 +116,10 @@ class RequestContext : public RefCounted<RequestContext> {
// Always non-NULL.
scoped_ptr<LogRecord> log_record_;
// Logs tracing events.
scoped_ptr<RequestTrace> trace_context_;
// Logs tracing events associated with the root request.
scoped_ptr<RequestTrace> root_trace_context_;
bool using_spdy_;
DISALLOW_COPY_AND_ASSIGN(RequestContext);
};
@@ -57,6 +57,9 @@ class RequestHeaders : public Headers<HttpRequestHeaders> {
// Determines whether a request header accepts gzipped content.
bool AcceptsGzip() const;
// Determines whether metadata was requested in the response.
bool MetadataRequested() const;
// Returns true if these request headers are for an XmlHttp request (i.e. ajax
// request). This mechanism is not reliable because sometimes this header is
// not set even for XmlHttp requests.
@@ -80,7 +80,7 @@ class UrlAsyncFetcher {
// be corrupted if you use it with anything other than the IPRO path.
AsyncFetch* EnableInflation(
AsyncFetch* fetch,
const std::set<ContentType::Type>* inflation_content_type_blacklist)
const std::set<const ContentType*>* inflation_content_type_blacklist)
const;
protected:
@@ -20,8 +20,14 @@
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/fast_wildcard_group.h"
using std::pair;
using std::make_pair;
using std::map;
namespace net_instaweb {
class PropertyCache;
class PropertyPage;
class RequestHeaders;
// This class contains various user agent based checks. Currently all of these
@@ -42,6 +48,15 @@ class UserAgentMatcher {
kDoesNotSupportBlink,
};
enum DeviceType {
kDesktop,
kTablet,
kMobile,
// This should always be the last type. This is used to mark the size of an
// array containing various DeviceTypes.
kEndOfDeviceType
};
enum PrefetchMechanism {
kPrefetchNotSupported,
kPrefetchLinkRelSubresource,
@@ -50,6 +65,11 @@ class UserAgentMatcher {
kPrefetchLinkScriptTag,
};
// Cohort descriptors for PropertyCache lookups of device objects.
static const char kDevicePropertiesCohort[];
static const char kScreenWidth[];
static const char kScreenHeight[];
UserAgentMatcher();
virtual ~UserAgentMatcher();
@@ -73,6 +93,12 @@ class UserAgentMatcher {
const StringPiece& user_agent,
const RequestHeaders* request_headers) const;
// Returns the DeviceType for the given user agent string.
DeviceType GetDeviceTypeForUA(const StringPiece& user_agent) const;
// Returns the suffix for the given device_type.
static StringPiece DeviceTypeSuffix(DeviceType device_type);
bool SupportsJsDefer(const StringPiece& user_agent, bool allow_mobile) const;
bool SupportsWebp(const StringPiece& user_agent) const;
bool SupportsWebpLosslessAlpha(const StringPiece& user_agent) const;
@@ -99,6 +125,10 @@ class UserAgentMatcher {
virtual bool SupportsSplitHtml(const StringPiece& user_agent,
bool allow_mobile) const;
// Returns true and sets width and height if we know them for the UA.
virtual bool GetScreenResolution(
const StringPiece& user_agent, int* width, int* height);
private:
FastWildcardGroup supports_image_inlining_;
FastWildcardGroup blink_desktop_whitelist_;
@@ -113,6 +143,8 @@ class UserAgentMatcher {
FastWildcardGroup supports_dns_prefetch_;
const RE2 chrome_version_pattern_;
mutable map <GoogleString, pair<int, int> > screen_dimensions_map_;
GoogleString known_devices_pattern_;
DISALLOW_COPY_AND_ASSIGN(UserAgentMatcher);
};
@@ -112,6 +112,9 @@ const char kIPadUserAgent[] =
const char kNexus7ChromeUserAgent[] =
"Mozilla/5.0 (Linux; Android 4.2; Nexus 7 Build/JOP32C) AppleWebKit/535.19"
"(KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19";
const char XT907UserAgent[] =
"Mozilla/5.0 (Linux; Android 4.1.1; XT907 Build/9.8.1Q_27-2) AppleWebKit"
"/537.25 (KHTML, like Gecko) Chrome/26.0.1376.1 Mobile Safari/537.25";
const char kAcceptHeaderValueNonMobile[] = "text/html";
const char kAcceptHeaderValueMobile[] =
"text/html,application/vnd.wap.xhtml+xml";
@@ -98,6 +98,7 @@ class WriteThroughHTTPCache : public HTTPCache {
// size limit. Note that both the key and value will count
// torward the size.
void set_cache1_limit(size_t limit) { cache1_size_limit_ = limit; }
size_t cache1_limit() const { return cache1_size_limit_; }
virtual const char* Name() const { return name_.c_str(); }
@@ -60,8 +60,13 @@ class AddInstrumentationFilter : public EmptyHtmlFilter {
// tag name.
void AddScriptNode(HtmlElement* element, const GoogleString& tag_name);
// Adds the kHeadScript just before the current event only if the element is
// not a <title> or <meta>.
void AddHeadScript(HtmlElement* element);
RewriteDriver* driver_;
bool found_head_;
bool added_head_script_;
bool added_tail_script_;
bool added_unload_script_;
@@ -19,11 +19,12 @@
#define NET_INSTAWEB_REWRITER_PUBLIC_BEACON_CRITICAL_IMAGES_FINDER_H_
#include "net/instaweb/rewriter/public/critical_images_finder.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class RewriteDriver;
class Statistics;
// Support critical (above the fold) image detection through a javascript beacon
@@ -36,12 +37,17 @@ class BeaconCriticalImagesFinder : public CriticalImagesFinder {
explicit BeaconCriticalImagesFinder(Statistics* stats);
virtual ~BeaconCriticalImagesFinder();
virtual bool IsMeaningful() const {
// TODO(jud): This class is not currently implemented yet, change this when
// it is functional.
return false;
virtual bool IsMeaningful(const RewriteDriver* driver) const {
return driver->options()->critical_images_beacon_enabled();
}
// Checks whether the requested image is present in the critical set or not.
// The critical image beacon sends back hashes of the URls to save space, so
// this computes the same hash on image_url and checks if it is stored in the
// critical image set.
virtual bool IsCriticalImage(const GoogleString& image_url,
const RewriteDriver* driver) const;
virtual void ComputeCriticalImages(StringPiece url,
RewriteDriver* driver);
@@ -19,6 +19,8 @@
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_BLINK_CRITICAL_LINE_DATA_FINDER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_BLINK_CRITICAL_LINE_DATA_FINDER_H_
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/http/public/user_agent_matcher.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
@@ -29,6 +31,7 @@ class BlinkCriticalLineData;
class PropertyPage;
class ResponseHeaders;
class RewriteDriver;
class RewriteDriverFactory;
// Finds BlinkCriticalLineData from the given html content. This information
// will be used by BlinkFlowCriticalLine.
@@ -41,8 +44,7 @@ class BlinkCriticalLineDataFinder {
// Gets BlinkCriticalLineData from the given PropertyPage.
virtual BlinkCriticalLineData* ExtractBlinkCriticalLineData(
int64 cache_time_ms, PropertyPage* page, int64 now_ms, bool diff_enabled,
bool propagate_cache_deletes);
int64 cache_time_ms, PropertyPage* page, int64 now_ms, bool diff_enabled);
// Computes BlinkCriticalLineData for the given html content.
virtual void ComputeBlinkCriticalLineData(
@@ -52,7 +54,12 @@ class BlinkCriticalLineDataFinder {
const ResponseHeaders* response_headers,
RewriteDriver* driver);
virtual void PropagateCacheDeletes(const GoogleString& key);
virtual void PropagateCacheDeletes(const GoogleString& url, int furious_id,
UserAgentMatcher::DeviceType device_type);
virtual bool UpdateDiffInfo(
bool is_diff, int64 now_ms, RewriteDriver* rewrite_driver,
RewriteDriverFactory* factory);
private:
DISALLOW_COPY_AND_ASSIGN(BlinkCriticalLineDataFinder);
@@ -23,6 +23,7 @@
#include <utility>
#include <vector>
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/util/public/json.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
@@ -35,7 +36,6 @@ class HtmlElement;
class Panel;
class PanelSet;
class ServerContext;
class RewriteOptions;
class UserAgentMatcher;
typedef std::map<GoogleString, const Panel*> PanelIdToSpecMap;
@@ -59,6 +59,7 @@ const char kXpath[] = "xpath";
// TODO(rahulbansal): Use these constants everywhere in the code from here.
const char kBlinkCohort[] = "blink";
const char kBlinkCriticalLineDataPropertyName[] = "blink_critical_line_data";
const char kCacheHtmlRewriterInfo[] = "cache_html";
const char kComputeVisibleTextFilterOutputEndMarker[] =
"<!--GooglePanel **** Output end ****-->";
@@ -66,14 +67,15 @@ const char kComputeVisibleTextFilterOutputEndMarker[] =
bool IsUserAgentAllowedForBlink(AsyncFetch* async_fetch,
const RewriteOptions* options,
const char* user_agent,
const UserAgentMatcher& user_agent_matcher);
UserAgentMatcher* user_agent_matcher);
// Checks whether the request for 'url' is a valid blink request.
bool IsBlinkRequest(const GoogleUrl& url,
AsyncFetch* async_fetch,
const RewriteOptions* options,
const char* user_agent,
const UserAgentMatcher& user_agent_matcher_);
UserAgentMatcher* user_agent_matcher_,
RewriteOptions::Filter filter);
// Checks if blink critical line flow can be applied.
bool ShouldApplyBlinkFlowCriticalLine(
@@ -1,5 +1,5 @@
/*
* Copyright 2012 Google Inc.
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,15 +14,16 @@
* limitations under the License.
*/
// Author: rahulbansal@google.com (Rahul Bansal)
// Authors: mmohabey@google.com (Megha Mohabey)
// rahulbansal@google.com (Rahul Bansal)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_BLINK_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_BLINK_FILTER_H_
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CACHE_HTML_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CACHE_HTML_FILTER_H_
#include <vector>
#include "net/instaweb/htmlparse/public/html_writer_filter.h"
#include "net/instaweb/rewriter/blink_critical_line_data.pb.h"
#include "net/instaweb/rewriter/cache_html_info.pb.h"
#include "net/instaweb/rewriter/public/blink_util.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/property_cache.h"
@@ -37,33 +38,24 @@ class HtmlElement;
class RewriteDriver;
class RewriteOptions;
// This class extracts the non cacheable panels, looks up the non critical
// content in property cache and sends it to the client,
class BlinkFilter : public HtmlWriterFilter {
// This class extracts the non cacheable panels and sends it to the client.
// TODO(mmohabey): Integrate with SplitFilter and send non critical JSON too.
class CacheHtmlFilter : public HtmlWriterFilter {
public:
// TODO(rahulbansal): Move these consts to appropriate file.
static const char kBlinkCriticalLineDataPropertyName[];
static const char kBlinkCohort[];
static const char kRefreshPageJs[];
explicit CacheHtmlFilter(RewriteDriver* rewrite_driver);
virtual ~CacheHtmlFilter();
explicit BlinkFilter(RewriteDriver* rewrite_driver);
virtual ~BlinkFilter();
void StartDocument();
void StartElement(HtmlElement* element);
void EndElement(HtmlElement* element);
void EndDocument();
virtual void StartDocument();
virtual void StartElement(HtmlElement* element);
virtual void EndElement(HtmlElement* element);
virtual void EndDocument();
void WriteString(StringPiece str);
void Flush();
virtual const char* Name() const { return "BlinkFilter"; }
virtual void Flush();
virtual const char* Name() const { return "CacheHtmlFilter"; }
private:
void SendCookies();
void SendNonCriticalJson(GoogleString* str);
void ServeNonCriticalPanelContents();
void SendNonCacheableObject(const Json::Value& json);
void ObtainBlinkCriticalLineData();
void HandleLastModifiedChange();
// Produces a custom xpath relative to the body or relative to the nearest
// ancestor with an id (if there is one). Xpath comprises of the tag name
// and the id (if it exists) or the position of the elements.
@@ -78,13 +70,13 @@ class BlinkFilter : public HtmlWriterFilter {
const HtmlElement* current_non_cacheable_element_; // We do not own this.
GoogleString current_panel_id_;
const PropertyCache::Cohort* cohort_; // We do not own this.
BlinkCriticalLineData blink_critical_line_data_;
CacheHtmlInfo cache_html_info_;
bool abort_filter_;
std::vector<int> num_children_stack_;
DISALLOW_COPY_AND_ASSIGN(BlinkFilter);
DISALLOW_COPY_AND_ASSIGN(CacheHtmlFilter);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_BLINK_FILTER_H_
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CACHE_HTML_FILTER_H_
@@ -1,5 +1,5 @@
/*
* Copyright 2010 Google Inc.
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,33 +14,31 @@
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
// Author: richardho@google.com (Richard Ho)
#ifndef NET_INSTAWEB_UTIL_PUBLIC_STRING_WRITER_H_
#define NET_INSTAWEB_UTIL_PUBLIC_STRING_WRITER_H_
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CACHE_HTML_INFO_FINDER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CACHE_HTML_INFO_FINDER_H_
#include "net/instaweb/http/public/user_agent_matcher.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/writer.h"
namespace net_instaweb {
class MessageHandler;
// Writer implementation for directing HTML output to a string.
class StringWriter : public Writer {
// Manages the cache lifetimes of CacheHtmlInfo.
class CacheHtmlInfoFinder {
public:
explicit StringWriter(GoogleString* str) : string_(str) { }
virtual ~StringWriter();
virtual bool Write(const StringPiece& str, MessageHandler* message_handler);
virtual bool Flush(MessageHandler* message_handler);
private:
GoogleString* string_;
CacheHtmlInfoFinder() { }
virtual ~CacheHtmlInfoFinder() { }
DISALLOW_COPY_AND_ASSIGN(StringWriter);
virtual void PropagateCacheDeletes(const GoogleString& url, int furious_id,
UserAgentMatcher::DeviceType device_type);
private:
DISALLOW_COPY_AND_ASSIGN(CacheHtmlInfoFinder);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_UTIL_PUBLIC_STRING_WRITER_H_
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CACHE_HTML_INFO_FINDER_H_
@@ -1,85 +0,0 @@
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: mmohabey@google.com (Megha Mohabey)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_COLLECT_SUBRESOURCES_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_COLLECT_SUBRESOURCES_FILTER_H_
#include <map>
#include "net/instaweb/htmlparse/public/html_element.h"
#include "net/instaweb/rewriter/public/rewrite_filter.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/gtest_prod.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class AbstractMutex;
class FlushEarlyInfo;
class FlushEarlyResource;
class PropertyCache;
class RewriteDriver;
// CollectSubresourcesFilter gets all the rewritten subresources in the head
// section of the document and stores them in property cache. The resources
// are then flushed early in FlushEarlyFlow in the form of a dummy HEAD which
// induces early downloading of the sub resources by the browser.
class CollectSubresourcesFilter : public RewriteFilter {
public:
explicit CollectSubresourcesFilter(RewriteDriver* rewrite_driver);
virtual ~CollectSubresourcesFilter();
virtual void StartDocumentImpl();
virtual void StartElementImpl(HtmlElement* element);
virtual void EndElementImpl(HtmlElement* element);
virtual const char* Name() const { return "CollectSubresourcesFilter"; }
virtual const char* id() const { return "fs"; }
void AddSubresourcesToFlushEarlyInfo(FlushEarlyInfo* info);
private:
typedef std::map<int, FlushEarlyResource*> ResourceMap;
// Creates a rewrite context for the subresource.
void CreateSubresourceContext(StringPiece url,
HtmlElement* elt,
HtmlElement::Attribute* attr);
// Enable writing of the property cache DOM cohort in the RewriteDriver.
virtual bool UsesPropertyCacheDomCohort() const { return true; }
class Context;
bool in_first_head_;
bool seen_first_head_;
int num_resources_;
scoped_ptr<AbstractMutex> mutex_;
// The subresources seen in the head of the page added by
// CollectSubresourcesFilter Filter.
ResourceMap subresources_;
PropertyCache* property_cache_;
FRIEND_TEST(CollectSubresourcesFilterTest, CollectSubresourcesFilter);
FRIEND_TEST(CollectSubresourcesFilterTest, HtmlHasRewrittenUrl);
DISALLOW_COPY_AND_ASSIGN(CollectSubresourcesFilter);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_COLLECT_SUBRESOURCES_FILTER_H_
@@ -116,7 +116,10 @@ class CommonFilter : public EmptyHtmlFilter {
virtual void StartElementImpl(HtmlElement* element) = 0;
virtual void EndElementImpl(HtmlElement* element) = 0;
// Protected pointers for inheriter's to use
// ID string used in logging. Inheritors should supply whatever short ID
// string they use.
virtual const char* LoggingId() { return Name(); }
// Protected pointers for inheritors to use.
RewriteDriver* driver_;
ServerContext* server_context_;
const RewriteOptions* rewrite_options_;
@@ -0,0 +1,90 @@
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: slamm@google.com (Stephen Lamm)
//
// Replace link tags with the inline CSS that is resolved on initial load.
// Move the link tags to the bottom (usually CSS is placed in HEAD). Also,
// copy existing inline style blocks to the bottom to maintain the original
// rule order.
//
// TODO(slamm): Consider prioritizing the rules in inline style blocks too.
//
// This lessons the extern resources in the HEAD and allows the page to load
// sooner.
//
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_CSS_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_CSS_FILTER_H_
#include <vector>
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
#include "net/instaweb/rewriter/public/css_tag_scanner.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class CriticalCssFinder;
class HtmlCharactersNode;
class HtmlElement;
class RewriteDriver;
class CriticalCssFilter : public EmptyHtmlFilter {
public:
explicit CriticalCssFilter(RewriteDriver* rewrite_driver,
CriticalCssFinder* finder);
virtual ~CriticalCssFilter();
static const char kAddStylesScript[];
// Overridden from EmptyHtmlFilter:
virtual void StartDocument();
virtual void EndDocument();
virtual void StartElement(HtmlElement* element);
virtual void Characters(HtmlCharactersNode* characters);
virtual void EndElement(HtmlElement* element);
virtual const char* Name() const { return "CriticalCss"; }
private:
// Returns the critical CSS rules for the |decoded_url| of a <link> tag.
// If data is unavailable (e.g., not yet determined, or flushed from
// page property cache), the returned StringPiece .data() is NULL.
// If no CSS is critical for |decoded_url|, the returned StringPiece is empty.
StringPiece GetRules(StringPiece decoded_url) const;
RewriteDriver* driver_;
CssTagScanner css_tag_scanner_;
CriticalCssFinder* finder_;
// Stores a map of CSS link URLs to critical CSS.
scoped_ptr<StringStringMap> critical_css_map_;
class CssElement;
class CssStyleElement;
typedef std::vector<CssElement*> CssElementVector;
CssElementVector css_elements_;
CssStyleElement* current_style_element_;
bool has_critical_css_match_;
DISALLOW_COPY_AND_ASSIGN(CriticalCssFilter);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_CSS_FILTER_H_
@@ -0,0 +1,75 @@
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: slamm@google.com (Stephen Lamm)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_CSS_FINDER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_CSS_FINDER_H_
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class PropertyValue;
class RewriteDriver;
class Statistics;
class TimedVariable;
// Finds critical CSS rules (i.e. CSS needed for the initial page load).
class CriticalCssFinder {
public:
static const char kCriticalCssValidCount[];
static const char kCriticalCssExpiredCount[];
static const char kCriticalCssNotFoundCount[];
explicit CriticalCssFinder(Statistics* stats);
virtual ~CriticalCssFinder();
static void InitStats(Statistics* statistics);
// Gets critical css from property cache.
virtual StringStringMap* CriticalCssMap(RewriteDriver* driver);
// Compute the critical css for |url|.
virtual void ComputeCriticalCss(StringPiece url, RewriteDriver* driver) = 0;
// Copy |critical_css_map| into property cache. Returns true on success.
virtual bool UpdateCache(RewriteDriver* driver,
const StringStringMap& critical_css_map);
virtual const char* GetCohort() const = 0;
protected:
PropertyValue* GetPropertyValue(RewriteDriver* driver);
private:
static const char kCriticalCssPropertyName[];
// Returns the critical css from |property_value|.
StringStringMap* DeserializeCacheData(RewriteDriver* driver,
const PropertyValue* property_value);
TimedVariable* critical_css_valid_count_;
TimedVariable* critical_css_expired_count_;
TimedVariable* critical_css_not_found_count_;
DISALLOW_COPY_AND_ASSIGN(CriticalCssFinder);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_CSS_FINDER_H_
@@ -0,0 +1,67 @@
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jud@google.com (Jud Porter)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_IMAGES_BEACON_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_IMAGES_BEACON_FILTER_H_
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
#include "net/instaweb/util/public/basictypes.h"
namespace net_instaweb {
class HtmlElement;
class RewriteDriver;
class Statistics;
class Variable;
// Inject javascript for detecting above the fold images after the page has
// loaded. Also adds pagespeed_url_hash attributes that the beacon sends
// back to the server. This allows the beacon to work despite image URL
// rewriting or inlining.
class CriticalImagesBeaconFilter : public EmptyHtmlFilter {
public:
// Counters.
static const char kCriticalImagesBeaconAddedCount[];
explicit CriticalImagesBeaconFilter(RewriteDriver* driver);
virtual ~CriticalImagesBeaconFilter();
virtual void DetermineEnabled();
static void InitStats(Statistics* statistics);
static void Terminate() {}
virtual void StartDocument();
virtual void EndElement(HtmlElement* element);
virtual const char* Name() const { return "CriticalImagesBeacon"; }
private:
// Clear all state associated with filter.
void Clear();
RewriteDriver* driver_;
bool added_script_;
// The total number of times the beacon is added.
Variable* critical_images_beacon_added_count_;
DISALLOW_COPY_AND_ASSIGN(CriticalImagesBeaconFilter);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CRITICAL_IMAGES_BEACON_FILTER_H_
@@ -48,7 +48,7 @@ class CriticalImagesFinder {
// Checks whether IsCriticalImage will return meaningful results about
// critical images. Users of IsCriticalImage should check this function and
// supply a default behavior if IsMeaningful returns false.
virtual bool IsMeaningful() const = 0;
virtual bool IsMeaningful(const RewriteDriver* driver) const = 0;
// Checks whether the requested image is present in the critical set or not.
// Users of this function should also check IsMeaningful() to see if the
@@ -21,14 +21,12 @@
#include "net/instaweb/rewriter/public/critical_images_finder_test_base.h"
#include "net/instaweb/rewriter/public/critical_images_finder.h"
#include "net/instaweb/rewriter/public/rewrite_test_base.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/null_statistics.h"
#include "net/instaweb/util/public/property_cache.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class AbstractMutex;
class PropertyValue;
class RewriteDriver;
class CriticalImagesFinderTestBase : public RewriteTestBase {
@@ -46,8 +44,6 @@ class CriticalImagesFinderTestBase : public RewriteTestBase {
protected:
NullStatistics stats_;
virtual void SetUp();
// Resets the state of the driver.
void ResetDriver();
@@ -56,17 +52,6 @@ class CriticalImagesFinderTestBase : public RewriteTestBase {
private:
static const char kRequestUrl[];
class MockPage : public PropertyPage {
public:
MockPage(AbstractMutex* mutex, const StringPiece& key)
: PropertyPage(mutex, key) {}
virtual ~MockPage();
virtual void Done(bool valid) {}
private:
DISALLOW_COPY_AND_ASSIGN(MockPage);
};
};
} // namespace net_instaweb
@@ -51,6 +51,7 @@ class CssCombineFilter : public RewriteFilter {
virtual void StartElementImpl(HtmlElement* element);
virtual void EndElementImpl(HtmlElement* element) {}
virtual void Flush();
virtual void DetermineEnabled();
virtual void IEDirective(HtmlIEDirectiveNode* directive);
virtual const char* Name() const { return "CssCombine"; }
virtual const UrlSegmentEncoder* encoder() const {
@@ -104,6 +104,8 @@ class CssFilter : public RewriteFilter {
virtual const char* Name() const { return "CssFilter"; }
virtual const char* id() const { return RewriteOptions::kCssFilterId; }
virtual int FilterCacheFormatVersion() const;
virtual void EncodeUserAgentIntoResourceContext(
ResourceContext* context) const;
static const char kBlocksRewritten[];
static const char kParseFailures[];
@@ -125,6 +127,10 @@ class CssFilter : public RewriteFilter {
CssFilter::Context* rewriter, RewriteContext* parent,
CssHierarchy* hierarchy);
virtual const RewriteOptions::Filter* RelatedFilters(int* num_filters) const;
virtual const RewriteOptions::OptionEnum* RelatedOptions(
int* num_options) const;
protected:
virtual RewriteContext* MakeRewriteContext();
virtual const UrlSegmentEncoder* encoder() const;
@@ -275,6 +281,10 @@ class CssFilter::Context : public SingleRewriteContext {
virtual GoogleString CacheKeySuffix() const;
virtual const UrlSegmentEncoder* encoder() const;
// Implements UserAgentCacheKey method of RewriteContext.
virtual GoogleString UserAgentCacheKey(
const ResourceContext* resource_context) const;
private:
bool RewriteCssText(const GoogleUrl& css_base_gurl,
const GoogleUrl& css_trim_gurl,
@@ -115,12 +115,11 @@ class CssFlattenImportsContext : public SingleRewriteContext {
ServerContext* manager = FindServerContext();
manager->MergeNonCachingResponseHeaders(input_resource_, output_resource_);
if (manager->Write(ResourceVector(1, input_resource_),
hierarchy_->minified_contents(),
&kContentTypeCss,
input_resource_->charset(),
output_resource_.get(),
Driver()->message_handler())) {
if (Driver()->Write(ResourceVector(1, input_resource_),
hierarchy_->minified_contents(),
&kContentTypeCss,
input_resource_->charset(),
output_resource_.get())) {
RewriteDone(kRewriteOk, 0);
} else {
RewriteDone(kRewriteFailed, 0);
@@ -118,7 +118,7 @@ class CssHierarchy {
bool unparseable_detected() const { return unparseable_detected_; }
void set_unparseable_detected(bool ok) { unparseable_detected_ = ok; }
bool flattened_result_limit() const { return flattened_result_limit_; }
int64 flattened_result_limit() const { return flattened_result_limit_; }
void set_flattened_result_limit(int64 x) { flattened_result_limit_ = x; }
// If we haven't already, determine the charset of this CSS, then check if
@@ -22,6 +22,7 @@
#include <cstddef>
#include "net/instaweb/rewriter/public/common_filter.h"
#include "net/instaweb/rewriter/public/css_tag_scanner.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
@@ -59,6 +60,7 @@ class CssInlineFilter : public CommonFilter {
const size_t size_threshold_bytes_;
GoogleString domain_;
CssTagScanner css_tag_scanner_;
DISALLOW_COPY_AND_ASSIGN(CssInlineFilter);
};
@@ -44,6 +44,7 @@ class CssMoveToHeadFilter : public CommonFilter {
virtual void EndElementImpl(HtmlElement* element);
virtual const char* Name() const { return "CssMoveToHead"; }
virtual void DetermineEnabled();
private:
CssTagScanner css_tag_scanner_;
@@ -161,6 +161,15 @@ class CssRewriteTestBase : public RewriteTestBase {
const GoogleString& expected_css_output,
int flags);
// Makes an HTML document with an external CSS link.
GoogleString MakeHtmlWithExternalCssLink(const StringPiece& css_url,
int flags);
// Makes a CSS body with an external image link, with nice indentation.
GoogleString MakeIndentedCssWithImage(StringPiece image_url);
// Makes a minified CSS body with an external image link.
GoogleString MakeMinifiedCssWithImage(StringPiece image_url);
void ValidateRewrite(const StringPiece& id,
const GoogleString& css_input,
@@ -53,10 +53,23 @@ class CssTagScanner {
explicit CssTagScanner(HtmlParse* html_parse);
// Examines an HTML element to determine if it's a CSS link,
// extracting out the HREF and the media-type.
bool ParseCssElement(
HtmlElement* element, HtmlElement::Attribute** href, const char** media);
// Examines an HTML element to determine if it's a CSS link, extracting out
// the href, the media type (if any) and the number of nonstandard attributes
// found. If it's not CSS, href is set to NULL, media is set to "", and
// num_nonstandard_attributes is set to 0.
bool ParseCssElement(HtmlElement* element,
HtmlElement::Attribute** href,
const char** media,
int* num_nonstandard_attributes);
// Many callers don't care about num_nonstandard_attributes, so we provide
// a version that discards that information.
bool ParseCssElement(HtmlElement* element,
HtmlElement::Attribute** href,
const char** media) {
int num_nonstandard_attributes;
return ParseCssElement(element, href, media, &num_nonstandard_attributes);
}
// Scans the contents of a CSS file, looking for the pattern url(xxx).
// Performs an arbitrary mutation on all such URLs.
@@ -77,6 +90,9 @@ class CssTagScanner {
// Should be called with element->AttributeValue(HtmlName::kRel) as the arg.
static bool IsStylesheetOrAlternate(const StringPiece& attribute_value);
// Can this media attribute include some kind of screen?
static bool CanMediaAffectScreen(const StringPiece& media);
private:
DISALLOW_COPY_AND_ASSIGN(CssTagScanner);
};
@@ -25,14 +25,15 @@
namespace net_instaweb {
class DeviceProperties;
class MessageHandler;
class ResourceContext;
// This class implements the encoding of css urls with optional additional
// dimension metadata. It prepends characters indicating whether the
// user-agent allows for inlining or webp. We may need to employ distinct
// CSS files to these types of browsers. This information is conveyed in
// the ResourceContext.
// dimension metadata. For the legacy encoding, it used to prepend characters
// indicating whether the user-agent allows for inlining or webp. We may need
// to employ distinct CSS files for these types of browsers. This information
// is conveyed in the ResourceContext.
// http://..path../W.cssfile... CSS file optimized for webp-capable browsers.
// http://..path../I.cssfile... CSS file optimzed for for non-webp browsers
// that inline.
@@ -41,6 +42,10 @@ class ResourceContext;
// Note that a legacy CSS URL beginning with W., I., or A. will be
// misinterpreted and will not be fetchable since the Decode function
// will strip off the leading 2 characters.
//
// Note that a lot of this is legacy encoding now, and that we just
// unconditionally use the "A." encoding and rely on content hash and
// metadata cache + user-agent sniffing to keep things consistent.
class CssUrlEncoder : public UrlSegmentEncoder {
public:
CssUrlEncoder() {}
@@ -55,6 +60,10 @@ class CssUrlEncoder : public UrlSegmentEncoder {
ResourceContext* dim,
MessageHandler* handler) const;
// Sets Inlining of image according to the user agent.
static void SetInliningImages(const DeviceProperties& device_properties,
ResourceContext* resource_context);
private:
DISALLOW_COPY_AND_ASSIGN(CssUrlEncoder);
};
@@ -0,0 +1,79 @@
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
// Helper class to make RewriteTestBase tests that use a custom options
// subclass.
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CUSTOM_REWRITE_TEST_BASE_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CUSTOM_REWRITE_TEST_BASE_H_
#include <utility>
#include "net/instaweb/http/public/mock_url_fetcher.h"
#include "net/instaweb/rewriter/public/rewrite_test_base.h"
#include "net/instaweb/rewriter/public/test_rewrite_driver_factory.h"
#include "net/instaweb/util/public/gtest.h"
namespace net_instaweb {
template<class OptionsClass>
class CustomRewriteTestBase : public RewriteTestBase {
public:
class CustomTestRewriteDriverFactory : public TestRewriteDriverFactory {
public:
explicit CustomTestRewriteDriverFactory(MockUrlFetcher* url_fetcher)
: TestRewriteDriverFactory(GTestTempDir(), url_fetcher) {
InitializeDefaultOptions();
}
virtual OptionsClass* NewRewriteOptions() {
return new OptionsClass;
}
};
CustomRewriteTestBase() : RewriteTestBase(MakeFactories(&mock_url_fetcher_)) {
}
virtual ~CustomRewriteTestBase() {
OptionsClass::Terminate();
}
virtual TestRewriteDriverFactory* MakeTestFactory() {
return new CustomTestRewriteDriverFactory(&mock_url_fetcher_);
}
static void SetUpTestCase() {
}
static void TearDownTestCase() {
}
private:
// We must call the static Initialize method on the options class before
// we construct a factory, which will 'new' the OptionsClass.
static std::pair<TestRewriteDriverFactory*, TestRewriteDriverFactory*>
MakeFactories(MockUrlFetcher* mock_fetcher) {
OptionsClass::Initialize();
return make_pair(new CustomTestRewriteDriverFactory(mock_fetcher),
new CustomTestRewriteDriverFactory(mock_fetcher));
}
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CUSTOM_REWRITE_TEST_BASE_H_
@@ -22,7 +22,6 @@
#include "base/logging.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/data_url.h"
#include "net/instaweb/util/public/scoped_ptr.h"
@@ -38,6 +37,8 @@ class ContentType;
class InputInfo;
class MessageHandler;
class RewriteOptions;
class ServerContext;
enum Encoding;
class DataUrlInputResource : public Resource {
@@ -76,23 +77,19 @@ class DataUrlInputResource : public Resource {
return NULL;
}
virtual bool UseHttpCache() const { return false; }
protected:
virtual bool Load(MessageHandler* message_handler);
virtual bool IsCacheable() const;
virtual void LoadAndCallback(NotCacheablePolicy not_cacheable_policy,
AsyncCallback* callback,
MessageHandler* message_handler);
private:
DataUrlInputResource(const GoogleString* url,
Encoding encoding,
const ContentType* type,
const StringPiece& encoded_contents,
ServerContext* server_context)
: Resource(server_context, type),
url_(url),
encoding_(encoding),
encoded_contents_(encoded_contents) {
// Make sure we auto-load.
Load(server_context->message_handler());
}
ServerContext* server_context);
scoped_ptr<const GoogleString> url_;
const Encoding encoding_;
@@ -1,5 +1,5 @@
/*
* Copyright 2012 Google Inc.
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,11 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_BASE_TAG_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_BASE_TAG_FILTER_H_
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_DECODE_REWRITTEN_URLS_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_DECODE_REWRITTEN_URLS_FILTER_H_
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
#include "net/instaweb/util/public/basictypes.h"
@@ -27,29 +24,22 @@ namespace net_instaweb {
class HtmlElement;
class RewriteDriver;
// Add this filter into the HtmlParse chain to add a base
// tag into the head section of an HTML document.
class BaseTagFilter : public EmptyHtmlFilter {
// Filter that decodes rewritten (.pagespeed.) URLs in HTML to origin URLs.
// TODO(sriharis): Also do this for URLs in CSS.
class DecodeRewrittenUrlsFilter : public EmptyHtmlFilter {
public:
explicit BaseTagFilter(RewriteDriver* driver)
: added_base_tag_(false),
driver_(driver) {}
explicit DecodeRewrittenUrlsFilter(RewriteDriver* driver) : driver_(driver) {}
virtual ~DecodeRewrittenUrlsFilter();
virtual ~BaseTagFilter();
virtual void StartDocument() {
added_base_tag_ = false;
}
virtual void StartElement(HtmlElement* element);
virtual const char* Name() const { return "BaseTag"; }
virtual const char* Name() const { return "DecodeRewrittenUrlsFilter"; }
private:
bool added_base_tag_;
RewriteDriver* driver_;
DISALLOW_COPY_AND_ASSIGN(BaseTagFilter);
DISALLOW_COPY_AND_ASSIGN(DecodeRewrittenUrlsFilter);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_BASE_TAG_FILTER_H_
#endif // NET_INSTAWEB_REWRITER_PUBLIC_DECODE_REWRITTEN_URLS_FILTER_H_
@@ -60,7 +60,7 @@ namespace net_instaweb {
class HtmlElement;
class RewriteDriver;
class StaticJavascriptManager;
class StaticAssetManager;
class DeferIframeFilter : public EmptyHtmlFilter {
public:
@@ -78,7 +78,7 @@ class DeferIframeFilter : public EmptyHtmlFilter {
private:
RewriteDriver* driver_;
StaticJavascriptManager* static_js_manager_;
StaticAssetManager* static_asset_manager_;
bool script_inserted_;
DISALLOW_COPY_AND_ASSIGN(DeferIframeFilter);
@@ -79,7 +79,7 @@ namespace net_instaweb {
class HtmlElement;
class RewriteDriver;
class StaticJavascriptManager;
class StaticAssetManager;
class Statistics;
class DelayImagesFilter : public EmptyHtmlFilter {
@@ -109,8 +109,12 @@ class DelayImagesFilter : public EmptyHtmlFilter {
// this node just after element.
void InsertDelayImagesInlineJS(HtmlElement* element);
// Returns a boolean for whether mobile aggressive rewriters are enabled and
// the current request is from a mobile user agent.
bool DisableInplaceLowResForMobile() const;
RewriteDriver* driver_;
StaticJavascriptManager* static_js_manager_;
StaticAssetManager* static_asset_manager_;
// pagespeed_low_res_src will be added to the low_res_data_map_ until
// low_res_inserted is false. As soon as low_res_map_inserted_ is true, there
@@ -124,9 +128,15 @@ class DelayImagesFilter : public EmptyHtmlFilter {
// end of body tag.
bool insert_low_res_images_inplace_;
// is_experimental_enabled_ is set to true if
// is_experimental_inline_preview_enabled_ is set to true if
// enable_inline_preview_images_experimental is true.
bool is_experimental_enabled_;
bool is_experimental_inline_preview_enabled_;
// lazyload_highres_images_ is set to true if lazyload_highres flag is true.
// It enables the feature that lazily loads the high res images after their
// low res versions are rendered. This flag is used especially in the case
// of mobile.
bool lazyload_highres_images_;
DISALLOW_COPY_AND_ASSIGN(DelayImagesFilter);
};
@@ -169,9 +169,9 @@ class DomainLawyer {
//
// Wildcards may not be used in the proxy_domain or origin_domain.
//
// Subdirectories should normally be used in both the proxy_domain and
// origin_domain. This is a not a strict requirement. If you fully
// control the entire origin domain and are dedicating a proxy domain
// Subdirectories should normally be used in the proxy_domain, the
// origin_domain, and to_domain. This is a not a strict requirement. If you
// fully control the entire origin domain and are dedicating a proxy domain
// for the sole use of that origin domain then subdirectories are not needed.
//
// The proxy_domain must be running mod_pagespeed and configured
@@ -181,11 +181,15 @@ class DomainLawyer {
// The origin_domain does not need to run mod_pagespeed; it is used
// to fetch the resources.
//
// If to_domain is provided then resources are rewritten to to_domain instead
// of proxy_domain. This is useful for rewriting to a CDN.
//
// It is invalid to use the same origin_domain in AddProxyDomainMapping
// and to_domain of AddOriginDomainMapping. The latter requires
// a overriding the Host: request-header on fetches.
bool AddProxyDomainMapping(const StringPiece& proxy_domain,
const StringPiece& origin_domain,
const StringPiece& to_domain_name,
MessageHandler* handler);
// Adds domain mappings that handle fetches on both http and https for the
@@ -41,13 +41,7 @@ class FileInputResource : public Resource {
const RewriteOptions* options,
const ContentType* type,
const StringPiece& url,
const StringPiece& filename)
: Resource(server_context, type),
url_(url.data(), url.size()),
filename_(filename.data(), filename.size()),
rewrite_options_(options) {
}
const StringPiece& filename);
virtual ~FileInputResource();
// Uses default no-op Freshen implementation because file-based resources
@@ -64,12 +58,15 @@ class FileInputResource : public Resource {
return rewrite_options_;
}
virtual bool UseHttpCache() const { return false; }
protected:
void SetDefaultHeaders(const ContentType* content_type,
ResponseHeaders* header, MessageHandler* handler);
virtual bool Load(MessageHandler* message_handler);
// Uses default, blocking LoadAndCallback implementation.
virtual void LoadAndCallback(NotCacheablePolicy not_cacheable_policy,
AsyncCallback* callback,
MessageHandler* message_handler);
private:
GoogleString url_;
@@ -21,6 +21,8 @@
#include <list>
#include "net/instaweb/htmlparse/public/html_writer_filter.h"
#include "net/instaweb/http/public/logging_proto.h"
#include "net/instaweb/http/public/logging_proto_impl.h"
#include "net/instaweb/http/public/semantic_type.h"
#include "net/instaweb/http/public/user_agent_matcher.h"
#include "net/instaweb/util/public/basictypes.h"
@@ -77,6 +79,12 @@ class FlushEarlyContentWriterFilter : public HtmlWriterFilter {
bool is_pagespeed_resource,
semantic_type::Category category);
void TryFlushingDeferJavascriptEarly();
// Returns the type of resource based on the url.
FlushEarlyResourceInfo::ResourceType GetResourceType(
const GoogleUrl& gurl, bool is_pagespeed_resource);
RewriteDriver* driver_;
TimedVariable* num_resources_flushed_early_;
// Whether we need to insert a close script tag at EndDocument.
@@ -40,7 +40,7 @@ class FlushEarlyInfoFinder {
// implementation does not, but classes inheriting likely do. Users of
// GetCharset should check this function and supply a default behavior if
// IsMeaningful returns false.
virtual bool IsMeaningful() const {
virtual bool IsMeaningful(const RewriteDriver* driver) const {
return false;
}
@@ -33,7 +33,7 @@ class MeaningfulFlushEarlyInfoFinder : public FlushEarlyInfoFinder {
public:
MeaningfulFlushEarlyInfoFinder() : num_compute_calls_(0) {}
virtual ~MeaningfulFlushEarlyInfoFinder() {}
virtual bool IsMeaningful() const {
virtual bool IsMeaningful(const RewriteDriver* driver) const {
return true;
}
virtual const char* GetCohort() const {
@@ -29,6 +29,7 @@
namespace net_instaweb {
class ImageDim;
class MessageHandler;
class Timer;
struct ContentType;
class Image {
@@ -51,7 +52,9 @@ class Image {
IMAGE_JPEG,
IMAGE_PNG,
IMAGE_GIF,
IMAGE_WEBP, // Update kImageTypeEnd if you add something after this.
IMAGE_WEBP,
IMAGE_WEBP_LOSSLESS_OR_ALPHA, // webps that are lossy or have transparency
// Update kImageTypeEnd if you add something after this.
};
enum PreferredWebp {
@@ -63,6 +66,7 @@ class Image {
struct CompressionOptions {
CompressionOptions()
: preferred_webp(WEBP_NONE),
allow_webp_alpha(false),
webp_quality(RewriteOptions::kDefaultImagesRecompressQuality),
jpeg_quality(RewriteOptions::kDefaultImagesRecompressQuality),
progressive_jpeg_min_bytes(
@@ -78,8 +82,14 @@ class Image {
retain_color_sampling(false),
retain_exif_data(false),
jpeg_num_progressive_scans(
RewriteOptions::kDefaultImageJpegNumProgressiveScans) {}
RewriteOptions::kDefaultImageJpegNumProgressiveScans),
webp_conversion_timeout_ms(-1),
conversions_attempted(0),
preserve_lossless(false) {}
// These options are set by the client to specify what type of
// conversion to perform:
PreferredWebp preferred_webp;
bool allow_webp_alpha;
int64 webp_quality;
int64 jpeg_quality;
int64 progressive_jpeg_min_bytes;
@@ -94,6 +104,12 @@ class Image {
bool retain_color_sampling;
bool retain_exif_data;
int jpeg_num_progressive_scans;
int64 webp_conversion_timeout_ms;
// These fields are set by the conversion routines to report
// characteristics of the conversion process.
int conversions_attempted;
bool preserve_lossless;
};
virtual ~Image();
@@ -103,7 +119,7 @@ class Image {
// Used for checking valid ImageType enum integer.
static const Type kImageTypeStart = IMAGE_UNKNOWN;
static const Type kImageTypeEnd = IMAGE_WEBP;
static const Type kImageTypeEnd = IMAGE_WEBP_LOSSLESS_OR_ALPHA;
// Stores the image dimensions in natural_dim (on success, sets
// natural_dim->{width, height} and
@@ -206,15 +222,6 @@ class Image {
// intent is that an Image is created in a scoped fashion from an existing known
// resource.
//
// The webp_preferred flag indicates that webp output should be produced rather
// than jpg, unless webp creation fails for any reason (in which case jpg is
// used as a fallback). It has no effect if original_contents are a non-jpg or
// non-webp image format.
//
// The jpeg_quality flag indicates what quality to use while recompressing jpeg
// images. Quality value of 75 is used as default for web images by most of the
// image libraries. Recommended setting for this is 85.
//
// The options should be set via Image::SetOptions after construction, before
// the image is used for anything but determining its natural dimension size.
//
@@ -224,12 +231,14 @@ Image* NewImage(const StringPiece& original_contents,
const GoogleString& url,
const StringPiece& file_prefix,
Image::CompressionOptions* options,
Timer* timer,
MessageHandler* handler);
// Creates a blank image of the given dimensions and type.
// For now, this is assumed to be an 8-bit 3-channel image.
Image* BlankImageWithOptions(int width, int height, Image::Type type,
const StringPiece& tmp_dir,
Timer* timer,
MessageHandler* handler,
Image::CompressionOptions* options);
@@ -38,6 +38,7 @@ namespace net_instaweb {
class CachedResult;
class ContentType;
class ImageDim;
class Histogram;
class ResourceContext;
class RewriteContext;
class RewriteDriver;
@@ -52,6 +53,50 @@ class WorkBound;
// rewritten urls, when in general those urls will be in a different domain.
class ImageRewriteFilter : public RewriteFilter {
public:
// Name for statistic used to bound rewriting work.
static const char kImageOngoingRewrites[];
// # of images that we decided not to rewrite because of size constraint.
static const char kImageNoRewritesHighResolution[];
// TimedVariable denoting image rewrites we dropped due to
// load (too many concurrent rewrites)
static const char kImageRewritesDroppedDueToLoad[];
// # of images not rewritten because the image MIME type is unknown.
static const char kImageRewritesDroppedMIMETypeUnknown[];
// # of images not rewritten because the server fails to write the merged
// html files.
static const char kImageRewritesDroppedServerWriteFail[];
// # of images not rewritten because the rewriting does not reduce the
// data size by a certain threshold. The image is resized in this case.
static const char kImageRewritesDroppedNoSavingResize[];
// # of images not rewritten because the rewriting does not reduce the
// data size by a certain threshold. The image is not resized in this case.
static const char kImageRewritesDroppedNoSavingNoResize[];
// TimedVariable denoting image squashing for mobile screen.
static const char kImageRewritesSquashingForMobileScreen[];
// Histogram for delays of successful image rewrites.
static const char kImageRewriteLatencyOkMs[];
// Histogram for delays of failed image rewrites.
static const char kImageRewriteLatencyFailedMs[];
// The property cache property name used to store URLs discovered when
// image_inlining_identify_and_cache_without_rewriting() is set in the
// RewriteOptions.
static const char kInlinableImageUrlsPropertyName[];
static const RewriteOptions::Filter kRelatedFilters[];
static const int kRelatedFiltersSize;
static const RewriteOptions::OptionEnum kRelatedOptions[];
static const int kRelatedOptionsSize;
explicit ImageRewriteFilter(RewriteDriver* driver);
virtual ~ImageRewriteFilter();
static void InitStats(Statistics* statistics);
@@ -60,6 +105,8 @@ class ImageRewriteFilter : public RewriteFilter {
virtual void EndElementImpl(HtmlElement* element);
virtual const char* Name() const { return "ImageRewrite"; }
virtual const char* id() const { return RewriteOptions::kImageCompressionId; }
virtual void EncodeUserAgentIntoResourceContext(
ResourceContext* context) const;
// Can we inline resource? If so, encode its contents into the data_url,
// otherwise leave data_url alone.
@@ -103,8 +150,9 @@ class ImageRewriteFilter : public RewriteFilter {
// Update desired image dimensions if necessary. Returns true if it is
// updated.
bool UpdateDesiredImageDimsIfNecessary(const ImageDim& image_dim,
ImageDim* desired_dim);
bool UpdateDesiredImageDimsIfNecessary(
const ImageDim& image_dim, const ResourceContext& resource_context,
ImageDim* desired_dim);
// Determines whether an image should be resized based on the current options.
//
@@ -125,35 +173,9 @@ class ImageRewriteFilter : public RewriteFilter {
const ResourceContext& context, const ResourcePtr& input_resource,
bool is_css);
// name for statistic used to bound rewriting work.
static const char kImageOngoingRewrites[];
// # of images that we decided not to rewrite because of size constraint.
static const char kImageNoRewritesHighResolution[];
// TimedVariable denoting image rewrites we dropped due to
// load (too many concurrent rewrites)
static const char kImageRewritesDroppedDueToLoad[];
// # of images not rewritten because the image MIME type is unknown.
static const char kImageRewritesDroppedMIMETypeUnknown[];
// # of images not rewritten because the server fails to write the merged
// html files.
static const char kImageRewritesDroppedServerWriteFail[];
// # of images not rewritten because the rewriting does not reduce the
// data size by a certain threshold. The image is resized in this case.
static const char kImageRewritesDroppedNoSavingResize[];
// # of images not rewritten because the rewriting does not reduce the
// data size by a certain threshold. The image is not resized in this case.
static const char kImageRewritesDroppedNoSavingNoResize[];
// The property cache property name used to store URLs discovered when
// image_inlining_identify_and_cache_without_rewriting() is set in the
// RewriteOptions.
static const char kInlinableImageUrlsPropertyName[];
virtual const RewriteOptions::Filter* RelatedFilters(int* num_filters) const;
virtual const RewriteOptions::OptionEnum* RelatedOptions(
int* num_options) const;
protected:
virtual const UrlSegmentEncoder* encoder() const;
@@ -214,10 +236,7 @@ class ImageRewriteFilter : public RewriteFilter {
// true if a PropertyValue was written.
bool StoreUrlInPropertyCache(const StringPiece& url);
// Sets resource_context.libwebp_level to indicate the level of webp
// support in the user agent..
void SetAttemptWebp(const StringPiece& url,
ResourceContext* resource_context);
bool SquashImagesForMobileScreenEnabled() const;
scoped_ptr<WorkBound> work_bound_;
@@ -244,6 +263,10 @@ class ImageRewriteFilter : public RewriteFilter {
Variable* image_rewrites_dropped_nosaving_noresize_;
// # of images not rewritten because of load.
TimedVariable* image_rewrites_dropped_due_to_load_;
// # of image squashing for mobile screen initiated. This may not be the
// actual # of images squashed as squashing may fail or rewritten image size
// is larger.
TimedVariable* image_rewrites_squashing_for_mobile_screen_;
// # of bytes saved from image rewriting (Note: This is computed at
// rewrite time not at serve time, so the number of bytes saved in
// transmission should be larger than this).
@@ -259,6 +282,10 @@ class ImageRewriteFilter : public RewriteFilter {
Variable* image_inline_count_;
// # of images rewritten into WebP format.
Variable* image_webp_rewrites_;
// Delay in microseconds of successful image rewrites.
Histogram* image_rewrite_latency_ok_ms_;
// Delay in microseconds of failed image rewrites.
Histogram* image_rewrite_latency_failed_ms_;
ImageUrlEncoder encoder_;
@@ -1,3 +1,4 @@
/*
* Copyright 2010 Google Inc.
*
@@ -36,11 +37,13 @@ class ImageTestBase : public testing::Test {
protected:
static const char kTestData[];
static const char kCuppa[];
static const char kCuppaTransparent[];
static const char kBikeCrash[];
static const char kIronChef[];
static const char kCradle[];
static const char kPuzzle[];
static const char kLarge[];
static const char kRedbrush[];
static const char kScenery[];
static const char kAppSegments[];
@@ -20,11 +20,14 @@
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/gtest_prod.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/url_segment_encoder.h"
namespace net_instaweb {
class DeviceProperties;
class RewriteDriver;
class MessageHandler;
// This class implements the encoding of image urls with optional additional
@@ -67,6 +70,29 @@ class ImageUrlEncoder : public UrlSegmentEncoder {
ResourceContext* dim,
MessageHandler* handler) const;
// Set LibWebp level according to the user agent.
// TODO(poojatandon): Pass a user agent object with its webp-cabaple bits
// pre-analyzed (not just the string from the request headers), since
// checking webp level related code doesn't belong here.
static void SetLibWebpLevel(const DeviceProperties& device_properties,
ResourceContext* resource_context);
// Sets webp and mobile capability in resource context.
//
// The parameters to this method are urls, rewrite options & resource context.
// Since rewrite options are not changed, we have passed const reference and
// resource context is modified and can be NULL, hence we pass as a pointer.
static void SetWebpAndMobileUserAgent(const RewriteDriver& driver,
ResourceContext* context);
// Set context for screen resolution.
static void SetUserAgentScreenResolution(
RewriteDriver* driver, ResourceContext* context);
// Helper function to generate Metadata cache key from ResourceContext.
static GoogleString CacheKeyFromResourceContext(
const ResourceContext& resource_context);
static bool HasDimensions(const ResourceContext& data) {
return (data.has_desired_image_dims() &&
HasValidDimensions(data.desired_image_dims()));
@@ -86,6 +112,16 @@ class ImageUrlEncoder : public UrlSegmentEncoder {
}
private:
FRIEND_TEST(ImageRewriteTest, SquashImagesForMobileScreen);
FRIEND_TEST(ImageUrlEncoderTest, UserAgentScreenResolution);
// Returns true if screen width and height are normalized according to a
// predefined list of screen resolutions (see implementation header document
// for more details).
static bool GetNormalizedScreenResolution(
int screen_width, int screen_height, int* normalized_width,
int* normalized_height);
DISALLOW_COPY_AND_ASSIGN(ImageUrlEncoder);
};
@@ -16,8 +16,8 @@
// Author: nikhilmadan@google.com (Nikhil Madan)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_AJAX_REWRITE_CONTEXT_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_AJAX_REWRITE_CONTEXT_H_
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_IN_PLACE_REWRITE_CONTEXT_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_IN_PLACE_REWRITE_CONTEXT_H_
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/content_type.h"
@@ -39,50 +39,52 @@
namespace net_instaweb {
class CacheUrlAsyncFetcher;
class InputInfo;
class MessageHandler;
class ResourceContext;
class RewriteDriver;
class RewriteFilter;
class Statistics;
class UrlAsyncFetcher;
class Variable;
// A resource-slot created for an ajax rewrite. This has an empty render method.
// Note that this class is usually used as a RefCountedPtr and gets deleted when
// there are no references remaining.
class AjaxRewriteResourceSlot : public ResourceSlot {
// A resource-slot created for an in-place rewrite. This has an empty render
// method. Note that this class is usually used as a RefCountedPtr and gets
// deleted when there are no references remaining.
class InPlaceRewriteResourceSlot : public ResourceSlot {
public:
explicit AjaxRewriteResourceSlot(const ResourcePtr& resource);
static const char kIproSlotLocation[];
explicit InPlaceRewriteResourceSlot(const ResourcePtr& resource);
// Implements ResourceSlot::Render().
virtual void Render();
// Implements ResourceSlot::LocationString().
virtual GoogleString LocationString() { return "ajax"; }
virtual GoogleString LocationString();
protected:
virtual ~AjaxRewriteResourceSlot();
virtual ~InPlaceRewriteResourceSlot();
private:
DISALLOW_COPY_AND_ASSIGN(AjaxRewriteResourceSlot);
DISALLOW_COPY_AND_ASSIGN(InPlaceRewriteResourceSlot);
};
// Context that is used for an ajax rewrite.
class AjaxRewriteContext : public SingleRewriteContext {
// Context that is used for an in-place rewrite.
class InPlaceRewriteContext : public SingleRewriteContext {
public:
// Stats variable name to keep track of how often in-place falls back to
// stream (due to a large resource) when Options->in_place_wait_for_optimized
// is true.
static const char kInPlaceOversizedOptStream[];
AjaxRewriteContext(RewriteDriver* driver, const StringPiece& url);
virtual ~AjaxRewriteContext();
InPlaceRewriteContext(RewriteDriver* driver, const StringPiece& url);
virtual ~InPlaceRewriteContext();
// Implements SingleRewriteContext::RewriteSingle().
virtual void RewriteSingle(const ResourcePtr& input,
const OutputResourcePtr& output);
// Implements RewriteContext::id().
virtual const char* id() const { return RewriteOptions::kAjaxRewriteId; }
virtual const char* id() const { return RewriteOptions::kInPlaceRewriteId; }
// Implements RewriteContext::kind().
virtual OutputResourceKind kind() const { return kRewrittenResource; }
// Implements RewriteContext::DecodeFetchUrls().
@@ -94,8 +96,14 @@ class AjaxRewriteContext : public SingleRewriteContext {
static void InitStats(Statistics* statistics);
bool perform_http_fetch() const { return perform_http_fetch_; }
void set_perform_http_fetch(bool x) { perform_http_fetch_ = x; }
bool proxy_mode() const { return proxy_mode_; }
void set_proxy_mode(bool x) { proxy_mode_ = x; }
virtual int64 GetRewriteDeadlineAlarmMs() const;
virtual GoogleString UserAgentCacheKey(
const ResourceContext* resource_context) const;
virtual void EncodeUserAgentIntoResourceContext(ResourceContext* context);
private:
friend class RecordingFetch;
@@ -115,6 +123,16 @@ class AjaxRewriteContext : public SingleRewriteContext {
// Update the date and expiry time based on the InputInfo's.
void UpdateDateAndExpiry(const protobuf::RepeatedPtrField<InputInfo>& inputs,
int64* date_ms, int64* expiry_ms);
// Returns true if kInPlaceOptimizeForBrowser is enabled and we
// actually need to do browser specific rewriting based on options.
bool InPlaceOptimizeForBrowserEnabled() const;
// Returns true if the "Vary: User-Agent" header should be added for the
// rewritten resource.
bool ShouldAddVaryUserAgent() const;
// No stale rewrites at all in the in place flow, since we will be actually
// serving out the stale value.
virtual bool do_stale_rewrite() const { return false; }
RewriteDriver* driver_;
GoogleString url_;
@@ -128,12 +146,20 @@ class AjaxRewriteContext : public SingleRewriteContext {
ResourcePtr input_resource_;
OutputResourcePtr output_resource_;
scoped_ptr<UrlAsyncFetcher> cache_fetcher_;
scoped_ptr<CacheUrlAsyncFetcher> cache_fetcher_;
// Should we fetch the contents if cache lookup fails?
bool perform_http_fetch_;
// Are we in proxy mode?
//
// True means that we are acting as a proxy and the user is depending on us
// to serve them the resource, thus we will fetch the contents over HTTP if
// not found in cache and ignore kRecentFetchNotCacheable and
// kRecentFetchFailed since we'll have to fetch the resource for users anyway.
//
// False means we are running on the origin, so we respect kRecent* messages
// and let the origin itself serve the resource.
bool proxy_mode_;
DISALLOW_COPY_AND_ASSIGN(AjaxRewriteContext);
DISALLOW_COPY_AND_ASSIGN(InPlaceRewriteContext);
};
// Records the fetch into the provided resource and passes through events to the
@@ -142,7 +168,7 @@ class RecordingFetch : public SharedAsyncFetch {
public:
RecordingFetch(AsyncFetch* async_fetch,
const ResourcePtr& resource,
AjaxRewriteContext* context,
InPlaceRewriteContext* context,
MessageHandler* handler);
virtual ~RecordingFetch();
@@ -159,7 +185,7 @@ class RecordingFetch : public SharedAsyncFetch {
private:
void FreeDriver();
bool CanAjaxRewrite();
bool CanInPlaceRewrite();
// By default RecordingFetch streams back the original content to the browser.
// If this returns false then the RecordingFetch should cache the original
@@ -168,8 +194,8 @@ class RecordingFetch : public SharedAsyncFetch {
MessageHandler* handler_;
ResourcePtr resource_;
AjaxRewriteContext* context_;
bool can_ajax_rewrite_;
InPlaceRewriteContext* context_;
bool can_in_place_rewrite_;
bool streaming_;
HTTPValue cache_value_;
HTTPValueWriter cache_value_writer_;
@@ -180,4 +206,4 @@ class RecordingFetch : public SharedAsyncFetch {
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_AJAX_REWRITE_CONTEXT_H_
#endif // NET_INSTAWEB_REWRITER_PUBLIC_IN_PLACE_REWRITE_CONTEXT_H_
@@ -67,7 +67,6 @@ class JavascriptFilter : public RewriteFilter {
virtual void StartElementImpl(HtmlElement* element);
virtual void Characters(HtmlCharactersNode* characters);
virtual void EndElementImpl(HtmlElement* element);
virtual void Flush();
virtual void IEDirective(HtmlIEDirectiveNode* directive);
virtual const char* Name() const { return "Javascript"; }
@@ -81,9 +80,15 @@ class JavascriptFilter : public RewriteFilter {
private:
class Context;
inline void CompleteScriptInProgress();
inline void RewriteInlineScript();
inline void RewriteExternalScript();
typedef enum {
kNoScript,
kExternalScript,
kInlineScript
} ScriptType;
inline void RewriteInlineScript(HtmlCharactersNode* body_node);
inline void RewriteExternalScript(
HtmlElement* script_in_progress, HtmlElement::Attribute* script_src);
// Lazily initialize config_ if it wasn't already.
void InitializeConfigIfNecessary() {
if (config_.get() != NULL) {
@@ -93,9 +98,7 @@ class JavascriptFilter : public RewriteFilter {
}
void InitializeConfig();
HtmlCharactersNode* body_node_;
HtmlElement* script_in_progress_;
HtmlElement::Attribute* script_src_;
ScriptType script_type_;
// some_missing_scripts indicates that we stopped processing a script and
// therefore can't assume we know all of the Javascript on a page.
bool some_missing_scripts_;
@@ -83,6 +83,7 @@ class JsCombineFilter : public RewriteFilter {
virtual void EndElementImpl(HtmlElement* element);
virtual void Characters(HtmlCharactersNode* characters);
virtual void Flush();
virtual void DetermineEnabled();
virtual void IEDirective(HtmlIEDirectiveNode* directive);
virtual const char* Name() const { return "JsCombine"; }
virtual RewriteContext* MakeRewriteContext();
@@ -27,7 +27,7 @@ namespace net_instaweb {
class HtmlElement;
class RewriteDriver;
class RewriteOptions;
class StaticJavascriptManager;
class StaticAssetManager;
class Statistics;
// Filter to lazyload images by replacing the src with a pagespeed_lazy_src
@@ -74,7 +74,6 @@ class Statistics;
class LazyloadImagesFilter : public CommonFilter {
public:
static const char* kImageLazyloadCode;
static const char* kBlankImageSrc;
static const char* kImageOnloadCode;
static const char* kLoadAllImages;
static const char* kOverrideAttributeFunctions;
@@ -92,7 +91,7 @@ class LazyloadImagesFilter : public CommonFilter {
static bool ShouldApply(RewriteDriver* driver);
static GoogleString GetLazyloadJsSnippet(
const RewriteOptions* options,
StaticJavascriptManager* static_js_manager);
StaticAssetManager* static_asset_manager);
private:
virtual void StartDocumentImpl();
@@ -104,7 +103,9 @@ class LazyloadImagesFilter : public CommonFilter {
// Clears all state associated with the filter.
void Clear();
static GoogleString GetBlankImageSrc(const RewriteOptions* options);
static GoogleString GetBlankImageSrc(
const RewriteOptions* options,
const StaticAssetManager* static_asset_manager);
// Inserts the lazyload JS code before the given element.
void InsertLazyloadJsCode(HtmlElement* element);
@@ -32,6 +32,8 @@ namespace net_instaweb {
class CachedResult;
class HtmlElement;
class RewriteDriver;
class Statistics;
class Variable;
/*
* The Local Storage Cache rewriter reduces HTTP requests by inlining resources
@@ -48,6 +50,14 @@ class LocalStorageCacheFilter : public RewriteFilter {
static const char kLscCookieName[];
static const char kLscInitializer[]; // public for the test harness only.
// Statistics' names.
static const char kCandidatesFound[];
static const char kStoredTotal[];
static const char kStoredImages[];
static const char kStoredCss[];
static const char kCandidatesAdded[];
static const char kCandidatesRemoved[];
// State information for an inline filter using LSC.
class InlineState {
public:
@@ -64,6 +74,9 @@ class LocalStorageCacheFilter : public RewriteFilter {
explicit LocalStorageCacheFilter(RewriteDriver* rewrite_driver);
virtual ~LocalStorageCacheFilter();
// May be called multiple times, if there are multiple statistics objects.
static void InitStats(Statistics* statistics);
virtual void StartDocumentImpl();
virtual void EndDocument();
virtual void StartElementImpl(HtmlElement* element);
@@ -116,7 +129,8 @@ class LocalStorageCacheFilter : public RewriteFilter {
HtmlElement* element);
// Remove the LSC attributes from the given element.
static void RemoveLscAttributes(HtmlElement* element);
static void RemoveLscAttributes(HtmlElement* element,
RewriteDriver* driver);
private:
void InsertOurScriptElement(HtmlElement* before);
@@ -134,6 +148,19 @@ class LocalStorageCacheFilter : public RewriteFilter {
// into the rewrite driver's cookies() - that must not change underneath us.
std::set<StringPiece> cookie_hashes_;
// # of times an img/link was found with a pagespeed_lsc_url attribute.
Variable* num_local_storage_cache_candidates_found_;
// # of times the hash of an img/link was found in the hash cookie.
Variable* num_local_storage_cache_stored_total_;
// # of times an img's hash was found in the hash cookie.
Variable* num_local_storage_cache_stored_images_;
// # of times a link's hash was found in the hash cookie.
Variable* num_local_storage_cache_stored_css_;
// # of times we added the hash and expiry attributes to a candidate img/link.
Variable* num_local_storage_cache_candidates_added_;
// # of times we removed the lsc attributes from a candidate img/link.
Variable* num_local_storage_cache_candidates_removed_;
DISALLOW_COPY_AND_ASSIGN(LocalStorageCacheFilter);
};
@@ -62,10 +62,15 @@ class OutputResource : public Resource {
const RewriteOptions* options,
OutputResourceKind kind);
virtual bool Load(MessageHandler* message_handler);
virtual void LoadAndCallback(NotCacheablePolicy not_cacheable_policy,
AsyncCallback* callback,
MessageHandler* handler);
// NOTE: url() will crash if resource has does not have a hash set yet.
// Specifically, this will occur if the resource has not been completely
// written yet. Before that point, the final URL cannot be known.
//
// Note: the OutputResource will never have a query string, even when
// ModPagespeedAddOptionsToUrls is on.
virtual GoogleString url() const;
// Returns the same as url(), but with a spoofed hash in case no hash
// was set yet. Use this for error reporting, etc. where you do not
@@ -131,6 +136,7 @@ class OutputResource : public Resource {
const GoogleString& unmapped_base() const { return unmapped_base_; }
const GoogleString& original_base() const { return original_base_; }
const ResourceNamer& full_name() const { return full_name_; }
ResourceNamer* mutable_full_name() { return &full_name_; }
StringPiece name() const { return full_name_.name(); }
StringPiece experiment() const { return full_name_.experiment(); }
StringPiece suffix() const;
@@ -156,7 +162,7 @@ class OutputResource : public Resource {
// to refactor this to check to see whether the desired resource is
// already known. For now we'll assume we can commit to serving the
// resource during the HTML rewriter.
bool IsWritten() const;
bool IsWritten() const { return writing_complete_; }
// Sets the type of the output resource, and thus also its suffix.
virtual void SetType(const ContentType* type);
@@ -207,7 +213,7 @@ class OutputResource : public Resource {
bool has_lock() const;
// This is called by CacheCallback::Done in rewrite_driver.cc.
void set_written(bool written) { writing_complete_ = true; }
void SetWritten(bool written) { writing_complete_ = true; }
virtual const RewriteOptions* rewrite_options() const {
return rewrite_options_;
@@ -219,6 +225,8 @@ class OutputResource : public Resource {
Writer* BeginWrite(MessageHandler* message_handler);
void EndWrite(MessageHandler* message_handler);
virtual bool UseHttpCache() const { return true; }
protected:
virtual ~OutputResource();
REFCOUNT_FRIEND_DECLARATION(OutputResource);
@@ -142,8 +142,6 @@ class Resource : public RefCounted<Resource> {
StringPiece charset() const { return charset_; }
void set_charset(StringPiece c) { c.CopyToString(&charset_); }
virtual bool IsCacheableTypeOfResource() const { return true; }
// Gets the absolute URL of the resource
virtual GoogleString url() const = 0;
@@ -152,7 +150,7 @@ class Resource : public RefCounted<Resource> {
void DetermineContentType();
// Obtain rewrite options for this. Any resources which return true
// for IsCacheable() but don't unconditionally return true for loaded()
// for UseHttpCache() but don't unconditionally return true for loaded()
// must override this in a useful way. Used in cache invalidation.
virtual const RewriteOptions* rewrite_options() const = 0;
@@ -225,6 +223,11 @@ class Resource : public RefCounted<Resource> {
fetch_response_status_ = x;
}
// Returns whether this type of resource should use the HTTP Cache. This
// method is based on properties of the class, not the resource itself, and
// helps short-circuit pointless cache lookups for file-based and data URLs.
virtual bool UseHttpCache() const = 0;
protected:
virtual ~Resource();
REFCOUNT_FRIEND_DECLARATION(Resource);
@@ -234,17 +237,17 @@ class Resource : public RefCounted<Resource> {
friend class ResourceManagerHttpCallback;
// Load the resource asynchronously, storing ResponseHeaders and
// contents in cache. Returns true, if the resource is already
// loaded or loaded synchronously. Never reports uncacheable resources.
virtual bool Load(MessageHandler* message_handler) = 0;
// Same as Load, but calls a callback when finished. The ResourcePtr
// used to construct 'callback' must be the same as the resource used
// to invoke this method. If the resource is uncacheable, will only
// return true if not_cacheable_policy == kLoadEvenIfNotCacheable.
// contents in object. Calls 'callback' when finished. The
// ResourcePtr used to construct 'callback' must be the same as the
// resource used to invoke this method.
//
// Setting not_cacheable_policy to kLoadEvenIfNotCacheable will permit it
// to consider loading to be successful on Cache-Control:private and
// Cache-Control:no-cache resources. It should not affect /whether/ the
// callback gets involved, only whether it gets true or false.
virtual void LoadAndCallback(NotCacheablePolicy not_cacheable_policy,
AsyncCallback* callback,
MessageHandler* message_handler);
MessageHandler* message_handler) = 0;
ServerContext* server_context_;
@@ -20,8 +20,8 @@
// changed extensively. Contact us at mod-pagespeed-discuss@googlegroups.com
// if you are interested in using it.
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_RESOURCE_FETCH_H_
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_RESOURCE_FETCH_H_
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_FETCH_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_FETCH_H_
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/request_context.h"
@@ -33,7 +33,6 @@ namespace net_instaweb {
class MessageHandler;
class ServerContext;
class RewriteDriver;
class RewriteDriverPool;
class RewriteOptions;
class SyncFetcherAdapterCallback;
class Timer;
@@ -81,8 +80,6 @@ class ResourceFetch : public SharedAsyncFetch {
// Exactly one of custom_options and driver_pool must be non-NULL.
static RewriteDriver* GetDriver(const GoogleUrl& url,
RewriteOptions* custom_options,
RewriteDriverPool* driver_pool,
bool using_spdy,
ServerContext* server_context,
const RequestContextPtr& request_ctx);
@@ -116,9 +113,9 @@ class ResourceFetch : public SharedAsyncFetch {
// If we're running an experiment and the url specifies an experiment spec,
// set custom_options to use that experiment spec. If custom_options is NULL
// one will be allocated and the caller takes ownership of it.
static void ApplyFuriousOptions(const ServerContext* server_context,
const GoogleUrl& url,
RewriteDriverPool* driver_pool,
static void ApplyFuriousOptions(const GoogleUrl& url,
const RequestContextPtr& request_ctx,
ServerContext* server_context,
RewriteOptions** custom_options);
GoogleUrl resource_url_;
@@ -135,4 +132,4 @@ class ResourceFetch : public SharedAsyncFetch {
} // namespace net_instaweb
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_RESOURCE_FETCH_H_
#endif // NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_FETCH_H_
@@ -46,7 +46,7 @@ class ResourceNamer {
bool Decode(const StringPiece& encoded_string);
// Encodes the fields in this encoder into an absolute url, with the
// trailing portion "NAME.pagespeed[.EXPT].ID.HASH.EXT".
// trailing portion "NAME.pagespeed[.(EXPT|PsolOpts)].ID.HASH.EXT".
GoogleString Encode() const;
// Encode a key that can used to do a lookup based on an id
@@ -65,15 +65,18 @@ class ResourceNamer {
// Simple getters
StringPiece id() const { return id_; }
StringPiece options() const { return options_; }
StringPiece name() const { return name_; }
StringPiece hash() const { return hash_; }
StringPiece ext() const { return ext_; }
StringPiece experiment() const { return experiment_; }
bool has_experiment() const { return !experiment_.empty(); }
bool has_options() const { return !options_.empty(); }
// Simple setters
void set_id(const StringPiece& p) { p.CopyToString(&id_); }
void set_options(const StringPiece& opts) { opts.CopyToString(&options_); }
void set_name(const StringPiece& n) { n.CopyToString(&name_); }
void set_hash(const StringPiece& h) { h.CopyToString(&hash_); }
void set_ext(const StringPiece& e) {
@@ -101,6 +104,7 @@ class ResourceNamer {
bool LegacyDecode(const StringPiece& encoded_string);
GoogleString id_;
GoogleString options_;
GoogleString name_;
GoogleString hash_;
GoogleString ext_;
@@ -23,6 +23,7 @@
#include <vector>
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/server_context.h"
@@ -38,16 +39,14 @@
namespace net_instaweb {
class AsyncFetch;
class CachedResult;
class GoogleUrl;
class InputInfo;
class MessageHandler;
class NamedLock;
class OutputPartitions;
class ResourceContext;
class RequestTrace;
class ResponseHeaders;
class RewriteDriver;
class RewriteOptions;
class Statistics;
class Writer;
// A RewriteContext is all the contextual information required to
@@ -92,6 +91,33 @@ class Writer;
// RewriteDriver.
class RewriteContext {
public:
typedef std::vector<InputInfo*> InputInfoStarVector;
static const char kNumDeadlineAlarmInvocations[];
// Used to pass the result of the metadata cache lookups. Recipient must
// take ownership.
struct CacheLookupResult {
CacheLookupResult()
: cache_ok(false),
can_revalidate(false),
partitions(new OutputPartitions) {}
bool cache_ok;
bool can_revalidate;
InputInfoStarVector revalidate;
scoped_ptr<OutputPartitions> partitions;
};
// Used for LookupMetadataForOutputResource.
class CacheLookupResultCallback {
public:
CacheLookupResultCallback() {}
virtual ~CacheLookupResultCallback();
virtual void Done(const GoogleString& cache_key,
CacheLookupResult* result) = 0;
private:
DISALLOW_COPY_AND_ASSIGN(CacheLookupResultCallback);
};
// Takes ownership of resource_context, which must be NULL or
// allocated with 'new'.
RewriteContext(RewriteDriver* driver, // exactly one of driver & parent
@@ -153,6 +179,25 @@ class RewriteContext {
AsyncFetch* fetch,
MessageHandler* message_handler);
// Attempts to lookup the metadata cache info that would be used for the
// output resource at url with the RewriteOptions set on driver.
//
// If there is a problem with the URL, returns false, and *error_out
// will contain an error message.
//
// If it can determine the metadata cache key successfully, returns true,
// and eventually callback will be invoked with the metadata cache key
// and the decoding results.
//
// Do not use the driver passed to this method for anything else.
//
// Note: this method is meant for debugging use only.
static bool LookupMetadataForOutputResource(
const GoogleString& url,
RewriteDriver* driver,
GoogleString* error_out,
CacheLookupResultCallback* callback);
// Runs after all Rewrites have been completed, and all nested
// RewriteContexts have completed and harvested.
//
@@ -196,7 +241,11 @@ class RewriteContext {
// If called with true, forces a rewrite and re-generates the output.
void set_force_rewrite(bool x) { force_rewrite_ = x; }
const ResourceContext* resource_context() { return resource_context_.get(); }
const ResourceContext* resource_context() const {
return resource_context_.get();
}
bool is_metadata_cache_miss() const { return is_metadata_cache_miss_; }
// Removes this RewriteContext from all slots. This is done normally when
// a RewriteContext is completed and we are ready to run the successors.
@@ -204,10 +253,28 @@ class RewriteContext {
// unhealthy.
void DetachSlots();
// Returns debug information about this RewriteContext.
GoogleString ToString(StringPiece prefix) const;
// Initializes statistics.
static void InitStats(Statistics* stats);
protected:
typedef std::vector<InputInfo*> InputInfoStarVector;
typedef std::vector<GoogleUrl*> GoogleUrlStarVector;
// Creates a new request trace associated with this context with a given
// |label|.
void AttachDependentRequestTrace(const StringPiece& label);
// Provides the dependent request trace associated with this context, if any.
// Note that this is distinct from the root user request trace, available
// in Driver().
RequestTrace* dependent_request_trace() { return dependent_request_trace_; }
// A convenience wrapper to log a trace annotation in both the request
// trace (if present) as well as the root user request trace (if present).
void TracePrintf(const char* fmt, ...);
// The following methods are provided for the benefit of subclasses.
// Finds the ServerContext associated with this context. Note that
@@ -244,8 +311,12 @@ class RewriteContext {
void StartNestedTasks();
// Deconstructs a URL by name and creates an output resource that
// corresponds to it.
// corresponds to it. If force_hash_to_zero is present, we are
// processing a stale rewrite, and we should use "0" as the hash
// so that the client browser sends a request for the freshened
// rewritten resource.
bool CreateOutputResourceForCachedOutput(const CachedResult* cached_result,
bool force_hash_to_zero,
OutputResourcePtr* output_resource);
// If this returns true, running the rewriter isn't required for
@@ -428,9 +499,30 @@ class RewriteContext {
// Indicates whether we are serving a stale rewrite.
bool stale_rewrite() const { return stale_rewrite_; }
// Returns an interval in milliseconds to wait when configuring the deadline
// alarm in FetchContext::SetupDeadlineAlarm(). Subclasses may configure the
// deadline based on rewrite type, e.g., IPRO vs. HTML-path.
virtual int64 GetRewriteDeadlineAlarmMs() const;
// Indicates user agent capabilities that must be stored in the cache key.
//
// Note that the context may be NULL as it may not be set before this. Since
// it isn't going to be modified in the method, ResourceContext is passed
// as a const pointer.
virtual GoogleString UserAgentCacheKey(
const ResourceContext* context) const {
return "";
}
// Encodes User Agent into the ResourceContext.
// A subclass ResourceContext should normally call
// RewriteFilter::EncodeUserAgentIntoResourceContext if it has access to
// a RewriteFilter.
virtual void EncodeUserAgentIntoResourceContext(ResourceContext* context) {}
private:
struct CacheLookupResult;
class OutputCacheCallback;
class LookupMetadataForOutputResourceCallback;
friend class OutputCacheCallback;
class HTTPCacheCallback;
friend class HTTPCacheCallback;
@@ -569,6 +661,13 @@ class RewriteContext {
// high-priority rewrite thread.
void StartNestedTasksImpl();
// Sets up all the state needed for Fetch, but doesn't register this context
// or actually start the rewrite process.
bool PrepareFetch(
const OutputResourcePtr& output_resource,
AsyncFetch* fetch,
MessageHandler* message_handler);
// Callback for metadata lookup on fetch path.
void FetchCacheDone(CacheLookupResult* cache_result);
@@ -583,6 +682,10 @@ class RewriteContext {
// spike or overload (kFallbackDiscretional).
bool CanFetchFallbackToOriginal(FallbackCondition circumstance) const;
// Whether stale rewrites should be done (only if within
// metadata_cache_staleness_threshold_ms). Default is true.
virtual bool do_stale_rewrite() const { return true; }
// To perform a rewrite, we need to have data for all of its input slots.
ResourceSlotVector slots_;
@@ -709,6 +812,14 @@ class RewriteContext {
// is stale.
bool stale_rewrite_;
// Indicates whether we have a metadata miss (or an unsuccessful revalidation
// attempt) on the html path.
bool is_metadata_cache_miss_;
// An optional request trace associated with this context. May be NULL.
// Always owned externally.
RequestTrace* dependent_request_trace_;
DISALLOW_COPY_AND_ASSIGN(RewriteContext);
};
@@ -428,6 +428,9 @@ class RewriteContextTestBase : public RewriteTestBase {
// Use a TTL value other than the implicit value, so we are sure we are using
// the original TTL value.
static const int64 kOriginTtlMs = 12 * Timer::kMinuteMs;
// An TTL value that is lower than the default implicit TTL value (300
// seconds).
static const int64 kLowOriginTtlMs = 5 * Timer::kSecondMs;
// Use a TTL value other than the implicit value, so we are sure we are using
// the original TTL value.
@@ -25,11 +25,11 @@
#include "base/logging.h"
#include "net/instaweb/htmlparse/public/html_element.h"
#include "net/instaweb/htmlparse/public/html_node.h"
#include "net/instaweb/htmlparse/public/html_parse.h"
#include "net/instaweb/http/public/content_type.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/http/public/user_agent_matcher.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/server_context.h"
@@ -54,20 +54,20 @@ class AbstractMutex;
class AddInstrumentationFilter;
class AsyncFetch;
class CacheUrlAsyncFetcher;
class CollectSubresourcesFilter;
class CommonFilter;
class CriticalLineInfo;
class DebugFilter;
class DeviceProperties;
class DomainRewriteFilter;
class FileSystem;
class FlushEarlyInfo;
class FlushEarlyRenderInfo;
class Function;
class HtmlEvent;
class HtmlFilter;
class HtmlWriterFilter;
class LogRecord;
class MessageHandler;
class OutputResource;
class PropertyPage;
class RequestHeaders;
class RequestTrace;
@@ -81,7 +81,6 @@ class ScopedMutex;
class Statistics;
class UrlAsyncFetcher;
class UrlLeftTrimFilter;
class UserAgentMatcher;
class Writer;
// This extends class HtmlParse (which should renamed HtmlContext) by providing
@@ -105,13 +104,6 @@ class RewriteDriver : public HtmlParse {
// being done in background, finishes.
};
// Lazily-initialized boolean value
enum LazyBool {
kNotSet = -1,
kFalse = 0,
kTrue = 1
};
// Indicates document's mimetype as XHTML, HTML, or is not
// known/something else. Note that in Apache we might not know the
// correct mimetype because a downstream module might change it.
@@ -142,20 +134,14 @@ class RewriteDriver : public HtmlParse {
// on every HTML request.
static const char kDomCohort[];
// The name of the property in the DomCohort that tracks the timestamp when
// we last received a request for this url.
// Property Names in DomCohort.
// Tracks the timestamp when we last received a request for this url.
static const char kLastRequestTimestamp[];
// The name of the property in the DomCohort that tracks whether we exceeded
// the maximum size limit of html which we should parse.
// Tracks if we exceeded the maximum size limit of html which we should parse.
static const char kParseSizeLimitExceeded[];
// This proprty is used to store information regarding the subresources
// associted with the HTML page.
// Flush Subresources Info associted with the HTML page.
static const char kSubresourcesPropertyName[];
// Key for storage of information regarding the status codes of previous
// responses.
// Status codes of previous responses.
static const char kStatusCodePropertyName[];
RewriteDriver(MessageHandler* message_handler,
@@ -196,24 +182,19 @@ class RewriteDriver : public HtmlParse {
bool MayCacheExtendScripts() const;
void RememberResource(const StringPiece& url, const ResourcePtr& resource);
const GoogleString& user_agent() const {
return user_agent_;
}
void set_user_agent(const StringPiece& user_agent_string) {
user_agent_string.CopyToString(&user_agent_);
user_agent_is_bot_ = kNotSet;
user_agent_supports_image_inlining_ = kNotSet;
user_agent_supports_js_defer_ = kNotSet;
user_agent_supports_webp_ = kNotSet;
user_agent_supports_webp_lossless_alpha_ = kNotSet;
is_mobile_user_agent_ = kNotSet;
user_agent_supports_split_html_ = kNotSet;
is_screen_resolution_set_ = kNotSet;
const GoogleString& user_agent() const { return user_agent_; }
void SetUserAgent(const StringPiece& user_agent_string);
const DeviceProperties* device_properties() const {
return device_properties_.get();
}
// Reinitializes device_properties_, clearing any cached values.
void ClearDeviceProperties();
// Returns true if the request we're rewriting was made using SPDY.
bool using_spdy() const { return using_spdy_; }
void set_using_spdy(bool x) { using_spdy_ = x; }
bool using_spdy() const { return request_context_->using_spdy(); }
bool write_property_cache_dom_cohort() const {
return write_property_cache_dom_cohort_;
@@ -261,24 +242,15 @@ class RewriteDriver : public HtmlParse {
}
const RequestHeaders* request_headers() const {
DCHECK(request_headers_ != NULL);
return request_headers_;
}
const UserAgentMatcher& user_agent_matcher() const {
UserAgentMatcher* user_agent_matcher() const {
DCHECK(server_context() != NULL);
return server_context()->user_agent_matcher();
}
bool UserAgentSupportsImageInlining() const;
bool UserAgentSupportsJsDefer() const;
bool UserAgentSupportsWebp() const;
bool UserAgentSupportsWebpLosslessAlpha() const;
bool IsMobileUserAgent() const;
bool GetScreenResolution(int* width, int* height);
void SetScreenResolution(int width, int height);
// Whether flush early flow is supported for this request.
bool SupportsFlushEarly() const;
bool UserAgentSupportsSplitHtml() const;
// Adds the filters from the options, specified by name in enabled_filters.
// This must be called explicitly after object construction to provide an
@@ -357,15 +329,18 @@ class RewriteDriver : public HtmlParse {
// Initiates an In-Place Resource Optimization (IPRO) fetch (A resource which
// is served under the original URL, but is still able to be rewritten).
//
// perform_http_fetch indicates whether or not an HTTP fetch should be done
// to get the resource if a cache lookup fails. Proxy implementations will
// want to set this to true because there is no other way to get the content.
// However, origin implementations will want to set this to false so that
// they can fall back to locally serving the contents.
// proxy_mode indicates whether we are running as a proxy where users
// depend on us to send contents. When set true, we will perform HTTP fetches
// to get contents if not in cache and will ignore kRecentFetchNotCacheable
// and kRecentFetchFailed since we'll have to fetch the resource for users
// anyway. Origin implementations (like mod_pagespeed) should set this to
// false and let the serve serve the resource if it's not in cache.
//
// async_fetch->Done(false) will be called if perform_http_fetch is false
// and the resource could not be found in HTTP cache.
void FetchInPlaceResource(const GoogleUrl& gurl, bool perform_http_fetch,
// If proxy_mode is false and the resource could not be found in HTTP cache,
// async_fetch->Done(false) will be called and async_fetch->status_code()
// will be CacheUrlAsyncFetcher::kNotInCacheStatus (to distinguish this
// from a different reason for failure, like kRecentFetchNotCacheable).
void FetchInPlaceResource(const GoogleUrl& gurl, bool proxy_mode,
AsyncFetch* async_fetch);
// See FetchResource. There are two differences:
@@ -455,28 +430,6 @@ class RewriteDriver : public HtmlParse {
// deleted at the point the callback is invoked.
void FinishParseAsync(Function* callback);
// Prevent the EndElementEvent for element from flushing. If it has already
// flushed, this has no effect. Should only be called from an event listener.
// Useful for giving an active filter time to complete an RPC that provides
// data to append to element.
void InhibitEndElement(const HtmlElement* element);
// Permits the EndElementEvent for element to flush. If it was not previously
// prevented from doing so by InhibitEndElement, this has no effect. Should
// only be called from an active filter, in coordination with an event
// listener that called InhibitEndElement. If we are currently flushing,
// another flush will be scheduled as soon as this one finishes. If we are
// not, another flush will be scheduled immediately.
void UninhibitEndElement(const HtmlElement* element);
// Returns true if the EndElementEvent for element is inhibited from flushing.
bool EndElementIsInhibited(const HtmlElement* element);
// Will return true if the EndElementEvent of element is inhibited from
// flushing, and that event determined the size of the current flush. Will
// return false if a flush is not currently in progress.
bool EndElementIsStoppingFlush(const HtmlElement* element);
// Report error message with description of context's location
// (such as filenames and line numbers). context may be NULL, in which case
// the current parse position will be used.
@@ -522,6 +475,13 @@ class RewriteDriver : public HtmlParse {
const StringPiece& base_url, const StringPiece& filter_id,
const StringPiece& name, OutputResourceKind kind);
// Fills in the resource namer based on the give filter_id, name and options
// stored in the driver.
void PopulateResourceNamer(
const StringPiece& filter_id,
const StringPiece& name,
ResourceNamer* full_name);
// Version of CreateOutputResourceWithPath which first takes only the
// unmapped path and finds the mapped path using the DomainLawyer
// and the base_url is this driver's base_url.
@@ -651,6 +611,11 @@ class RewriteDriver : public HtmlParse {
// are no longer interested in its results.
void Cleanup();
// Debugging routines to print out data about the driver.
GoogleString ToString(bool show_detached_contexts);
void PrintState(bool show_detached_contexts); // For debugging.
void PrintStateToErrorLog(bool show_detached_contexts); // For logs.
// Wait for outstanding Rewrite to complete. Once the rewrites are
// complete they can be rendered.
void WaitForCompletion();
@@ -729,6 +694,10 @@ class RewriteDriver : public HtmlParse {
}
int max_page_processing_delay_ms() { return max_page_processing_delay_ms_; }
// Sets the device type chosen for the current property_page.
void set_device_type(UserAgentMatcher::DeviceType x) { device_type_ = x; }
UserAgentMatcher::DeviceType device_type() { return device_type_; }
// Tries to register the given rewrite context as working on
// its partition key. If this context is the first one to try to handle it,
// returns NULL. Otherwise returns the previous such context.
@@ -778,10 +747,8 @@ class RewriteDriver : public HtmlParse {
virtual void Flush();
// Initiates an asynchronous Flush. done->Run() will be called when
// the flush is complete. The inhibits_mutex_ will be held while the callback
// is running, so the callback should not attempt to inhibit or uninhibit
// an element. Further calls to ParseText should be deferred until the
// callback is called.
// the flush is complete. Further calls to ParseText should be deferred until
// the callback is called.
void FlushAsync(Function* done);
// Queues up a task to run on the (high-priority) rewrite thread.
@@ -920,6 +887,12 @@ class RewriteDriver : public HtmlParse {
// that browsers should parse it as XHTML.
XhtmlStatus MimeTypeXhtmlStatus();
void set_flushed_cached_html(bool x) { flushed_cached_html_ = x; }
bool flushed_cached_html() { return flushed_cached_html_; }
void set_flushing_cached_html(bool x) { flushing_cached_html_ = x; }
bool flushing_cached_html() { return flushing_cached_html_; }
void set_flushed_early(bool x) { flushed_early_ = x; }
bool flushed_early() { return flushed_early_; }
@@ -931,13 +904,6 @@ class RewriteDriver : public HtmlParse {
}
bool is_lazyload_script_flushed() { return is_lazyload_script_flushed_; }
void set_is_defer_javascript_script_flushed(bool x) {
is_defer_javascript_script_flushed_ = x;
}
bool is_defer_javascript_script_flushed() {
return is_defer_javascript_script_flushed_;
}
// This method is not thread-safe. Call it only from the html parser thread.
FlushEarlyInfo* flush_early_info();
@@ -972,19 +938,28 @@ class RewriteDriver : public HtmlParse {
// Sets the is_nested property on the driver.
void set_is_nested(bool n) { is_nested_ = n; }
bool is_nested() const { return is_nested_; }
// Sets must compute finder properties to true. Note that this value is
// sticky. Once it is set to true for a given request, it remains true till
// the driver is reset.
void enable_must_compute_finder_properties() {
must_compute_finder_properties_ = true;
}
bool must_compute_finder_properties() {
return must_compute_finder_properties_;
}
// Writes the specified contents into the output resource, and marks it
// as optimized. 'inputs' described the input resources that were used
// to construct the output, and is used to determine whether the
// result can be safely cache extended and be marked publicly cacheable.
// 'content_type' and 'charset' specify the mimetype and encoding of
// the contents, and will help form the Content-Type header.
// 'charset' may be empty when not specified.
//
// Note that this does not escape charset.
//
// Callers should take care that dangerous types like 'text/html' do not
// sneak into content_type.
bool Write(const ResourceVector& inputs,
const StringPiece& contents,
const ContentType* type,
StringPiece charset,
OutputResource* output);
private:
friend class RewriteContext;
friend class RewriteDriverTest;
friend class RewriteTestBase;
friend class ServerContextTest;
@@ -1088,12 +1063,6 @@ class RewriteDriver : public HtmlParse {
void AddPreRenderFilters();
void AddPostRenderFilters();
// After removing an inhibition, finish the parse if necessary.
void UninhibitFlushDone(Function* user_callback);
// Move anything on queue_ after the first inhibited event to deferred_queue_.
void SplitQueueIfNecessary();
// Helper function to decode the pagespeed url.
bool DecodeOutputResourceNameHelper(const GoogleUrl& url,
ResourceNamer* name_out,
@@ -1189,6 +1158,12 @@ class RewriteDriver : public HtmlParse {
bool flush_requested_;
bool flush_occurred_;
// If it is true, then cached html is flushed.
bool flushed_cached_html_;
// If it is true, then we are using this RewriteDriver to flush cached html.
bool flushing_cached_html_;
// If it is true, then the bytes were flushed before receiving bytes from the
// origin server.
bool flushed_early_;
@@ -1200,9 +1175,6 @@ class RewriteDriver : public HtmlParse {
// If it is set to true, then lazyload script is flushed with flush early
// flow.
bool is_lazyload_script_flushed_;
// If it is set to true, then defer_javascript script is flushed with flush
// early flow.
bool is_defer_javascript_script_flushed_;
// Set to true if RewriteDriver can be released.
bool release_driver_;
@@ -1212,15 +1184,6 @@ class RewriteDriver : public HtmlParse {
// this.
bool write_property_cache_dom_cohort_;
scoped_ptr<AbstractMutex> inhibits_mutex_;
typedef std::set <const HtmlElement*> ConstHtmlElementSet;
ConstHtmlElementSet end_elements_inhibited_; // protected by inhibits_mutex_
HtmlEventList deferred_queue_; // protected by inhibits_mutex_
Function* finish_parse_on_hold_; // protected by inhibits_mutex_
HtmlEvent* inhibiting_event_; // protected by inhibits_mutex_
bool flush_in_progress_; // protected by inhibits_mutex_
bool uninhibit_reflush_requested_; // protected by inhibits_mutex_
// Tracks the number of RewriteContexts that have been completed,
// but not yet deleted. Once RewriteComplete has been called,
// rewrite_context->Propagate() is called to render slots (if not
@@ -1240,24 +1203,9 @@ class RewriteDriver : public HtmlParse {
GoogleUrl decoded_base_url_;
GoogleString user_agent_;
// Properties of the user_agent_ that are computed once and cached.
mutable LazyBool user_agent_is_bot_;
mutable LazyBool user_agent_supports_image_inlining_;
mutable LazyBool user_agent_supports_js_defer_;
mutable LazyBool user_agent_supports_webp_;
mutable LazyBool user_agent_supports_webp_lossless_alpha_;
mutable LazyBool is_mobile_user_agent_;
mutable LazyBool supports_flush_early_;
mutable LazyBool user_agent_supports_split_html_;
LazyBool should_skip_parsing_;
LazyBool is_screen_resolution_set_;
int user_agent_screen_resolution_width_;
int user_agent_screen_resolution_height_;
// If true, request is known to have been made using SPDY.
bool using_spdy_;
mutable LazyBool supports_flush_early_;
StringFilterMap resource_filter_map_;
@@ -1374,6 +1322,9 @@ class RewriteDriver : public HtmlParse {
// Boolean value which tells whether property page is owned by driver or not.
bool owns_property_page_;
// Device type for the current property page.
UserAgentMatcher::DeviceType device_type_;
scoped_ptr<CriticalLineInfo> critical_line_info_;
// Stores all the critical images for the current URL.
@@ -1399,7 +1350,6 @@ class RewriteDriver : public HtmlParse {
// The total number of bytes for which ParseText is called.
int num_bytes_in_;
CollectSubresourcesFilter* collect_subresources_filter_;
DebugFilter* debug_filter_;
scoped_ptr<FlushEarlyInfo> flush_early_info_;
@@ -1413,9 +1363,6 @@ class RewriteDriver : public HtmlParse {
bool is_blink_request_;
bool can_rewrite_resources_;
// Indicates whether we must properties of any of the finders.
bool must_compute_finder_properties_;
// Additional request context that may outlive this RewriteDriver. (Thus,
// the context is reference counted.)
RequestContextPtr request_context_;
@@ -1425,9 +1372,17 @@ class RewriteDriver : public HtmlParse {
// True if this driver has been cloned from another to execute subordinate
// rewrites. Some logging operations aren't executed on nested rewrite
// drivers. Note that this is totally distinct from nested rewrite contexts.
// drivers, and timeout policies are changed. Note that this is totally
// distinct from nested rewrite contexts.
bool is_nested_;
scoped_ptr<DeviceProperties> device_properties_;
// Helps make sure RewriteDriver and its children are initialized exactly
// once, allowing for multiple calls to RewriteDriver::Initialize as long
// as they are matched to RewriteDriver::Terminate.
static int initialized_count_;
DISALLOW_COPY_AND_ASSIGN(RewriteDriver);
};
@@ -34,6 +34,8 @@ namespace net_instaweb {
class AbstractClientState;
class AbstractMutex;
class BlinkCriticalLineDataFinder;
class CacheHtmlInfoFinder;
class CriticalCssFinder;
class CriticalImagesFinder;
class FileSystem;
class FilenameEncoder;
@@ -49,7 +51,7 @@ class RewriteDriver;
class RewriteOptions;
class RewriteStats;
class Scheduler;
class StaticJavascriptManager;
class StaticAssetManager;
class Statistics;
class ThreadSystem;
class Timer;
@@ -68,7 +70,7 @@ class RewriteDriverFactory {
// Helper for users of defer_cleanup; see below.
template<class T> class Deleter;
enum WorkerPoolName {
enum WorkerPoolCategory {
kHtmlWorkers,
kRewriteWorkers,
kLowPriorityRewriteWorkers,
@@ -149,14 +151,14 @@ class RewriteDriverFactory {
FilenameEncoder* filename_encoder() { return filename_encoder_.get(); }
UrlNamer* url_namer();
UserAgentMatcher* user_agent_matcher();
StaticJavascriptManager* static_javascript_manager();
StaticAssetManager* static_asset_manager();
RewriteOptions* default_options() { return default_options_.get(); }
// These accessors are *not* thread-safe. They must be called once prior
// to forking threads, e.g. via ComputeUrlFetcher().
Timer* timer();
NamedLockManager* lock_manager();
QueuedWorkerPool* WorkerPool(WorkerPoolName pool);
QueuedWorkerPool* WorkerPool(WorkerPoolCategory pool);
Scheduler* scheduler();
UsageDataReporter* usage_data_reporter();
@@ -171,6 +173,8 @@ class RewriteDriverFactory {
// ServerContext is owned by the factory, and should not be
// deleted directly. Currently it is not possible to delete a
// server context except by deleting the entire factory.
//
// Implemented in terms of NewServerContext().
ServerContext* CreateServerContext();
// Initializes a ServerContext that has been new'd directly. This
@@ -299,12 +303,21 @@ class RewriteDriverFactory {
virtual Hasher* NewHasher() = 0;
// Creates a new ServerContext* object. ServerContexst itself must be
// overridden per Factory as it has at least one pure virtual method.
virtual ServerContext* NewServerContext() = 0;
virtual CriticalCssFinder* DefaultCriticalCssFinder();
virtual CriticalImagesFinder* DefaultCriticalImagesFinder();
// Default implementation returns NULL.
virtual BlinkCriticalLineDataFinder* DefaultBlinkCriticalLineDataFinder(
PropertyCache* cache);
// Default implementation returns NULL.
virtual CacheHtmlInfoFinder* DefaultCacheHtmlInfoFinder(
PropertyCache* cache);
// Default implementation returns NULL.
virtual FlushEarlyInfoFinder* DefaultFlushEarlyInfoFinder();
@@ -322,7 +335,8 @@ class RewriteDriverFactory {
// Subclasses can override this to create an appropriately-sized thread
// pool for their environment. The default implementation will always
// make one with a single thread.
virtual QueuedWorkerPool* CreateWorkerPool(WorkerPoolName name);
virtual QueuedWorkerPool* CreateWorkerPool(WorkerPoolCategory pool,
StringPiece name);
// Subclasses can override this method to request load-shedding to happen
// if the low-priority work pool has too many inactive sequences queued up
@@ -344,15 +358,15 @@ class RewriteDriverFactory {
// filename_prefix()
virtual StringPiece LockFilePrefix();
// Initializes the StaticJavascriptManager.
virtual void InitStaticJavascriptManager(
StaticJavascriptManager* static_js_manager) {}
// Initializes the StaticAssetManager.
virtual void InitStaticAssetManager(
StaticAssetManager* static_asset_manager) {}
private:
// Creates a StaticJavascriptManager instance. Default implementation creates
// an instance that disables serving of filter javascript via gstatic
// Creates a StaticAssetManager instance. Default implementation creates an
// instance that disables serving of filter javascript via gstatic
// (gstatic.com is the domain google uses for serving static content).
StaticJavascriptManager* DefaultStaticJavascriptManager();
StaticAssetManager* DefaultStaticAssetManager();
void SetupSlurpDirectories();
void Init(); // helper-method for constructors.
@@ -368,7 +382,7 @@ class RewriteDriverFactory {
scoped_ptr<FilenameEncoder> filename_encoder_;
scoped_ptr<UrlNamer> url_namer_;
scoped_ptr<UserAgentMatcher> user_agent_matcher_;
scoped_ptr<StaticJavascriptManager> static_javascript_manager_;
scoped_ptr<StaticAssetManager> static_asset_manager_;
scoped_ptr<Timer> timer_;
scoped_ptr<Scheduler> scheduler_;
scoped_ptr<UsageDataReporter> usage_data_reporter_;
@@ -20,15 +20,16 @@
#define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_FILTER_H_
#include "net/instaweb/rewriter/public/common_filter.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class OutputResource;
class Resource;
class ResourceContext;
class RewriteContext;
class RewriteDriver;
class UrlSegmentEncoder;
@@ -48,11 +49,6 @@ class RewriteFilter : public CommonFilter {
// UsePropertyCacheDomCohort to return true.
virtual void DetermineEnabled();
// Create an input resource by decoding output_resource using the
// filter's. Assures legality by explicitly permission-checking the result.
ResourcePtr CreateInputResourceFromOutputResource(
OutputResource* output_resource);
// All RewriteFilters define how they encode URLs and other
// associated information needed for a rewrite into a URL.
// The default implementation handles a single URL with
@@ -78,6 +74,12 @@ class RewriteFilter : public CommonFilter {
virtual RewriteContext* MakeNestedRewriteContext(
RewriteContext* parent, const ResourceSlotPtr& slot);
// Encodes user agent information needed by the filter into ResourceContext.
// See additional header document for
// RewriteContext::EncodeUserAgentIntoResourceContext.
virtual void EncodeUserAgentIntoResourceContext(
ResourceContext* context) const {}
// Determine the charset of a script. Logic taken from:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/
// scripting-1.html#establish-script-block-source
@@ -111,9 +113,38 @@ class RewriteFilter : public CommonFilter {
const StringPiece attribute_charset,
const StringPiece enclosing_charset);
// Add this filter to the logged list of applied rewriters.
// Determines which filters are related to this RewriteFilter. Note,
// for example, that the ImageRewriteFilter class implements lots of
// different RewriteOptions::Filters.
//
// This is used for embedding the relevant enabled filter IDs. See
// the doc for RewriteOptions::add_options_to_urls_. We want to support
// that without bloating URLs excessively adding unrelated filter settings.
//
// The vector is returned in numerically increasing order so binary_search
// is possible.
//
// *num_filters is set to the size of this array.
//
// Ownership of the filter-vector is not transferred to the caller; it
// is expected to return a pointer to a static vector.
virtual const RewriteOptions::Filter* RelatedFilters(int* num_filters) const;
// Determines which options are related to this RewriteFilter.
//
// The vector is returned in numerically increasing order so binary_search
// is possible.
//
// *num_options is set to the size of this array.
//
// Ownership of the filter-vector is not transferred to the caller; it
// is expected to return a pointer to a static vector.
virtual const RewriteOptions::OptionEnum* RelatedOptions(
int* num_options) const;
protected:
// This class logs using id().
virtual void LogFilterModifiedContent();
virtual const char* LoggingId() { return id(); }
private:
// Filters should override this and return true if they write to the property
File diff suppressed because it is too large Load Diff
@@ -26,8 +26,11 @@ class MessageHandler;
class QueryParams;
class RequestHeaders;
class ResponseHeaders;
class RewriteDriver;
class RewriteDriverFactory;
class RewriteFilter;
class RewriteOptions;
class ServerContext;
class RewriteQuery {
public:
@@ -61,10 +64,18 @@ class RewriteQuery {
// If NULL is passed for request_headers or response_headers those particular
// headers will be skipped in the scan.
//
// 'allow_related_options' applies only to .pagespeed. resources.
// It enables the parsing of filters & options by ID, that have been
// declared in the RelatedOptions() and RelatedFilters() methods of
// the filter identified in the .pagespeed. URL. See GenerateResourceOption
// for how they get into URLs in the first place.
//
// TODO(jmarantz): consider allowing an alternative prefix to "ModPagespeed"
// to accomodate other Page Speed Automatic applications that might want to
// brand differently.
static Status Scan(RewriteDriverFactory* factory,
static Status Scan(bool allow_related_options,
RewriteDriverFactory* factory,
ServerContext* server_context,
GoogleUrl* request_url,
RequestHeaders* request_headers,
ResponseHeaders* response_headers,
@@ -84,20 +95,36 @@ class RewriteQuery {
RewriteOptions* options,
MessageHandler* handler);
// Given a two-letter filter ID string, generates a query-param for
// any in the driver's options that are related to the filter, and
// differ from the default. If no settings have been altered the
// empty string is returned.
static GoogleString GenerateResourceOption(StringPiece filter_id,
RewriteDriver* driver);
private:
// Returns true if the params/headers look like they might have some
// options. This is used as a cheap pre-scan before doing the more
// expensive query processing.
static bool MayHaveCustomOptions(const QueryParams& params,
const RequestHeaders* req_headers,
const ResponseHeaders* resp_headers);
// As above, but only for headers.
template <class HeaderT>
static bool HeadersMayHaveCustomOptions(const QueryParams& params,
const HeaderT* headers);
// Examines a name/value pair for options.
static Status ScanNameValue(const StringPiece& name,
const GoogleString& value,
RewriteOptions* options,
MessageHandler* handler);
// Parses a resource option based on the specified filter's related options.
static Status ParseResourceOption(StringPiece value, RewriteOptions* options,
const RewriteFilter* rewrite_filter);
};
} // namespace net_instaweb

Some files were not shown because too many files have changed in this diff Show More