incubator-pagespeed-ngx/net/instaweb/rewriter/public/rewrite_driver.h

/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: jmarantz@google.com (Joshua Marantz)

#ifndef NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_

#include <map>
#include <set>
#include <vector>

#include "base/logging.h"
#include "net/instaweb/http/public/cache_url_async_fetcher.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/http/public/url_async_fetcher.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/critical_images_finder.h"
#include "net/instaweb/rewriter/public/critical_selector_finder.h"
#include "net/instaweb/rewriter/public/downstream_cache_purger.h"
#include "net/instaweb/rewriter/public/inline_attribute_slot.h"
#include "net/instaweb/rewriter/public/inline_resource_slot.h"
#include "net/instaweb/rewriter/public/output_resource.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_namer.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_context.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/scan_filter.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/srcset_slot.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/atomic_bool.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/function.h"
#include "pagespeed/kernel/base/printf_format.h"
#include "pagespeed/kernel/base/proto_util.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/thread_annotations.h"
#include "pagespeed/kernel/base/thread_system.h"
#include "pagespeed/kernel/base/writer.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_filter.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/html/html_parse.h"
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "pagespeed/kernel/http/response_headers.h"
#include "pagespeed/kernel/http/user_agent_matcher.h"
#include "pagespeed/kernel/thread/queued_worker_pool.h"
#include "pagespeed/kernel/thread/scheduler.h"
#include "pagespeed/kernel/thread/sequence.h"
#include "pagespeed/kernel/util/categorized_refcount.h"
#include "pagespeed/kernel/util/url_segment_encoder.h"
#include "pagespeed/opt/http/property_cache.h"

namespace net_instaweb {

class AbstractLogRecord;
class AsyncFetch;
class CommonFilter;
class DebugFilter;
class DependencyTracker;
class DomStatsFilter;
class DomainRewriteFilter;
class FallbackPropertyPage;
class FileSystem;
class FlushEarlyInfo;
class HtmlWriterFilter;
class MessageHandler;
class RequestProperties;
class RequestTrace;
class RewriteDriverPool;
class RewriteFilter;
class Statistics;
class UrlLeftTrimFilter;
class UrlNamer;

// This extends class HtmlParse (which should renamed HtmlContext) by providing
// context for rewriting resources (css, js, images).
class RewriteDriver : public HtmlParse {
 public:
  // Status return-code for ResolveCssUrls.
  enum CssResolutionStatus {
    kWriteFailed,
    kNoResolutionNeeded,
    kSuccess
  };

  // Mode for BoundedWaitForCompletion
  enum WaitMode {
    kNoWait,  // Used internally. Do not pass in.
    kWaitForCompletion,    // wait for everything to complete (up to deadline)
    kWaitForCachedRender,  // wait for at least cached rewrites to complete,
                           // and anything else that finishes within deadline.
    kWaitForShutDown       // Makes sure that all work, including any that's
                           // being done in background, finishes.
  };

  // Indicates document's mimetype as XHTML, HTML, or is not
  // known/something else.  Note that in Apache we might not know the
  // correct mimetype because a downstream module might change it.
  // It's not clear how likely this is, since mod_rewrite and mod_mime
  // run upstream of mod_pagespeed.  However if anyone sets mimetype
  // via "Header Add", it would affect the Browser's view of the
  // document's mimetype (which is what determines the parsing) but
  // mod_pagespeed would not know.
  //
  // Note that we also have doctype().IsXhtml() but that indicates quirks-mode
  // for CSS, and does not control how the parser parses the document.
  enum XhtmlStatus {
    kXhtmlUnknown,
    kIsXhtml,
    kIsNotXhtml
  };

  // See CreateInputResource.
  enum InlineAuthorizationPolicy {
    kInlineUnauthorizedResources,
    kInlineOnlyAuthorizedResources
  };

  // See CreateInputResource.
  enum IntendedFor {
    kIntendedForInlining,
    kIntendedForGeneral
  };

  // This string identifies, for the PropertyCache, a group of properties
  // that are computed from the DOM, and thus can, if desired, be rewritten
  // on every HTML request.
  static const char kDomCohort[];
  // The cohort for properties that are written by the beacon handler.
  static const char kBeaconCohort[];
  // Cohort for dependency information. This is written at different time than
  // kDomCohort, and might not be in use for some requests, depending on
  // settings.
  static const char kDependenciesCohort[];

  // Property Names in DomCohort.
  // Tracks the timestamp when we last received a request for this url.
  static const char kLastRequestTimestamp[];
  // Tracks if we exceeded the maximum size limit of html which we should parse.
  static const char kParseSizeLimitExceeded[];
  // Flush Subresources Info associted with the HTML page.
  static const char kSubresourcesPropertyName[];
  // Status codes of previous responses.
  static const char kStatusCodePropertyName[];

  RewriteDriver(MessageHandler* message_handler,
                FileSystem* file_system,
                UrlAsyncFetcher* url_async_fetcher);

  // Need explicit destructors to allow destruction of scoped_ptr-controlled
  // instances without propagating the include files.
  virtual ~RewriteDriver();

  // Returns a fresh instance using the same options we do, using the same log
  // record. Drivers should only be cloned within the same request.
  //
  // Clones share the same request_context, which contains bits derived from the
  // request headers, so request_headers_ is also cloned (or shared if we make
  // them shareable).
  //
  // You must call SetRequestHeaders before calling Clone.
  RewriteDriver* Clone();

  // Clears the current request cache of resources and base URL.  The
  // filter-chain is left intact so that a new request can be issued.
  // Deletes all RewriteContexts.
  //
  // WaitForCompletion must be called prior to Clear().
  void Clear();

  // Initialize statistics for all filters that need it.
  static void InitStats(Statistics* statistics);

  // Initialize statics.  Initialize/Terminate calls must be paired.
  static void Initialize();
  static void Terminate();

  // Formats a "deadline exceeded" message for a given filter.
  static GoogleString DeadlineExceededMessage(StringPiece filter_name);

  // Sets a server context enabling the rewriting of
  // resources. This will replace any previous server context.
  void SetServerContext(ServerContext* server_context);

  // Returns true if we may cache extend Css, Images, PDFs, or Scripts
  // respectively.
  bool MayCacheExtendCss() const;
  bool MayCacheExtendImages() const;
  bool MayCacheExtendPdfs() const;
  bool MayCacheExtendScripts() const;

  const GoogleString& user_agent() const { return user_agent_; }

  const RequestProperties* request_properties() const {
    return request_properties_.get();
  }

  // Reinitializes request_properties_, clearing any cached values.
  void ClearRequestProperties();

  bool write_property_cache_dom_cohort() const {
    return write_property_cache_dom_cohort_;
  }
  void set_write_property_cache_dom_cohort(bool x) {
    write_property_cache_dom_cohort_ = x;
  }

  // Returns the list of cohorts that should be read in based on
  // our options.
  static PropertyCache::CohortVector GetCohortList(
      const PropertyCache* pcache, const RewriteOptions* options,
      const ServerContext* server_context);

  // Should be called once everything in the property cache has been read,
  // and the pages set on the object.
  void PropertyCacheSetupDone();

  RequestContextPtr request_context() { return request_context_; }
  void set_request_context(const RequestContextPtr& x);

  // Convenience method to return the trace context from the request_context()
  // if both are configured and NULL otherwise.
  RequestTrace* trace_context();

  // Convenience methods to issue a trace annotation if tracing is enabled.
  // If tracing is disabled, these methods are no-ops.
  void TracePrintf(const char* fmt, ...);
  void TraceLiteral(const char* literal);
  void TraceString(const GoogleString& s);

  // Return a mutable pointer to the response headers that filters can update
  // before the first flush.  Returns NULL after Flush has occurred.
  ResponseHeaders* mutable_response_headers() {
    return flush_occurred_ ? NULL : response_headers_;
  }

  // Returns a const version of the ResponseHeaders*, indepdendent of whether
  // Flush has occurred.   Note that ResponseHeaders* may still be NULL if
  // no one has called set_response_headers_ptr.
  //
  // TODO(jmarantz): Change API to require response_headers in StartParse so
  // we can guarantee this is non-null.
  const ResponseHeaders* response_headers() {
    return response_headers_;
  }

  // Set the pointer to the response headers that filters can update
  // before the first flush.  RewriteDriver does NOT take ownership
  // of this memory.
  void set_response_headers_ptr(ResponseHeaders* headers) {
    response_headers_ = headers;
  }

  // Reinitializes request_headers_ (a scoped ptr) with a copy of the original
  // request headers. Note that the fetches associated with the driver could
  // be using a modified version of the original request headers.
  // There MUST be exactly 1 call to this method after a rewrite driver object
  // has been constructed or recycled, before the RewriteDriver is used for
  // request processing.
  //
  // This method also sets up the user-agent and device properties.
  void SetRequestHeaders(const RequestHeaders& headers);

  const RequestHeaders* request_headers() const {
    return request_headers_.get();
  }

  UserAgentMatcher* user_agent_matcher() const {
    DCHECK(server_context() != NULL);
    return server_context()->user_agent_matcher();
  }

  // Adds the filters from the options, specified by name in enabled_filters.
  // This must be called explicitly after object construction to provide an
  // opportunity to programatically add custom filters beyond those defined
  // in RewriteOptions, via AddFilter(HtmlFilter* filter) (below).
  void AddFilters();

  // Adds a filter to the very beginning of the pre-render chain, taking
  // ownership.  This should only be used for filters that must run before any
  // filter added via PrependOwnedPreRenderFilter.
  void AddOwnedEarlyPreRenderFilter(HtmlFilter* filter);

  // Adds a filter to the beginning of the pre-render chain, taking ownership.
  void PrependOwnedPreRenderFilter(HtmlFilter* filter);
  // Adds a filter to the end of the pre-render chain, taking ownership.
  void AppendOwnedPreRenderFilter(HtmlFilter* filter);
  // Same, without taking ownership.
  void AppendUnownedPreRenderFilter(HtmlFilter* filter);

  // Adds a filter to the end of the post-render chain, taking ownership.
  void AddOwnedPostRenderFilter(HtmlFilter* filter);
  // Same, without taking ownership.
  void AddUnownedPostRenderFilter(HtmlFilter* filter);

  // Add a RewriteFilter to the end of the pre-render chain and take ownership
  // of the filter.  This differs from AppendOwnedPreRenderFilter in that
  // it adds the filter's ID into a dispatch table for serving
  // rewritten resources.  E.g. if your filter->id == "xy" and
  // FetchResource("NAME.pagespeed.xy.HASH.EXT"...)  is called, then
  // RewriteDriver will dispatch to filter->Fetch().
  //
  // This is used when the filter being added is not part of the
  // core set built into RewriteDriver and RewriteOptions, such
  // as platform-specific or server-specific filters, or filters
  // invented for unit-testing the framework.
  void AppendRewriteFilter(RewriteFilter* filter);

  // Like AppendRewriteFilter, but adds the filter to the beginning of the
  // pre-render chain.
  void PrependRewriteFilter(RewriteFilter* filter);

  // Tells RewriteDriver that a certain portion of URL namespace should not
  // be handled via usual (HTTP proxy semantics) means. It's up to
  // the filters to actually arrange for that to do something.
  // Takes ownership of the claimant object. Note that it's important for the
  // claims to be disjoint, since the RewriteContext framework needs to
  // be able to assign compatible Resource objects for same URLs/slots among
  // all filters that deal with them.
  void AddResourceUrlClaimant(ResourceUrlClaimant* claimant);

  // Controls how HTML output is written.  Be sure to call this last, after
  // all other filters have been established.
  //
  // TODO(jmarantz): fix this in the implementation so that the caller can
  // install filters in any order and the writer will always be last.
  void SetWriter(Writer* writer);

  Writer* writer() const { return writer_; }

  // Initiates an async fetch for a rewritten resource with the specified name.
  // If url matches the pattern of what the driver is authorized to serve,
  // then true is returned and the caller must listen on the callback for
  // the completion of the request.
  //
  // If the driver is not authorized to serve the resource for any of the
  // following reasons, false is returned and the callback will -not- be
  // called - the request should be passed to another handler.
  // * The URL is invalid or it does not match the general pagespeed pattern.
  // * The filter id in the URL does not map to a known filter.
  // * The filter for the id in the URL doesn't recognize the format of the URL.
  // * The filter for the id in the URL is forbidden.
  //
  // In other words there are three outcomes for this routine:
  //   1. the request was handled immediately and the callback called
  //      before the method returns.  true is returned.
  //   2. the request looks good but was queued because some other resource
  //      fetch is needed to satisfy it.  true is returned.
  //   3. the request does not look like it belongs to Instaweb.  The callback
  //      will not be called, and false will be returned.
  //
  // In even other words, if this routine returns 'false' then the callback
  // will not be called.  If the callback -is- called, then this should be the
  // 'final word' on this request, whether it was called with success=true or
  // success=false.
  //
  // Note that if the request headers have not yet been set on the driver then
  // they'll be taken from the fetch.
  bool FetchResource(const StringPiece& url, AsyncFetch* fetch);

  // Initiates an In-Place Resource Optimization (IPRO) fetch (A resource which
  // is served under the original URL, but is still able to be rewritten).
  //
  // proxy_mode indicates whether we are running as a proxy where users
  // depend on us to send contents. When set true, we will perform HTTP fetches
  // to get contents if not in cache and will ignore kRecentFetchNotCacheable
  // and kRecentFetchFailed since we'll have to fetch the resource for users
  // anyway. Origin implementations (like mod_pagespeed) should set this to
  // false and let the serve serve the resource if it's not in cache.
  //
  // If proxy_mode is false and the resource could not be found in HTTP cache,
  // async_fetch->Done(false) will be called and async_fetch->status_code()
  // will be CacheUrlAsyncFetcher::kNotInCacheStatus (to distinguish this
  // from a different reason for failure, like kRecentFetchNotCacheable).
  //
  // Note that if the request headers have not yet been set on the driver then
  // they'll be taken from the fetch.
  void FetchInPlaceResource(const GoogleUrl& gurl, bool proxy_mode,
                            AsyncFetch* async_fetch);

  // See FetchResource.  There are two differences:
  //   1. It takes an OutputResource instead of a URL.
  //   2. It returns whether a fetch was queued or not.  This is safe
  //      to ignore because in either case the callback will be called.
  //   3. If 'filter' is NULL then the request only checks cache and
  //      (if enabled) the file system.
  bool FetchOutputResource(const OutputResourcePtr& output_resource,
                           RewriteFilter* filter,
                           AsyncFetch* async_fetch);

  // Attempts to decode an output resource based on the URL pattern
  // without actually rewriting it. No permission checks are performed on the
  // url, though it is parsed to see if it looks like the url of a generated
  // resource (which should mean checking the hash to ensure we generated it
  // ourselves).
  // TODO(jmaessen): add url hash & check thereof.
  OutputResourcePtr DecodeOutputResource(const GoogleUrl& url,
                                         RewriteFilter** filter) const;

  // As above, but does not actually create a resource object,
  // and instead outputs the decoded information into the various out
  // parameters. Returns whether decoding successful or not.
  // Uses options_to_use rather than this->options() to determine which
  // drivers are forbidden from applying, etc.
  bool DecodeOutputResourceName(const GoogleUrl& url,
                                const RewriteOptions* options_to_use,
                                const UrlNamer* url_namer,
                                ResourceNamer* name_out,
                                OutputResourceKind* kind_out,
                                RewriteFilter** filter_out) const;

  // Attempts to lookup the metadata cache info that would be used for the
  // output resource at url with the RewriteOptions set on this driver.
  //
  // If there is a problem with the URL, returns false, and *error_out
  // will contain an error message.
  //
  // If it can determine the metadata cache key successfully, returns true,
  // and eventually callback will be invoked with the metadata cache key
  // and the decoding results.
  //
  // After calling this method, the driver should not be used for anything else.
  bool LookupMetadataForOutputResource(
      StringPiece url,
      GoogleString* error_out,
      RewriteContext::CacheLookupResultCallback* callback);

  // Decodes the incoming pagespeed url to original url(s).
  bool DecodeUrl(const GoogleUrl& url,
                 StringVector* decoded_urls) const;

  // As above, but lets one specify the options and URL namer to use.
  // Meant for use with the decoding_driver.
  bool DecodeUrlGivenOptions(const GoogleUrl& url,
                             const RewriteOptions* options,
                             const UrlNamer* url_namer,
                             StringVector* decoded_urls) const;

  FileSystem* file_system() { return file_system_; }
  UrlAsyncFetcher* async_fetcher() { return url_async_fetcher_; }

  // Set a fetcher that will be used by RewriteDriver for current request
  // only (that is, until Clear()). RewriteDriver will take ownership of this
  // fetcher, and will keep it around until Clear(), even if further calls
  // to this method are made.
  void SetSessionFetcher(UrlAsyncFetcher* f);

  UrlAsyncFetcher* distributed_fetcher() { return distributed_async_fetcher_; }
  // Does not take ownership.
  void set_distributed_fetcher(UrlAsyncFetcher* fetcher) {
    distributed_async_fetcher_ = fetcher;
  }

  // Creates a cache fetcher that uses the driver's fetcher and its options.
  // Note: this means the driver's fetcher must survive as long as this does.
  CacheUrlAsyncFetcher* CreateCacheFetcher();
  // Returns a cache fetcher that does not fall back to an actual fetcher.
  CacheUrlAsyncFetcher* CreateCacheOnlyFetcher();

  ServerContext* server_context() const { return server_context_; }
  Statistics* statistics() const;

  // Takes ownership of 'options'.
  void set_custom_options(RewriteOptions* options) {
    set_options_for_pool(NULL, options);
  }

  // Takes ownership of 'options'. pool denotes the pool of rewrite drivers that
  // use these options. May be NULL if using custom options.
  void set_options_for_pool(RewriteDriverPool* pool, RewriteOptions* options) {
    controlling_pool_ = pool;
    options_.reset(options);
  }

  // Pool in which this driver can be recycled. May be NULL.
  RewriteDriverPool* controlling_pool() { return controlling_pool_; }

  // Return the options used for this RewriteDriver.
  const RewriteOptions* options() const { return options_.get(); }

  // Override HtmlParse's StartParseId to propagate any required options.
  // Note that if this (or other variants) returns true you should use
  // FinishParse(), otherwise Cleanup().
  virtual bool StartParseId(const StringPiece& url, const StringPiece& id,
                            const ContentType& content_type);

  // Override HtmlParse's FinishParse to ensure that the
  // request-scoped cache is cleared immediately.
  //
  // Note that the RewriteDriver can delete itself in this method, if
  // it's not externally managed, and if all RewriteContexts have been
  // completed.
  virtual void FinishParse();

  // As above, but asynchronous. Note that the RewriteDriver may already be
  // deleted at the point the callback is invoked. The scheduler lock will
  // not be held when the callback is run.
  void FinishParseAsync(Function* callback);

  // Report error message with description of context's location
  // (such as filenames and line numbers). context may be NULL, in which case
  // the current parse position will be used.
  void InfoAt(const RewriteContext* context,
              const char* msg, ...) INSTAWEB_PRINTF_FORMAT(3, 4);

  // Constructs name and URL for the specified input resource and encoder.
  bool GenerateOutputResourceNameAndUrl(
    const UrlSegmentEncoder* encoder,
    const ResourceContext* data,
    const ResourcePtr& input_resource,
    GoogleString* name,
    GoogleUrl* mapped_gurl,
    GoogleString* failure_reason);

  // Creates a reference-counted pointer to a new OutputResource object.
  //
  // The content type is taken from the input_resource, but can be modified
  // with SetType later if that is not correct (e.g. due to image transcoding).

  // Constructs an output resource corresponding to the specified input resource
  // and encoded using the provided encoder.  Assumes permissions checking
  // occurred when the input resource was constructed, and does not do it again.
  // To avoid if-chains, tolerates a NULL input_resource (by returning NULL).
  // TODO(jmaessen, jmarantz): Do we want to permit NULL input_resources here?
  // jmarantz has evinced a distaste.
  OutputResourcePtr CreateOutputResourceFromResource(
      const char* filter_id,
      const UrlSegmentEncoder* encoder,
      const ResourceContext* data,
      const ResourcePtr& input_resource,
      OutputResourceKind kind,
      GoogleString* failure_reason);

  // Creates an output resource where the name is provided.  The intent is to
  // be able to derive the content from the name, for example, by encoding
  // URLs and metadata.
  //
  // This method succeeds unless the filename is too long.
  //
  // This name is prepended with path for writing hrefs, and the resulting url
  // is encoded and stored at file_prefix when working with the file system.
  // So hrefs are:
  //    $(PATH)/$(NAME).pagespeed[.$EXPERIMENT].$(FILTER_PREFIX).
  //        $(HASH).$(CONTENT_TYPE_EXT)
  //
  // EXPERIMENT is set only when there is an active experiment_spec.
  //
  // Could be private since you should use one of the versions below but put
  // here with the rest like it and for documentation clarity.
  OutputResourcePtr CreateOutputResourceWithPath(
      const StringPiece& mapped_path, const StringPiece& unmapped_path,
      const StringPiece& base_url, const StringPiece& filter_id,
      const StringPiece& name, OutputResourceKind kind,
      GoogleString* failure_reason);

  // Fills in the resource namer based on the give filter_id, name and options
  // stored in the driver.
  void PopulateResourceNamer(
    const StringPiece& filter_id,
    const StringPiece& name,
    ResourceNamer* full_name);

  // Version of CreateOutputResourceWithPath which first takes only the
  // unmapped path and finds the mapped path using the DomainLawyer
  // and the base_url is this driver's base_url.
  OutputResourcePtr CreateOutputResourceWithUnmappedUrl(
      const GoogleUrl& unmapped_gurl, const StringPiece& filter_id,
      const StringPiece& name, OutputResourceKind kind,
      GoogleString* failure_reason);

  // Version of CreateOutputResourceWithPath where the unmapped and mapped
  // paths are different and the base_url is this driver's base_url.
  OutputResourcePtr CreateOutputResourceWithMappedPath(
      const StringPiece& mapped_path, const StringPiece& unmapped_path,
      const StringPiece& filter_id, const StringPiece& name,
      OutputResourceKind kind, GoogleString* failure_reason) {
    return CreateOutputResourceWithPath(mapped_path, unmapped_path,
                                        decoded_base_url_.AllExceptLeaf(),
                                        filter_id, name, kind, failure_reason);
  }

  // Version of CreateOutputResourceWithPath where the unmapped and mapped
  // paths and the base url are all the same. FOR TESTS ONLY.
  OutputResourcePtr CreateOutputResourceWithPath(
      const StringPiece& path, const StringPiece& filter_id,
      const StringPiece& name, OutputResourceKind kind,
      GoogleString* failure_reason) {
    return CreateOutputResourceWithPath(path, path, path, filter_id, name,
                                        kind, failure_reason);
  }

  // Creates an input resource based on input_url.  Returns NULL if the input
  // resource url isn't valid or is a data url, or can't legally be rewritten
  // in the context of this page, in which case *is_authorized will be false.
  // Assumes that resources from unauthorized domains may not be rewritten and
  // that the resource is not intended exclusively for inlining.
  ResourcePtr CreateInputResource(const GoogleUrl& input_url,
                                  bool* is_authorized);

  // Creates an input resource.  Returns NULL if the input resource url isn't
  // valid or is a data url, or can't legally be rewritten in the context of
  // this page (which could mean that it was a resource from an unauthorized
  // domain being processed by a filter that does not allow unauthorized
  // resources, in which case *is_authorized will be false).
  //
  // There are two "special" options, and if you don't care about them you
  // should just call CreateInputResource(input_url, is_authorized) to use
  // their defaults:
  // * If resources from unauthorized domains may be inlined, set
  //   inline_authorization_policy to kInlineUnauthorizedResources, otherwise
  //   set it to kInlineOnlyAuthorizedResources.
  // * If this resource will be inlined after fetching, then set intended_for to
  //   kIntendedForInlining, otherwise use kIntendedForGeneral.  This is to
  //   support AllowWhenInlining.
  ResourcePtr CreateInputResource(
      const GoogleUrl& input_url,
      InlineAuthorizationPolicy inline_authorization_policy,
      IntendedFor intended_for,
      bool* is_authorized);

  // Creates an input resource from the given absolute url.  Requires that the
  // provided url has been checked, and can legally be rewritten in the current
  // page context. Only for use by unit tests.
  ResourcePtr CreateInputResourceAbsoluteUncheckedForTestsOnly(
      const StringPiece& absolute_url);

  // Returns true if some ResourceUrlClaimant has staked a claim on given URL.
  // If this returns true, CreateInputResource will fail, but it's probably
  // not worth logging any debug filter hints about that.
  bool IsResourceUrlClaimed(const GoogleUrl& url) const;

  // Checks to see if the input_url has the same origin as and the base url, to
  // make sure we're not fetching from another server. Does not consult the
  // domain lawyer, and is not affected by AddDomain().
  // Precondition: input_url.IsWebValid()
  bool MatchesBaseUrl(const GoogleUrl& input_url) const;

  // Checks to see if we can write the input_url resource in the domain_url
  // taking into account domain authorization, wildcard allow/disallow from
  // RewriteOptions, and the intended use of the url's resource.  After the
  // function is executed, is_authorized_domain will indicate whether input_url
  // was found to belong to an authorized domain or not.
  bool MayRewriteUrl(const GoogleUrl& domain_url,
                     const GoogleUrl& input_url,
                     InlineAuthorizationPolicy inline_authorization_policy,
                     IntendedFor intended_for,
                     bool* is_authorized_domain) const;

  // Returns the appropriate base gurl to be used for resolving hrefs
  // in the document.  Note that HtmlParse::google_url() is the URL
  // for the HTML file and is used for printing html syntax errors.
  const GoogleUrl& base_url() const { return base_url_; }

  // The URL that was requested if FetchResource was called.
  StringPiece fetch_url() const { return fetch_url_; }

  // Returns the decoded version of base_gurl() in case it was encoded by a
  // non-default UrlNamer (for the default UrlNamer this returns the same value
  // as base_url()).  Required when fetching a resource by its encoded name.
  const GoogleUrl& decoded_base_url() const { return decoded_base_url_; }
  StringPiece decoded_base() const { return decoded_base_url_.Spec(); }

  // Quick way to tell if the document url is https (ie was fetched via https).
  bool IsHttps() const { return google_url().SchemeIs("https"); }

  const UrlSegmentEncoder* default_encoder() const { return &default_encoder_; }

  // Finds a filter with the given ID, or returns NULL if none found.
  RewriteFilter* FindFilter(const StringPiece& id) const;

  // Returns refs_before_base.
  bool refs_before_base() { return refs_before_base_; }

  // Sets whether or not there were references to urls before the
  // base tag (if there is a base tag).  This variable has document-level
  // scope.  It is reset at the beginning of every document by
  // ScanFilter.
  void set_refs_before_base() { refs_before_base_ = true; }

  // Get/set the charset of the containing HTML page. See scan_filter.cc for
  // an explanation of how this is determined, but NOTE that the determined
  // charset can change as more of the HTML is seen, in particular after a
  // meta tag.
  StringPiece containing_charset() { return containing_charset_; }
  void set_containing_charset(const StringPiece charset) {
    charset.CopyToString(&containing_charset_);
  }

  // Creates and registers a HtmlElement slot for rewriting.
  // If this is the first time called for this position, a new slot will be
  // returned. On subsequent calls, the original slot will be returned so
  // that rewrites are propagated between filters.
  HtmlResourceSlotPtr GetSlot(const ResourcePtr& resource,
                              HtmlElement* elt,
                              HtmlElement::Attribute* attr);

  // Creates and registers an inline resource slot for rewriting.
  // If this is the first time called for this position, a new slot will be
  // returned. On subsequent calls, the original slot will be returned so
  // that rewrites are propagated between filters.
  InlineResourceSlotPtr GetInlineSlot(const ResourcePtr& resource,
                                      HtmlCharactersNode* char_node);

  // Creates and registers an inline attribute resource slot for rewriting.
  // If this is the first time called for this position, a new slot will be
  // returned. On subsequent calls, the original slot will be returned so
  // that rewrites are propagated between filters.
  InlineAttributeSlotPtr GetInlineAttributeSlot(
      const ResourcePtr& resource, HtmlElement* element,
      HtmlElement::Attribute* attribute);

  // Create and and registers a source set slot collection for rewriting
  // all the images in the srcset attribute of an <img>. Also creates the
  // neccessary resources using the provided filter's policy.
  //
  // If this is the first time called for this element + attr, a new
  // collection will be returned. On subsequent calls, the original collection
  // will be returned so that rewrites are propagated between filters. All
  // filters using this call are expected to have the same values for
  // AllowUnauthorizedDomain() and IntendedForInlining().
  SrcSetSlotCollectionPtr GetSrcSetSlotCollection(
      CommonFilter* filter, HtmlElement* element, HtmlElement::Attribute* attr);

  // Method to start a resource rewrite.  This is called by a filter during
  // parsing, although the Rewrite might continue after deadlines expire
  // and the rewritten HTML must be flushed.  Returns InitiateRewrite returns
  // false if the system is not healthy enough to support resource rewrites.
  bool InitiateRewrite(RewriteContext* rewrite_context)
      LOCKS_EXCLUDED(rewrite_mutex());
  void InitiateFetch(RewriteContext* rewrite_context);

  // Provides a mechanism for a RewriteContext to notify a
  // RewriteDriver that it is complete, to allow the RewriteDriver
  // to delete itself or return it back to a free pool in the ServerContext.
  //
  // This will also call back into RewriteContext::Propagate, letting it
  // know whether the context is still attached to the HTML DOM
  // (and hence safe to render), and to do other bookkeeping.
  //
  // If 'permit_render' is false, no rendering will be asked for even if
  // the context is still attached.
  void RewriteComplete(RewriteContext* rewrite_context, bool permit_render);

  // Provides a mechanism for a RewriteContext to notify a
  // RewriteDriver that a certain number of rewrites have been discovered
  // to need to take the slow path.
  void ReportSlowRewrites(int num);

  // If there are not outstanding references to this RewriteDriver,
  // delete it or recycle it to a free pool in the ServerContext.
  // If this is a fetch, calling this also signals to the system that you
  // are no longer interested in its results.
  void Cleanup();

  // Adds an extra external reference to the object. You should not
  // normally need to call it (NewRewriteDriver does it initially), unless for
  // some reason you want to pin the object (e.g. in tests). Matches up with
  // Cleanup.
  void AddUserReference();

  // Debugging routines to print out data about the driver.
  GoogleString ToString(bool show_detached_contexts) const
      LOCKS_EXCLUDED(rewrite_mutex());
  GoogleString ToStringLockHeld(bool show_detached_contexts) const
      EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
  void PrintState(bool show_detached_contexts);            // For debugging.
  void PrintStateToErrorLog(bool show_detached_contexts);  // For logs.

  // Wait for outstanding Rewrite to complete.  Once the rewrites are
  // complete they can be rendered.
  void WaitForCompletion();

  // Wait for outstanding rewrite to complete, including any background
  // work that may be ongoing even after results were reported.
  //
  // Note: while this guarantees that the result of the computation is
  // known, the thread that performed it may still be running for a
  // little bit and accessing the driver.
  void WaitForShutDown();

  // As above, but with a time bound, and taking a mode parameter to decide
  // between WaitForCompletion or WaitForShutDown behavior.
  // If timeout_ms <= 0, no time bound will be used.
  void BoundedWaitFor(WaitMode mode, int64 timeout_ms)
      LOCKS_EXCLUDED(rewrite_mutex());

  // If this is set to true, during a Flush of HTML the system will
  // wait for results of all rewrites rather than just waiting for
  // cache lookups and a small deadline. Note, however, that in very
  // rare circumstances some rewrites may still be dropped due to
  // excessive load.
  //
  // Note: reset every time the driver is recycled.
  void set_fully_rewrite_on_flush(bool x) {
    fully_rewrite_on_flush_ = x;
  }

  // Returns if this response has a blocking rewrite or not.
  bool fully_rewrite_on_flush() const {
    return fully_rewrite_on_flush_;
  }

  // This is relevant only when fully_rewrite_on_flush is true.
  // When this is set to true, Flush of HTML will not wait for async events
  // while it does wait when it is set to false.
  void set_fast_blocking_rewrite(bool x) {
    fast_blocking_rewrite_ = x;
  }

  bool fast_blocking_rewrite() const {
    return fast_blocking_rewrite_;
  }

  // If the value of X-PSA-Blocking-Rewrite request header matches the blocking
  // rewrite key, set fully_rewrite_on_flush flag.
  void EnableBlockingRewrite(RequestHeaders* request_headers);

  // Indicate that this RewriteDriver will be explicitly deleted, and
  // thus should not be auto-deleted at the end of the parse.  This is
  // primarily for tests.
  //
  // TODO(jmarantz): Consider phasing this out to make tests behave
  // more like servers.
  void set_externally_managed(bool x) { externally_managed_ = x; }

  // Called by RewriteContext to let RewriteDriver know it will be continuing
  // on the fetch in background, and so it should defer doing full cleanup
  // sequences until DetachedFetchComplete() is called.
  void DetachFetch();

  // Called by RewriteContext when a detached async fetch is complete, allowing
  // the RewriteDriver to be recycled if FetchComplete() got invoked as well.
  void DetachedFetchComplete();

  // Cleans up the driver and any fetch rewrite contexts, unless the fetch
  // rewrite got detached by a call to DetachFetch(), in which case a call to
  // DetachedFetchComplete() must also be performed.
  void FetchComplete();

  // Deletes the specified RewriteContext.  If this is the last RewriteContext
  // active on this Driver, and there is no other outstanding activity, then
  // the RewriteDriver itself can be recycled, and WaitForCompletion can return.
  //
  // We expect to this method to be called on the Rewrite thread.
  void DeleteRewriteContext(RewriteContext* rewrite_context);

  int rewrite_deadline_ms() { return options()->rewrite_deadline_ms(); }

  // Sets a maximum amount of time to process a page across all flush
  // windows; i.e., the entire lifecycle of this driver during a given pageload.
  // A negative value indicates no limit.
  // Setting fully_rewrite_on_flush() overrides this.
  void set_max_page_processing_delay_ms(int x) {
    max_page_processing_delay_ms_ = x;
  }
  int max_page_processing_delay_ms() { return max_page_processing_delay_ms_; }

  // Sets the device type chosen for the current property_page.
  void set_device_type(UserAgentMatcher::DeviceType x) { device_type_ = x; }
  UserAgentMatcher::DeviceType device_type() const { return device_type_; }

  // Tries to register the given rewrite context as working on
  // its partition key. If this context is the first one to try to handle it,
  // returns NULL. Otherwise returns the previous such context.
  //
  // Must only be called from rewrite thread.
  RewriteContext* RegisterForPartitionKey(const GoogleString& partition_key,
                                          RewriteContext* candidate);

  // Must be called after all other rewrites that are currently relying on this
  // one have had their RepeatedSuccess or RepeatedFailure methods called.
  //
  // Must only be called from rewrite thread.
  void DeregisterForPartitionKey(
      const GoogleString& partition_key, RewriteContext* candidate);

  // Indicates that a Flush through the HTML parser chain should happen
  // soon, e.g. once the network pauses its incoming byte stream.
  void RequestFlush() { flush_requested_ = true; }
  bool flush_requested() const { return flush_requested_; }

  // Executes an Flush() if RequestFlush() was called, e.g. from the
  // Listener Filter (see set_event_listener below).  Consider an HTML
  // parse driven by a UrlAsyncFetcher.  When the UrlAsyncFetcher
  // temporarily runs out of bytes to read, it calls
  // response_writer->Flush().  When that happens, we may want to
  // consider flushing the outstanding HTML events through the system
  // so that the browser can start fetching subresources and
  // rendering.  The event_listener (see set_event_listener below)
  // helps determine whether enough "interesting" events have passed
  // in the current flush window so that we should take this incoming
  // network pause as an opportunity.
  void ExecuteFlushIfRequested();

  // Asynchronous version of the above. Note that you should not
  // attempt to write out any data until the callback is invoked.
  // (If a flush is not needed, the callback will be invoked immediately).
  void ExecuteFlushIfRequestedAsync(Function* callback);

  // Overrides HtmlParse::Flush so that it can happen in two phases:
  //    1. Pre-render chain runs, resulting in async rewrite activity
  //    2. async rewrite activity ends, calling callback, and post-render
  //       filters run.
  // This API is used for unit-tests & Apache (which lacks a useful event
  // model) and results in blocking behavior.
  //
  // FlushAsync is prefered for event-driven servers.
  virtual void Flush();

  // Initiates an asynchronous Flush.  done->Run() will be called when
  // the flush is complete.  Further calls to ParseText should be deferred until
  // the callback is called. Scheduler mutex is not held while done is called.
  void FlushAsync(Function* done);

  // Queues up a task to run on the (high-priority) rewrite thread.
  void AddRewriteTask(Function* task);

  // Queues up a task to run on the low-priority rewrite thread.
  // Such tasks are expected to be safely cancelable.
  void AddLowPriorityRewriteTask(Function* task);

  QueuedWorkerPool::Sequence* html_worker() { return html_worker_; }
  Sequence* rewrite_worker();
  Scheduler::Sequence* scheduler_sequence() {
    return scheduler_sequence_.get();
  }

  QueuedWorkerPool::Sequence* low_priority_rewrite_worker() {
    return low_priority_rewrite_worker_;
  }

  // Make the rewrite_worker tasks run on the request thread.  This
  // must be called immediately after initializing the driver, before
  // it starts processing the request.
  void RunTasksOnRequestThread();

  // Switches the driver back to running rewrite_worker tasks using
  // the QueuedWorkerPool.  This can be called when we are retiring
  // a server-request on behalf of the client (e.g. after a deadline was
  // exceeded), but want background optimization to continue.  It can
  // no longer continue on the request thread.
  void SwitchToQueuedWorkerPool() EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());

  Scheduler* scheduler() { return scheduler_; }

  // Used by CacheExtender, CssCombineFilter, etc. for rewriting domains
  // of sub-resources in CSS.
  DomainRewriteFilter* domain_rewriter() { return domain_rewriter_.get(); }
  UrlLeftTrimFilter* url_trim_filter() { return url_trim_filter_.get(); }

  // Rewrites CSS content to absolutify any relative embedded URLs, streaming
  // the results to the writer.  Returns 'false' if the writer returns false
  // or if the content was not rewritten because the domains of the gurl
  // and resolved_base match.
  //
  // input_css_base contains the path where the CSS text came from.
  // output_css_base contains the path where the CSS will be written.
  CssResolutionStatus ResolveCssUrls(const GoogleUrl& input_css_base,
                                     const StringPiece& output_css_base,
                                     const StringPiece& contents,
                                     Writer* writer,
                                     MessageHandler* handler);

  // Determines if an URL relative to the given input_base needs to be
  // absolutified given that it will end up under output_base:
  // - If we are proxying and input_base isn't proxy encoded, then yes.
  // - If we aren't proxying and input_base != output_base, then yes.
  // - If we aren't proxying and the domain lawyer will shard or rewrite
  //   input_base, then yes.
  // If not NULL also set *proxy_mode to whether proxy mode is active or not.
  bool ShouldAbsolutifyUrl(const GoogleUrl& input_base,
                           const GoogleUrl& output_base,
                           bool* proxy_mode) const;

  // Update the PropertyValue named 'property_name' in dom cohort with
  // the value 'property_value'.  It is the responsibility of the client to
  // ensure that property cache and dom cohort are enabled when this function is
  // called.  It is a programming error to call this function when property
  // cache or dom cohort is not available, more so since the value payload has
  // to be serialised before calling this function.  Hence this function will
  // DFATAL if property cache or dom cohort is not available.
  void UpdatePropertyValueInDomCohort(
      AbstractPropertyPage* page,
      StringPiece property_name,
      StringPiece property_value);

  // Returns the property page which contains the cached properties associated
  // with the current URL.
  PropertyPage* property_page() const;

  // Returns the property page which contains the cached properties associated
  // with the current URL and fallback URL (i.e. without query params). This
  // should be used where a property is interested in fallback values if
  // actual values are not present.
  FallbackPropertyPage* fallback_property_page() const {
    return fallback_property_page_;
  }

  // Returns property page which contains cached properties associated with
  // the current origin (host/port/protocol). May be NULL.
  PropertyPage* origin_property_page() const;

  // Takes ownership of page.
  void set_property_page(PropertyPage* page);
  // Takes ownership of page.
  void set_fallback_property_page(FallbackPropertyPage* page);
  // Does not take the ownership of the page.
  void set_unowned_fallback_property_page(FallbackPropertyPage* page);
  // Takes ownership of page.
  void set_origin_property_page(PropertyPage* page);

  // The JS to detect above-the-fold images should only be enabled if one of the
  // filters that uses critical image information is enabled, the property cache
  // is enabled (since the critical image information is stored in the property
  // cache), and it is not explicitly disabled through options.
  bool is_critical_images_beacon_enabled();

  // Used by ImageRewriteFilter for identifying critical images.
  CriticalImagesInfo* critical_images_info() const {
    return critical_images_info_.get();
  }

  // This should only be called by the CriticalSelectorFinder. Normal users
  // should call CriticalSelectorFinder::IsCriticalImage.
  // TODO(jud): Remove when the finders reside in RewriteDriver and manage their
  // own state.
  CriticalSelectorInfo* critical_selector_info() {
    return critical_selector_info_.get();
  }

  // This should only be called by the CriticalSelectorFinder.
  // TODO(jud): Remove when the finders reside in RewriteDriver and manage their
  // own state.
  void set_critical_selector_info(CriticalSelectorInfo* info) {
    critical_selector_info_.reset(info);
  }

  // Inserts the critical images present on the requested html page. It takes
  // ownership of critical_images_info. This should only be called by the
  // CriticalImagesFinder, normal users should just be using the automatic
  // management of critical_images_info that CriticalImagesFinder provides.
  void set_critical_images_info(CriticalImagesInfo* critical_images_info) {
    critical_images_info_.reset(critical_images_info);
  }

  // Return true if we must flatten css imports, either because the filter is
  // enabled explicitly or because it is enabled by PrioritizeCriticalCss.
  bool FlattenCssImportsEnabled() const {
    return (options()->Enabled(RewriteOptions::kFlattenCssImports) ||
            (!options()->Forbidden(RewriteOptions::kFlattenCssImports) &&
             (options()->Enabled(RewriteOptions::kPrioritizeCriticalCss) ||
              options()->Enabled(RewriteOptions::kComputeCriticalCss))));
  }

  // We expect to this method to be called on the HTML parser thread.
  // Returns the number of images whose low quality images are inlined in the
  // html page.
  int num_inline_preview_images() const { return num_inline_preview_images_; }

  // We expect to this method to be called on the HTML parser thread.
  void increment_num_inline_preview_images();

  // Increment reference count for misc. async ops that need the RewriteDriver
  // kept alive.
  void IncrementAsyncEventsCount();

  // Decrements a reference count bumped up by IncrementAsyncEventsCount()
  void DecrementAsyncEventsCount();

  // Increment reference count for misc async ops that should be waited for
  // before doing rendering for current flush window.
  void IncrementRenderBlockingAsyncEventsCount();

  // Decrements a reference count bumped up by
  // IncrementRenderBlockingAsyncEventsCount()
  void DecrementRenderBlockingAsyncEventsCount();

  // Determines whether the document's Content-Type has a mimetype indicating
  // that browsers should parse it as XHTML.
  XhtmlStatus MimeTypeXhtmlStatus();

  void set_is_lazyload_script_flushed(bool x) {
    is_lazyload_script_flushed_ = x;
  }
  bool is_lazyload_script_flushed() const {
    return is_lazyload_script_flushed_; }

  // This method is not thread-safe. Call it only from the html parser thread.
  FlushEarlyInfo* flush_early_info();

  // dependency_tracker()->RegisterDependencyCandidate and
  // ReportDependencyCandidate can be called from any thread.
  DependencyTracker* dependency_tracker() const {
    return dependency_tracker_.get();
  }

  // Determines whether we are currently in Debug mode; meaning that the
  // site owner or user has enabled filter kDebug.
  bool DebugMode() const { return options()->Enabled(RewriteOptions::kDebug); }

  // Log the given debug message(s) as HTML comments after the given element,
  // if not NULL, it has not been flushed, and if debug is enabled. The form
  // that takes a repeated field is intended for use by CachedResult, e.g:
  //   InsertDebugComment(cached_result.debug_message(), element);
  // Messages are HTML-escaped before being written out to the DOM.
  void InsertDebugComment(StringPiece unescaped_message, HtmlNode* node);
  void InsertDebugComments(
      const protobuf::RepeatedPtrField<GoogleString>& unescaped_messages,
      HtmlElement* element);
  void InsertUnauthorizedDomainDebugComment(StringPiece url,
                                            HtmlElement* element);

  // Generates an unauthorized domain debug comment. Public for unit tests.
  static GoogleString GenerateUnauthorizedDomainDebugComment(
      const GoogleUrl& gurl);

  // log_record() always returns a pointer to a valid AbstractLogRecord, owned
  // by the rewrite_driver's request context.
  AbstractLogRecord* log_record();

  DomStatsFilter* dom_stats_filter() const {
    return dom_stats_filter_;
  }

  // Determines whether the system is healthy enough to rewrite resources.
  // Currently, systems get sick based on the health of the metadata cache.
  bool can_rewrite_resources() const { return can_rewrite_resources_; }

  // Determine whether this driver is nested inside another.
  bool is_nested() const { return is_nested_; }

  // Determines whether metadata was requested in the response headers and
  // verifies that the key in the header is the same as the expected key. An
  // empty expected key returns false.
  bool MetadataRequested(const RequestHeaders& request_headers) const;

  // Did the driver attempt to distribute the fetch?
  bool tried_to_distribute_fetch() const { return tried_to_distribute_fetch_; }

  // Writes the specified contents into the output resource, and marks it
  // as optimized. 'inputs' described the input resources that were used
  // to construct the output, and is used to determine whether the
  // result can be safely cache extended and be marked publicly cacheable.
  // 'content_type' and 'charset' specify the mimetype and encoding of
  // the contents, and will help form the Content-Type header.
  // 'charset' may be empty when not specified.
  //
  // Note that this does not escape charset.
  //
  // Callers should take care that dangerous types like 'text/html' do not
  // sneak into content_type.
  bool Write(const ResourceVector& inputs,
             const StringPiece& contents,
             const ContentType* type,
             StringPiece charset,
             OutputResource* output);

  void set_defer_instrumentation_script(bool x) {
    defer_instrumentation_script_ = x;
  }
  bool defer_instrumentation_script() const {
    return defer_instrumentation_script_;
  }

  // Sets the num_initiated_rewrites_. This should only be called from test
  // code.
  void set_num_initiated_rewrites(int64 x) {
    ScopedMutex lock(rewrite_mutex());
    num_initiated_rewrites_ = x;
  }
  int64 num_initiated_rewrites() const {
    ScopedMutex lock(rewrite_mutex());
    return num_initiated_rewrites_;
  }
  // Sets the num_detached_rewrites_. This should only be called from test code.
  void set_num_detached_rewrites(int64 x) {
    ScopedMutex lock(rewrite_mutex());
    num_detached_rewrites_ = x;
  }
  int64 num_detached_rewrites() const {
    ScopedMutex lock(rewrite_mutex());
    return num_detached_rewrites_;
  }

  void set_pagespeed_query_params(StringPiece x) {
    x.CopyToString(&pagespeed_query_params_);
  }
  StringPiece pagespeed_query_params() const {
    return pagespeed_query_params_;
  }

  void set_pagespeed_option_cookies(StringPiece x) {
    x.CopyToString(&pagespeed_option_cookies_);
  }
  StringPiece pagespeed_option_cookies() const {
    return pagespeed_option_cookies_;
  }

  // We fragment the cache based on the hostname we got from the request, unless
  // that was overridden in the options with a cache_fragment.
  const GoogleString& CacheFragment() const;

  // Utility function to set/clear cookies for PageSpeed options. gurl is the
  // URL of the request from which the host is extracted for a cookie attribute.
  // TODO(matterbury): Get the URL from 'this' which we can't do now because it
  // isn't set until we've decided that the content of requested URL is HTML.
  // Returns true if any Set-Cookie headers are added, in which case
  // ComputeCaching has been called on response_headers.
  bool SetOrClearPageSpeedOptionCookies(const GoogleUrl& gurl,
                                        ResponseHeaders* response_headers);

  // Calls the provided ResourceNamer's Decode() function, passing the hash and
  // signature lengths from this RewriteDriver.
  bool Decode(StringPiece leaf, ResourceNamer* resource_namer) const;

  bool filters_added() const { return filters_added_; }
  bool has_html_writer_filter() const {
    return html_writer_filter_.get() != nullptr;
  }

  // Declares whether the current document is AMP or not.  Prior to calling
  // this, all HTML events are buffered, to avoid waking up filters that
  // inject scripts.
  void SetIsAmpDocument(bool is_amp);
  bool is_amp_document() const { return is_amp_; }

 protected:
  virtual void DetermineFiltersBehaviorImpl();

 private:
  friend class DistributedRewriteContextTest;
  friend class RewriteContext;
  friend class RewriteDriverTest;
  friend class RewriteTestBase;
  friend class ServerContextTest;

  typedef std::map<GoogleString, RewriteFilter*> StringFilterMap;

  // Returns true if the given fetch request should be distributed.
  bool ShouldDistributeFetch(const StringPiece& filter_id);

  // Distributes the fetch to another task if ShouldDistributeFetch allows it
  // for the provided filter_id and streams the result to the provided fetch
  // object.
  //
  // Returns true if an attempt to distribute was made. If the attempt fails
  // before async_fetch was written to (before ResponseHeaders) it will call
  // RewriteDriver::FetchResource() and skip distribution. If the attempt fails
  // after writing to the ResponseHeaders then the fetch will ultimately fail
  // and the client will get a broken resource.
  //
  // Returns false if ShouldDistributeFetch disallows the distribution.
  bool DistributeFetch(const StringPiece& url, const StringPiece& filter_id,
                       AsyncFetch* async_fetch);

  // Checks whether outstanding rewrites are completed in a satisfactory fashion
  // with respect to given wait_mode and timeout, and invokes done->Run() (with
  // rewrite_mutex released) when either finished or timed out. May relinquish
  // rewrite_mutex() temporarily to invoke done.
  void CheckForCompletionAsync(WaitMode wait_mode, int64 timeout_ms,
                               Function* done)
      EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());

  // A single check attempt for the above. Will either invoke callback (with
  // rewrite_mutex released) or ask scheduler to check again. May relinquish
  // rewrite_mutex() temporarily to invoke done.
  void TryCheckForCompletion(WaitMode wait_mode, int64 end_time_ms,
                             Function* done)
      EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());

  // Termination predicate for above.
  bool IsDone(WaitMode wait_mode, bool deadline_reached)
      EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());

  // Always wait for pending async events during shutdown or while waiting for
  // the completion of all rewriting (except in fast_blocking_rewrite mode).
  bool WaitForPendingAsyncEvents(WaitMode wait_mode) {
    return wait_mode == kWaitForShutDown ||
        (fully_rewrite_on_flush_ && !fast_blocking_rewrite_);
  }

  // Portion of flush that happens asynchronously off the scheduler
  // once the rendering is complete. Calls back to 'callback' after its
  // processing, but with the lock released.
  void FlushAsyncDone(int num_rewrites, Function* callback);

  // Returns the amount of time to wait for rewrites to complete for the
  // current flush window. This combines the per-flush window deadline
  // (configured via rewrite_deadline_ms()) and the per-page deadline
  // (configured via max_page_processing_delay_ms()).
  int64 ComputeCurrentFlushWindowRewriteDelayMs();

  // Queues up invocation of FlushAsyncDone in our html_workers sequence.
  void QueueFlushAsyncDone(int num_rewrites, Function* callback);

  // Called as part of implementation of FinishParseAsync, after the
  // flush is complete.
  void QueueFinishParseAfterFlush(Function* user_callback);
  void FinishParseAfterFlush(Function* user_callback);

  bool RewritesComplete() const EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());

  // Sets the base GURL in response to a base-tag being parsed.  This
  // should only be called by ScanFilter.
  void SetBaseUrlIfUnset(const StringPiece& new_base);

  // Sets the base URL for a resource fetch.  This should only be called from
  // test code and from FetchResource.
  void SetBaseUrlForFetch(const StringPiece& url);

  // Saves a decoding of the Base URL in decoded_base_url_.  Use this
  // whenever updating base_url_.
  void SetDecodedUrlFromBase();

  // The rewrite_mutex is owned by the scheduler.
  AbstractMutex* rewrite_mutex() const LOCK_RETURNED(scheduler_->mutex()) {
    return scheduler_->mutex();
  }

  // Parses an arbitrary block of an html file
  virtual void ParseTextInternal(const char* content, int size);

  // Indicates whether we should skip parsing for the given request.
  bool ShouldSkipParsing();

  // Returns the length of the signature on a signed resource URL.
  int SignatureLength() const;

  friend class ScanFilter;

  // Registers RewriteFilter in the map, but does not put it in the
  // html parse filter chain.  This allows it to serve resource
  // requests.
  void RegisterRewriteFilter(RewriteFilter* filter);

  // Adds an already-owned rewrite filter to the pre-render chain.  This
  // is used for filters that are unconditionally created for handling of
  // resources, but their presence in the html-rewrite chain is conditional
  // on options.
  void EnableRewriteFilter(const char* id);

  // Internal low-level helper for resource creation.
  // Use only when permission checking has been done explicitly on the
  // caller side. is_authorized_domain is passed along to Resource object
  // creation, in order to decide whether to keep the resource in the usual
  // key space or a separate one meant for unauthorized resources only.
  ResourcePtr CreateInputResourceUnchecked(const GoogleUrl& gurl,
                                           bool is_authorized_domain);

  void AddPreRenderFilters();
  void AddPostRenderFilters();

  // Helper function to decode the pagespeed url.
  bool DecodeOutputResourceNameHelper(const GoogleUrl& url,
                                      const RewriteOptions* options_to_use,
                                      const UrlNamer* url_namer,
                                      ResourceNamer* name_out,
                                      OutputResourceKind* kind_out,
                                      RewriteFilter** filter_out,
                                      GoogleString* url_base,
                                      StringVector* urls) const;

  // When HTML parsing is complete, we have learned all we can about the DOM, so
  // immediately write anything required into that Cohort into the page property
  // cache. Writes to this cohort are predicated so that they only occur if a
  // filter that actually makes use of it is enabled. This prevents filling the
  // cache with unnecessary entries. To enable writing, a filter should override
  // DetermineEnabled to call
  // RewriteDriver::set_write_property_cache_dom_cohort(true), or in the case of
  // a RewriteFilter, should override
  // RewriteFilter::UsesPropertyCacheDomCohort() to return true.
  void WriteDomCohortIntoPropertyCache();

  // Used by CreateCacheFetcher() and CreateCacheOnlyFetcher().
  CacheUrlAsyncFetcher* CreateCustomCacheFetcher(UrlAsyncFetcher* base_fetcher);

  // Just before releasing the rewrite driver, check if the feature for storing
  // rewritten responses (e.g. html) in cache is enabled. If yes, purge the
  // old response if significant amount of rewriting happened after this
  // response was stored in the cache. If not, release the rewrite driver. If a
  // purge fetch request is issued, the rewrite driver will be released after
  // this async fetch request is completed.
  void PossiblyPurgeCachedResponseAndReleaseDriver();

  // Log statistics to the AbstractLogRecord.
  void LogStats();

  // This pair of calls helps determine if code that changes event state
  // should wake up anyone waiting for rewrite driver's completion.
  //
  // The usage pattern is something like this:
  //   ScopedMutex lock(rewrite_mutex());
  //   bool should_signal_cookie = PrepareShouldSignal();
  //
  //   // Change state
  //   ...
  //
  //   SignalIfRequired(should_signal_cookie);
  //
  // WARNING: SignalIfRequired() drops the lock on rewrite_mutex() temporarily,
  // so 'this' could get deleted after it returns, so it should not be accessed
  // afterwards.
  bool PrepareShouldSignal() EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
  void SignalIfRequired(bool result_of_prepare_should_signal)
      EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());

  // Reverts the driver back to its default state of using a shared scheduler
  // and running on the shared scheduler.
  void CleanupRequestThread();

  // Only the first base-tag is significant for a document -- any subsequent
  // ones are ignored.  There should be no URLs referenced prior to the base
  // tag, if one exists.  See
  //
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/
  //    semantics.html#the-base-element
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/
  //    urls.html#document-base-url
  //
  // Thus we keep the base-tag in the RewriteDriver, and also keep track of
  // whether it's been reset already within the document.
  bool base_was_set_;

  // Stores whether or not there were references to urls before the
  // base tag (if there is a base tag) in this document.  If there is
  // no base tag, this should be false.  If the base tag is before all
  // other url references, this should also be false.
  bool refs_before_base_;

  // The charset of the containing HTML page.
  GoogleString containing_charset_;

  // Copies properties from the request headers to the request context,
  // if both are non-null.
  void PopulateRequestContext();

  bool filters_added_;
  bool externally_managed_;

  // Memory management stuff. Some of the reference counts we keep track of
  // also are used as a count of events, to help determine when we are done.
  //
  // WARNING: every time you decrement reference counts, you should
  // check release_driver_ within the critical section, and call
  // PossiblyPurgeCachedResponseAndReleaseDriver() if it is true
  // after releasing the lock. The easiest way to get it right is to just call
  // DropReference().
  enum RefCategory {
    kRefUser,  // External refcount from users
    kRefParsing,  // Parser active

    // The number of rewrites (RewriteContext) that have been requested,
    // and not yet completed, and for which we still hope to render
    // them within the flush window. This is waited for.
    kRefPendingRewrites,

    // The number of rewrites (RewriteContext) that have missed the rendering
    // deadline. We don't wait for them, but they still need to keep
    // the RewriteDriver alive.
    kRefDetachedRewrites,

    // Tracks the number of RewriteContexts that have been completed,
    // but not yet deleted.  Once RewriteComplete has been called,
    // rewrite_context->Propagate() is called to render slots (if not
    // detached) and to queue up activity that must occur prior to the
    // context being deleted: specifically running any successors.
    // After all that occurs, DeleteRewriteContext must be called and
    // that will decrement this counter.
    kRefDeletingRewrites,

    // Keeps track of fetch-responding work that's user-facing.
    kRefFetchUserFacing,

    // Keeps track of any background continuation of a fetch.
    kRefFetchBackground,

    // Misc async references from outside
    //
    // TODO(morlovich): Split between events people might want to wait for
    // and events which they don't in a follow up.
    kRefAsyncEvents,

    // Async events we always wait for, even if fully_rewrite_on_flush isn't
    // turned on.
    kRefRenderBlockingAsyncEvents,

    kNumRefCategories
  };

  friend class CategorizedRefcount<RewriteDriver, RefCategory>;

  // Protected by rewrite_mutex().
  CategorizedRefcount<RewriteDriver, RefCategory> ref_counts_;

  // Interface to CategorizedRefcount
  void LastRefRemoved();
  StringPiece RefCategoryName(RefCategory cat);

  // Drops a reference of given kind, signaling any waiters
  // and potentially even releasing the rewrite driver.
  void DropReference(RefCategory cat);

  // Set to true when the refcount reaches 0. See comment
  // above RefCategory for how this should be used.
  bool release_driver_;

  // If not kNoWait, indicates that WaitForCompletion or similar method
  // have been called, and an another thread is waiting for us to notify it of
  // everything having been finished in a given mode.
  WaitMode waiting_ GUARDED_BY(rewrite_mutex());

  // This is set to true if the current wait's deadline has expired.
  bool waiting_deadline_reached_ GUARDED_BY(rewrite_mutex());

  // If this is true, the usual HTML streaming interface will let rendering
  // of every flush window fully complete before proceeding rather than
  // use a deadline. This means rewriting of HTML may be slow, and hence
  // should not be used for online traffic.
  bool fully_rewrite_on_flush_;

  // If this is true, we don't wait for async events before flushing bytes to
  // the client during a blocking rewrite; else we do wait for async events.
  bool fast_blocking_rewrite_;

  bool flush_requested_;
  bool flush_occurred_;

  // If it is set to true, then lazyload script is flushed with flush early
  // flow.
  bool is_lazyload_script_flushed_;

  // Tracks whether any filter that uses the dom cohort of the property cache is
  // enabled. Writes to the property cache for this cohort are predicated on
  // this.
  bool write_property_cache_dom_cohort_;

  // URL of the HTML pages being rewritten in the HTML flow or the
  // of the resource being rewritten in the resource flow.
  GoogleUrl base_url_;

  // In the resource flow, the URL requested may not have the same
  // base as the original resource. decoded_base_url_ stores the base
  // of the original (un-rewritten) resource.
  GoogleUrl decoded_base_url_;

  // This is the URL that is being fetched in a fetch path (not valid in HTML
  // path).
  GoogleString fetch_url_;

  GoogleString user_agent_;

  LazyBool should_skip_parsing_;

  StringFilterMap resource_filter_map_;

  ResponseHeaders* response_headers_;

  // request_headers_ is a copy of the Fetch's request headers, and it
  // stays alive until the rewrite driver is recycled or deleted.
  scoped_ptr<const RequestHeaders> request_headers_;

  int status_code_;  // Status code of response for this request.

  // This group of rewrite-context-related variables is accessed
  // only in the main thread of RewriteDriver (aka the HTML thread).
  typedef std::vector<RewriteContext*> RewriteContextVector;
  RewriteContextVector rewrites_;  // ordered list of rewrites to initiate

  // The maximum amount of time to wait for page processing across all flush
  // windows. A negative value implies no limit.
  int max_page_processing_delay_ms_;

  typedef std::set<RewriteContext*> RewriteContextSet;

  // Contains the RewriteContext* that have been queued into the
  // RewriteThread, but have not gotten to the point where
  // RewriteComplete() has been called.  This set is cleared
  // one the rewrite_deadline_ms has passed.
  RewriteContextSet initiated_rewrites_ GUARDED_BY(rewrite_mutex());

  // Number of total initiated rewrites for the request.
  int64 num_initiated_rewrites_ GUARDED_BY(rewrite_mutex());

  // Number of total detached rewrites for the request, i.e. rewrites whose
  // results did not make it to the response. This is different from
  // kRefDetachedRewrites (and detached_rewrites_.size(), which is equal to it)
  // since that counter is for the number of rewrites
  // currently in the detached state for the current flush window,
  // while this variable is total that ever got detached over all of the
  // document.
  int64 num_detached_rewrites_ GUARDED_BY(rewrite_mutex());

  // Contains the RewriteContext* that were still running at the deadline.
  // They are said to be in a "detached" state although the RewriteContexts
  // themselves don't know that.  They will continue performing their
  // Rewrite in the RewriteThread, and caching the results.  And until
  // they complete, the RewriteDriver must stay alive and not be Recycled
  // or deleted.  WaitForCompletion() blocks until all detached_rewrites
  // have been retired.
  RewriteContextSet detached_rewrites_ GUARDED_BY(rewrite_mutex());

  // Rewrites that may possibly be satisfied from metadata cache alone.
  int possibly_quick_rewrites_ GUARDED_BY(rewrite_mutex());

  // List of RewriteContext objects for fetch to delete. We do it in
  // clear as a simplification.
  RewriteContextVector fetch_rewrites_;

  // These objects are provided on construction or later, and are
  // owned by the caller.
  FileSystem* file_system_;
  ServerContext* server_context_;
  Scheduler* scheduler_;
  UrlAsyncFetcher* default_url_async_fetcher_;  // the fetcher we got at ctor

  // This is the fetcher we use --- it's either the default_url_async_fetcher_,
  // or whatever it was temporarily overridden to by SetSessionFetcher.
  // This is either owned externally or via owned_url_async_fetchers_.
  UrlAsyncFetcher* url_async_fetcher_;

  // This is the fetcher that is used to distribute rewrites if enabled. This
  // can be NULL if distributed rewriting is not configured. This is owned
  // externally.
  UrlAsyncFetcher* distributed_async_fetcher_;

  // A list of all the UrlAsyncFetchers that we own, as set with
  // SetSessionFetcher.
  std::vector<UrlAsyncFetcher*> owned_url_async_fetchers_;

  DomStatsFilter* dom_stats_filter_;
  scoped_ptr<HtmlWriterFilter> html_writer_filter_;

  ScanFilter scan_filter_;
  scoped_ptr<DomainRewriteFilter> domain_rewriter_;
  scoped_ptr<UrlLeftTrimFilter> url_trim_filter_;

  // Maps rewrite context partition keys to the context responsible for
  // rewriting them, in case a URL occurs more than once.
  typedef std::map<GoogleString, RewriteContext*> PrimaryRewriteContextMap;
  PrimaryRewriteContextMap primary_rewrite_context_map_;

  HtmlResourceSlotSet slots_;
  InlineResourceSlotSet inline_slots_;
  InlineAttributeSlotSet inline_attribute_slots_;
  SrcSetSlotCollectionSet srcset_collections_;

  scoped_ptr<RewriteOptions> options_;

  RewriteDriverPool* controlling_pool_;  // or NULL if this has custom options.

  // Object which manages CacheUrlAsyncFetcher async operations.
  scoped_ptr<CacheUrlAsyncFetcher::AsyncOpHooks>
      cache_url_async_fetcher_async_op_hooks_;

  // The default resource encoder
  UrlSegmentEncoder default_encoder_;

  // The first chain of filters called before waiting for Rewrites to complete.
  FilterList early_pre_render_filters_;
  // The second chain of filters called before waiting for Rewrites to complete.
  FilterList pre_render_filters_;

  // Owned by us.
  std::vector<ResourceUrlClaimant*> resource_claimants_;

  // A container of filters to delete when RewriteDriver is deleted.  This
  // can include pre_render_filters as well as those added to the post-render
  // chain owned by HtmlParse.
  FilterVector filters_to_delete_;

  QueuedWorkerPool::Sequence* html_worker_;
  QueuedWorkerPool::Sequence* rewrite_worker_;
  QueuedWorkerPool::Sequence* low_priority_rewrite_worker_;
  scoped_ptr<Scheduler::Sequence> scheduler_sequence_;

  Writer* writer_;

  // Stores any cached properties associated with the current URL and fallback
  // URL (i.e. without query params).
  FallbackPropertyPage* fallback_property_page_;

  // Boolean value which tells whether property page is owned by driver or not.
  bool owns_property_page_;

  // Per-origin property page, for things which are site-wide.
  scoped_ptr<PropertyPage> origin_property_page_;

  // Device type for the current property page.
  UserAgentMatcher::DeviceType device_type_;

  // The critical image finder and critical selector finder will lazy-init these
  // fields.
  scoped_ptr<CriticalImagesInfo> critical_images_info_;
  scoped_ptr<CriticalSelectorInfo> critical_selector_info_;

  // Memoized computation of whether the current doc has an XHTML mimetype.
  bool xhtml_mimetype_computed_;
  XhtmlStatus xhtml_status_ : 8;

  // Number of images whose low quality images are inlined in the html page by
  // InlinePreviewFilter.
  int num_inline_preview_images_;

  // The total number of bytes for which ParseText is called.
  int num_bytes_in_;

  DebugFilter* debug_filter_;

  scoped_ptr<FlushEarlyInfo> flush_early_info_;
  scoped_ptr<DependencyTracker> dependency_tracker_;

  bool can_rewrite_resources_;
  bool is_nested_;

  // Additional request context that may outlive this RewriteDriver. (Thus,
  // the context is reference counted.)
  RequestContextPtr request_context_;

  // Start time for HTML requests. Used for statistics reporting.
  int64 start_time_ms_;

  scoped_ptr<RequestProperties> request_properties_;

  // Helps make sure RewriteDriver and its children are initialized exactly
  // once, allowing for multiple calls to RewriteDriver::Initialize as long
  // as they are matched to RewriteDriver::Terminate.
  static int initialized_count_;

  // True if this RewriteDriver attempted to distribute the rewrite. This is
  // used to prevent a second attempt in case the first errored out.
  bool tried_to_distribute_fetch_;

  // If false, add data-pagespeed-no-defer attribute to the script inserted by
  // add_instrumentation filter.
  bool defer_instrumentation_script_;

  // Indicates whether this document is determined to be AMP-HTML.
  bool is_amp_;

  // Indicates that task execution has started.
  AtomicBool executing_rewrite_tasks_;

  // Downstream cache object used for issuing purges.
  DownstreamCachePurger downstream_cache_purger_;

  // Any PageSpeed options stripped from the original URL.
  GoogleString pagespeed_query_params_;

  // Any PageSpeed option cookies from the original request.
  GoogleString pagespeed_option_cookies_;

  DISALLOW_COPY_AND_ASSIGN(RewriteDriver);
};

// Subclass of HTTPCache::Callback that incorporates a given RewriteOptions'
// invalidation policy.
class OptionsAwareHTTPCacheCallback : public HTTPCache::Callback {
 public:
  virtual ~OptionsAwareHTTPCacheCallback();
  virtual bool IsCacheValid(const GoogleString& key,
                            const ResponseHeaders& headers);
  virtual int64 OverrideCacheTtlMs(const GoogleString& key);
  virtual ResponseHeaders::VaryOption RespectVaryOnResources() const;

  // Validates the specified response for the URL, request, given the specified
  // options.  This is for checking if cache response can still be used, not for
  // determining whether an entry should be written to an HTTP cache.
  static bool IsCacheValid(const GoogleString& key,
                           const RewriteOptions& rewrite_options,
                           const RequestContextPtr& request_ctx,
                           const ResponseHeaders& headers);

 protected:
  // Sub-classes need to ensure that rewrite_options remains valid till
  // Callback::Done finishes.
  OptionsAwareHTTPCacheCallback(
      const RewriteOptions* rewrite_options,
      const RequestContextPtr& request_ctx);

 private:
  const RewriteOptions* rewrite_options_;

  DISALLOW_COPY_AND_ASSIGN(OptionsAwareHTTPCacheCallback);
};

}  // namespace net_instaweb

#endif  // NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_