Files
incubator-pagespeed-ngx/psol/include/net/instaweb/rewriter/public/output_resource.h
T
2013-03-18 12:36:07 -04:00

300 lines
13 KiB
C++

/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: sligocki@google.com (Shawn Ligocki)
//
// Output resources are created by a ResourceManager. They must be able to
// write contents and return their url (so that it can be href'd on a page).
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_OUTPUT_RESOURCE_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_OUTPUT_RESOURCE_H_
#include "base/logging.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_namer.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/queued_worker_pool.h"
#include "net/instaweb/util/public/ref_counted_ptr.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
class CachedResult;
class Function;
class MessageHandler;
class NamedLock;
class ServerContext;
class RewriteOptions;
class Writer;
struct ContentType;
class OutputResource : public Resource {
public:
// Construct an OutputResource. For the moment, we pass in type redundantly
// even though full_name embeds an extension. This reflects current code
// structure rather than a principled stand on anything.
// TODO(jmaessen): remove redundancy.
//
// The 'options' argument can be NULL. This is done in the Fetch path because
// that field is only used for domain sharding, and during the fetch, further
// domain makes no sense.
OutputResource(ServerContext* server_context,
const StringPiece& resolved_base,
const StringPiece& unmapped_base, /* aka source domain */
const StringPiece& original_base, /* aka cnamed domain */
const ResourceNamer& resource_id,
const RewriteOptions* options,
OutputResourceKind kind);
virtual void LoadAndCallback(NotCacheablePolicy not_cacheable_policy,
AsyncCallback* callback,
MessageHandler* handler);
// NOTE: url() will crash if resource has does not have a hash set yet.
// Specifically, this will occur if the resource has not been completely
// written yet. Before that point, the final URL cannot be known.
//
// Note: the OutputResource will never have a query string, even when
// ModPagespeedAddOptionsToUrls is on.
virtual GoogleString url() const;
// Returns the same as url(), but with a spoofed hash in case no hash
// was set yet. Use this for error reporting, etc. where you do not
// know whether the output resource has a valid hash yet.
GoogleString UrlEvenIfHashNotSet();
// Save resource contents to disk, for testing and debugging purposes.
// Precondition: the resource contents must be fully set.
// The resource will be saved under the resource manager's filename_prefix()
// using with URL escaped using its filename_encoder().
void DumpToDisk(MessageHandler* handler);
// Lazily initialize and return creation_lock_. If the resource is expensive
// to create, this lock should be held during its creation to avoid multiple
// rewrites happening at once. The lock will be unlocked on destruction,
// DropCreationLock, or EndWrite (called from ResourceManager::Write)
NamedLock* CreationLock();
// Attempt to obtain a named lock for the resource without blocking. Return
// true if we do so.
bool TryLockForCreation();
// Attempt to obtain a named lock for the resource, scheduling the callback in
// the provided worker if we do so and scheduling a cancellation if locking
// times out.
void LockForCreation(QueuedWorkerPool::Sequence* worker, Function* callback);
// Drops the lock created by above, if any.
void DropCreationLock();
// Update the passed in CachedResult from the CachedResult in this
// OutputResource.
void UpdateCachedResultPreservingInputInfo(CachedResult* to_update) const;
// The NameKey describes the source url and rewriter used, without hash and
// content type information. This is used to find previously-computed filter
// results whose output hash and content type is unknown. The full name of a
// resource is of the form
// path/prefix.encoded_resource_name.hash.extension
// we know prefix and name, but not the hash, and we don't always even have
// the extension, which might have changes as the result of, for example image
// optimization (e.g. gif->png). But We can "remember" the hash/extension for
// as long as the origin URL was cacheable. So we construct this as a key:
// path/prefix.encoded_resource_name
// and use that to map to the hash-code and extension. If we know the
// hash-code then we may also be able to look up the contents in the same
// cache.
virtual GoogleString name_key() const;
// Builds a canonical URL in a form for use with the HTTP cache.
// The DomainLawyer from options is used to find the proper domain in case
// there is a fetch for the unsharded form, or the wrong shard.
//
// For example, if you have a resource styles.css
// ModPagespeedMapRewriteDomain master alias
// ModPagespeedShardDomain master shard1,shard2
// then all HTTP cache puts/gets will use the key "http://master/style.css",
// which can be obtained from an output resource using this method.
GoogleString HttpCacheKey() const;
// output-specific
const GoogleString& resolved_base() const { return resolved_base_; }
const GoogleString& unmapped_base() const { return unmapped_base_; }
const GoogleString& original_base() const { return original_base_; }
const ResourceNamer& full_name() const { return full_name_; }
ResourceNamer* mutable_full_name() { return &full_name_; }
StringPiece name() const { return full_name_.name(); }
StringPiece experiment() const { return full_name_.experiment(); }
StringPiece suffix() const;
StringPiece filter_prefix() const { return full_name_.id(); }
StringPiece hash() const { return full_name_.hash(); }
bool has_hash() const { return !hash().empty(); }
// Some output resources have mangled names derived from input resource(s),
// such as when combining CSS files. When we need to regenerate the output
// resource given just its URL we need to convert the URL back to its
// constituent input resource URLs. Our url() method can return a modified
// version of the input resources' host and path if our resource manager
// has a non-standard url_namer(), so when trying to regenerate the input
// resources' URL we need to reverse that modification. Note that the
// default UrlNamer class doesn't do any modification, and that the decoding
// of the leaf names is done separetly by the UrlMultipartEncoder class.
GoogleString decoded_base() const;
// In a scalable installation where the sprites must be kept in a
// database, we cannot serve HTML that references new resources
// that have not been committed yet, and committing to a database
// may take too long to block on the HTML rewrite. So we will want
// to refactor this to check to see whether the desired resource is
// already known. For now we'll assume we can commit to serving the
// resource during the HTML rewriter.
bool IsWritten() const { return writing_complete_; }
// Sets the type of the output resource, and thus also its suffix.
virtual void SetType(const ContentType* type);
// Whenever output resources are created via RewriteDriver
// (except DecodeOutputResource) it looks up cached
// information on any previous creation of that resource, including
// the full filename and any filter-specific metadata. If such
// information is available, this method will return non-NULL.
//
// Note: cached_result() will also be non-NULL if you explicitly
// create the result from a filter by calling EnsureCachedResultCreated()
//
// The output is const because we do not check that the CachedResult has not
// been written. If you want to modify the CachedResult, use
// EnsureCachedResultCreated instead.
const CachedResult* cached_result() const { return cached_result_; }
// If there is no cached output information, creates an empty one,
// without any information filled in (so no url(), or timestamps).
//
// The primary use of this method is to let filters store any metadata they
// want before calling ResourceManager::Write.
// This never returns null.
// We will DCHECK that the cached result has not been written.
CachedResult* EnsureCachedResultCreated();
void clear_cached_result();
// Sets the cached-result to an already-existing, externally owned
// buffer. We need to make sure not to free it on destruction.
void set_cached_result(CachedResult* cached_result) {
clear_cached_result();
cached_result_ = cached_result;
}
// Transfers up ownership of any cached result and clears pointer to it.
CachedResult* ReleaseCachedResult() {
CHECK(cached_result_owned_);
CachedResult* ret = cached_result_;
cached_result_ = NULL;
cached_result_owned_ = false;
return ret;
}
OutputResourceKind kind() const { return kind_; }
bool has_lock() const;
// This is called by CacheCallback::Done in rewrite_driver.cc.
void SetWritten(bool written) { writing_complete_ = true; }
virtual const RewriteOptions* rewrite_options() const {
return rewrite_options_;
}
// Interface for directly setting the value of the resource.
// It must not have been set otherwise! The return value of
// BeginWrite is owned by this OutputResource.
Writer* BeginWrite(MessageHandler* message_handler);
void EndWrite(MessageHandler* message_handler);
virtual bool UseHttpCache() const { return true; }
protected:
virtual ~OutputResource();
REFCOUNT_FRIEND_DECLARATION(OutputResource);
private:
friend class ResourceManagerTestingPeer;
friend class RewriteDriver;
friend class ServerContext;
friend class ServerContextTest;
void SetHash(const StringPiece& hash);
StringPiece extension() const { return full_name_.ext(); }
// Name of the file used by DumpToDisk.
GoogleString DumpFileName() const;
bool writing_complete_;
// TODO(jmarantz): We have a complicated semantic for CachedResult
// ownership as we transition from rewriting inline while html parsing
// to rewriting asynchronously. In the asynchronous world, the
// CachedResult object will be owned at a higher level. So it is not
// safe to call cached_result_.release() or .reset() directly. Instead,
// go through the clear_cached_result() method.
bool cached_result_owned_;
CachedResult* cached_result_;
// The resolved_base_ is the domain as reported by UrlPartnership.
// It takes into account domain-mapping via ModPagespeedMapRewriteDomain.
// However, the resolved base is not affected by sharding. Shard-selection
// is done when url() is called, relying on the content hash.
// The unmapped_base_ is the same domain as resolved_base_ but before domain
// mapping was applied; it is also known as the source domain since it is
// the domain of the resource's link.
// The original_base_ is the domain of the page that contains the resource
// link; it is also known as the CNAMEd domain since the page's URL is
// one that we manage and is one that we are rwriting.
// For example, given an HTML page with URL http://www.example.com/index.html
// containing elements "<base href='http://static.example.com/'>" and
// "<link rel='stylesheet' href='styles.css'>", and also a rule rewriting
// static.example.com -> cdn.com/example/static, then the OutputResource for
// the link element's href will have:
// resolved_base_ == http://cdn.com/example/static/
// unmapped_base_ == http://static.example.com/
// original_base_ == http://www.example.com/
GoogleString resolved_base_;
GoogleString unmapped_base_;
GoogleString original_base_;
ResourceNamer full_name_;
// Lazily evaluated and cached result of the url() method, which is const.
mutable GoogleString computed_url_;
// Lock guarding resource creation. Lazily initialized by CreationLock(),
// unlocked on destruction, DropCreationLock or EndWrite.
scoped_ptr<NamedLock> creation_lock_;
const RewriteOptions* rewrite_options_;
// Output resource have a 'kind' associated with them that controls the kind
// of caching we would like to be performed on them when written out.
OutputResourceKind kind_;
DISALLOW_COPY_AND_ASSIGN(OutputResource);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_OUTPUT_RESOURCE_H_