Pull out the InputInfo validation functions into a separate
file, so I can call them for determining what to preload. (including file system metadata cache helpers)
This commit is contained in:
@@ -1601,6 +1601,7 @@
|
||||
'rewriter/inline_attribute_slot.cc',
|
||||
'rewriter/inline_resource_slot.cc',
|
||||
'rewriter/inline_rewrite_context.cc',
|
||||
'rewriter/input_info_utils.cc',
|
||||
'rewriter/insert_amp_link_filter.cc',
|
||||
'rewriter/insert_dns_prefetch_filter.cc',
|
||||
'rewriter/insert_ga_filter.cc',
|
||||
|
||||
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
//
|
||||
|
||||
#include "net/instaweb/rewriter/public/input_info_utils.h"
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "net/instaweb/rewriter/public/rewrite_options.h"
|
||||
#include "net/instaweb/rewriter/public/server_context.h"
|
||||
#include "pagespeed/kernel/base/file_system.h"
|
||||
#include "pagespeed/kernel/base/hasher.h"
|
||||
#include "pagespeed/kernel/base/proto_util.h"
|
||||
#include "pagespeed/kernel/base/shared_string.h"
|
||||
#include "pagespeed/kernel/base/string.h"
|
||||
#include "pagespeed/kernel/base/string_util.h"
|
||||
#include "pagespeed/kernel/base/timer.h"
|
||||
#include "pagespeed/kernel/cache/cache_interface.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
namespace input_info_utils {
|
||||
|
||||
namespace {
|
||||
|
||||
bool MatchesFileSystemMetadataCacheEntry(
|
||||
const InputInfo& input_info,
|
||||
const InputInfo& fsmdc_info,
|
||||
int64 mtime_ms) {
|
||||
return (fsmdc_info.has_last_modified_time_ms() &&
|
||||
fsmdc_info.has_input_content_hash() &&
|
||||
fsmdc_info.last_modified_time_ms() == mtime_ms &&
|
||||
fsmdc_info.input_content_hash() == input_info.input_content_hash());
|
||||
}
|
||||
|
||||
// Checks if the stat() data about the input_info's file matches that in the
|
||||
// filesystem metadata cache; it needs to be for the input to be "valid".
|
||||
bool IsFilesystemMetadataCacheCurrent(CacheInterface* fsmdc,
|
||||
const GoogleString& file_key,
|
||||
const InputInfo& input_info,
|
||||
int64 mtime_ms) {
|
||||
// Get the filesystem metadata cache (FSMDC) entry for the filename.
|
||||
// If we found an entry,
|
||||
// Extract the FSMDC timestamp and contents hash.
|
||||
// If the FSMDC timestamp == the file's current timestamp,
|
||||
// (the FSMDC contents hash is valid/current/correct)
|
||||
// If the FSMDC content hash == the metadata cache's content hash,
|
||||
// The metadata cache's entry is valid so its input_info is valid.
|
||||
// Else
|
||||
// Return false as the metadata cache's entry is not valid as
|
||||
// someone has changed it on us.
|
||||
// Else
|
||||
// Return false as our FSMDC entry is out of date so we can't
|
||||
// tell if the metadata cache's input_info is valid.
|
||||
// Else
|
||||
// Return false as we can't tell if the metadata cache's input_info is
|
||||
// valid.
|
||||
CacheInterface::SynchronousCallback callback;
|
||||
fsmdc->Get(file_key, &callback);
|
||||
DCHECK(callback.called());
|
||||
if (callback.state() == CacheInterface::kAvailable) {
|
||||
StringPiece val_str = callback.value()->Value();
|
||||
ArrayInputStream input(val_str.data(), val_str.size());
|
||||
InputInfo fsmdc_info;
|
||||
if (fsmdc_info.ParseFromZeroCopyStream(&input)) {
|
||||
// We have a filesystem metadata cache entry: if its timestamp equals
|
||||
// the file's, and its contents hash equals the metadata caches's, then
|
||||
// the input is valid.
|
||||
return MatchesFileSystemMetadataCacheEntry(
|
||||
input_info, fsmdc_info, mtime_ms);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the filesystem metadata cache with the timestamp and contents hash
|
||||
// of the given input's file (which is read from disk to compute the hash).
|
||||
// Returns false if the file cannot be read.
|
||||
bool UpdateFilesystemMetadataCache(ServerContext* server_context,
|
||||
const GoogleString& file_key,
|
||||
const InputInfo& input_info,
|
||||
int64 mtime_ms,
|
||||
CacheInterface* fsmdc,
|
||||
InputInfo* fsmdc_info) {
|
||||
GoogleString contents;
|
||||
if (!server_context->file_system()->ReadFile(
|
||||
input_info.filename().c_str(), &contents,
|
||||
server_context->message_handler())) {
|
||||
return false;
|
||||
}
|
||||
GoogleString contents_hash =
|
||||
server_context->contents_hasher()->Hash(contents);
|
||||
fsmdc_info->set_type(InputInfo::FILE_BASED);
|
||||
DCHECK_LT(0, mtime_ms);
|
||||
fsmdc_info->set_last_modified_time_ms(mtime_ms);
|
||||
fsmdc_info->set_input_content_hash(contents_hash);
|
||||
GoogleString buf;
|
||||
{
|
||||
// MUST be in a block so that sstream is destructed to finalize buf.
|
||||
StringOutputStream sstream(&buf);
|
||||
fsmdc_info->SerializeToZeroCopyStream(&sstream);
|
||||
}
|
||||
fsmdc->PutSwappingString(file_key, &buf);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Checks whether the given input is still unchanged.
|
||||
bool IsInputValid(
|
||||
ServerContext* server_context, const RewriteOptions* options,
|
||||
bool nested_rewrite, const InputInfo& input_info,
|
||||
int64 now_ms, bool* purged, bool* stale_rewrite) {
|
||||
switch (input_info.type()) {
|
||||
case InputInfo::CACHED: {
|
||||
// It is invalid if cacheable inputs have expired or ...
|
||||
DCHECK(input_info.has_expiration_time_ms());
|
||||
if (input_info.has_url()) {
|
||||
// We do not search wildcards when validating metadata because
|
||||
// that would require N wildcard matches (not even a
|
||||
// FastWildcardGroup) per input dependency.
|
||||
if (!options->IsUrlCacheValid(input_info.url(), input_info.date_ms(),
|
||||
false /* search_wildcards */)) {
|
||||
*purged = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!input_info.has_expiration_time_ms()) {
|
||||
return false;
|
||||
}
|
||||
int64 ttl_ms = input_info.expiration_time_ms() - now_ms;
|
||||
if (ttl_ms > 0) {
|
||||
return true;
|
||||
} else if (
|
||||
!nested_rewrite &&
|
||||
ttl_ms + options->metadata_cache_staleness_threshold_ms() > 0) {
|
||||
*stale_rewrite = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case InputInfo::FILE_BASED: {
|
||||
// ... if file-based inputs have changed.
|
||||
DCHECK(input_info.has_last_modified_time_ms() &&
|
||||
input_info.has_filename());
|
||||
if (!input_info.has_last_modified_time_ms() ||
|
||||
!input_info.has_filename()) {
|
||||
return false;
|
||||
}
|
||||
int64 mtime_sec;
|
||||
server_context->file_system()->Mtime(input_info.filename(), &mtime_sec,
|
||||
server_context->message_handler());
|
||||
int64 mtime_ms = mtime_sec * Timer::kSecondMs;
|
||||
|
||||
CacheInterface* fsmdc = server_context->filesystem_metadata_cache();
|
||||
if (fsmdc != nullptr) {
|
||||
CHECK(fsmdc->IsBlocking());
|
||||
if (!input_info.has_input_content_hash()) {
|
||||
return false;
|
||||
}
|
||||
// Construct a host-specific key. The format is somewhat arbitrary,
|
||||
// all it needs to do is differentiate the same path on different
|
||||
// hosts. If the size of the key becomes a concern we can hash it
|
||||
// and hope.
|
||||
GoogleString file_key;
|
||||
StrAppend(&file_key, "file://", server_context->hostname(),
|
||||
input_info.filename());
|
||||
if (IsFilesystemMetadataCacheCurrent(fsmdc, file_key, input_info,
|
||||
mtime_ms)) {
|
||||
return true;
|
||||
}
|
||||
InputInfo fsmdc_info;
|
||||
if (!UpdateFilesystemMetadataCache(server_context, file_key,
|
||||
input_info, mtime_ms, fsmdc,
|
||||
&fsmdc_info)) {
|
||||
return false;
|
||||
}
|
||||
// Check again now that we KNOW we have the most up-to-date data
|
||||
// in the filesystem metadata cache.
|
||||
return MatchesFileSystemMetadataCacheEntry(
|
||||
input_info, fsmdc_info, mtime_ms);
|
||||
} else {
|
||||
DCHECK_LT(0, input_info.last_modified_time_ms());
|
||||
return (mtime_ms == input_info.last_modified_time_ms());
|
||||
}
|
||||
}
|
||||
case InputInfo::ALWAYS_VALID:
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(DFATAL) << "Corrupt InputInfo object !?";
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace input_info_utils
|
||||
} // namespace net_instaweb
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright 2016 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: morlovich@google.com (Maks Orlovich)
|
||||
|
||||
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_INPUT_INFO_UTILS_H_
|
||||
#define NET_INSTAWEB_REWRITER_PUBLIC_INPUT_INFO_UTILS_H_
|
||||
|
||||
#include "net/instaweb/rewriter/input_info.pb.h"
|
||||
#include "net/instaweb/rewriter/public/rewrite_options.h"
|
||||
#include "net/instaweb/rewriter/public/server_context.h"
|
||||
#include "pagespeed/kernel/base/basictypes.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
namespace input_info_utils {
|
||||
|
||||
// Computes whether the given input_info is valid at now_ms, with the filesystem
|
||||
// and its metadata cache in server_context, considering invalidation
|
||||
// information and policy in options.
|
||||
//
|
||||
// *purged will be set if the entry was invalidated due to a cache purge.
|
||||
// *stale_rewrite will be set (and true will be returned) if
|
||||
// options->metadata_cache_staleness_threshold_ms() permitted reuse past
|
||||
// expiration at this time, andthe rewrite isn't nested.
|
||||
bool IsInputValid(
|
||||
ServerContext* server_context, const RewriteOptions* options,
|
||||
bool nested_rewrite, const InputInfo& input_info,
|
||||
int64 now_ms, bool* purged, bool* stale_rewrite);
|
||||
|
||||
} // namespace input_info_utils
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_REWRITER_PUBLIC_INPUT_INFO_UTILS_H_
|
||||
@@ -41,6 +41,7 @@
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/rewriter/cached_result.pb.h"
|
||||
#include "net/instaweb/rewriter/public/inline_output_resource.h"
|
||||
#include "net/instaweb/rewriter/public/input_info_utils.h"
|
||||
#include "net/instaweb/rewriter/public/output_resource.h"
|
||||
#include "net/instaweb/rewriter/public/resource.h"
|
||||
#include "net/instaweb/rewriter/public/resource_namer.h"
|
||||
@@ -55,7 +56,6 @@
|
||||
#include "pagespeed/kernel/base/base64_util.h"
|
||||
#include "pagespeed/kernel/base/callback.h"
|
||||
#include "pagespeed/kernel/base/dynamic_annotations.h" // RunningOnValgrind
|
||||
#include "pagespeed/kernel/base/file_system.h"
|
||||
#include "pagespeed/kernel/base/function.h"
|
||||
#include "pagespeed/kernel/base/hasher.h"
|
||||
#include "pagespeed/kernel/base/message_handler.h"
|
||||
@@ -297,169 +297,12 @@ class RewriteContext::OutputCacheCallback : public CacheInterface::Callback {
|
||||
}
|
||||
|
||||
private:
|
||||
bool AreInputInfosEqual(const InputInfo& input_info,
|
||||
const InputInfo& fsmdc_info,
|
||||
int64 mtime_ms) {
|
||||
return (fsmdc_info.has_last_modified_time_ms() &&
|
||||
fsmdc_info.has_input_content_hash() &&
|
||||
fsmdc_info.last_modified_time_ms() == mtime_ms &&
|
||||
fsmdc_info.input_content_hash() == input_info.input_content_hash());
|
||||
}
|
||||
|
||||
// Checks if the stat() data about the input_info's file matches that in the
|
||||
// filesystem metadata cache; it needs to be for the input to be "valid".
|
||||
bool IsFilesystemMetadataCacheCurrent(CacheInterface* fsmdc,
|
||||
const GoogleString& file_key,
|
||||
const InputInfo& input_info,
|
||||
int64 mtime_ms) {
|
||||
// Get the filesystem metadata cache (FSMDC) entry for the filename.
|
||||
// If we found an entry,
|
||||
// Extract the FSMDC timestamp and contents hash.
|
||||
// If the FSMDC timestamp == the file's current timestamp,
|
||||
// (the FSMDC contents hash is valid/current/correct)
|
||||
// If the FSMDC content hash == the metadata cache's content hash,
|
||||
// The metadata cache's entry is valid so its input_info is valid.
|
||||
// Else
|
||||
// Return false as the metadata cache's entry is not valid as
|
||||
// someone has changed it on us.
|
||||
// Else
|
||||
// Return false as our FSMDC entry is out of date so we can't
|
||||
// tell if the metadata cache's input_info is valid.
|
||||
// Else
|
||||
// Return false as we can't tell if the metadata cache's input_info is
|
||||
// valid.
|
||||
CacheInterface::SynchronousCallback callback;
|
||||
fsmdc->Get(file_key, &callback);
|
||||
DCHECK(callback.called());
|
||||
if (callback.state() == CacheInterface::kAvailable) {
|
||||
StringPiece val_str = callback.value()->Value();
|
||||
ArrayInputStream input(val_str.data(), val_str.size());
|
||||
InputInfo fsmdc_info;
|
||||
if (fsmdc_info.ParseFromZeroCopyStream(&input)) {
|
||||
// We have a filesystem metadata cache entry: if its timestamp equals
|
||||
// the file's, and its contents hash equals the metadata caches's, then
|
||||
// the input is valid.
|
||||
return AreInputInfosEqual(input_info, fsmdc_info, mtime_ms);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the filesystem metadata cache with the timestamp and contents hash
|
||||
// of the given input's file (which is read from disk to compute the hash).
|
||||
// Returns false if the file cannot be read.
|
||||
bool UpdateFilesystemMetadataCache(ServerContext* server_context,
|
||||
const GoogleString& file_key,
|
||||
const InputInfo& input_info,
|
||||
int64 mtime_ms,
|
||||
CacheInterface* fsmdc,
|
||||
InputInfo* fsmdc_info) {
|
||||
GoogleString contents;
|
||||
if (!server_context->file_system()->ReadFile(
|
||||
input_info.filename().c_str(), &contents,
|
||||
server_context->message_handler())) {
|
||||
return false;
|
||||
}
|
||||
GoogleString contents_hash =
|
||||
server_context->contents_hasher()->Hash(contents);
|
||||
fsmdc_info->set_type(InputInfo::FILE_BASED);
|
||||
DCHECK_LT(0, mtime_ms);
|
||||
fsmdc_info->set_last_modified_time_ms(mtime_ms);
|
||||
fsmdc_info->set_input_content_hash(contents_hash);
|
||||
GoogleString buf;
|
||||
{
|
||||
// MUST be in a block so that sstream is destructed to finalize buf.
|
||||
StringOutputStream sstream(&buf);
|
||||
fsmdc_info->SerializeToZeroCopyStream(&sstream);
|
||||
}
|
||||
fsmdc->PutSwappingString(file_key, &buf);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Checks whether the given input is still unchanged.
|
||||
bool IsInputValid(const InputInfo& input_info, int64 now_ms, bool* purged,
|
||||
bool* stale_rewrite) {
|
||||
switch (input_info.type()) {
|
||||
case InputInfo::CACHED: {
|
||||
// It is invalid if cacheable inputs have expired or ...
|
||||
DCHECK(input_info.has_expiration_time_ms());
|
||||
const RewriteOptions* options = rewrite_context_->Options();
|
||||
if (input_info.has_url()) {
|
||||
// We do not search wildcards when validating metadata because
|
||||
// that would require N wildcard matches (not even a
|
||||
// FastWildcardGroup) per input dependency.
|
||||
if (!options->IsUrlCacheValid(input_info.url(), input_info.date_ms(),
|
||||
false /* search_wildcards */)) {
|
||||
*purged = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!input_info.has_expiration_time_ms()) {
|
||||
return false;
|
||||
}
|
||||
int64 ttl_ms = input_info.expiration_time_ms() - now_ms;
|
||||
if (ttl_ms > 0) {
|
||||
return true;
|
||||
} else if (
|
||||
!rewrite_context_->has_parent() &&
|
||||
ttl_ms + options->metadata_cache_staleness_threshold_ms() > 0) {
|
||||
*stale_rewrite = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case InputInfo::FILE_BASED: {
|
||||
ServerContext* server_context = rewrite_context_->FindServerContext();
|
||||
|
||||
// ... if file-based inputs have changed.
|
||||
DCHECK(input_info.has_last_modified_time_ms() &&
|
||||
input_info.has_filename());
|
||||
if (!input_info.has_last_modified_time_ms() ||
|
||||
!input_info.has_filename()) {
|
||||
return false;
|
||||
}
|
||||
int64 mtime_sec;
|
||||
server_context->file_system()->Mtime(input_info.filename(), &mtime_sec,
|
||||
server_context->message_handler());
|
||||
int64 mtime_ms = mtime_sec * Timer::kSecondMs;
|
||||
|
||||
CacheInterface* fsmdc = server_context->filesystem_metadata_cache();
|
||||
if (fsmdc != NULL) {
|
||||
CHECK(fsmdc->IsBlocking());
|
||||
if (!input_info.has_input_content_hash()) {
|
||||
return false;
|
||||
}
|
||||
// Construct a host-specific key. The format is somewhat arbitrary,
|
||||
// all it needs to do is differentiate the same path on different
|
||||
// hosts. If the size of the key becomes a concern we can hash it
|
||||
// and hope.
|
||||
GoogleString file_key;
|
||||
StrAppend(&file_key, "file://", server_context->hostname(),
|
||||
input_info.filename());
|
||||
if (IsFilesystemMetadataCacheCurrent(fsmdc, file_key, input_info,
|
||||
mtime_ms)) {
|
||||
return true;
|
||||
}
|
||||
InputInfo fsmdc_info;
|
||||
if (!UpdateFilesystemMetadataCache(server_context, file_key,
|
||||
input_info, mtime_ms, fsmdc,
|
||||
&fsmdc_info)) {
|
||||
return false;
|
||||
}
|
||||
// Check again now that we KNOW we have the most up-to-date data
|
||||
// in the filesystem metadata cache.
|
||||
return AreInputInfosEqual(input_info, fsmdc_info, mtime_ms);
|
||||
} else {
|
||||
DCHECK_LT(0, input_info.last_modified_time_ms());
|
||||
return (mtime_ms == input_info.last_modified_time_ms());
|
||||
}
|
||||
}
|
||||
case InputInfo::ALWAYS_VALID:
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(DFATAL) << "Corrupt InputInfo object !?";
|
||||
return false;
|
||||
return input_info_utils::IsInputValid(
|
||||
rewrite_context_->FindServerContext(), rewrite_context_->Options(),
|
||||
rewrite_context_->has_parent(), input_info, now_ms, purged,
|
||||
stale_rewrite);
|
||||
}
|
||||
|
||||
// Check that a CachedResult is valid, specifically, that all the inputs are
|
||||
|
||||
Reference in New Issue
Block a user