Collect validity information on dependencies we collect.

(Will be used to verify preload hint URLs, but isn't yet)
This commit is contained in:
Maks Orlovich
2016-08-29 16:10:28 -04:00
parent 7f44c27b6b
commit c511dae590
5 changed files with 232 additions and 40 deletions
@@ -65,6 +65,12 @@ class CollectDependenciesFilter::Context : public RewriteContext {
// We will never produce output, but always want to do stuff.
outputs->push_back(OutputResourcePtr(nullptr));
partitions->add_partition();
ResourcePtr resource(slot(0)->resource());
if (resource->loaded()) {
resource->AddInputInfoToPartition(
Resource::kIncludeInputHash, 0, partitions->mutable_partition(0));
}
return true;
}
@@ -95,7 +101,8 @@ class CollectDependenciesFilter::Context : public RewriteContext {
}
protected:
void ExtractNestedCssDependencies(const ResourcePtr& resource,
void ExtractNestedCssDependencies(const Dependency* parent_dep,
const ResourcePtr& resource,
CachedResult* partition) {
// TODO(morlovich): We should probably look inside <style> blocks like this,
// too?
@@ -125,9 +132,7 @@ class CollectDependenciesFilter::Context : public RewriteContext {
Dependency* dep = partition->add_collected_dependency();
dep->set_url(full_url.Spec().as_string());
dep->set_content_type(DEP_CSS);
// TODO(morlovich): Set validity_info, which should be based on
// the containing CSS. (Could also have some sort of mechanism
// to be conditional on it)
*dep->mutable_validity_info() = parent_dep->validity_info();
}
}
}
@@ -140,20 +145,35 @@ class CollectDependenciesFilter::Context : public RewriteContext {
dep->set_url(slot(0)->resource()->url());
dep->set_content_type(dep_type_);
if (dep_type_ == DEP_CSS) {
ExtractNestedCssDependencies(slot(0)->resource(), partition);
// The framework collected input info from any filter that ran before
// us, but not us (since it will do it after we finish work) --- which
// matters if our input is an unoptimized result, so add in our input info.
for (int i = 0; i < partition->input_size(); ++i) {
slot(0)->ReportInput(partition->input(i));
}
// TODO(morlovich): Set validity_info.
// This is surprisingly complicated, since essentially we have to get info
// from all the steps along the RewriteContext, and the previous
// RewriteContexts already got deleted. (This isn't needed in normal
// operation since invalidation in the middle of a chain would change
// the input URL in the middle of the chain, but we are trying to skip
// to the end).
// (is_pagespeed_resource is also not currently set, but I am not sure
// I actually want that: validity_info may be useful for non-optimized
// resources as well).
if (slot(0)->inputs() != nullptr) {
for (const InputInfo& input : *slot(0)->inputs()) {
InputInfo* stored_copy = dep->add_validity_info();
*stored_copy = input;
// Drop the parts of the info we can't use for checking validity
// of push.
stored_copy->clear_input_content_hash();
stored_copy->clear_disable_further_processing();
stored_copy->clear_index();
}
}
// Note: this needs to happen after the above since we need to propagate
// validity_info.
if (dep_type_ == DEP_CSS) {
ExtractNestedCssDependencies(dep, slot(0)->resource(), partition);
}
// TODO(morlovich): is_pagespeed_resource is not currently set, but I am not
// sure I actually want that: validity_info may be useful for non-optimized
// resources as well, and we set that already.
CHECK(output_resource.get() == nullptr);
CHECK_EQ(0, partition_index);
@@ -272,6 +292,7 @@ void CollectDependenciesFilter::StartElementImpl(HtmlElement* element) {
continue;
}
ResourceSlotPtr slot(driver()->GetSlot(resource, element, attr));
slot->set_need_aggregate_input_info(true);
Context* context = new Context(
attributes[i].category == semantic_type::kStylesheet ?
DEP_CSS : DEP_JAVASCRIPT,
@@ -18,6 +18,8 @@
#include "net/instaweb/rewriter/public/dependency_tracker.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/http_cache_failure.h"
#include "net/instaweb/rewriter/dependencies.pb.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
@@ -34,8 +36,6 @@
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_parse_test_base.h"
#include "pagespeed/kernel/http/content_type.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/http_cache_failure.h"
#include "pagespeed/opt/http/request_context.h"
namespace net_instaweb {
@@ -62,12 +62,13 @@ class CollectDependenciesFilterTest : public RewriteTestBase {
SetResponseWithDefaultHeaders("a.css", kContentTypeCss,
" * { display: block }", 100);
SetResponseWithDefaultHeaders("c.css", kContentTypeCss,
" * { display: list-item }", 100);
" * { display: list-item }", 150);
SetResponseWithDefaultHeaders("b.js", kContentTypeJavascript,
" var b = 42", 200);
SetResponseWithDefaultHeaders("d.js", kContentTypeJavascript,
" var d = 32", 200);
" var d = 32", 250);
start_time_ms_ = timer()->NowMs();
}
void ResetDriver() {
@@ -80,8 +81,14 @@ class CollectDependenciesFilterTest : public RewriteTestBase {
SetHtmlMimetype(); // Don't wrap scripts in <![CDATA[ ]]>
}
GoogleString FormatRelTimeSec(int delta_sec) {
return Integer64ToString(start_time_ms_ + delta_sec * Timer::kSecondMs);
}
PropertyCache* pcache_;
PropertyPage* page_;
int64 start_time_ms_;
const int64 kYearSec = Timer::kYearMs / Timer::kSecondMs;
};
TEST_F(CollectDependenciesFilterTest, BasicOperation) {
@@ -103,21 +110,45 @@ TEST_F(CollectDependenciesFilterTest, BasicOperation) {
DependencyTracker* tracker = rewrite_driver()->dependency_tracker();
rewrite_driver()->StartParse(kTestDomain);
ASSERT_TRUE(tracker->read_in_info() != nullptr);
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(StrCat(
"dependency {"
"url: 'http://test.com/A.a.css.pagespeed.cf.0.css'"
"content_type: DEP_CSS"
"content_type: DEP_CSS "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(100), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"last_modified_time_ms: ", FormatRelTimeSec(0), " ",
"expiration_time_ms: ", FormatRelTimeSec(kYearSec), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}"
"dependency {"
"url: 'http://test.com/b.js.pagespeed.jm.0.js'"
"content_type: DEP_JAVASCRIPT"
"}"));
"content_type: DEP_JAVASCRIPT "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(200), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"last_modified_time_ms: ", FormatRelTimeSec(0), " ",
"expiration_time_ms: ", FormatRelTimeSec(kYearSec), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}")));
rewrite_driver()->FinishParse();
}
TEST_F(CollectDependenciesFilterTest, MediaTopLevel) {
SetResponseWithDefaultHeaders("e.css", kContentTypeCss,
" * { display: inline-block }", 100);
" * { display: inline-block }", 400);
rewrite_driver()->AddFilters();
@@ -133,15 +164,25 @@ TEST_F(CollectDependenciesFilterTest, MediaTopLevel) {
DependencyTracker* tracker = rewrite_driver()->dependency_tracker();
rewrite_driver()->StartParse(kTestDomain);
ASSERT_TRUE(tracker->read_in_info() != nullptr);
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(StrCat(
"dependency {"
"url: 'http://test.com/a.css'"
"content_type: DEP_CSS"
"content_type: DEP_CSS "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(100), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}"
"dependency {"
"url: 'http://test.com/e.css'"
"content_type: DEP_CSS"
"}"));
"content_type: DEP_CSS "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(400), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}")));
rewrite_driver()->FinishParse();
}
@@ -182,6 +223,39 @@ TEST_F(CollectDependenciesFilterTest, HandleEmptyResources) {
rewrite_driver()->FinishParse();
}
TEST_F(CollectDependenciesFilterTest, Unoptimized) {
const char kInput[] = "<link rel=stylesheet href=a.css>"
"<script src=b.js></script>";
ValidateNoChanges("unoptimized", kInput);
// Read stuff back in from pcache.
ResetDriver();
DependencyTracker* tracker = rewrite_driver()->dependency_tracker();
rewrite_driver()->StartParse(kTestDomain);
ASSERT_TRUE(tracker->read_in_info() != nullptr);
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(StrCat(
"dependency {"
"url: 'http://test.com/a.css'"
"content_type: DEP_CSS "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(100), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}"
"dependency {"
"url: 'http://test.com/b.js'"
"content_type: DEP_JAVASCRIPT "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(200), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}")));
rewrite_driver()->FinishParse();
}
TEST_F(CollectDependenciesFilterTest, Inliners) {
// Currently we don't collect info on inline resources --- the filters
// themsleves are expected to help --- but we should at least behave
@@ -235,11 +309,27 @@ TEST_F(CollectDependenciesFilterTest, Combiners) {
DependencyTracker* tracker = rewrite_driver()->dependency_tracker();
rewrite_driver()->StartParse(kTestDomain);
ASSERT_TRUE(tracker->read_in_info() != nullptr);
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(StrCat(
"dependency {"
"url: 'http://test.com/a.css+c.css.pagespeed.cc.0.css'"
"content_type: DEP_CSS"
"}"));
"content_type: DEP_CSS "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(100), " " // a.css
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(150), " " // c.css
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"last_modified_time_ms: ", FormatRelTimeSec(0), " ", // a + c
"expiration_time_ms: ", FormatRelTimeSec(kYearSec), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}")));
rewrite_driver()->FinishParse();
}
@@ -268,19 +358,63 @@ TEST_F(CollectDependenciesFilterTest, Chain) {
DependencyTracker* tracker = rewrite_driver()->dependency_tracker();
rewrite_driver()->StartParse(kTestDomain);
ASSERT_TRUE(tracker->read_in_info() != nullptr);
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(
EXPECT_THAT(*tracker->read_in_info(), EqualsProto(StrCat(StrCat(
"dependency {"
"url: 'http://test.com/A.a.css+c.css,Mcc.0.css.pagespeed.cf.0.css'"
"content_type: DEP_CSS"
"}"
"content_type: DEP_CSS "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(100), " " // a.css
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(150), " " // c.css
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"last_modified_time_ms: ", FormatRelTimeSec(0), " ", // a + c
"expiration_time_ms: ", FormatRelTimeSec(kYearSec), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"last_modified_time_ms: ", FormatRelTimeSec(0), " ", // (a + c).cf
"expiration_time_ms: ", FormatRelTimeSec(kYearSec), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}"),
"dependency {"
"url: 'http://test.com/b.js.pagespeed.jm.0.js'"
"content_type: DEP_JAVASCRIPT"
"content_type: DEP_JAVASCRIPT "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(200), " " // b.js
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"last_modified_time_ms: ", FormatRelTimeSec(0), " ", // b.js.jm
"expiration_time_ms: ", FormatRelTimeSec(kYearSec), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}"
"dependency {"
"url: 'http://test.com/d.js.pagespeed.jm.0.js'"
"content_type: DEP_JAVASCRIPT"
"}"));
"content_type: DEP_JAVASCRIPT "
"validity_info {"
"type: CACHED "
"expiration_time_ms: ", FormatRelTimeSec(250), " " // d.js
"date_ms: ", FormatRelTimeSec(0),
"}"
"validity_info {"
"type: CACHED "
"last_modified_time_ms: ", FormatRelTimeSec(0), " ", // d.js.jm
"expiration_time_ms: ", FormatRelTimeSec(kYearSec), " "
"date_ms: ", FormatRelTimeSec(0),
"}"
"}")));
rewrite_driver()->FinishParse();
}
+21 -2
View File
@@ -19,9 +19,11 @@
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_SLOT_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_SLOT_H_
#include <memory>
#include <set>
#include <vector>
#include "net/instaweb/rewriter/input_info.pb.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/ref_counted_ptr.h"
@@ -33,7 +35,6 @@
namespace net_instaweb {
class CachedResults;
class HtmlResourceSlot;
class ResourceSlot;
class RewriteContext;
@@ -58,7 +59,8 @@ class ResourceSlot : public RefCounted<ResourceSlot> {
disable_rendering_(false),
should_delete_element_(false),
disable_further_processing_(false),
was_optimized_(false) {
was_optimized_(false),
need_aggregate_input_info_(false) {
}
ResourcePtr resource() const { return resource_; }
@@ -129,6 +131,21 @@ class ResourceSlot : public RefCounted<ResourceSlot> {
return disable_further_processing_;
}
// If this is true, input info on all inputs affecting this slot
// will be collected from all RewriteContexts chained to it.
void set_need_aggregate_input_info(bool x) {
need_aggregate_input_info_ = x;
}
bool need_aggregate_input_info() const {
return need_aggregate_input_info_;
}
void ReportInput(const InputInfo& input);
// may be nullptr.
const std::vector<InputInfo>* inputs() const { return inputs_.get(); }
// Render is not thread-safe. This must be called from the thread that
// owns the DOM or CSS file. The RewriteContext state machine will only
// call ResourceSlot::Render() on slots that were optimized successfully,
@@ -186,11 +203,13 @@ class ResourceSlot : public RefCounted<ResourceSlot> {
private:
ResourcePtr resource_;
std::unique_ptr<std::vector<InputInfo>> inputs_;
bool preserve_urls_;
bool disable_rendering_;
bool should_delete_element_;
bool disable_further_processing_;
bool was_optimized_;
bool need_aggregate_input_info_;
// We track the RewriteContexts that are atempting to rewrite this
// slot, to help us build a dependency graph between ResourceContexts.
+7
View File
@@ -41,6 +41,13 @@ bool ResourceSlot::DirectSetUrl(const StringPiece& url) {
return false;
}
void ResourceSlot::ReportInput(const InputInfo& input) {
if (inputs_ == nullptr) {
inputs_.reset(new std::vector<InputInfo>);
}
inputs_->push_back(input);
}
RewriteContext* ResourceSlot::LastContext() const {
if (contexts_.empty()) {
return NULL;
+11
View File
@@ -2536,6 +2536,17 @@ void RewriteContext::Propagate(bool render_slots) {
ResourceSlotPtr slot = slots_[slot_index];
ResourcePtr resource(outputs_[p]);
slot->SetResource(resource);
if (slot->need_aggregate_input_info()) {
for (int i = 0; i < partitions_->other_dependency_size(); ++i) {
const InputInfo& other_dep = partitions_->other_dependency(p);
slot->ReportInput(other_dep);
}
for (int i = 0; i < partition->input_size(); ++i) {
const InputInfo& own_dep = partition->input(i);
slot->ReportInput(own_dep);
}
}
if (render_slots && partition->url_relocatable() && !was_too_busy_) {
// This check for relocatable is potentially unsafe in that later
// filters might still try to relocate the resource. We deal with