Do CSS indirect dependency collection in CollectDependenciesFilter, not CssFilter.

This is cleaner code-wise, and makes it easier to do additional things like properly order dependencies, have different domain policy for preload hints, etc.
(It's also not as expensive computationally as I first thought, since the CSS parser has code for just parsing imports)
Testcase for this is TEST_F(PushPreloadFilterTest, IndirectCollected)
This commit is contained in:
Maks Orlovich
2016-08-16 17:53:25 -04:00
parent bf4754060d
commit 0df83712e5
4 changed files with 81 additions and 114 deletions
@@ -22,6 +22,7 @@
#include "base/logging.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/dependencies.pb.h"
#include "net/instaweb/rewriter/public/css_util.h"
#include "net/instaweb/rewriter/public/dependency_tracker.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
@@ -31,14 +32,17 @@
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_result.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/http/data_url.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/semantic_type.h"
#include "util/utf8/public/unicodetext.h"
#include "webutil/css/parser.h"
namespace net_instaweb {
class CollectDependenciesFilter::Context : public RewriteContext {
public:
Context(DependencyType type, RewriteDriver* driver)
@@ -65,6 +69,58 @@ class CollectDependenciesFilter::Context : public RewriteContext {
return true;
}
bool DefinitelyNeededToRender(const std::unique_ptr<Css::Import>& import) {
StringVector media_types;
if (!css_util::ConvertMediaQueriesToStringVector(
import->media_queries(), &media_types)) {
// Something we don't understand. This includes things specifying
// media queries, which we can't evaluate, and therefore conservatively
// assume to be potentially unneeded.
return false;
}
if (media_types.empty()) {
return true; // @import "foo", without media specified.
}
for (auto& medium : media_types) {
if (StringCaseEqual(medium, "all") || StringCaseEqual(medium, "screen")) {
return true;
}
}
return false;
}
void ExtractNestedCssDependencies(const ResourcePtr& resource,
CachedResult* partition) {
Css::Parser parser(resource->ExtractUncompressedContents());
parser.set_preservation_mode(true);
// We avoid quirks-mode so that we do not "fix" something we shouldn't have.
parser.set_quirks_mode(false);
while (true) {
std::unique_ptr<Css::Import> import(parser.ParseNextImport());
if (import == nullptr ||
parser.errors_seen_mask() != Css::Parser::kNoError) {
break;
}
if (DefinitelyNeededToRender(import)) {
GoogleString rel_url(
import->link().utf8_data(), import->link().utf8_length());
GoogleUrl full_url(GoogleUrl(resource->url()), rel_url);
if (full_url.IsWebValid()) {
Dependency* dep = partition->add_collected_dependency();
dep->set_url(full_url.Spec().as_string());
dep->set_content_type(DEP_CSS);
// TODO(morlovich): Set validity_info, which should be based on
// the containing CSS. (Could also have some sort of mechanism
// to be conditional on it)
}
}
}
}
void Rewrite(int partition_index,
CachedResult* partition,
const OutputResourcePtr& output_resource) override {
@@ -72,6 +128,10 @@ class CollectDependenciesFilter::Context : public RewriteContext {
dep->set_url(slot(0)->resource()->url());
dep->set_content_type(dep_type_);
if (dep_type_ == DEP_CSS) {
ExtractNestedCssDependencies(slot(0)->resource(), partition);
}
// TODO(morlovich): Set validity_info.
// This is surprisingly complicated, since essentially we have to get info
// from all the steps along the RewriteContext, and the previous
@@ -113,16 +173,26 @@ class CollectDependenciesFilter::Context : public RewriteContext {
return;
}
if (num_output_partitions() == 1) {
DependencyTracker* dep_tracker = Driver()->dependency_tracker();
// We already allocated dep_id_, so we should report on it, with either
// the first dependency we collected, or nullptr.
if (num_output_partitions() == 1 &&
output_partition(0)->collected_dependency_size() > 0) {
CachedResult* result = output_partition(0);
Driver()->dependency_tracker()->ReportDependencyCandidate(
dep_id_,
result->collected_dependency_size() > 0 ?
&result->collected_dependency(0) : nullptr);
dep_tracker->ReportDependencyCandidate(dep_id_,
&result->collected_dependency(0));
// Any other dependencies get their brand new IDs.
// TODO(morlovich): They actually should be hinted right after dependency
// dep_id_.
for (int c = 1; c < result->collected_dependency_size(); ++c) {
int additional_dep_id = dep_tracker->RegisterDependencyCandidate();
dep_tracker->ReportDependencyCandidate(
additional_dep_id, &result->collected_dependency(c));
}
} else {
Driver()->dependency_tracker()->ReportDependencyCandidate(
dep_id_, nullptr);
dep_tracker->ReportDependencyCandidate(dep_id_, nullptr);
}
reported_ = true;
}
+3 -86
View File
@@ -27,7 +27,6 @@
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/dependencies.pb.h"
#include "net/instaweb/rewriter/public/association_transformer.h"
#include "net/instaweb/rewriter/public/css_absolutify.h"
#include "net/instaweb/rewriter/public/css_flatten_imports_context.h"
@@ -38,7 +37,6 @@
#include "net/instaweb/rewriter/public/css_url_counter.h"
#include "net/instaweb/rewriter/public/css_util.h"
#include "net/instaweb/rewriter/public/data_url_input_resource.h"
#include "net/instaweb/rewriter/public/dependency_tracker.h"
#include "net/instaweb/rewriter/public/image_rewrite_filter.h"
#include "net/instaweb/rewriter/public/image_url_encoder.h"
#include "net/instaweb/rewriter/public/inline_attribute_slot.h"
@@ -72,6 +70,7 @@
#include "pagespeed/kernel/http/data_url.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/response_headers.h"
#include "pagespeed/kernel/http/semantic_type.h"
#include "pagespeed/kernel/util/simple_random.h"
#include "pagespeed/opt/logging/enums.pb.h"
#include "webutil/css/parser.h"
@@ -106,30 +105,6 @@ class SimpleAbsolutifyTransformer : public CssTagScanner::Transformer {
DISALLOW_COPY_AND_ASSIGN(SimpleAbsolutifyTransformer);
};
// Returns true if the stylesheet included in *child will be needed to
// display its parent.
bool ContainsImportThatWillBeNeeded(const CssHierarchy* child) {
// Some kinds of errors (media we can't understand, broken URLs)
// gets marked as !flattening_succeeded.
if (!child->flattening_succeeded()) {
return false;
}
// Now check the media.
if (child->media().empty()) {
// Denotes "all".
return true;
}
for (const GoogleString& medium : child->media()) {
if (StringCaseEqual(medium, "screen")) {
return true;
}
}
return false;
}
// All of the options that can affect image optimization can also affect
// CSS rewriting, due to embedded images. We will merge those in during
// Initialize. There are additional options that affect CSS files. Notably,
@@ -208,16 +183,13 @@ CssFilter::Context::Context(CssFilter* filter, RewriteDriver* driver,
rewrite_inline_char_node_(NULL),
rewrite_inline_attribute_(NULL),
rewrite_inline_css_kind_(kInsideStyleTag),
in_text_size_(-1),
dummy_dep_id_(-1),
deps_reported_(false) {
in_text_size_(-1) {
initial_css_base_gurl_.Reset(filter_->decoded_base_url());
DCHECK(initial_css_base_gurl_.IsWebValid());
initial_css_trim_gurl_.Reset(initial_css_base_gurl_);
}
CssFilter::Context::~Context() {
DCHECK(deps_reported_ || dummy_dep_id_ == -1);
}
// The base URL used when absolutifying sub-resources must be the input
@@ -316,8 +288,6 @@ bool CssFilter::Context::SendFallbackResponse(
}
void CssFilter::Context::Render() {
ReportDeps();
if (num_output_partitions() == 0) {
return;
}
@@ -345,44 +315,6 @@ void CssFilter::Context::Render() {
}
}
void CssFilter::Context::WillNotRender() {
ReportDeps();
}
void CssFilter::Context::Cancel() {
ReportDeps();
}
void CssFilter::Context::ReportDeps() {
if (deps_reported_ || dummy_dep_id_ == -1) {
return;
}
deps_reported_ = true;
DependencyTracker* dep_tracker = Driver()->dependency_tracker();
// Report actual dependencies we noticed.
if (num_output_partitions() == 1) {
CachedResult* result = output_partition(0);
for (int i = 0, n = result->collected_dependency_size(); i < n; ++i) {
int dep_id = dep_tracker->RegisterDependencyCandidate();
dep_tracker->ReportDependencyCandidate(
dep_id, &result->collected_dependency(i));
}
}
// Now drop the refcount one.
dep_tracker->ReportDependencyCandidate(dummy_dep_id_, nullptr);
}
void CssFilter::Context::Initiated() {
// Grab a dummy dependency ID to let dependency tracker we may have some stuff
// to report; we don't know exact number of them yet so we will just abuse
// this one as a refcount.
dummy_dep_id_ = Driver()->dependency_tracker()->RegisterDependencyCandidate();
}
void CssFilter::Context::SetupInlineRewrite(HtmlElement* style_element,
HtmlCharactersNode* text) {
// To handle nested rewrites of inline CSS, we internally handle it
@@ -659,19 +591,6 @@ void CssFilter::Context::Harvest() {
if (hierarchy_.flattening_succeeded() &&
hierarchy_.flattened_result_limit() > 0) {
hierarchy_.RollUpContents();
} else if (Options()->NeedsDependenciesCohort()) {
// If we are not flattening, we may want to push/prefetch the nested
// stylesheets.
for (CssHierarchy* child : hierarchy_.children()) {
if (ContainsImportThatWillBeNeeded(child)) {
Dependency* dep =
output_partition(0)->add_collected_dependency();
dep->set_url(child->url().as_string());
dep->set_content_type(DEP_CSS);
// TODO(morlovich): Set validity_info, which should be based on
// the containing CSS.
}
}
}
// If CSS was successfully parsed.
@@ -1179,9 +1098,7 @@ CssFilter::Context* CssFilter::StartRewriting(const ResourceSlotPtr& slot) {
if (driver()->options()->css_preserve_urls()) {
slot->set_preserve_urls(true);
}
if (driver()->InitiateRewrite(rewriter)) {
rewriter->Initiated();
} else {
if (!driver()->InitiateRewrite(rewriter)) {
rewriter = NULL;
}
return rewriter;
@@ -212,14 +212,6 @@ bool CssImageRewriter::RewriteCss(int64 image_inline_max_bytes,
// failed flattening, so that later RollUps do the right thing (nothing).
// This is not something we need to log in the statistics or in debug.
hierarchy->set_flattening_succeeded(false);
if (options->NeedsDependenciesCohort()) {
// If we're collecting dependencies, and aren't flattening, we want
// to call ExpandChildren so it can record all our child stylesheets and
// whether their media is comparable. Since we marked
// flattening_succeeded(false) above, we will only look inside it for
// dependency collection and not do any flattening work.
hierarchy->ExpandChildren();
}
} else if (hierarchy->flattening_succeeded()) {
// Flattening of this hierarchy might have already failed because of a
// problem detected with the containing charset or media, in particular
-12
View File
@@ -281,13 +281,8 @@ class CssFilter::Context : public SingleRewriteContext {
CssHierarchy* mutable_hierarchy() { return &hierarchy_; }
void Initiated();
protected:
void Render() override;
void WillNotRender() override;
void Cancel() override;
virtual void Harvest();
virtual bool Partition(OutputPartitions* partitions,
OutputResourceVector* outputs);
@@ -303,8 +298,6 @@ class CssFilter::Context : public SingleRewriteContext {
const ResourceContext* resource_context) const;
private:
void ReportDeps();
void GetCssBaseUrlToUse(const ResourcePtr& input_resource,
GoogleUrl* css_base_gurl_to_use);
@@ -412,11 +405,6 @@ class CssFilter::Context : public SingleRewriteContext {
ResourcePtr input_resource_;
OutputResourcePtr output_resource_;
// We use this as a refcount in advance of knowing how many nested CSS
// resources there are.
int dummy_dep_id_;
bool deps_reported_;
DISALLOW_COPY_AND_ASSIGN(Context);
};