Basic CSP parsing and representation. (#1493)

This commit is contained in:
Maks Orlovich
2017-02-13 12:10:58 -05:00
committed by GitHub
parent a41cdab05e
commit 0cb8dad051
7 changed files with 574 additions and 0 deletions
+17
View File
@@ -1173,6 +1173,21 @@
'gperf.gypi',
],
},
{
'target_name': 'instaweb_rewriter_csp_gperf',
'variables': {
'instaweb_gperf_subdir': 'net/instaweb/rewriter',
},
'sources': [
'rewriter/csp_directive.gperf',
],
'dependencies': [
'<(DEPTH)/pagespeed/kernel.gyp:util',
],
'includes': [
'gperf.gypi',
],
},
{
'target_name': 'instaweb_static_asset_config_pb',
'variables': {
@@ -1534,6 +1549,7 @@
'instaweb_responsive_js_data2c',
'instaweb_responsive_js_opt_data2c',
'instaweb_rewriter_base',
'instaweb_rewriter_csp_gperf',
'instaweb_rewriter_css',
'instaweb_rewriter_image',
'instaweb_rewriter_javascript',
@@ -1567,6 +1583,7 @@
'rewriter/css_move_to_head_filter.cc',
'rewriter/css_outline_filter.cc',
'rewriter/css_tag_scanner.cc',
'rewriter/csp.cc',
'rewriter/data_url_input_resource.cc',
'rewriter/debug_filter.cc',
'rewriter/decode_rewritten_urls_filter.cc',
+176
View File
@@ -0,0 +1,176 @@
/*
* Copyright 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: morlovich@google.com (Maksim Orlovich)
//
// This provides basic parsing and evaluation of a (subset of)
// Content-Security-Policy that's relevant for PageSpeed Automatic
#include "net/instaweb/rewriter/public/csp.h"
#include "net/instaweb/rewriter/public/csp_directive.h"
namespace net_instaweb {
namespace {
void TrimCspWhitespace(StringPiece* input) {
// AKA RWS in HTTP spec, which of course isn't the HTML notion of whitespace
// that TrimWhitespace uses.
while (!input->empty() && ((*input)[0] == ' ' || (*input)[0] == '\t')) {
input->remove_prefix(1);
}
while (input->ends_with(" ") || input->ends_with("\t")) {
input->remove_suffix(1);
}
}
char Last(StringPiece input) {
DCHECK(!input.empty());
return input[input.size() - 1];
}
inline bool IsAsciiAlpha(char ch) {
return (((ch >= 'a') && (ch <= 'z')) ||
((ch >= 'A') && (ch <= 'Z')));
}
} // namespace
CspSourceExpression CspSourceExpression::Parse(StringPiece input) {
TrimCspWhitespace(&input);
if (input.empty()) {
return CspSourceExpression(kUnknown);
}
if (input.size() > 2 && input[0] == '\'' && Last(input) == '\'') {
return ParseQuoted(input.substr(1, input.size() - 2));
}
// Check for scheme-source.
if (input.size() >= 2 && Last(input) == ':') {
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
bool is_scheme = true;
if (IsAsciiAlpha(input[0])) {
for (size_t i = 1; i < (input.size() - 1); ++i) {
char c = input[i];
if (!IsAsciiAlphaNumeric(c) && (c != '+') && (c != '-') && (c != '.')) {
is_scheme = false;
break;
}
}
} else {
is_scheme = false;
}
if (is_scheme) {
return CspSourceExpression(kSchemeSource, input);
}
}
// Assume host-source. It might make sense to split this down further here,
// that will become clear once the actual URL matching algorithm is
// implemented.
return CspSourceExpression(kHostSource, input);
}
CspSourceExpression CspSourceExpression::ParseQuoted(StringPiece input) {
CHECK(!input.empty());
if (input[0] == 'u' || input[0] == 'U') {
if (StringCaseEqual(input, "unsafe-inline")) {
return CspSourceExpression(kUnsafeInline);
}
if (StringCaseEqual(input, "unsafe-eval")) {
return CspSourceExpression(kUnsafeEval);
}
if (StringCaseEqual(input, "unsafe-hashed-attributes")) {
return CspSourceExpression(kUnsafeHashedAttributes);
}
}
if (input[0] == 's' || input[0] == 'S') {
if (StringCaseEqual(input, "self")) {
return CspSourceExpression(kSelf);
}
if (StringCaseEqual(input, "strict-dynamic")) {
return CspSourceExpression(kStrictDynamic);
}
}
return CspSourceExpression(kUnknown);
}
std::unique_ptr<CspSourceList> CspSourceList::Parse(StringPiece input) {
std::unique_ptr<CspSourceList> result(new CspSourceList);
TrimCspWhitespace(&input);
StringPieceVector tokens;
SplitStringPieceToVector(input, " ", &tokens, true);
for (StringPiece token : tokens) {
TrimCspWhitespace(&token);
CspSourceExpression expr = CspSourceExpression::Parse(token);
if (expr.kind() != CspSourceExpression::kUnknown) {
result->expressions_.push_back(expr);
}
}
return result;
}
CspPolicy::CspPolicy() {
policies_.resize(static_cast<size_t>(CspDirective::kNumSourceListDirectives));
}
std::unique_ptr<CspPolicy> CspPolicy::Parse(StringPiece input) {
std::unique_ptr<CspPolicy> policy;
TrimCspWhitespace(&input);
StringPieceVector tokens;
SplitStringPieceToVector(input, ";", &tokens, true);
// TODO(morlovich): This will need some extra-careful testing.
// Essentially the spec has a notion of a policy with an empty directive set,
// and it basically gets ignored; but is a policy like
// tasty-chocolate-src: * an empty one, or not? This is particularly
// relevant since we may not want to parse worker-src or whatever.
if (tokens.empty()) {
return policy;
}
policy.reset(new CspPolicy);
for (StringPiece token : tokens) {
TrimCspWhitespace(&token);
StringPiece::size_type pos = token.find(' ');
if (pos != StringPiece::npos) {
StringPiece name = token.substr(0, pos);
StringPiece value = token.substr(pos + 1);
CspDirective dir_name = LookupCspDirective(name);
if (dir_name != CspDirective::kNumSourceListDirectives &&
policy->policies_[static_cast<int>(dir_name)] == nullptr) {
// Note: repeated directives are ignored per the "Parse a serialized
// CSP as disposition" algorith,
policy->policies_[static_cast<int>(dir_name)]
= CspSourceList::Parse(value);
}
}
}
return policy;
}
} // namespace net_instaweb
+44
View File
@@ -0,0 +1,44 @@
%{
// csp_directive.gp.cc is automatically generated from csp_directive.gperf
// Author: morlovich@google.com
#include "base/basictypes.h"
#include "net/instaweb/rewriter/public/csp_directive.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
%}
%compare-strncmp
%define class-name CspDirectiveMapper
%define lookup-function-name Lookup
%define word-array-name CspDirectiveNameTable
%global-table
%ignore-case
%includes
%language=C++
%readonly-tables
%struct-type
struct CspDirectiveInfo {
const char* name;
CspDirective directive;
};
%%
"child-src", CspDirective::kChildSrc
"connect-src", CspDirective::kConnectSrc
"default-src", CspDirective::kDefaultSrc
"frame-src", CspDirective::kFrameSrc
"img-src", CspDirective::kImgSrc
"script-src", CspDirective::kScriptSrc
"style-src", CspDirective::kStyleSrc
"base-uri", CspDirective::kBaseUri
%%
CspDirective LookupCspDirective(StringPiece name) {
const CspDirectiveInfo* info =
CspDirectiveMapper::Lookup(name.data(), name.size());
return info ? info->directive : CspDirective::kNumSourceListDirectives;
}
} // namespace net_instaweb
+134
View File
@@ -0,0 +1,134 @@
/*
* Copyright 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: morlovich@google.com (Maksim Orlovich)
#include "net/instaweb/rewriter/public/csp.h"
#include <memory>
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/gtest.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
namespace {
TEST(CspParseSourceTest, Quoted) {
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kSelf),
CspSourceExpression::Parse("'self' "));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kSelf),
CspSourceExpression::Parse(" 'sElf' "));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kStrictDynamic),
CspSourceExpression::Parse(" \t 'strict-dynamic' "));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kUnsafeInline),
CspSourceExpression::Parse("'unsafe-inline'"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kUnsafeEval),
CspSourceExpression::Parse("'unsafe-eval'"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kUnsafeHashedAttributes),
CspSourceExpression::Parse("'unsafe-hashed-attribUtes'"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kUnknown),
CspSourceExpression::Parse("'nonce-qwertyu12345'"));
}
TEST(CspParseSourceTest, NonQuoted) {
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kUnknown),
CspSourceExpression::Parse(" "));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kSchemeSource, "https:"),
CspSourceExpression::Parse(" https:"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kSchemeSource,
"weird-schema+-1.0:"),
CspSourceExpression::Parse("weird-schema+-1.0:"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kHostSource, "*.example.com"),
CspSourceExpression::Parse("*.example.com"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kHostSource,
"http://www.example.com/dir"),
CspSourceExpression::Parse("http://www.example.com/dir"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kHostSource,
"http://www.example.com/dir/file.js"),
CspSourceExpression::Parse("http://www.example.com/dir/file.js"));
EXPECT_EQ(
CspSourceExpression(CspSourceExpression::kHostSource, "*"),
CspSourceExpression::Parse("*"));
}
TEST(CspParseTest, Empty) {
std::unique_ptr<CspPolicy> policy(CspPolicy::Parse(" "));
EXPECT_EQ(policy, nullptr);
}
TEST(CspParseTest, Basic) {
std::unique_ptr<CspPolicy> policy(CspPolicy::Parse(
"default-src *; script-src 'unsafe-inline' 'unsafe-eval'"));
ASSERT_TRUE(policy != nullptr);
ASSERT_TRUE(policy->SourceListFor(CspDirective::kDefaultSrc) != nullptr);
const std::vector<CspSourceExpression>& default_src =
policy->SourceListFor(CspDirective::kDefaultSrc)->expressions();
ASSERT_EQ(1, default_src.size());
EXPECT_EQ(CspSourceExpression::kHostSource, default_src[0].kind());
EXPECT_EQ("*", default_src[0].param());
ASSERT_TRUE(policy->SourceListFor(CspDirective::kScriptSrc) != nullptr);
const std::vector<CspSourceExpression>& script_src =
policy->SourceListFor(CspDirective::kScriptSrc)->expressions();
ASSERT_EQ(2, script_src.size());
EXPECT_EQ(CspSourceExpression::kUnsafeInline, script_src[0].kind());
EXPECT_EQ(CspSourceExpression::kUnsafeEval, script_src[1].kind());
}
TEST(CspParseTest, Repeated) {
// Repeating within same policy doesn't do anything.
std::unique_ptr<CspPolicy> policy(CspPolicy::Parse(
"script-src 'unsafe-inline' 'unsafe-eval'; script-src 'strict-dynamic'"));
ASSERT_TRUE(policy != nullptr);
ASSERT_TRUE(policy->SourceListFor(CspDirective::kScriptSrc) != nullptr);
const std::vector<CspSourceExpression>& script_src =
policy->SourceListFor(CspDirective::kScriptSrc)->expressions();
ASSERT_EQ(2, script_src.size());
EXPECT_EQ(CspSourceExpression::kUnsafeInline, script_src[0].kind());
EXPECT_EQ(CspSourceExpression::kUnsafeEval, script_src[1].kind());
}
} // namespace
} // namespace net_instaweb
+128
View File
@@ -0,0 +1,128 @@
/*
* Copyright 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: morlovich@google.com (Maksim Orlovich)
//
// This provides basic parsing and evaluation of a (subset of)
// Content-Security-Policy that's relevant for PageSpeed Automatic.
// CspContext is the main class.
#ifndef NET_INSTAWEB_REWRITER_CSP_H_
#define NET_INSTAWEB_REWRITER_CSP_H_
#include <memory>
#include <string>
#include <vector>
#include "net/instaweb/rewriter/public/csp_directive.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
class CspSourceExpression {
public:
enum Kind {
kSelf, kSchemeSource, kHostSource,
kUnsafeInline, kUnsafeEval, kStrictDynamic, kUnsafeHashedAttributes,
kUnknown /* includes hash-or-nonce */
};
CspSourceExpression() : kind_(kUnknown) {}
explicit CspSourceExpression(Kind kind): kind_(kind) {}
CspSourceExpression(Kind kind, StringPiece input)
: kind_(kind), param_(input.as_string()) {}
static CspSourceExpression Parse(StringPiece input);
bool operator==(const CspSourceExpression& other) const {
return kind_ == other.kind_ && param_ == other.param_;
}
Kind kind() const { return kind_; }
const GoogleString& param() const { return param_; }
private:
// input here is without the quotes, and non-empty.
static CspSourceExpression ParseQuoted(StringPiece input);
Kind kind_;
GoogleString param_;
};
class CspSourceList {
public:
static std::unique_ptr<CspSourceList> Parse(StringPiece input);
const std::vector<CspSourceExpression>& expressions() const {
return expressions_;
}
private:
std::vector<CspSourceExpression> expressions_;
};
// An individual policy. Note that a page is constrained by an intersection
// of some number of these.
class CspPolicy {
public:
CspPolicy();
// Just an example for now...
bool UnsafeEval() const { return false; /* */ }
// May return null.
static std::unique_ptr<CspPolicy> Parse(StringPiece input);
// May return null.
const CspSourceList* SourceListFor(CspDirective directive) {
return policies_[static_cast<int>(directive)].get();
}
private:
// The expectation is that some of these may be null.
std::vector<std::unique_ptr<CspSourceList>> policies_;
};
// A set of all policies (maybe none!) on the page. Note that we do not track
// those with report disposition, only those that actually enforce --- reporting
// seems like it would keep the page author informed about our effects as it is.
class CspContext {
public:
bool UnsafeEval() const {
return AllPermit(&CspPolicy::UnsafeEval);
}
private:
typedef bool (CspPolicy::*SimplePredicateFn)() const;
bool AllPermit(SimplePredicateFn predicate) const {
// Note that empty policies_ means "true" --- there is no policy whatsoever,
// so everything is permitted. If there is more than that, all policies
// must agree, too.
for (const auto& policy : policies_) {
if (!(policy.get()->*predicate)()) {
return false;
}
}
return true;
}
std::vector<std::unique_ptr<CspPolicy>> policies_;
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_CSP_H_
@@ -0,0 +1,74 @@
/*
* Copyright 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: morlovich@google.com (Maksim Orlovich)
//
// Enum for Content-Security-Policy directives
#ifndef NET_INSTAWEB_REWRITER_CSP_DIRECTIVE_H_
#define NET_INSTAWEB_REWRITER_CSP_DIRECTIVE_H_
#include <memory>
#include <string>
#include <vector>
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
struct CspDirectiveInfo;
// Directives mentioned in the spec that we care (and comments for those
// where we don't).
enum class CspDirective {
// These take source list:
kChildSrc,
kConnectSrc,
kDefaultSrc,
// font-src doesn't actually matter for us since the Google font support only
// touches the loader CSS, not the font URL itself.
kFrameSrc,
kImgSrc,
// manifest-src
// media-src
// object-src
kScriptSrc,
kStyleSrc,
// worker-src
kBaseUri,
// form-action
// frame-ancestors
kNumSourceListDirectives
// These take other stuff. If we actually parsed them, we would want
// to distinguish them so we don't stick them into the array of
// CspSourceList* the other stuff goes into.
// plugin-types
// sandbox --- TODO(morlovich): Understand implications of this.
// disown-opener
// report-uri
// report-to
};
// Returns kNumSourceListDirectives if unrecognized.
CspDirective LookupCspDirective(StringPiece name);
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_CSP_DIRECTIVE_H_
+1
View File
@@ -119,6 +119,7 @@
'rewriter/critical_images_finder_test_base.cc',
'rewriter/critical_selector_filter_test.cc',
'rewriter/critical_selector_finder_test.cc',
'rewriter/csp_test.cc',
'rewriter/css_combine_filter_test.cc',
'rewriter/css_embedded_config_test.cc',
'rewriter/css_filter_test.cc',