Compare commits
605 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6865525deb | |||
| cd8c45fc86 | |||
| 9d6bfad665 | |||
| c253c3ba80 | |||
| 8c7c8a843a | |||
| bcb1eb1dec | |||
| df498ea788 | |||
| 4b4d353156 | |||
| c9d4912746 | |||
| f1a6cd0ade | |||
| 0c01c0644e | |||
| 14aa4fd962 | |||
| 36a7ff9543 | |||
| 706577a34b | |||
| 6703b2d76e | |||
| 40c05b4a4f | |||
| bc875647f1 | |||
| f88a0763fd | |||
| 239ca9aa80 | |||
| 37c51d3b9e | |||
| 2468257e2d | |||
| 4628e1c0ea | |||
| 7f98ab448b | |||
| 103c479fbb | |||
| 887bd8fa98 | |||
| 804f234268 | |||
| ea82f0de19 | |||
| 5f312820e5 | |||
| 7efac4ac4e | |||
| 634b813071 | |||
| d30972985b | |||
| 3a6e833eae | |||
| 90c8ac8a12 | |||
| 6ccc1c513b | |||
| fe47eeb4f3 | |||
| e42f61e263 | |||
| 8bac32c669 | |||
| 4bef2af22c | |||
| 52bd966edd | |||
| 0f7a1f4503 | |||
| 7f477d6575 | |||
| cc10f15f53 | |||
| 9ee745c2ef | |||
| 5337e4b558 | |||
| b31bf7b090 | |||
| 8464a00a77 | |||
| b048ceb858 | |||
| 0fbdf3ee97 | |||
| a9d85960b5 | |||
| cd19284062 | |||
| c2a756049e | |||
| 6b2999dfd4 | |||
| 5658c20327 | |||
| ec2dea84fe | |||
| e206e871aa | |||
| f4fd9a3f7c | |||
| cf88925f1e | |||
| 5ea8fb9f3e | |||
| cd31a6b129 | |||
| 137792e88f | |||
| 91cd587e68 | |||
| e9608b03b7 | |||
| 6e1a9a73a5 | |||
| 3875acf392 | |||
| 4dd70b40ef | |||
| 88d9b5241d | |||
| 23905f8ce9 | |||
| effe857083 | |||
| 599a3bca5c | |||
| c26313e7ab | |||
| bcba22dd95 | |||
| 1ea7e3e568 | |||
| 34341251f6 | |||
| d1f2a43a71 | |||
| 58b1264745 | |||
| 8b6fdf56ef | |||
| 93d70feb3b | |||
| 62c8947b2d | |||
| 22749d7222 | |||
| a8141eadd3 | |||
| 2d28ff38e8 | |||
| d7f1c0dc48 | |||
| 737e9396f5 | |||
| 81029dafa6 | |||
| 001cd2dc4c | |||
| 3efebb7948 | |||
| 02b67b79ad | |||
| d0858790e4 | |||
| 4d945b1c59 | |||
| 7d965aced7 | |||
| a5411a1c7c | |||
| 8a1260a1d1 | |||
| f2e7b15d9c | |||
| 5bc61a7092 | |||
| 03819f4e4a | |||
| 7ef61d2580 | |||
| a73c096950 | |||
| 7bd90f7b3a | |||
| 73981ffe1a | |||
| 58228564dd | |||
| 340c86a49e | |||
| abdafc7aa4 | |||
| 8cf10873d1 | |||
| 2496716e36 | |||
| af364344e9 | |||
| 9da45910c7 | |||
| 5dfe42f3d3 | |||
| bff15040be | |||
| 3c5c5076de | |||
| eab4f8672f | |||
| 852e376130 | |||
| b7529a8d9a | |||
| 5dd93fffae | |||
| 48d23f10ba | |||
| f924bc72d9 | |||
| 1aff187d0e | |||
| 3b64df255a | |||
| 0eeeca6e47 | |||
| 6f68ba2f68 | |||
| e6e70c32a8 | |||
| a3e20a05e2 | |||
| e28255e2ef | |||
| 10ac8ae4a3 | |||
| 1b3ca08e22 | |||
| de5bc5d943 | |||
| afd0f01686 | |||
| 0cdb81fab4 | |||
| b492d3966e | |||
| 232c05a81d | |||
| adbe2cb359 | |||
| 88370cf54d | |||
| b43a973071 | |||
| a524127d49 | |||
| 53a6de6da8 | |||
| 66f1b9aa9b | |||
| 6903286aa4 | |||
| 46dc39f5a9 | |||
| 7d8fefe780 | |||
| 43d1706e2f | |||
| 622d088a3e | |||
| 0290f52a88 | |||
| c94146a54d | |||
| f3a1cb6cf4 | |||
| 5a23347e64 | |||
| 7ce289346a | |||
| 6b4a9dc487 | |||
| a3f2e4766c | |||
| ab0a5e4b65 | |||
| a458a65d4e | |||
| d00911b00f | |||
| ed80427ae6 | |||
| f556ca4926 | |||
| cad618dff7 | |||
| a906411367 | |||
| e6c5bc1b2e | |||
| d6d3b90ea3 | |||
| d7afd2acbc | |||
| 05f6993801 | |||
| fe63bbf53f | |||
| c7aa72d9c5 | |||
| 8f6bb0c9e3 | |||
| 84f2a1884e | |||
| da83f73f19 | |||
| 2bfc05b4c6 | |||
| ea9e209687 | |||
| c073a3120b | |||
| 5e3b2f23a5 | |||
| f41ecf4010 | |||
| 34a62db9cf | |||
| b0efa9f982 | |||
| fb00ac373b | |||
| 9550629768 | |||
| 26a5deb341 | |||
| 7bcd5aadde | |||
| 594ba13606 | |||
| 9d0b896737 | |||
| 89f6465288 | |||
| eb11cfe0cf | |||
| 7a0e45d025 | |||
| 3cf731ee24 | |||
| 1a4ffcf335 | |||
| 72c7e584f7 | |||
| 08f69913dc | |||
| f2d6bcf3ac | |||
| 2c89815084 | |||
| e55b062c17 | |||
| 62ef051844 | |||
| 36b07ca209 | |||
| 1280cd0dfa | |||
| a06551a97d | |||
| 679618f998 | |||
| fb9b355715 | |||
| f71ac7dd0c | |||
| 19f479a6b6 | |||
| 7431946d23 | |||
| 0d760dbf31 | |||
| e9d7b2ad01 | |||
| 0d89df0e80 | |||
| 218d74f206 | |||
| bcefd56941 | |||
| 676344bc8a | |||
| 7293f649d1 | |||
| 10f5093bf6 | |||
| 357eb92683 | |||
| a6c6d13392 | |||
| 9007e582ad | |||
| 1ca04d514c | |||
| 8e08b2b7e0 | |||
| ad1a1594d6 | |||
| d5e37d2d2e | |||
| 4bceb402a6 | |||
| 80a50fe9d8 | |||
| 28192a509e | |||
| 9331815350 | |||
| 727d339dfd | |||
| b3e5297dc3 | |||
| 639a01015c | |||
| 29a43196e8 | |||
| 0b20c91217 | |||
| d654062516 | |||
| b6991fd107 | |||
| bf042f8af9 | |||
| 22950666e0 | |||
| 96f185d472 | |||
| c9a106b4b0 | |||
| 00080ff728 | |||
| b6098ce302 | |||
| 36d285a354 | |||
| 149d1c7a5c | |||
| 9e9bf76164 | |||
| 09bc3b0aba | |||
| 502abb2f10 | |||
| 08bdfb8c65 | |||
| 8a8f62fefa | |||
| 94ab5c33f8 | |||
| 0e766e4096 | |||
| a7fdc86744 | |||
| aed13fc578 | |||
| cd753a22a7 | |||
| 6af5774ef8 | |||
| 117994993a | |||
| 5487b7350c | |||
| 261b602f83 | |||
| ae3cf7a425 | |||
| b4af0738a5 | |||
| ff049a55c3 | |||
| 0772787ad4 | |||
| 7eedc2ce3c | |||
| 7af665b5dd | |||
| 8136a15352 | |||
| 31024cfc24 | |||
| 703fda7eba | |||
| d4bc40007e | |||
| f2e2ab6056 | |||
| e06a65e7e6 | |||
| 3231949e03 | |||
| f5713996f2 | |||
| 185632583b | |||
| c5dbfa16cd | |||
| 33b2e82ec6 | |||
| b87ffce5ba | |||
| 7d5fc11afd | |||
| 7cba7ba4f4 | |||
| cf434f71cc | |||
| ba8c149a88 | |||
| f5bac604a1 | |||
| f18faa3ae1 | |||
| 070ae2aad2 | |||
| 674c70e32c | |||
| b4ef143a88 | |||
| ffdc3c8fd6 | |||
| af10fc3ddb | |||
| e9a099f9c5 | |||
| e4a2933c58 | |||
| 2601232457 | |||
| a3e1810a6a | |||
| 72f3c79fec | |||
| 323e820fde | |||
| a9a7bf56ac | |||
| 2358f957cb | |||
| 7d72a7c89a | |||
| 8830724580 | |||
| c2d07a8d5a | |||
| c3bff67637 | |||
| 5a1a29ee8b | |||
| 61025c5e4f | |||
| d3e7900704 | |||
| 85e19c962b | |||
| 20fd7d18dd | |||
| 4667aa1fc3 | |||
| 6ced8c0f65 | |||
| bfaf4268cf | |||
| 78cf39f9b3 | |||
| 215071a383 | |||
| 630f7d848b | |||
| d90245af69 | |||
| f25569690a | |||
| 707d671826 | |||
| f8b87ea436 | |||
| 86d840f76e | |||
| 0110d33fa7 | |||
| 497594ba7f | |||
| 336352df38 | |||
| 0edd405eb8 | |||
| d004c4d916 | |||
| c754cf1be0 | |||
| 091ef6399b | |||
| 9e4d26da5e | |||
| 027d481817 | |||
| 67106e920e | |||
| 9699caeab5 | |||
| c371d516a8 | |||
| 207eb2d131 | |||
| d9e2142f31 | |||
| bf6c6c0e9b | |||
| e8dd9fd3c3 | |||
| 33d006625e | |||
| 2dcaa95278 | |||
| f5252b569a | |||
| 64eaa2a659 | |||
| 4783144e7d | |||
| 83205c9c31 | |||
| 625e762961 | |||
| c20affe323 | |||
| 7a9e6de802 | |||
| 96cf9a22f7 | |||
| ab83a70a35 | |||
| 6db4d02a91 | |||
| 1354cee4ed | |||
| 658b2cf7a9 | |||
| 092bbf2862 | |||
| 6ccb815df3 | |||
| f04c533df0 | |||
| 8468e4849a | |||
| df5736609d | |||
| 72ddb34a1c | |||
| 7fbb2c61ee | |||
| ae2d4bac7f | |||
| af772c2fe8 | |||
| a4bd9b9c13 | |||
| f87d0f7ae2 | |||
| 501742cb56 | |||
| a669be99b1 | |||
| 328d3afc9b | |||
| 2681c24ee0 | |||
| f86f47fda4 | |||
| 179c81afa3 | |||
| 8d7eb20c89 | |||
| 1fe6c54b94 | |||
| 1667879202 | |||
| 0076e45677 | |||
| 1f3560ea21 | |||
| 54352bff72 | |||
| 329985659c | |||
| db870f7023 | |||
| ed14455412 | |||
| be4d263d10 | |||
| 0bafd6b7e0 | |||
| 9bbe912bd7 | |||
| b78eb8a939 | |||
| e082a01912 | |||
| fa5815e1e8 | |||
| f12af2f03b | |||
| e22fae46bc | |||
| 53a599fbd4 | |||
| 70c47c13a4 | |||
| 8021751d69 | |||
| 6b29b5220d | |||
| 07286005a6 | |||
| 721c49dc71 | |||
| 658a30d836 | |||
| 94688bd1e5 | |||
| 26aaf221c7 | |||
| 530bf3c05e | |||
| dfedc6b025 | |||
| 4e3fe6f35f | |||
| af6706a4c9 | |||
| b6c3001df2 | |||
| b8ff84ed9b | |||
| b83bb636f0 | |||
| 7d6b6c4b4c | |||
| 2629865c59 | |||
| fd546f270b | |||
| 641fb971d1 | |||
| e40cde221c | |||
| f956ef92e9 | |||
| 687e0a7028 | |||
| 7c15cd83f4 | |||
| 8f6ac0660e | |||
| 422d5afa6b | |||
| f653094040 | |||
| 8430a6c40e | |||
| d4fa76463d | |||
| 1e5769df11 | |||
| f1a800d37f | |||
| ee4958e7dd | |||
| dbead4a62e | |||
| 2b2d19e78d | |||
| 9ed9fecc0f | |||
| 06e64dec7a | |||
| 420b58c346 | |||
| 36554ce92f | |||
| c5265b47cf | |||
| c29ac1e6a3 | |||
| 6e86835599 | |||
| f70eb5df40 | |||
| 4650aa4a3f | |||
| 34e3101edd | |||
| fdbdf11bab | |||
| db238e7705 | |||
| a196cfe071 | |||
| 442d319aef | |||
| a48c243e3e | |||
| 5c17cb671c | |||
| 94d004e72c | |||
| 21a9705b78 | |||
| 137182792e | |||
| 3dbb845603 | |||
| ac7a6bce2c | |||
| 9baf3d01d3 | |||
| 1db13030fa | |||
| be2fb57d85 | |||
| 3a99b9adff | |||
| 8cb48034f0 | |||
| 49c96c9202 | |||
| ad2ea9498b | |||
| 3a085e114d | |||
| 3fbb740101 | |||
| fa99ac39b4 | |||
| d0de777452 | |||
| eb9979f048 | |||
| c57c16c499 | |||
| 70284c987e | |||
| 64f2ab6591 | |||
| c739bf29c4 | |||
| bb1093e8b2 | |||
| 51eaeddfba | |||
| 122c234570 | |||
| ddf844b680 | |||
| a245f40bcf | |||
| cafb06c011 | |||
| cbfbb5b6d6 | |||
| 606a75a068 | |||
| 0c26aafc2f | |||
| 8bd4ed2ec2 | |||
| 9c5fb3107e | |||
| 1c86ab66be | |||
| f05e3cf281 | |||
| 177b70b995 | |||
| 3b2b28559a | |||
| 7e89db8fa1 | |||
| 9b3ab25cea | |||
| 54ef25214c | |||
| b9db498199 | |||
| 6503ebfb1a | |||
| baf35dc906 | |||
| f4466918f3 | |||
| 3b6caa38aa | |||
| b1127c2a01 | |||
| ad6429d26f | |||
| 59cdafaf70 | |||
| 34ebc2d179 | |||
| 49c9de63cd | |||
| 502a604a26 | |||
| fca58f2d6f | |||
| b2bbdcc0df | |||
| bdb5de5a2f | |||
| 960d0940bb | |||
| e7a91c2f0a | |||
| e583afdb49 | |||
| 53138d7b34 | |||
| e45acf4613 | |||
| 47e3364332 | |||
| 49760f03a0 | |||
| 2f5afcddd1 | |||
| 67beee1444 | |||
| cd80e92df1 | |||
| e303c92041 | |||
| ccd36dd658 | |||
| 3a9953e25c | |||
| 311ea897cf | |||
| 4cc3e4def3 | |||
| 301a5249a8 | |||
| 4bff89e8af | |||
| fa29f9ff32 | |||
| aeba9eb285 | |||
| 5d2e2d7b67 | |||
| e61c4f722f | |||
| 505d3be9b9 | |||
| bec52734f9 | |||
| 73f7531ed5 | |||
| 96fbca62e0 | |||
| 36aed99813 | |||
| e1cd69e672 | |||
| 41f75282d8 | |||
| aa6ef1d6f2 | |||
| 40fc16b1ea | |||
| db882d350c | |||
| 3edd4f851f | |||
| cd2be9aa9c | |||
| 52e1c189fa | |||
| 0231f021ad | |||
| 25e9fba38c | |||
| 0fc61adb30 | |||
| 84ff166375 | |||
| 04ab9a06e7 | |||
| f8454d500a | |||
| 1904494276 | |||
| a421f6c501 | |||
| 3359910784 | |||
| 1f7bab2faf | |||
| 4d85b56e1e | |||
| 261e46a6d1 | |||
| 556cf508f1 | |||
| a668eafc38 | |||
| a0fdf4b940 | |||
| 6f845f9fa5 | |||
| 7e9c6b2dbc | |||
| d9c19b98b7 | |||
| 841eca0e99 | |||
| 1a819e74c3 | |||
| fcf56710d3 | |||
| c8094915b7 | |||
| 6d0e256f1b | |||
| 8e76e14f4e | |||
| b5bb232044 | |||
| 2503f406b6 | |||
| b04132e341 | |||
| 9b7e2571bd | |||
| 888265b36a | |||
| 6da81d760d | |||
| 08cc94bdb3 | |||
| e010be17a9 | |||
| a898bad02d | |||
| 8e1cbbf967 | |||
| 4a4ba6592b | |||
| b3624743e3 | |||
| e8a284d2e7 | |||
| b557a56613 | |||
| 67321228b1 | |||
| ad6afc0415 | |||
| df6a761324 | |||
| 33b1a53f37 | |||
| 0d6eed588a | |||
| 6e31ad982d | |||
| d19dc5e6c6 | |||
| 6d01bcfe79 | |||
| c0c5d70a6b | |||
| 6e1ab54c6b | |||
| fb637ae5ea | |||
| 89a4c2e572 | |||
| 9c09f92c51 | |||
| 3a01a398e2 | |||
| aaad1c124c | |||
| 7509424226 | |||
| c601c2707f | |||
| 433bfa1549 | |||
| c2255f8b8a | |||
| f7f76b9ec6 | |||
| 59dfeb58b4 | |||
| dd781df6a4 | |||
| f4ea0721dc | |||
| 4feb5ae1ae | |||
| 514117142f | |||
| 51ca3afb67 | |||
| 8c88593be9 | |||
| 4777e0412c | |||
| 8375ffafa1 | |||
| e4f87fd7ef | |||
| 61f32c0294 | |||
| b73744d4ae | |||
| 458fe68871 | |||
| c7ff530834 | |||
| 72acd50b7e | |||
| ec95367677 | |||
| 7f58b6ff11 | |||
| 9f7978da27 | |||
| 05fd02154c | |||
| e0796407e9 | |||
| af1f770b28 | |||
| 4ab1bae8d1 | |||
| dd9b209903 | |||
| 848ae63877 | |||
| bf923a264c | |||
| 7b78855cc1 | |||
| dc4d5272fd | |||
| 807784b414 | |||
| 85a36b8133 | |||
| e1341837ce | |||
| fd1787a6a3 | |||
| cb65e29c34 | |||
| 72d5bc4476 | |||
| e8045a52ad | |||
| 1dce70f548 | |||
| 2e26e7a2fe | |||
| 32e0c3c0b7 | |||
| 1426e918a8 | |||
| 8adc3738e1 | |||
| a4cd2c6568 | |||
| f2a5c5d518 | |||
| 03f7d5d7e9 | |||
| 4043a2b559 | |||
| d495a3f666 | |||
| f21618c824 | |||
| 6cfdc066d9 |
@@ -1 +1,4 @@
|
||||
test/tmp
|
||||
psol/
|
||||
psol-*.tar.gz
|
||||
*.*.*.*.tar.gz
|
||||
|
||||
@@ -19,106 +19,15 @@ optimizations, see our <a href="http://ngxpagespeed.com">demonstration site</a>.
|
||||
|
||||
## How to build
|
||||
|
||||
Because nginx does not support dynamic loading of modules, you need to compile
|
||||
nginx from source to add ngx_pagespeed. Alternatively, if you're using Tengine you can [install ngx_pagespeed without
|
||||
recompiling Tengine](https://github.com/pagespeed/ngx_pagespeed/wiki/Using-ngx_pagespeed-with-Tengine).
|
||||
|
||||
1. Install dependencies:
|
||||
|
||||
```bash
|
||||
# These are for RedHat, CentOS, and Fedora.
|
||||
$ sudo yum install gcc-c++ pcre-dev pcre-devel zlib-devel make
|
||||
|
||||
# These are for Debian. Ubuntu will be similar.
|
||||
$ sudo apt-get install build-essential zlib1g-dev libpcre3 libpcre3-dev
|
||||
```
|
||||
|
||||
2. Download ngx_pagespeed:
|
||||
|
||||
```bash
|
||||
$ cd ~
|
||||
$ wget https://github.com/pagespeed/ngx_pagespeed/archive/release-1.5.27.1-beta.zip
|
||||
$ unzip release-1.5.27.1-beta.zip
|
||||
```
|
||||
|
||||
3. Download and build nginx:
|
||||
|
||||
```bash
|
||||
$ # check http://nginx.org/en/download.html for the latest version
|
||||
$ wget http://nginx.org/download/nginx-1.4.0.tar.gz
|
||||
$ tar -xvzf nginx-1.4.0.tar.gz
|
||||
$ cd nginx-1.4.0/
|
||||
$ ./configure --add-module=$HOME/ngx_pagespeed-release-1.5.27.1-beta
|
||||
$ make
|
||||
$ sudo make install
|
||||
```
|
||||
|
||||
If this doesn't work see the [build
|
||||
troubleshooting](https://github.com/pagespeed/ngx_pagespeed/wiki/Build-Troubleshooting) page.
|
||||
|
||||
This will use a binary PageSpeed Optimization Library, but it's also possible to
|
||||
[build PSOL from
|
||||
source](https://github.com/pagespeed/ngx_pagespeed/wiki/Building-PSOL-From-Source).
|
||||
Follow the steps on <a
|
||||
href="https://developers.google.com/speed/pagespeed/module/build_ngx_pagespeed_from_source">build
|
||||
ngx_pagespeed from source</a>.
|
||||
|
||||
## How to use
|
||||
|
||||
In your `nginx.conf`, add to the main or server block:
|
||||
|
||||
```nginx
|
||||
pagespeed on;
|
||||
|
||||
# needs to exist and be writable by nginx
|
||||
pagespeed FileCachePath /var/ngx_pagespeed_cache;
|
||||
```
|
||||
|
||||
In every server block where pagespeed is enabled add:
|
||||
|
||||
```apache
|
||||
# Ensure requests for pagespeed optimized resources go to the pagespeed
|
||||
# handler and no extraneous headers get set.
|
||||
location ~ "\.pagespeed\.([a-z]\.)?[a-z]{2}\.[^.]{10}\.[^.]+" { add_header "" ""; }
|
||||
location ~ "^/ngx_pagespeed_static/" { }
|
||||
location ~ "^/ngx_pagespeed_beacon$" { }
|
||||
location /ngx_pagespeed_statistics { allow 127.0.0.1; deny all; }
|
||||
location /ngx_pagespeed_message { allow 127.0.0.1; deny all; }
|
||||
```
|
||||
|
||||
To confirm that the module is loaded, fetch a page and check that you see the
|
||||
`X-Page-Speed` header:
|
||||
|
||||
```bash
|
||||
$ curl -I 'http://localhost:8050/some_page/' | grep X-Page-Speed
|
||||
X-Page-Speed: 1.4.0.0-2729
|
||||
```
|
||||
|
||||
Looking at the source of a few pages you should see various changes, such as
|
||||
urls being replaced with new ones like `yellow.css.pagespeed.ce.lzJ8VcVi1l.css`.
|
||||
|
||||
When reading the [mod_pagespeed
|
||||
documentation](https://developers.google.com/speed/docs/mod_pagespeed/using_mod),
|
||||
keep in mind that you need to make a small adjustment to configuration
|
||||
directives: replace **ModPagespeed** with **pagespeed**:
|
||||
|
||||
mod_pagespeed.conf:
|
||||
ModPagespeedEnableFilters collapse_whitespace,add_instrumentation
|
||||
ModPagespeedRunExperiment on
|
||||
ModPagespeedExperimentSpec id=3;percent=50;default
|
||||
ModPagespeedExperimentSpec id=4;percent=50
|
||||
|
||||
ngx_pagespeed.conf:
|
||||
pagespeed EnableFilters collapse_whitespace,add_instrumentation;
|
||||
pagespeed RunExperiment on;
|
||||
pagespeed ExperimentSpec "id=3;percent=50;default";
|
||||
pagespeed ExperimentSpec "id=4;percent=50";
|
||||
|
||||
For more configuration details, see the [differences from mod_pagespeed
|
||||
configuration](https://github.com/pagespeed/ngx_pagespeed/wiki/Configuration-differences-from-mod_pagespeed)
|
||||
and <a href="https://github.com/pagespeed/ngx_pagespeed/wiki/Known-Issues">known
|
||||
issues</a> wiki pages.
|
||||
|
||||
There are extensive system tests which cover most of ngx_pagespeed's
|
||||
functionality. Consider [testing your
|
||||
installation](https://github.com/pagespeed/ngx_pagespeed/wiki/Testing).
|
||||
Follow the steps on <a
|
||||
href="https://developers.google.com/speed/pagespeed/module/configuration">PageSpeed
|
||||
configuration</a>.
|
||||
|
||||
For feedback, questions, and to follow
|
||||
the progress of the project:
|
||||
|
||||
@@ -17,27 +17,30 @@
|
||||
# PSOL_BINARY: absolute path to pagespeed_automatic.a
|
||||
|
||||
mod_pagespeed_dir="${MOD_PAGESPEED_DIR:-unset}"
|
||||
position_aux="${POSITION_AUX:-unset}"
|
||||
|
||||
if [ "$mod_pagespeed_dir" = "unset" ] ; then
|
||||
mod_pagespeed_dir="$ngx_addon_dir/psol/include"
|
||||
build_from_source=false
|
||||
|
||||
if [ ! -e "$mod_pagespeed_dir" ] ; then
|
||||
echo "ngx_pagespeed: pagespeed optimization library not found:"
|
||||
echo ""
|
||||
echo " You need to separately download the pagespeed library:"
|
||||
echo ""
|
||||
echo " $ cd /path/to/ngx_pagespeed"
|
||||
echo " $ wget https://dl.google.com/dl/page-speed/psol/1.10.33.4.tar.gz"
|
||||
echo " $ tar -xzvf 1.10.33.4.tar.gz # expands to psol/"
|
||||
echo ""
|
||||
echo " Or see the installation instructions:"
|
||||
echo " https://github.com/pagespeed/ngx_pagespeed#how-to-build"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
else
|
||||
build_from_source=true
|
||||
fi
|
||||
|
||||
echo "mod_pagespeed_dir=$mod_pagespeed_dir"
|
||||
echo "build_from_source=$build_from_source"
|
||||
|
||||
ngx_feature="psol"
|
||||
ngx_feature_name=""
|
||||
ngx_feature_run=no
|
||||
ngx_feature_incs="
|
||||
#include \"net/instaweb/htmlparse/public/html_parse.h\"
|
||||
#include \"net/instaweb/htmlparse/public/html_writer_filter.h\"
|
||||
#include \"net/instaweb/util/public/string.h\"
|
||||
#include \"net/instaweb/util/public/string_writer.h\"
|
||||
#include \"net/instaweb/util/public/null_message_handler.h\"
|
||||
"
|
||||
|
||||
os_name='unknown_os'
|
||||
arch_name='unknown_arch'
|
||||
uname_os=`uname`
|
||||
@@ -68,36 +71,101 @@ else
|
||||
buildtype=Release
|
||||
fi
|
||||
|
||||
# If the compiler is gcc, we want to use g++ to link, if at all possible,
|
||||
# so that -static-libstdc++ works.
|
||||
# Annoyingly, the feature test doesn't even use $LINK for linking, so that
|
||||
# needs an explicit -lstdc++
|
||||
pagespeed_libs=
|
||||
ps_maybe_gpp_base=`basename $CC| sed s/gcc/g++/`
|
||||
ps_maybe_gpp="`dirname $CC`/$ps_maybe_gpp_base"
|
||||
if [ -n "$NGX_GCC_VER" -a \( -x "$ps_maybe_gpp" \) ]; then
|
||||
LINK=$ps_maybe_gpp
|
||||
NGX_TEST_LD_OPT="$NGX_TEST_LD_OPT -lstdc++"
|
||||
else
|
||||
pagespeed_libs="-lstdc++"
|
||||
fi
|
||||
|
||||
# The compiler needs to know that __sync_add_and_fetch_4 is ok,
|
||||
# and this requires an instruction that didn't exist on i586 or i386.
|
||||
if [ "$uname_arch" = "i686" ]; then
|
||||
FLAG_MARCH='-march=i686'
|
||||
fi
|
||||
|
||||
CFLAGS="$CFLAGS $FLAG_MARCH"
|
||||
|
||||
# For now, standardize on gcc-4.x ABI --- if we don't set this, people building
|
||||
# with new gcc defaulting to gcc-5 C++11 ABI will have build trouble linking
|
||||
# to our libpsol.a
|
||||
# See https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
|
||||
CFLAGS="$CFLAGS -D_GLIBCXX_USE_CXX11_ABI=0"
|
||||
CC_TEST_FLAGS="$CC_TEST_FLAGS -D_GLIBCXX_USE_CXX11_ABI=0"
|
||||
|
||||
case "$NGX_GCC_VER" in
|
||||
4.8*)
|
||||
# On GCC 4.8 and above, -Wall enables -Wunused-local-typedefs. This breaks
|
||||
# on VerifySizesAreEqual in bit_cast in chromium/src/base/basictypes.h which
|
||||
# has a typedef that is intentionally unused.
|
||||
CFLAGS="$CFLAGS -Wno-unused-local-typedefs"
|
||||
|
||||
# On GCC 4.8 and above, we get the following compiler warning:
|
||||
# chromium/src/base/memory/scoped_ptr.h:133:7: warning: declaration of ‘class scoped_ptr<C>’ [enabled by default]
|
||||
# Based on discussion at http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54055,
|
||||
# this is invalid code, but hasn't been fixed yet in chromium.
|
||||
# Unfortunately, there also does not appear to be a flag for just disabling
|
||||
# that warning, so we add Wno-error to override nginx's default -Werror
|
||||
# option.
|
||||
CFLAGS="$CFLAGS -Wno-error"
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$WNO_ERROR" = "YES" ]; then
|
||||
CFLAGS="$CFLAGS -Wno-error"
|
||||
fi
|
||||
|
||||
psol_binary="${PSOL_BINARY:-unset}"
|
||||
if [ "$psol_binary" = "unset" ] ; then
|
||||
if $build_from_source ; then
|
||||
psol_binary="\
|
||||
$mod_pagespeed_dir/pagespeed/automatic/pagespeed_automatic.a"
|
||||
else
|
||||
psol_library_dir="$ngx_addon_dir/psol/lib/$buildtype/$os_name/$arch_name"
|
||||
psol_binary="$psol_library_dir/pagespeed_automatic.a"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "mod_pagespeed_dir=$mod_pagespeed_dir"
|
||||
echo "build_from_source=$build_from_source"
|
||||
|
||||
ngx_feature="psol"
|
||||
ngx_feature_name=""
|
||||
ngx_feature_run=no
|
||||
ngx_feature_incs="
|
||||
#include \"pagespeed/kernel/base/string.h\"
|
||||
#include \"pagespeed/kernel/base/string_writer.h\"
|
||||
#include \"pagespeed/kernel/base/null_message_handler.h\"
|
||||
#include \"pagespeed/kernel/html/html_parse.h\"
|
||||
#include \"pagespeed/kernel/html/html_writer_filter.h\"
|
||||
"
|
||||
|
||||
pagespeed_include="\
|
||||
$mod_pagespeed_dir \
|
||||
$mod_pagespeed_dir/third_party/chromium/src \
|
||||
$mod_pagespeed_dir/third_party/google-sparsehash/src \
|
||||
$mod_pagespeed_dir/third_party/google-sparsehash/gen/arch/$os_name/$arch_name/include \
|
||||
$mod_pagespeed_dir/third_party/protobuf/src \
|
||||
$mod_pagespeed_dir/third_party/re2/src \
|
||||
$mod_pagespeed_dir/out/$buildtype/obj/gen \
|
||||
$mod_pagespeed_dir/out/$buildtype/obj/gen/protoc_out/instaweb \
|
||||
$mod_pagespeed_dir/third_party/apr/src/include \
|
||||
$mod_pagespeed_dir/third_party/aprutil/src/include \
|
||||
$mod_pagespeed_dir/third_party/apr/gen/arch/$os_name/$arch_name/include \
|
||||
$mod_pagespeed_dir/third_party/aprutil/gen/arch/$os_name/$arch_name/include"
|
||||
ngx_feature_path="$pagespeed_include"
|
||||
|
||||
if $build_from_source ; then
|
||||
psol_library_binaries="\
|
||||
$mod_pagespeed_dir/net/instaweb/automatic/pagespeed_automatic.a \
|
||||
$mod_pagespeed_dir/out/$buildtype/obj.target/third_party/serf/libserf.a \
|
||||
$mod_pagespeed_dir/out/$buildtype/obj.target/third_party/aprutil/libaprutil.a \
|
||||
$mod_pagespeed_dir/out/$buildtype/obj.target/third_party/apr/libapr.a"
|
||||
else
|
||||
psol_library_dir="$ngx_addon_dir/psol/lib/$buildtype/$os_name/$arch_name"
|
||||
psol_library_binaries="\
|
||||
$psol_library_dir/pagespeed_automatic.a \
|
||||
$psol_library_dir/libserf.a \
|
||||
$psol_library_dir/libaprutil.a \
|
||||
$psol_library_dir/libapr.a"
|
||||
fi
|
||||
|
||||
pagespeed_libs="-lstdc++ $psol_library_binaries -lrt -pthread -lm"
|
||||
pagespeed_libs="$pagespeed_libs $psol_binary -lrt -pthread -lm"
|
||||
ngx_feature_libs="$pagespeed_libs"
|
||||
ngx_feature_test="
|
||||
|
||||
GoogleString output_buffer;
|
||||
net_instaweb::StringWriter write_to_string(&output_buffer);
|
||||
|
||||
@@ -122,44 +190,106 @@ if [ $ngx_found = yes ]; then
|
||||
ps_src="$ngx_addon_dir/src"
|
||||
ngx_addon_name=ngx_pagespeed
|
||||
NGX_ADDON_DEPS="$NGX_ADDON_DEPS \
|
||||
$ps_src/ngx_pagespeed.h \
|
||||
$ps_src/ngx_fetch.h \
|
||||
$ps_src/ngx_url_async_fetcher.h \
|
||||
$ps_src/log_message_handler.h \
|
||||
$ps_src/ngx_base_fetch.h \
|
||||
$ps_src/ngx_server_context.h \
|
||||
$ps_src/ngx_rewrite_options.h \
|
||||
$ps_src/ngx_rewrite_driver_factory.h \
|
||||
$ps_src/ngx_thread_system.h \
|
||||
$ps_src/ngx_caching_headers.h \
|
||||
$ps_src/ngx_event_connection.h \
|
||||
$ps_src/ngx_fetch.h \
|
||||
$ps_src/ngx_gzip_setter.h \
|
||||
$ps_src/ngx_list_iterator.h \
|
||||
$ps_src/ngx_message_handler.h \
|
||||
$ps_src/pthread_shared_mem.h \
|
||||
$ps_src/ngx_request_context.h \
|
||||
$ps_src/log_message_handler.h"
|
||||
$ps_src/ngx_pagespeed.h \
|
||||
$ps_src/ngx_rewrite_driver_factory.h \
|
||||
$ps_src/ngx_rewrite_options.h \
|
||||
$ps_src/ngx_server_context.h \
|
||||
$ps_src/ngx_url_async_fetcher.h \
|
||||
$psol_binary"
|
||||
NGX_ADDON_SRCS="$NGX_ADDON_SRCS \
|
||||
$ps_src/log_message_handler.cc \
|
||||
$ps_src/ngx_base_fetch.cc \
|
||||
$ps_src/ngx_caching_headers.cc \
|
||||
$ps_src/ngx_event_connection.cc \
|
||||
$ps_src/ngx_fetch.cc \
|
||||
$ps_src/ngx_gzip_setter.cc \
|
||||
$ps_src/ngx_list_iterator.cc \
|
||||
$ps_src/ngx_message_handler.cc \
|
||||
$ps_src/ngx_pagespeed.cc \
|
||||
$ps_src/ngx_rewrite_driver_factory.cc \
|
||||
$ps_src/ngx_rewrite_options.cc \
|
||||
$ps_src/ngx_server_context.cc \
|
||||
$ps_src/ngx_fetch.cc \
|
||||
$ps_src/ngx_url_async_fetcher.cc \
|
||||
$ps_src/ngx_base_fetch.cc \
|
||||
$ps_src/ngx_thread_system.cc \
|
||||
$ps_src/ngx_message_handler.cc \
|
||||
$ps_src/pthread_shared_mem.cc \
|
||||
$ps_src/ngx_request_context.cc \
|
||||
$ps_src/log_message_handler.cc \
|
||||
$mod_pagespeed_dir/net/instaweb/apache/add_headers_fetcher.cc \
|
||||
$mod_pagespeed_dir/net/instaweb/apache/loopback_route_fetcher.cc \
|
||||
$mod_pagespeed_dir/net/instaweb/apache/serf_url_async_fetcher.cc"
|
||||
# Make pagespeed run immediately before gzip.
|
||||
$ps_src/ngx_url_async_fetcher.cc"
|
||||
# Save our sources in a separate var since we may need it in config.make
|
||||
PS_NGX_SRCS="$NGX_ADDON_SRCS"
|
||||
|
||||
if [ "$position_aux" = "true" ] ; then
|
||||
HTTP_AUX_FILTER_MODULES="$HTTP_AUX_FILTER_MODULES $ngx_addon_name"
|
||||
else
|
||||
# Make pagespeed run immediately before gzip and Brotli.
|
||||
if echo $HTTP_FILTER_MODULES | grep ngx_http_brotli_filter_module >/dev/null; then
|
||||
module=ngx_http_brotli_filter_module
|
||||
elif [ $HTTP_GZIP = YES ]; then
|
||||
module=$HTTP_GZIP_FILTER_MODULE
|
||||
else
|
||||
module=$HTTP_RANGE_HEADER_FILTER_MODULE
|
||||
fi
|
||||
|
||||
HTTP_FILTER_MODULES=$(echo $HTTP_FILTER_MODULES |\
|
||||
sed "s/$module/$module $ngx_addon_name/")
|
||||
fi
|
||||
|
||||
# Make the etag header filter run immediately before range header filter.
|
||||
HTTP_FILTER_MODULES=$(echo $HTTP_FILTER_MODULES |\
|
||||
sed "s/$HTTP_GZIP_FILTER_MODULE/$HTTP_GZIP_FILTER_MODULE $ngx_addon_name/")
|
||||
HTTP_FILTER_MODULES=$(echo $HTTP_FILTER_MODULES |\
|
||||
sed "s/$HTTP_HEADER_FILTER_MODULE/$HTTP_HEADER_FILTER_MODULE ngx_pagespeed_copy_filter/")
|
||||
sed "s/$HTTP_RANGE_HEADER_FILTER_MODULE/$HTTP_RANGE_HEADER_FILTER_MODULE ngx_pagespeed_etag_filter/")
|
||||
|
||||
CORE_LIBS="$CORE_LIBS $pagespeed_libs"
|
||||
CORE_INCS="$CORE_INCS $pagespeed_include"
|
||||
echo "List of modules (in reverse order of applicability): "$HTTP_FILTER_MODULES
|
||||
else
|
||||
cat << END
|
||||
$0: error: module ngx_pagespeed requires the pagespeed optimization library
|
||||
$0: error: module ngx_pagespeed requires the pagespeed optimization library.
|
||||
Look in obj/autoconf.err for more details.
|
||||
END
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test whether the compiler is compatible
|
||||
ngx_feature="psol-compiler-compat"
|
||||
ngx_feature_name=""
|
||||
ngx_feature_run=no
|
||||
ngx_feature_incs=""
|
||||
ngx_feature_path=""
|
||||
ngx_feature_libs="-lstdc++"
|
||||
ngx_feature_test="
|
||||
|
||||
#if defined(__clang__) && defined(__GLIBCXX__)
|
||||
// See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html#abi.versioning
|
||||
// for a list of various values of __GLIBCXX__. Note that they're not monotonic
|
||||
// with respect to version numbers.
|
||||
#if __GLIBCXX__ == 20120322 || __GLIBCXX__ == 20120614
|
||||
#error \"clang is using libstdc++ 4.7.0 or 4.7.1, which can cause binary incompatibility.\"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(__clang__) && defined(__GNUC__)
|
||||
#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
|
||||
#error \"GCC < 4.8 no longer supported. Please use gcc >= 4.8 or clang >= 3.3\"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3)
|
||||
#error \"Please use gcc >= 4.8 or clang >= 3.3\"
|
||||
#endif
|
||||
#endif
|
||||
"
|
||||
|
||||
. "$ngx_addon_dir/cpp_feature"
|
||||
|
||||
if [ $ngx_found = no ]; then
|
||||
cat << END
|
||||
$0: error: module ngx_pagespeed requires gcc >= 4.8 or clang >= 3.3.
|
||||
See https://developers.google.com/speed/pagespeed/module/build_ngx_pagespeed_from_source for some recommendations.
|
||||
Look in objs/autoconf.err for more details.
|
||||
END
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
if [ -n "$NGX_CLANG_VER" ]; then
|
||||
# Chromium headers assume clang is always in C++11 mode. Oblige it.
|
||||
for ps_src_file in $PS_NGX_SRCS; do
|
||||
ps_obj_file="$NGX_OBJS/addon/src/`basename $ps_src_file .cc`.o"
|
||||
echo "$ps_obj_file : CFLAGS += --std=c++11" >> $NGX_MAKEFILE
|
||||
done
|
||||
fi
|
||||
@@ -1,88 +0,0 @@
|
||||
// Copyright 2001 - 2003 Google Inc. All Rights Reserved
|
||||
|
||||
#ifndef BASE_BASICTYPES_H__
|
||||
#define BASE_BASICTYPES_H__
|
||||
|
||||
typedef unsigned char uint8;
|
||||
typedef unsigned short uint16;
|
||||
typedef unsigned int uint32;
|
||||
|
||||
const uint8 kuint8max = (( uint8) 0xFF);
|
||||
const uint32 kuint32max = ((uint32) 0xFFFFFFFF);
|
||||
|
||||
// The arraysize(arr) macro returns the # of elements in an array arr.
|
||||
// The expression is a compile-time constant, and therefore can be
|
||||
// used in defining new arrays, for example. If you use arraysize on
|
||||
// a pointer by mistake, you will get a compile-time error.
|
||||
//
|
||||
// One caveat is that arraysize() doesn't accept any array of an
|
||||
// anonymous type or a type defined inside a function. In these rare
|
||||
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
|
||||
// due to a limitation in C++'s template system. The limitation might
|
||||
// eventually be removed, but it hasn't happened yet.
|
||||
|
||||
// This template function declaration is used in defining arraysize.
|
||||
// Note that the function doesn't need an implementation, as we only
|
||||
// use its type.
|
||||
template <typename T, size_t N>
|
||||
char (&ArraySizeHelper(T (&array)[N]))[N];
|
||||
|
||||
// That gcc wants both of these prototypes seems mysterious. VC, for
|
||||
// its part, can't decide which to use (another mystery). Matching of
|
||||
// template overloads: the final frontier.
|
||||
#ifndef _MSC_VER
|
||||
template <typename T, size_t N>
|
||||
char (&ArraySizeHelper(const T (&array)[N]))[N];
|
||||
#endif
|
||||
|
||||
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
|
||||
|
||||
// ARRAYSIZE performs essentially the same calculation as arraysize,
|
||||
// but can be used on anonymous types or types defined inside
|
||||
// functions. It's less safe than arraysize as it accepts some
|
||||
// (although not all) pointers. Therefore, you should use arraysize
|
||||
// whenever possible.
|
||||
//
|
||||
// The expression ARRAYSIZE(a) is a compile-time constant of type
|
||||
// size_t.
|
||||
//
|
||||
// ARRAYSIZE catches a few type errors. If you see a compiler error
|
||||
//
|
||||
// "warning: division by zero in ..."
|
||||
//
|
||||
// when using ARRAYSIZE, you are (wrongfully) giving it a pointer.
|
||||
// You should only use ARRAYSIZE on statically allocated arrays.
|
||||
//
|
||||
// The following comments are on the implementation details, and can
|
||||
// be ignored by the users.
|
||||
//
|
||||
// ARRAYSIZE(arr) works by inspecting sizeof(arr) (the # of bytes in
|
||||
// the array) and sizeof(*(arr)) (the # of bytes in one array
|
||||
// element). If the former is divisible by the latter, perhaps arr is
|
||||
// indeed an array, in which case the division result is the # of
|
||||
// elements in the array. Otherwise, arr cannot possibly be an array,
|
||||
// and we generate a compiler error to prevent the code from
|
||||
// compiling.
|
||||
//
|
||||
// Since the size of bool is implementation-defined, we need to cast
|
||||
// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
|
||||
// result has type size_t.
|
||||
//
|
||||
// This macro is not perfect as it wrongfully accepts certain
|
||||
// pointers, namely where the pointer size is divisible by the pointee
|
||||
// size. Since all our code has to go through a 32-bit compiler,
|
||||
// where a pointer is 4 bytes, this means all pointers to a type whose
|
||||
// size is 3 or greater than 4 will be (righteously) rejected.
|
||||
//
|
||||
// Starting with Visual C++ 2005, WinNT.h includes ARRAYSIZE.
|
||||
#define ARRAYSIZE_UNSAFE(a) \
|
||||
((sizeof(a) / sizeof(*(a))) / \
|
||||
static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
|
||||
|
||||
// A macro to disallow the evil copy constructor and operator= functions
|
||||
// This should be used in the private: declarations for a class
|
||||
#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \
|
||||
TypeName(const TypeName&); \
|
||||
void operator=(const TypeName&)
|
||||
|
||||
#endif // BASE_BASICTYPES_H__
|
||||
@@ -1,482 +0,0 @@
|
||||
// Copyright 2006 Google Inc. All Rights Reserved.
|
||||
// Author: brettw (Brett Wilson)
|
||||
|
||||
#ifndef BASE_LOGGING_H__
|
||||
#define BASE_LOGGING_H__
|
||||
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <strstream>
|
||||
#include <tchar.h>
|
||||
|
||||
#include "base/basictypes.h"
|
||||
#include "base/scoped_ptr.h"
|
||||
|
||||
// Optional message capabilities
|
||||
// -----------------------------
|
||||
// Assertion failed messages and fatal errors are displayed in a dialog box
|
||||
// before the application exits. However, running this UI creates a message
|
||||
// loop, which causes application messages to be processed and potentially
|
||||
// dispatched to existing application windows. Since the application is in a
|
||||
// bad state when this assertion dialog is displayed, these messages may not
|
||||
// get processed and hang the dialog, or the application might go crazy.
|
||||
//
|
||||
// Therefore, it can be beneficial to display the error dialog in a separate
|
||||
// process from the main application. When the logging system needs to display
|
||||
// a fatal error dialog box, it will look for a program called
|
||||
// "DebugMessage.exe" in the same directory as the application executable. It
|
||||
// will run this application with the message as the command line, and will
|
||||
// not include the name of the application as is traditional for easier
|
||||
// parsing.
|
||||
//
|
||||
// The code for DebugMessage.exe is only one line. In WinMain, do:
|
||||
// MessageBox(NULL, GetCommandLineW(), L"Fatal Error", 0);
|
||||
//
|
||||
// If DebugMessage.exe is not found, the logging code will use a normal
|
||||
// MessageBox, potentially causing the problems discussed above.
|
||||
|
||||
|
||||
// Instructions
|
||||
// ------------
|
||||
//
|
||||
// Make a bunch of macros for logging. The way to log things is to stream
|
||||
// things to LOG(<a particular severity level>). E.g.,
|
||||
//
|
||||
// LOG(INFO) << "Found " << num_cookies << " cookies";
|
||||
//
|
||||
// You can also do conditional logging:
|
||||
//
|
||||
// LOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
|
||||
//
|
||||
// The above will cause log messages to be output on the 1st, 11th, 21st, ...
|
||||
// times it is executed. Note that the special COUNTER value is used to
|
||||
// identify which repetition is happening.
|
||||
//
|
||||
// There are also "debug mode" logging macros like the ones above:
|
||||
//
|
||||
// DLOG(INFO) << "Found cookies";
|
||||
//
|
||||
// DLOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
|
||||
//
|
||||
// All "debug mode" logging is compiled away to nothing for non-debug mode
|
||||
// compiles. LOG_IF and development flags also work well together
|
||||
// because the code can be compiled away sometimes.
|
||||
//
|
||||
// We also have
|
||||
//
|
||||
// LOG_ASSERT(assertion);
|
||||
// DLOG_ASSERT(assertion);
|
||||
//
|
||||
// which is syntactic sugar for {,D}LOG_IF(FATAL, assert fails) << assertion;
|
||||
//
|
||||
// We also override the standard 'assert' to use 'DLOG_ASSERT'.
|
||||
//
|
||||
// The supported severity levels for macros that allow you to specify one
|
||||
// are (in increasing order of severity) INFO, WARNING, ERROR, and FATAL.
|
||||
//
|
||||
// There is also the special severity of DFATAL, which logs FATAL in
|
||||
// debug mode, ERROR in normal mode.
|
||||
//
|
||||
// Very important: logging a message at the FATAL severity level causes
|
||||
// the program to terminate (after the message is logged).
|
||||
|
||||
namespace logging {
|
||||
|
||||
// Where to record logging output? A flat file and/or system debug log via
|
||||
// OutputDebugString. Defaults to LOG_ONLY_TO_FILE.
|
||||
enum LoggingDestination { LOG_ONLY_TO_FILE,
|
||||
LOG_ONLY_TO_SYSTEM_DEBUG_LOG,
|
||||
LOG_TO_BOTH_FILE_AND_SYSTEM_DEBUG_LOG };
|
||||
|
||||
// Indicates that the log file should be locked when being written to.
|
||||
// Often, there is no locking, which is fine for a single threaded program.
|
||||
// If logging is being done from multiple threads or there can be more than
|
||||
// one process doing the logging, the file should be locked during writes to
|
||||
// make each log outut atomic. Other writers will block.
|
||||
//
|
||||
// All processes writing to the log file must have their locking set for it to
|
||||
// work properly. Defaults to DONT_LOCK_LOG_FILE.
|
||||
enum LogLockingState { LOCK_LOG_FILE, DONT_LOCK_LOG_FILE };
|
||||
|
||||
// On startup, should we delete or append to an existing log file (if any)?
|
||||
// Defaults to APPEND_TO_OLD_LOG_FILE.
|
||||
enum OldFileDeletionState { DELETE_OLD_LOG_FILE, APPEND_TO_OLD_LOG_FILE };
|
||||
|
||||
// Sets the log file name and other global logging state. Calling this function
|
||||
// is recommended, and is normally done at the beginning of application init.
|
||||
// If you don't call it, all the flags will be initialized to their default
|
||||
// values, and there is a race condition that may leak a critical section
|
||||
// object if two threads try to do the first log at the same time.
|
||||
// See the definition of the enums above for descriptions and default values.
|
||||
//
|
||||
// The default log file is initialized to "debug.log" in the application
|
||||
// directory. You probably don't want this, especially since the program
|
||||
// directory may not be writable on an enduser's system.
|
||||
void InitLogging(const TCHAR* log_file, LoggingDestination logging_dest,
|
||||
LogLockingState lock_log, OldFileDeletionState delete_old);
|
||||
|
||||
// Sets the log level. Anything at or above this level will be written to the
|
||||
// log file/displayed to the user (if applicable). Anything below this level
|
||||
// will be silently ignored. The log level defaults to 0 (everything is logged)
|
||||
// if this function is not called.
|
||||
void SetMinLogLevel(int level);
|
||||
|
||||
// Sets the log filter prefix. Any log message below LOG_ERROR severity that
|
||||
// doesn't start with this prefix with be silently ignored. The filter defaults
|
||||
// to NULL (everything is logged) if this function is not called. Messages
|
||||
// with severity of LOG_ERROR or higher will not be filtered.
|
||||
void SetLogFilterPrefix(char* filter);
|
||||
|
||||
// Sets the common items you want to be prepended to each log message.
|
||||
// process and thread IDs default to off, the timestamp defaults to on.
|
||||
// If this function is not called, logging defaults to writing the timestamp
|
||||
// only.
|
||||
void SetLogItems(bool enable_process_id, bool enable_thread_id,
|
||||
bool enable_timestamp, bool enable_tickcount);
|
||||
|
||||
// Sets the Log Assert Handler that will be used to notify of check failures.
|
||||
// The default handler shows a dialog box, however clients can use this
|
||||
// function to override with their own handling (e.g. a silent one for Unit
|
||||
// Tests)
|
||||
typedef void (*LogAssertHandlerFunction)(const std::string& str);
|
||||
void SetLogAssertHandler(LogAssertHandlerFunction handler);
|
||||
|
||||
typedef int LogSeverity;
|
||||
const LogSeverity LOG_INFO = 0;
|
||||
const LogSeverity LOG_WARNING = 1;
|
||||
const LogSeverity LOG_ERROR = 2;
|
||||
const LogSeverity LOG_FATAL = 3;
|
||||
const LogSeverity LOG_NUM_SEVERITIES = 4;
|
||||
|
||||
// LOG_DFATAL_LEVEL is LOG_FATAL in debug mode, ERROR in normal mode
|
||||
#ifdef NDEBUG
|
||||
const LogSeverity LOG_DFATAL_LEVEL = LOG_ERROR;
|
||||
#else
|
||||
const LogSeverity LOG_DFATAL_LEVEL = LOG_FATAL;
|
||||
#endif
|
||||
|
||||
// A few definitions of macros that don't generate much code. These are used
|
||||
// by LOG() and LOG_IF, etc. Since these are used all over our code, it's
|
||||
// better to have compact code for these operations.
|
||||
#define COMPACT_GOOGLE_LOG_INFO \
|
||||
logging::LogMessage(__FILE__, __LINE__)
|
||||
#define COMPACT_GOOGLE_LOG_WARNING \
|
||||
logging::LogMessage(__FILE__, __LINE__, logging::LOG_WARNING)
|
||||
#define COMPACT_GOOGLE_LOG_ERROR \
|
||||
logging::LogMessage(__FILE__, __LINE__, logging::LOG_ERROR)
|
||||
#define COMPACT_GOOGLE_LOG_FATAL \
|
||||
logging::LogMessage(__FILE__, __LINE__, logging::LOG_FATAL)
|
||||
#define COMPACT_GOOGLE_LOG_DFATAL \
|
||||
logging::LogMessage(__FILE__, __LINE__, logging::LOG_DFATAL_LEVEL)
|
||||
|
||||
// wingdi.h defines ERROR to be 0. When we call LOG(ERROR), it gets
|
||||
// substituted with 0, and it expands to COMPACT_GOOGLE_LOG_0. To allow us
|
||||
// to keep using this syntax, we define this macro to do the same thing
|
||||
// as COMPACT_GOOGLE_LOG_ERROR, and also define ERROR the same way that
|
||||
// the Windows SDK does for consistency.
|
||||
#define ERROR 0
|
||||
#define COMPACT_GOOGLE_LOG_0 \
|
||||
logging::LogMessage(__FILE__, __LINE__, logging::LOG_ERROR)
|
||||
|
||||
// We use the preprocessor's merging operator, "##", so that, e.g.,
|
||||
// LOG(INFO) becomes the token COMPACT_GOOGLE_LOG_INFO. There's some funny
|
||||
// subtle difference between ostream member streaming functions (e.g.,
|
||||
// ostream::operator<<(int) and ostream non-member streaming functions
|
||||
// (e.g., ::operator<<(ostream&, string&): it turns out that it's
|
||||
// impossible to stream something like a string directly to an unnamed
|
||||
// ostream. We employ a neat hack by calling the stream() member
|
||||
// function of LogMessage which seems to avoid the problem.
|
||||
|
||||
#define LOG(severity) COMPACT_GOOGLE_LOG_ ## severity.stream()
|
||||
#define SYSLOG(severity) LOG(severity)
|
||||
|
||||
#define LOG_IF(severity, condition) \
|
||||
!(condition) ? (void) 0 : logging::LogMessageVoidify() & LOG(severity)
|
||||
#define SYSLOG_IF(severity, condition) LOG_IF(severity, condition)
|
||||
|
||||
#define LOG_ASSERT(condition) \
|
||||
LOG_IF(FATAL, !(condition)) << "Assert failed: " #condition ". "
|
||||
#define SYSLOG_ASSERT(condition) \
|
||||
SYSLOG_IF(FATAL, !(condition)) << "Assert failed: " #condition ". "
|
||||
|
||||
// A container for a string pointer which can be evaluated to a bool -
|
||||
// true iff the pointer is NULL.
|
||||
struct CheckOpString {
|
||||
CheckOpString(std::string* str) : str_(str) { }
|
||||
// No destructor: if str_ is non-NULL, we're about to LOG(FATAL),
|
||||
// so there's no point in cleaning up str_.
|
||||
operator bool() const { return str_ != NULL; }
|
||||
std::string* str_;
|
||||
};
|
||||
|
||||
// Build the error message string. This is separate from the "Impl"
|
||||
// function template because it is not performance critical and so can
|
||||
// be out of line, while the "Impl" code should be inline.
|
||||
template<class t1, class t2>
|
||||
std::string* MakeCheckOpString(const t1& v1, const t2& v2, const char* names) {
|
||||
std::ostrstream ss;
|
||||
ss << names << " (" << v1 << " vs. " << v2 << ")";
|
||||
return new std::string(ss.str(), ss.pcount());
|
||||
}
|
||||
|
||||
extern std::string* MakeCheckOpStringIntInt(int v1, int v2, const char* names);
|
||||
|
||||
template<int, int>
|
||||
std::string* MakeCheckOpString(const int& v1, const int& v2, const char* names) {
|
||||
return MakeCheckOpStringIntInt(v1, v2, names);
|
||||
}
|
||||
|
||||
// Plus some debug-logging macros that get compiled to nothing for production
|
||||
//
|
||||
// DEBUG_MODE is for uses like
|
||||
// if (DEBUG_MODE) foo.CheckThatFoo();
|
||||
// instead of
|
||||
// #ifndef NDEBUG
|
||||
// foo.CheckThatFoo();
|
||||
// #endif
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
#define DLOG(severity) LOG(severity)
|
||||
#define DLOG_IF(severity, condition) LOG_IF(severity, condition)
|
||||
#define DLOG_ASSERT(condition) LOG_ASSERT(condition)
|
||||
|
||||
// debug-only checking. not executed in NDEBUG mode.
|
||||
enum { DEBUG_MODE = 1 };
|
||||
#define DCHECK(condition) \
|
||||
LOG_IF(FATAL, !(condition)) << "Check failed: " #condition ". "
|
||||
|
||||
// Helper functions for DCHECK_OP macro.
|
||||
// The (int, int) specialization works around the issue that the compiler
|
||||
// will not instantiate the template version of the function on values of
|
||||
// unnamed enum type - see comment below.
|
||||
#define DEFINE_DCHECK_OP_IMPL(name, op) \
|
||||
template <class t1, class t2> \
|
||||
inline std::string* Check##name##Impl(const t1& v1, const t2& v2, \
|
||||
const char* names) { \
|
||||
if (v1 op v2) return NULL; \
|
||||
else return MakeCheckOpString(v1, v2, names); \
|
||||
} \
|
||||
inline std::string* Check##name##Impl(int v1, int v2, const char* names) { \
|
||||
if (v1 op v2) return NULL; \
|
||||
else return MakeCheckOpString(v1, v2, names); \
|
||||
}
|
||||
DEFINE_DCHECK_OP_IMPL(EQ, ==)
|
||||
DEFINE_DCHECK_OP_IMPL(NE, !=)
|
||||
DEFINE_DCHECK_OP_IMPL(LE, <=)
|
||||
DEFINE_DCHECK_OP_IMPL(LT, < )
|
||||
DEFINE_DCHECK_OP_IMPL(GE, >=)
|
||||
DEFINE_DCHECK_OP_IMPL(GT, > )
|
||||
#undef DEFINE_DCHECK_OP_IMPL
|
||||
|
||||
// Helper macro for binary operators.
|
||||
// Don't use this macro directly in your code, use CHECK_EQ et al below.
|
||||
#define DCHECK_OP(name, op, val1, val2) \
|
||||
while (logging::CheckOpString _result = \
|
||||
logging::Check##name##Impl((val1), (val2), #val1 " " #op " " #val2)) \
|
||||
logging::LogMessage(__FILE__, __LINE__, _result).stream()
|
||||
|
||||
// Equality/Inequality checks - compare two values, and log a LOG_FATAL message
|
||||
// including the two values when the result is not as expected. The values
|
||||
// must have operator<<(ostream, ...) defined.
|
||||
//
|
||||
// You may append to the error message like so:
|
||||
// CHECK_NE(1, 2) << ": The world must be ending!";
|
||||
//
|
||||
// We are very careful to ensure that each argument is evaluated exactly
|
||||
// once, and that anything which is legal to pass as a function argument is
|
||||
// legal here. In particular, the arguments may be temporary expressions
|
||||
// which will end up being destroyed at the end of the apparent statement,
|
||||
// for example:
|
||||
// CHECK_EQ(string("abc")[1], 'b');
|
||||
//
|
||||
// WARNING: These don't compile correctly if one of the arguments is a pointer
|
||||
// and the other is NULL. To work around this, simply static_cast NULL to the
|
||||
// type of the desired pointer.
|
||||
|
||||
#define DCHECK_EQ(val1, val2) DCHECK_OP(EQ, ==, val1, val2)
|
||||
#define DCHECK_NE(val1, val2) DCHECK_OP(NE, !=, val1, val2)
|
||||
#define DCHECK_LE(val1, val2) DCHECK_OP(LE, <=, val1, val2)
|
||||
#define DCHECK_LT(val1, val2) DCHECK_OP(LT, < , val1, val2)
|
||||
#define DCHECK_GE(val1, val2) DCHECK_OP(GE, >=, val1, val2)
|
||||
#define DCHECK_GT(val1, val2) DCHECK_OP(GT, > , val1, val2)
|
||||
|
||||
// Helper functions for string comparisons.
|
||||
// To avoid bloat, the definitions are in logging.cc.
|
||||
#define DECLARE_DCHECK_STROP_IMPL(func, expected) \
|
||||
std::string* Check##func##expected##Impl(const char* s1, \
|
||||
const char* s2, \
|
||||
const char* names);
|
||||
DECLARE_DCHECK_STROP_IMPL(strcmp, true)
|
||||
DECLARE_DCHECK_STROP_IMPL(strcmp, false)
|
||||
DECLARE_DCHECK_STROP_IMPL(_stricmp, true)
|
||||
DECLARE_DCHECK_STROP_IMPL(_stricmp, false)
|
||||
#undef DECLARE_DCHECK_STROP_IMPL
|
||||
|
||||
// Helper macro for string comparisons.
|
||||
// Don't use this macro directly in your code, use CHECK_STREQ et al below.
|
||||
#define DCHECK_STROP(func, op, expected, s1, s2) \
|
||||
while (CheckOpString _result = \
|
||||
logging::Check##func##expected##Impl((s1), (s2), \
|
||||
#s1 " " #op " " #s2)) \
|
||||
LOG(FATAL) << *_result.str_
|
||||
|
||||
// String (char*) equality/inequality checks.
|
||||
// CASE versions are case-insensitive.
|
||||
//
|
||||
// Note that "s1" and "s2" may be temporary strings which are destroyed
|
||||
// by the compiler at the end of the current "full expression"
|
||||
// (e.g. DCHECK_STREQ(Foo().c_str(), Bar().c_str())).
|
||||
|
||||
#define DCHECK_STREQ(s1, s2) DCHECK_STROP(strcmp, ==, true, s1, s2)
|
||||
#define DCHECK_STRNE(s1, s2) DCHECK_STROP(strcmp, !=, false, s1, s2)
|
||||
#define DCHECK_STRCASEEQ(s1, s2) DCHECK_STROP(_stricmp, ==, true, s1, s2)
|
||||
#define DCHECK_STRCASENE(s1, s2) DCHECK_STROP(_stricmp, !=, false, s1, s2)
|
||||
|
||||
#define DCHECK_INDEX(I,A) DCHECK(I < (sizeof(A)/sizeof(A[0])))
|
||||
#define DCHECK_BOUND(B,A) DCHECK(B <= (sizeof(A)/sizeof(A[0])))
|
||||
|
||||
#else // NDEBUG
|
||||
|
||||
#define DLOG(severity) \
|
||||
true ? (void) 0 : logging::LogMessageVoidify() & LOG(severity)
|
||||
|
||||
#define DLOG_IF(severity, condition) \
|
||||
true ? (void) 0 : logging::LogMessageVoidify() & LOG(severity)
|
||||
|
||||
#define DLOG_ASSERT(condition) \
|
||||
true ? (void) 0 : LOG_ASSERT(condition)
|
||||
|
||||
enum { DEBUG_MODE = 0 };
|
||||
|
||||
// This macro can be followed by a sequence of stream parameters in
|
||||
// non-debug mode. The DCHECK and friends macros use this so that
|
||||
// the expanded expression DCHECK(foo) << "asdf" is still syntactically
|
||||
// valid, even though the expression will get optimized away.
|
||||
#define NDEBUG_EAT_STREAM_PARAMETERS \
|
||||
logging::LogMessage(__FILE__, __LINE__).stream()
|
||||
|
||||
#define DCHECK(condition) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_EQ(val1, val2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_NE(val1, val2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_LE(val1, val2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_LT(val1, val2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_GE(val1, val2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_GT(val1, val2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_STREQ(str1, str2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_STRCASEEQ(str1, str2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_STRNE(str1, str2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#define DCHECK_STRCASENE(str1, str2) \
|
||||
while (false) NDEBUG_EAT_STREAM_PARAMETERS
|
||||
|
||||
#endif // NDEBUG
|
||||
|
||||
#define NOTREACHED() DCHECK(false)
|
||||
|
||||
// Redefine the standard assert to use our nice log files
|
||||
#undef assert
|
||||
#define assert(x) DLOG_ASSERT(x)
|
||||
|
||||
// This class more or less represents a particular log message. You
|
||||
// create an instance of LogMessage and then stream stuff to it.
|
||||
// When you finish streaming to it, ~LogMessage is called and the
|
||||
// full message gets streamed to the appropriate destination.
|
||||
//
|
||||
// You shouldn't actually use LogMessage's constructor to log things,
|
||||
// though. You should use the LOG() macro (and variants thereof)
|
||||
// above.
|
||||
class LogMessage {
|
||||
public:
|
||||
LogMessage(const char* file, int line, LogSeverity severity, int ctr);
|
||||
|
||||
// Two special constructors that generate reduced amounts of code at
|
||||
// LOG call sites for common cases.
|
||||
//
|
||||
// Used for LOG(INFO): Implied are:
|
||||
// severity = LOG_INFO, ctr = 0
|
||||
//
|
||||
// Using this constructor instead of the more complex constructor above
|
||||
// saves a couple of bytes per call site.
|
||||
LogMessage(const char* file, int line);
|
||||
|
||||
// Used for LOG(severity) where severity != INFO. Implied
|
||||
// are: ctr = 0
|
||||
//
|
||||
// Using this constructor instead of the more complex constructor above
|
||||
// saves a couple of bytes per call site.
|
||||
LogMessage(const char* file, int line, LogSeverity severity);
|
||||
|
||||
// A special constructor used for check failures.
|
||||
// Implied severity = LOG_FATAL
|
||||
LogMessage(const char* file, int line, const CheckOpString& result);
|
||||
|
||||
~LogMessage();
|
||||
|
||||
std::ostream& stream() { return stream_; }
|
||||
|
||||
private:
|
||||
void Init(const char* file, int line);
|
||||
|
||||
LogSeverity severity_;
|
||||
std::ostrstream stream_;
|
||||
int message_start_; // offset of the start of the message (past prefix info).
|
||||
|
||||
DISALLOW_EVIL_CONSTRUCTORS(LogMessage);
|
||||
};
|
||||
|
||||
// A non-macro interface to the log facility; (useful
|
||||
// when the logging level is not a compile-time constant).
|
||||
inline void LogAtLevel(int const log_level, std::string const &msg) {
|
||||
LogMessage(__FILE__, __LINE__, log_level).stream() << msg;
|
||||
}
|
||||
|
||||
// This class is used to explicitly ignore values in the conditional
|
||||
// logging macros. This avoids compiler warnings like "value computed
|
||||
// is not used" and "statement has no effect".
|
||||
class LogMessageVoidify {
|
||||
public:
|
||||
LogMessageVoidify() { }
|
||||
// This has to be an operator with a precedence lower than << but
|
||||
// higher than ?:
|
||||
void operator&(std::ostream&) { }
|
||||
};
|
||||
|
||||
// Closes the log file explicitly if open.
|
||||
// NOTE: Since the log file is opened as necessary by the action of logging
|
||||
// statements, there's no guarantee that it will stay closed
|
||||
// after this call.
|
||||
void CloseLogFile();
|
||||
|
||||
} // namespace Logging
|
||||
|
||||
// These functions are provided as a convenience for logging, which is where we
|
||||
// use streams (it is against Google style to use streams in other places). It
|
||||
// is designed to allow you to emit non-ASCII Unicode strings to the log file,
|
||||
// which is normally ASCII. It is relatively slow, so try not to use it for
|
||||
// common cases. Non-ASCII characters will be converted to UTF-8 by these operators.
|
||||
std::ostream& operator<<(std::ostream& out, const wchar_t* wstr);
|
||||
inline std::ostream& operator<<(std::ostream& out, const std::wstring& wstr) {
|
||||
return out << wstr.c_str();
|
||||
}
|
||||
|
||||
#endif // BASE_LOGGING_H__
|
||||
@@ -1,322 +0,0 @@
|
||||
#ifndef BASE_SCOPED_PTR_H
|
||||
#define BASE_SCOPED_PTR_H
|
||||
|
||||
// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999.
|
||||
// Copyright (c) 2001, 2002 Peter Dimov
|
||||
//
|
||||
// Permission to copy, use, modify, sell and distribute this software
|
||||
// is granted provided this copyright notice appears in all copies.
|
||||
// This software is provided "as is" without express or implied
|
||||
// warranty, and with no claim as to its suitability for any purpose.
|
||||
//
|
||||
// See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation.
|
||||
//
|
||||
|
||||
// scoped_ptr mimics a built-in pointer except that it guarantees deletion
|
||||
// of the object pointed to, either on destruction of the scoped_ptr or via
|
||||
// an explicit reset(). scoped_ptr is a simple solution for simple needs;
|
||||
// use shared_ptr or std::auto_ptr if your needs are more complex.
|
||||
|
||||
// *** NOTE ***
|
||||
// If your scoped_ptr is a class member of class FOO pointing to a
|
||||
// forward declared type BAR (as shown below), then you MUST use a non-inlined
|
||||
// version of the destructor. The destructor of a scoped_ptr (called from
|
||||
// FOO's destructor) must have a complete definition of BAR in order to
|
||||
// destroy it. Example:
|
||||
//
|
||||
// -- foo.h --
|
||||
// class BAR;
|
||||
//
|
||||
// class FOO {
|
||||
// public:
|
||||
// FOO();
|
||||
// ~FOO(); // Required for sources that instantiate class FOO to compile!
|
||||
//
|
||||
// private:
|
||||
// scoped_ptr<BAR> bar_;
|
||||
// };
|
||||
//
|
||||
// -- foo.cc --
|
||||
// #include "foo.h"
|
||||
// FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition.
|
||||
|
||||
#include <cstddef> // for std::ptrdiff_t
|
||||
#include <assert.h> // for assert
|
||||
#include <stdlib.h> // for free() decl
|
||||
|
||||
template <typename T>
|
||||
class scoped_ptr {
|
||||
private:
|
||||
|
||||
T* ptr;
|
||||
|
||||
scoped_ptr(scoped_ptr const &);
|
||||
scoped_ptr & operator=(scoped_ptr const &);
|
||||
|
||||
public:
|
||||
|
||||
typedef T element_type;
|
||||
|
||||
explicit scoped_ptr(T* p = 0): ptr(p) {}
|
||||
|
||||
~scoped_ptr() {
|
||||
typedef char type_must_be_complete[sizeof(T)];
|
||||
delete ptr;
|
||||
}
|
||||
|
||||
void reset(T* p = 0) {
|
||||
typedef char type_must_be_complete[sizeof(T)];
|
||||
|
||||
if (ptr != p) {
|
||||
delete ptr;
|
||||
ptr = p;
|
||||
}
|
||||
}
|
||||
|
||||
T& operator*() const {
|
||||
assert(ptr != 0);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
T* operator->() const {
|
||||
assert(ptr != 0);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
bool operator==(T* p) const {
|
||||
return ptr == p;
|
||||
}
|
||||
|
||||
bool operator!=(T* p) const {
|
||||
return ptr != p;
|
||||
}
|
||||
|
||||
T* get() const {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void swap(scoped_ptr & b) {
|
||||
T* tmp = b.ptr;
|
||||
b.ptr = ptr;
|
||||
ptr = tmp;
|
||||
}
|
||||
|
||||
T* release() {
|
||||
T* tmp = ptr;
|
||||
ptr = 0;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
// no reason to use these: each scoped_ptr should have its own object
|
||||
template <typename U> bool operator==(scoped_ptr<U> const& p) const;
|
||||
template <typename U> bool operator!=(scoped_ptr<U> const& p) const;
|
||||
};
|
||||
|
||||
template<typename T> inline
|
||||
void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) {
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
bool operator==(T* p, const scoped_ptr<T>& b) {
|
||||
return p == b.get();
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
bool operator!=(T* p, const scoped_ptr<T>& b) {
|
||||
return p != b.get();
|
||||
}
|
||||
|
||||
// scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to
|
||||
// is guaranteed, either on destruction of the scoped_array or via an explicit
|
||||
// reset(). Use shared_array or std::vector if your needs are more complex.
|
||||
|
||||
template<typename T>
|
||||
class scoped_array {
|
||||
private:
|
||||
|
||||
T* ptr;
|
||||
|
||||
scoped_array(scoped_array const &);
|
||||
scoped_array & operator=(scoped_array const &);
|
||||
|
||||
public:
|
||||
|
||||
typedef T element_type;
|
||||
|
||||
explicit scoped_array(T* p = 0) : ptr(p) {}
|
||||
|
||||
~scoped_array() {
|
||||
typedef char type_must_be_complete[sizeof(T)];
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
void reset(T* p = 0) {
|
||||
typedef char type_must_be_complete[sizeof(T)];
|
||||
|
||||
if (ptr != p) {
|
||||
delete [] ptr;
|
||||
ptr = p;
|
||||
}
|
||||
}
|
||||
|
||||
T& operator[](std::ptrdiff_t i) const {
|
||||
assert(ptr != 0);
|
||||
assert(i >= 0);
|
||||
return ptr[i];
|
||||
}
|
||||
|
||||
bool operator==(T* p) const {
|
||||
return ptr == p;
|
||||
}
|
||||
|
||||
bool operator!=(T* p) const {
|
||||
return ptr != p;
|
||||
}
|
||||
|
||||
T* get() const {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void swap(scoped_array & b) {
|
||||
T* tmp = b.ptr;
|
||||
b.ptr = ptr;
|
||||
ptr = tmp;
|
||||
}
|
||||
|
||||
T* release() {
|
||||
T* tmp = ptr;
|
||||
ptr = 0;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
// no reason to use these: each scoped_array should have its own object
|
||||
template <typename U> bool operator==(scoped_array<U> const& p) const;
|
||||
template <typename U> bool operator!=(scoped_array<U> const& p) const;
|
||||
};
|
||||
|
||||
template<class T> inline
|
||||
void swap(::scoped_array<T>& a, ::scoped_array<T>& b) {
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
bool operator==(T* p, const ::scoped_array<T>& b) {
|
||||
return p == b.get();
|
||||
}
|
||||
|
||||
template<typename T> inline
|
||||
bool operator!=(T* p, const ::scoped_array<T>& b) {
|
||||
return p != b.get();
|
||||
}
|
||||
|
||||
|
||||
// This class wraps the c library function free() in a class that can be
|
||||
// passed as a template argument to scoped_ptr_malloc below.
|
||||
class ScopedPtrMallocFree {
|
||||
public:
|
||||
inline void operator()(void* x) const {
|
||||
free(x);
|
||||
}
|
||||
};
|
||||
|
||||
// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a
|
||||
// second template argument, the functor used to free the object.
|
||||
|
||||
template<typename T, typename FreeProc = ScopedPtrMallocFree>
|
||||
class scoped_ptr_malloc {
|
||||
private:
|
||||
|
||||
T* ptr;
|
||||
|
||||
scoped_ptr_malloc(scoped_ptr_malloc const &);
|
||||
scoped_ptr_malloc & operator=(scoped_ptr_malloc const &);
|
||||
|
||||
public:
|
||||
|
||||
typedef T element_type;
|
||||
|
||||
explicit scoped_ptr_malloc(T* p = 0): ptr(p) {}
|
||||
|
||||
~scoped_ptr_malloc() {
|
||||
typedef char type_must_be_complete[sizeof(T)];
|
||||
free_((void*) ptr);
|
||||
}
|
||||
|
||||
void reset(T* p = 0) {
|
||||
typedef char type_must_be_complete[sizeof(T)];
|
||||
|
||||
if (ptr != p) {
|
||||
free_((void*) ptr);
|
||||
ptr = p;
|
||||
}
|
||||
}
|
||||
|
||||
T& operator*() const {
|
||||
assert(ptr != 0);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
T* operator->() const {
|
||||
assert(ptr != 0);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
bool operator==(T* p) const {
|
||||
return ptr == p;
|
||||
}
|
||||
|
||||
bool operator!=(T* p) const {
|
||||
return ptr != p;
|
||||
}
|
||||
|
||||
T* get() const {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void swap(scoped_ptr_malloc & b) {
|
||||
T* tmp = b.ptr;
|
||||
b.ptr = ptr;
|
||||
ptr = tmp;
|
||||
}
|
||||
|
||||
T* release() {
|
||||
T* tmp = ptr;
|
||||
ptr = 0;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
// no reason to use these: each scoped_ptr_malloc should have its own object
|
||||
template <typename U, typename GP>
|
||||
bool operator==(scoped_ptr_malloc<U, GP> const& p) const;
|
||||
template <typename U, typename GP>
|
||||
bool operator!=(scoped_ptr_malloc<U, GP> const& p) const;
|
||||
|
||||
static FreeProc const free_;
|
||||
};
|
||||
|
||||
template<typename T, typename FP>
|
||||
FP const scoped_ptr_malloc<T,FP>::free_ = FP();
|
||||
|
||||
template<typename T, typename FP> inline
|
||||
void swap(scoped_ptr_malloc<T,FP>& a, scoped_ptr_malloc<T,FP>& b) {
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
template<typename T, typename FP> inline
|
||||
bool operator==(T* p, const scoped_ptr_malloc<T,FP>& b) {
|
||||
return p == b.get();
|
||||
}
|
||||
|
||||
template<typename T, typename FP> inline
|
||||
bool operator!=(T* p, const scoped_ptr_malloc<T,FP>& b) {
|
||||
return p != b.get();
|
||||
}
|
||||
|
||||
#endif // #ifndef BASE_SCOPED_PTR_H
|
||||
@@ -1,192 +0,0 @@
|
||||
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef BASE_STRING16_H_
|
||||
#define BASE_STRING16_H_
|
||||
|
||||
// WHAT:
|
||||
// A version of std::basic_string that provides 2-byte characters even when
|
||||
// wchar_t is not implemented as a 2-byte type. You can access this class as
|
||||
// string16. We also define char16, which string16 is based upon.
|
||||
//
|
||||
// WHY:
|
||||
// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
|
||||
// data. Plenty of existing code operates on strings encoded as UTF-16.
|
||||
//
|
||||
// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
|
||||
// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
|
||||
// at run time, because it calls some functions (like wcslen) that come from
|
||||
// the system's native C library -- which was built with a 4-byte wchar_t!
|
||||
// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
|
||||
// entirely improper on those systems where the encoding of wchar_t is defined
|
||||
// as UTF-32.
|
||||
//
|
||||
// Here, we define string16, which is similar to std::wstring but replaces all
|
||||
// libc functions with custom, 2-byte-char compatible routines. It is capable
|
||||
// of carrying UTF-16-encoded data.
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/basictypes.h"
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
typedef wchar_t char16;
|
||||
typedef std::wstring string16;
|
||||
|
||||
#else // !WIN32
|
||||
|
||||
typedef uint16 char16;
|
||||
|
||||
namespace base {
|
||||
|
||||
// char16 versions of the functions required by string16_char_traits; these
|
||||
// are based on the wide character functions of similar names ("w" or "wcs"
|
||||
// instead of "c16").
|
||||
int c16memcmp(const char16* s1, const char16* s2, size_t n);
|
||||
size_t c16len(const char16* s);
|
||||
const char16* c16memchr(const char16* s, char16 c, size_t n);
|
||||
char16* c16memmove(char16* s1, const char16* s2, size_t n);
|
||||
char16* c16memcpy(char16* s1, const char16* s2, size_t n);
|
||||
char16* c16memset(char16* s, char16 c, size_t n);
|
||||
|
||||
struct string16_char_traits {
|
||||
typedef char16 char_type;
|
||||
typedef int int_type;
|
||||
|
||||
typedef std::streamoff off_type;
|
||||
typedef mbstate_t state_type;
|
||||
typedef std::fpos<state_type> pos_type;
|
||||
|
||||
static void assign(char_type& c1, const char_type& c2) {
|
||||
c1 = c2;
|
||||
}
|
||||
|
||||
static bool eq(const char_type& c1, const char_type& c2) {
|
||||
return c1 == c2;
|
||||
}
|
||||
static bool lt(const char_type& c1, const char_type& c2) {
|
||||
return c1 < c2;
|
||||
}
|
||||
|
||||
static int compare(const char_type* s1, const char_type* s2, size_t n) {
|
||||
return c16memcmp(s1, s2, n);
|
||||
}
|
||||
|
||||
static size_t length(const char_type* s) {
|
||||
return c16len(s);
|
||||
}
|
||||
|
||||
static const char_type* find(const char_type* s, size_t n,
|
||||
const char_type& a) {
|
||||
return c16memchr(s, a, n);
|
||||
}
|
||||
|
||||
static char_type* move(char_type* s1, const char_type* s2, int_type n) {
|
||||
return c16memmove(s1, s2, n);
|
||||
}
|
||||
|
||||
static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
|
||||
return c16memcpy(s1, s2, n);
|
||||
}
|
||||
|
||||
static char_type* assign(char_type* s, size_t n, char_type a) {
|
||||
return c16memset(s, a, n);
|
||||
}
|
||||
|
||||
static int_type not_eof(const int_type& c) {
|
||||
return eq_int_type(c, eof()) ? 0 : c;
|
||||
}
|
||||
|
||||
static char_type to_char_type(const int_type& c) {
|
||||
return char_type(c);
|
||||
}
|
||||
|
||||
static int_type to_int_type(const char_type& c) {
|
||||
return int_type(c);
|
||||
}
|
||||
|
||||
static bool eq_int_type(const int_type& c1, const int_type& c2) {
|
||||
return c1 == c2;
|
||||
}
|
||||
|
||||
static int_type eof() {
|
||||
return static_cast<int_type>(EOF);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace base
|
||||
|
||||
// The string class will be explicitly instantiated only once, in string16.cc.
|
||||
//
|
||||
// std::basic_string<> in GNU libstdc++ contains a static data member,
|
||||
// _S_empty_rep_storage, to represent empty strings. When an operation such
|
||||
// as assignment or destruction is performed on a string, causing its existing
|
||||
// data member to be invalidated, it must not be freed if this static data
|
||||
// member is being used. Otherwise, it counts as an attempt to free static
|
||||
// (and not allocated) data, which is a memory error.
|
||||
//
|
||||
// Generally, due to C++ template magic, _S_empty_rep_storage will be marked
|
||||
// as a coalesced symbol, meaning that the linker will combine multiple
|
||||
// instances into a single one when generating output.
|
||||
//
|
||||
// If a string class is used by multiple shared libraries, a problem occurs.
|
||||
// Each library will get its own copy of _S_empty_rep_storage. When strings
|
||||
// are passed across a library boundary for alteration or destruction, memory
|
||||
// errors will result. GNU libstdc++ contains a configuration option,
|
||||
// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which
|
||||
// disables the static data member optimization, but it's a good optimization
|
||||
// and non-STL code is generally at the mercy of the system's STL
|
||||
// configuration. Fully-dynamic strings are not the default for GNU libstdc++
|
||||
// libstdc++ itself or for the libstdc++ installations on the systems we care
|
||||
// about, such as Mac OS X and relevant flavors of Linux.
|
||||
//
|
||||
// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 .
|
||||
//
|
||||
// To avoid problems, string classes need to be explicitly instantiated only
|
||||
// once, in exactly one library. All other string users see it via an "extern"
|
||||
// declaration. This is precisely how GNU libstdc++ handles
|
||||
// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring).
|
||||
//
|
||||
// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2),
|
||||
// in which the linker does not fully coalesce symbols when dead code
|
||||
// stripping is enabled. This bug causes the memory errors described above
|
||||
// to occur even when a std::basic_string<> does not cross shared library
|
||||
// boundaries, such as in statically-linked executables.
|
||||
//
|
||||
// TODO(mark): File this bug with Apple and update this note with a bug number.
|
||||
|
||||
extern template class std::basic_string<char16, base::string16_char_traits>;
|
||||
|
||||
typedef std::basic_string<char16, base::string16_char_traits> string16;
|
||||
|
||||
extern std::ostream& operator<<(std::ostream& out, const string16& str);
|
||||
|
||||
#endif // !WIN32
|
||||
|
||||
#endif // BASE_STRING16_H_
|
||||
@@ -1,392 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_GURL_H__
|
||||
#define GOOGLEURL_SRC_GURL_H__
|
||||
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
|
||||
#include "base/string16.h"
|
||||
#include "googleurl/src/url_canon.h"
|
||||
#include "googleurl/src/url_canon_stdstring.h"
|
||||
#include "googleurl/src/url_common.h"
|
||||
#include "googleurl/src/url_parse.h"
|
||||
|
||||
class GURL {
|
||||
public:
|
||||
typedef url_canon::StdStringReplacements<std::string> Replacements;
|
||||
typedef url_canon::StdStringReplacements<string16> ReplacementsW;
|
||||
|
||||
// Creates an empty, invalid URL.
|
||||
GURL_API GURL();
|
||||
|
||||
// Copy construction is relatively inexpensive, with most of the time going
|
||||
// to reallocating the string. It does not re-parse.
|
||||
GURL_API GURL(const GURL& other);
|
||||
|
||||
// The narrow version requires the input be UTF-8. Invalid UTF-8 input will
|
||||
// result in an invalid URL.
|
||||
//
|
||||
// The wide version should also take an encoding parameter so we know how to
|
||||
// encode the query parameters. It is probably sufficient for the narrow
|
||||
// version to assume the query parameter encoding should be the same as the
|
||||
// input encoding.
|
||||
GURL_API explicit GURL(const std::string& url_string
|
||||
/*, output_param_encoding*/);
|
||||
GURL_API explicit GURL(const string16& url_string
|
||||
/*, output_param_encoding*/);
|
||||
|
||||
// Constructor for URLs that have already been parsed and canonicalized. This
|
||||
// is used for conversions from KURL, for example. The caller must supply all
|
||||
// information associated with the URL, which must be correct and consistent.
|
||||
GURL_API GURL(const char* canonical_spec, size_t canonical_spec_len,
|
||||
const url_parse::Parsed& parsed, bool is_valid);
|
||||
|
||||
GURL_API ~GURL();
|
||||
|
||||
GURL_API GURL& operator=(const GURL& other);
|
||||
|
||||
// Returns true when this object represents a valid parsed URL. When not
|
||||
// valid, other functions will still succeed, but you will not get canonical
|
||||
// data out in the format you may be expecting. Instead, we keep something
|
||||
// "reasonable looking" so that the user can see how it's busted if
|
||||
// displayed to them.
|
||||
bool is_valid() const {
|
||||
return is_valid_;
|
||||
}
|
||||
|
||||
// Returns true if the URL is zero-length. Note that empty URLs are also
|
||||
// invalid, and is_valid() will return false for them. This is provided
|
||||
// because some users may want to treat the empty case differently.
|
||||
bool is_empty() const {
|
||||
return spec_.empty();
|
||||
}
|
||||
|
||||
// Returns the raw spec, i.e., the full text of the URL, in canonical UTF-8,
|
||||
// if the URL is valid. If the URL is not valid, this will assert and return
|
||||
// the empty string (for safety in release builds, to keep them from being
|
||||
// misused which might be a security problem).
|
||||
//
|
||||
// The URL will be ASCII except the reference fragment, which may be UTF-8.
|
||||
// It is guaranteed to be valid UTF-8.
|
||||
//
|
||||
// The exception is for empty() URLs (which are !is_valid()) but this will
|
||||
// return the empty string without asserting.
|
||||
//
|
||||
// Used invalid_spec() below to get the unusable spec of an invalid URL. This
|
||||
// separation is designed to prevent errors that may cause security problems
|
||||
// that could result from the mistaken use of an invalid URL.
|
||||
GURL_API const std::string& spec() const;
|
||||
|
||||
// Returns the potentially invalid spec for a the URL. This spec MUST NOT be
|
||||
// modified or sent over the network. It is designed to be displayed in error
|
||||
// messages to the user, as the apperance of the spec may explain the error.
|
||||
// If the spec is valid, the valid spec will be returned.
|
||||
//
|
||||
// The returned string is guaranteed to be valid UTF-8.
|
||||
const std::string& possibly_invalid_spec() const {
|
||||
return spec_;
|
||||
}
|
||||
|
||||
// Getter for the raw parsed structure. This allows callers to locate parts
|
||||
// of the URL within the spec themselves. Most callers should consider using
|
||||
// the individual component getters below.
|
||||
//
|
||||
// The returned parsed structure will reference into the raw spec, which may
|
||||
// or may not be valid. If you are using this to index into the spec, BE
|
||||
// SURE YOU ARE USING possibly_invalid_spec() to get the spec, and that you
|
||||
// don't do anything "important" with invalid specs.
|
||||
const url_parse::Parsed& parsed_for_possibly_invalid_spec() const {
|
||||
return parsed_;
|
||||
}
|
||||
|
||||
// Defiant equality operator!
|
||||
bool operator==(const GURL& other) const {
|
||||
return spec_ == other.spec_;
|
||||
}
|
||||
bool operator!=(const GURL& other) const {
|
||||
return spec_ != other.spec_;
|
||||
}
|
||||
|
||||
// Allows GURL to used as a key in STL (for example, a std::set or std::map).
|
||||
bool operator<(const GURL& other) const {
|
||||
return spec_ < other.spec_;
|
||||
}
|
||||
|
||||
// Resolves a URL that's possibly relative to this object's URL, and returns
|
||||
// it. Absolute URLs are also handled according to the rules of URLs on web
|
||||
// pages.
|
||||
//
|
||||
// It may be impossible to resolve the URLs properly. If the input is not
|
||||
// "standard" (SchemeIsStandard() == false) and the input looks relative, we
|
||||
// can't resolve it. In these cases, the result will be an empty, invalid
|
||||
// GURL.
|
||||
//
|
||||
// The result may also be a nonempty, invalid URL if the input has some kind
|
||||
// of encoding error. In these cases, we will try to construct a "good" URL
|
||||
// that may have meaning to the user, but it will be marked invalid.
|
||||
//
|
||||
// It is an error to resolve a URL relative to an invalid URL. The result
|
||||
// will be the empty URL.
|
||||
GURL_API GURL Resolve(const std::string& relative) const;
|
||||
GURL_API GURL Resolve(const string16& relative) const;
|
||||
|
||||
// Like Resolve() above but takes a character set encoder which will be used
|
||||
// for any query text specified in the input. The charset converter parameter
|
||||
// may be NULL, in which case it will be treated as UTF-8.
|
||||
//
|
||||
// TODO(brettw): These should be replaced with versions that take something
|
||||
// more friendly than a raw CharsetConverter (maybe like an ICU character set
|
||||
// name).
|
||||
GURL_API GURL ResolveWithCharsetConverter(
|
||||
const std::string& relative,
|
||||
url_canon::CharsetConverter* charset_converter) const;
|
||||
GURL_API GURL ResolveWithCharsetConverter(
|
||||
const string16& relative,
|
||||
url_canon::CharsetConverter* charset_converter) const;
|
||||
|
||||
// Creates a new GURL by replacing the current URL's components with the
|
||||
// supplied versions. See the Replacements class in url_canon.h for more.
|
||||
//
|
||||
// These are not particularly quick, so avoid doing mutations when possible.
|
||||
// Prefer the 8-bit version when possible.
|
||||
//
|
||||
// It is an error to replace components of an invalid URL. The result will
|
||||
// be the empty URL.
|
||||
//
|
||||
// Note that we use the more general url_canon::Replacements type to give
|
||||
// callers extra flexibility rather than our override.
|
||||
GURL_API GURL ReplaceComponents(
|
||||
const url_canon::Replacements<char>& replacements) const;
|
||||
GURL_API GURL ReplaceComponents(
|
||||
const url_canon::Replacements<char16>& replacements) const;
|
||||
|
||||
// A helper function that is equivalent to replacing the path with a slash
|
||||
// and clearing out everything after that. We sometimes need to know just the
|
||||
// scheme and the authority. If this URL is not a standard URL (it doesn't
|
||||
// have the regular authority and path sections), then the result will be
|
||||
// an empty, invalid GURL. Note that this *does* work for file: URLs, which
|
||||
// some callers may want to filter out before calling this.
|
||||
//
|
||||
// It is an error to get an empty path on an invalid URL. The result
|
||||
// will be the empty URL.
|
||||
GURL_API GURL GetWithEmptyPath() const;
|
||||
|
||||
// A helper function to return a GURL containing just the scheme, host,
|
||||
// and port from a URL. Equivalent to clearing any username and password,
|
||||
// replacing the path with a slash, and clearing everything after that. If
|
||||
// this URL is not a standard URL, then the result will be an empty,
|
||||
// invalid GURL. If the URL has neither username nor password, this
|
||||
// degenerates to GetWithEmptyPath().
|
||||
//
|
||||
// It is an error to get the origin of an invalid URL. The result
|
||||
// will be the empty URL.
|
||||
GURL_API GURL GetOrigin() const;
|
||||
|
||||
// Returns true if the scheme for the current URL is a known "standard"
|
||||
// scheme. Standard schemes have an authority and a path section. This
|
||||
// includes file: and filesystem:, which some callers may want to filter out
|
||||
// explicitly by calling SchemeIsFile[System].
|
||||
GURL_API bool IsStandard() const;
|
||||
|
||||
// Returns true if the given parameter (should be lower-case ASCII to match
|
||||
// the canonicalized scheme) is the scheme for this URL. This call is more
|
||||
// efficient than getting the scheme and comparing it because no copies or
|
||||
// object constructions are done.
|
||||
GURL_API bool SchemeIs(const char* lower_ascii_scheme) const;
|
||||
|
||||
// We often need to know if this is a file URL. File URLs are "standard", but
|
||||
// are often treated separately by some programs.
|
||||
bool SchemeIsFile() const {
|
||||
return SchemeIs("file");
|
||||
}
|
||||
|
||||
// FileSystem URLs need to be treated differently in some cases.
|
||||
bool SchemeIsFileSystem() const {
|
||||
return SchemeIs("filesystem");
|
||||
}
|
||||
|
||||
// If the scheme indicates a secure connection
|
||||
bool SchemeIsSecure() const {
|
||||
return SchemeIs("https") ||
|
||||
(SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure());
|
||||
}
|
||||
|
||||
// Returns true if the hostname is an IP address. Note: this function isn't
|
||||
// as cheap as a simple getter because it re-parses the hostname to verify.
|
||||
// This currently identifies only IPv4 addresses (bug 822685).
|
||||
GURL_API bool HostIsIPAddress() const;
|
||||
|
||||
// Getters for various components of the URL. The returned string will be
|
||||
// empty if the component is empty or is not present.
|
||||
std::string scheme() const { // Not including the colon. See also SchemeIs.
|
||||
return ComponentString(parsed_.scheme);
|
||||
}
|
||||
std::string username() const {
|
||||
return ComponentString(parsed_.username);
|
||||
}
|
||||
std::string password() const {
|
||||
return ComponentString(parsed_.password);
|
||||
}
|
||||
// Note that this may be a hostname, an IPv4 address, or an IPv6 literal
|
||||
// surrounded by square brackets, like "[2001:db8::1]". To exclude these
|
||||
// brackets, use HostNoBrackets() below.
|
||||
std::string host() const {
|
||||
return ComponentString(parsed_.host);
|
||||
}
|
||||
std::string port() const { // Returns -1 if "default"
|
||||
return ComponentString(parsed_.port);
|
||||
}
|
||||
std::string path() const { // Including first slash following host
|
||||
return ComponentString(parsed_.path);
|
||||
}
|
||||
std::string query() const { // Stuff following '?'
|
||||
return ComponentString(parsed_.query);
|
||||
}
|
||||
std::string ref() const { // Stuff following '#'
|
||||
return ComponentString(parsed_.ref);
|
||||
}
|
||||
|
||||
// Existance querying. These functions will return true if the corresponding
|
||||
// URL component exists in this URL. Note that existance is different than
|
||||
// being nonempty. http://www.google.com/? has a query that just happens to
|
||||
// be empty, and has_query() will return true.
|
||||
bool has_scheme() const {
|
||||
return parsed_.scheme.len >= 0;
|
||||
}
|
||||
bool has_username() const {
|
||||
return parsed_.username.len >= 0;
|
||||
}
|
||||
bool has_password() const {
|
||||
return parsed_.password.len >= 0;
|
||||
}
|
||||
bool has_host() const {
|
||||
// Note that hosts are special, absense of host means length 0.
|
||||
return parsed_.host.len > 0;
|
||||
}
|
||||
bool has_port() const {
|
||||
return parsed_.port.len >= 0;
|
||||
}
|
||||
bool has_path() const {
|
||||
// Note that http://www.google.com/" has a path, the path is "/". This can
|
||||
// return false only for invalid or nonstandard URLs.
|
||||
return parsed_.path.len >= 0;
|
||||
}
|
||||
bool has_query() const {
|
||||
return parsed_.query.len >= 0;
|
||||
}
|
||||
bool has_ref() const {
|
||||
return parsed_.ref.len >= 0;
|
||||
}
|
||||
|
||||
// Returns a parsed version of the port. Can also be any of the special
|
||||
// values defined in Parsed for ExtractPort.
|
||||
GURL_API int IntPort() const;
|
||||
|
||||
// Returns the port number of the url, or the default port number.
|
||||
// If the scheme has no concept of port (or unknown default) returns
|
||||
// PORT_UNSPECIFIED.
|
||||
GURL_API int EffectiveIntPort() const;
|
||||
|
||||
// Extracts the filename portion of the path and returns it. The filename
|
||||
// is everything after the last slash in the path. This may be empty.
|
||||
GURL_API std::string ExtractFileName() const;
|
||||
|
||||
// Returns the path that should be sent to the server. This is the path,
|
||||
// parameter, and query portions of the URL. It is guaranteed to be ASCII.
|
||||
GURL_API std::string PathForRequest() const;
|
||||
|
||||
// Returns the host, excluding the square brackets surrounding IPv6 address
|
||||
// literals. This can be useful for passing to getaddrinfo().
|
||||
GURL_API std::string HostNoBrackets() const;
|
||||
|
||||
// Returns true if this URL's host matches or is in the same domain as
|
||||
// the given input string. For example if this URL was "www.google.com",
|
||||
// this would match "com", "google.com", and "www.google.com
|
||||
// (input domain should be lower-case ASCII to match the canonicalized
|
||||
// scheme). This call is more efficient than getting the host and check
|
||||
// whether host has the specific domain or not because no copies or
|
||||
// object constructions are done.
|
||||
//
|
||||
// If function DomainIs has parameter domain_len, which means the parameter
|
||||
// lower_ascii_domain does not gurantee to terminate with NULL character.
|
||||
GURL_API bool DomainIs(const char* lower_ascii_domain, int domain_len) const;
|
||||
|
||||
// If function DomainIs only has parameter lower_ascii_domain, which means
|
||||
// domain string should be terminate with NULL character.
|
||||
bool DomainIs(const char* lower_ascii_domain) const {
|
||||
return DomainIs(lower_ascii_domain,
|
||||
static_cast<int>(strlen(lower_ascii_domain)));
|
||||
}
|
||||
|
||||
// Swaps the contents of this GURL object with the argument without doing
|
||||
// any memory allocations.
|
||||
GURL_API void Swap(GURL* other);
|
||||
|
||||
// Returns a reference to a singleton empty GURL. This object is for callers
|
||||
// who return references but don't have anything to return in some cases.
|
||||
// This function may be called from any thread.
|
||||
GURL_API static const GURL& EmptyGURL();
|
||||
|
||||
// Returns the inner URL of a nested URL [currently only non-null for
|
||||
// filesystem: URLs].
|
||||
const GURL* inner_url() const {
|
||||
return inner_url_;
|
||||
}
|
||||
|
||||
private:
|
||||
// Returns the substring of the input identified by the given component.
|
||||
std::string ComponentString(const url_parse::Component& comp) const {
|
||||
if (comp.len <= 0)
|
||||
return std::string();
|
||||
return std::string(spec_, comp.begin, comp.len);
|
||||
}
|
||||
|
||||
// The actual text of the URL, in canonical ASCII form.
|
||||
std::string spec_;
|
||||
|
||||
// Set when the given URL is valid. Otherwise, we may still have a spec and
|
||||
// components, but they may not identify valid resources (for example, an
|
||||
// invalid port number, invalid characters in the scheme, etc.).
|
||||
bool is_valid_;
|
||||
|
||||
// Identified components of the canonical spec.
|
||||
url_parse::Parsed parsed_;
|
||||
|
||||
// Used for nested schemes [currently only filesystem:].
|
||||
GURL* inner_url_;
|
||||
|
||||
// TODO bug 684583: Add encoding for query params.
|
||||
};
|
||||
|
||||
// Stream operator so GURL can be used in assertion statements.
|
||||
GURL_API std::ostream& operator<<(std::ostream& out, const GURL& url);
|
||||
|
||||
#endif // GOOGLEURL_SRC_GURL_H__
|
||||
@@ -1,912 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef GOOGLEURL_SRC_URL_CANON_H__
|
||||
#define GOOGLEURL_SRC_URL_CANON_H__
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "base/string16.h"
|
||||
#include "googleurl/src/url_common.h"
|
||||
#include "googleurl/src/url_parse.h"
|
||||
|
||||
namespace url_canon {
|
||||
|
||||
// Canonicalizer output -------------------------------------------------------
|
||||
|
||||
// Base class for the canonicalizer output, this maintains a buffer and
|
||||
// supports simple resizing and append operations on it.
|
||||
//
|
||||
// It is VERY IMPORTANT that no virtual function calls be made on the common
|
||||
// code path. We only have two virtual function calls, the destructor and a
|
||||
// resize function that is called when the existing buffer is not big enough.
|
||||
// The derived class is then in charge of setting up our buffer which we will
|
||||
// manage.
|
||||
template<typename T>
|
||||
class CanonOutputT {
|
||||
public:
|
||||
CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) {
|
||||
}
|
||||
virtual ~CanonOutputT() {
|
||||
}
|
||||
|
||||
// Implemented to resize the buffer. This function should update the buffer
|
||||
// pointer to point to the new buffer, and any old data up to |cur_len_| in
|
||||
// the buffer must be copied over.
|
||||
//
|
||||
// The new size |sz| must be larger than buffer_len_.
|
||||
virtual void Resize(int sz) = 0;
|
||||
|
||||
// Accessor for returning a character at a given position. The input offset
|
||||
// must be in the valid range.
|
||||
inline char at(int offset) const {
|
||||
return buffer_[offset];
|
||||
}
|
||||
|
||||
// Sets the character at the given position. The given position MUST be less
|
||||
// than the length().
|
||||
inline void set(int offset, int ch) {
|
||||
buffer_[offset] = ch;
|
||||
}
|
||||
|
||||
// Returns the number of characters currently in the buffer.
|
||||
inline int length() const {
|
||||
return cur_len_;
|
||||
}
|
||||
|
||||
// Returns the current capacity of the buffer. The length() is the number of
|
||||
// characters that have been declared to be written, but the capacity() is
|
||||
// the number that can be written without reallocation. If the caller must
|
||||
// write many characters at once, it can make sure there is enough capacity,
|
||||
// write the data, then use set_size() to declare the new length().
|
||||
int capacity() const {
|
||||
return buffer_len_;
|
||||
}
|
||||
|
||||
// Called by the user of this class to get the output. The output will NOT
|
||||
// be NULL-terminated. Call length() to get the
|
||||
// length.
|
||||
const T* data() const {
|
||||
return buffer_;
|
||||
}
|
||||
T* data() {
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
// Shortens the URL to the new length. Used for "backing up" when processing
|
||||
// relative paths. This can also be used if an external function writes a lot
|
||||
// of data to the buffer (when using the "Raw" version below) beyond the end,
|
||||
// to declare the new length.
|
||||
//
|
||||
// This MUST NOT be used to expand the size of the buffer beyond capacity().
|
||||
void set_length(int new_len) {
|
||||
cur_len_ = new_len;
|
||||
}
|
||||
|
||||
// This is the most performance critical function, since it is called for
|
||||
// every character.
|
||||
void push_back(T ch) {
|
||||
// In VC2005, putting this common case first speeds up execution
|
||||
// dramatically because this branch is predicted as taken.
|
||||
if (cur_len_ < buffer_len_) {
|
||||
buffer_[cur_len_] = ch;
|
||||
cur_len_++;
|
||||
return;
|
||||
}
|
||||
|
||||
// Grow the buffer to hold at least one more item. Hopefully we won't have
|
||||
// to do this very often.
|
||||
if (!Grow(1))
|
||||
return;
|
||||
|
||||
// Actually do the insertion.
|
||||
buffer_[cur_len_] = ch;
|
||||
cur_len_++;
|
||||
}
|
||||
|
||||
// Appends the given string to the output.
|
||||
void Append(const T* str, int str_len) {
|
||||
if (cur_len_ + str_len > buffer_len_) {
|
||||
if (!Grow(cur_len_ + str_len - buffer_len_))
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < str_len; i++)
|
||||
buffer_[cur_len_ + i] = str[i];
|
||||
cur_len_ += str_len;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Grows the given buffer so that it can fit at least |min_additional|
|
||||
// characters. Returns true if the buffer could be resized, false on OOM.
|
||||
bool Grow(int min_additional) {
|
||||
static const int kMinBufferLen = 16;
|
||||
int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
|
||||
do {
|
||||
if (new_len >= (1 << 30)) // Prevent overflow below.
|
||||
return false;
|
||||
new_len *= 2;
|
||||
} while (new_len < buffer_len_ + min_additional);
|
||||
Resize(new_len);
|
||||
return true;
|
||||
}
|
||||
|
||||
T* buffer_;
|
||||
int buffer_len_;
|
||||
|
||||
// Used characters in the buffer.
|
||||
int cur_len_;
|
||||
};
|
||||
|
||||
// Simple implementation of the CanonOutput using new[]. This class
|
||||
// also supports a static buffer so if it is allocated on the stack, most
|
||||
// URLs can be canonicalized with no heap allocations.
|
||||
template<typename T, int fixed_capacity = 1024>
|
||||
class RawCanonOutputT : public CanonOutputT<T> {
|
||||
public:
|
||||
RawCanonOutputT() : CanonOutputT<T>() {
|
||||
this->buffer_ = fixed_buffer_;
|
||||
this->buffer_len_ = fixed_capacity;
|
||||
}
|
||||
virtual ~RawCanonOutputT() {
|
||||
if (this->buffer_ != fixed_buffer_)
|
||||
delete[] this->buffer_;
|
||||
}
|
||||
|
||||
virtual void Resize(int sz) {
|
||||
T* new_buf = new T[sz];
|
||||
memcpy(new_buf, this->buffer_,
|
||||
sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
|
||||
if (this->buffer_ != fixed_buffer_)
|
||||
delete[] this->buffer_;
|
||||
this->buffer_ = new_buf;
|
||||
this->buffer_len_ = sz;
|
||||
}
|
||||
|
||||
protected:
|
||||
T fixed_buffer_[fixed_capacity];
|
||||
};
|
||||
|
||||
// Normally, all canonicalization output is in narrow characters. We support
|
||||
// the templates so it can also be used internally if a wide buffer is
|
||||
// required.
|
||||
typedef CanonOutputT<char> CanonOutput;
|
||||
typedef CanonOutputT<char16> CanonOutputW;
|
||||
|
||||
template<int fixed_capacity>
|
||||
class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
|
||||
template<int fixed_capacity>
|
||||
class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};
|
||||
|
||||
// Character set converter ----------------------------------------------------
|
||||
//
|
||||
// Converts query strings into a custom encoding. The embedder can supply an
|
||||
// implementation of this class to interface with their own character set
|
||||
// conversion libraries.
|
||||
//
|
||||
// Embedders will want to see the unit test for the ICU version.
|
||||
|
||||
class CharsetConverter {
|
||||
public:
|
||||
CharsetConverter() {}
|
||||
virtual ~CharsetConverter() {}
|
||||
|
||||
// Converts the given input string from UTF-16 to whatever output format the
|
||||
// converter supports. This is used only for the query encoding conversion,
|
||||
// which does not fail. Instead, the converter should insert "invalid
|
||||
// character" characters in the output for invalid sequences, and do the
|
||||
// best it can.
|
||||
//
|
||||
// If the input contains a character not representable in the output
|
||||
// character set, the converter should append the HTML entity sequence in
|
||||
// decimal, (such as "你") with escaping of the ampersand, number
|
||||
// sign, and semicolon (in the previous example it would be
|
||||
// "%26%2320320%3B"). This rule is based on what IE does in this situation.
|
||||
virtual void ConvertFromUTF16(const char16* input,
|
||||
int input_len,
|
||||
CanonOutput* output) = 0;
|
||||
};
|
||||
|
||||
// Whitespace -----------------------------------------------------------------
|
||||
|
||||
// Searches for whitespace that should be removed from the middle of URLs, and
|
||||
// removes it. Removed whitespace are tabs and newlines, but NOT spaces. Spaces
|
||||
// are preserved, which is what most browsers do. A pointer to the output will
|
||||
// be returned, and the length of that output will be in |output_len|.
|
||||
//
|
||||
// This should be called before parsing if whitespace removal is desired (which
|
||||
// it normally is when you are canonicalizing).
|
||||
//
|
||||
// If no whitespace is removed, this function will not use the buffer and will
|
||||
// return a pointer to the input, to avoid the extra copy. If modification is
|
||||
// required, the given |buffer| will be used and the returned pointer will
|
||||
// point to the beginning of the buffer.
|
||||
//
|
||||
// Therefore, callers should not use the buffer, since it may actuall be empty,
|
||||
// use the computed pointer and |*output_len| instead.
|
||||
GURL_API const char* RemoveURLWhitespace(const char* input, int input_len,
|
||||
CanonOutputT<char>* buffer,
|
||||
int* output_len);
|
||||
GURL_API const char16* RemoveURLWhitespace(const char16* input, int input_len,
|
||||
CanonOutputT<char16>* buffer,
|
||||
int* output_len);
|
||||
|
||||
// IDN ------------------------------------------------------------------------
|
||||
|
||||
// Converts the Unicode input representing a hostname to ASCII using IDN rules.
|
||||
// The output must fall in the ASCII range, but will be encoded in UTF-16.
|
||||
//
|
||||
// On success, the output will be filled with the ASCII host name and it will
|
||||
// return true. Unlike most other canonicalization functions, this assumes that
|
||||
// the output is empty. The beginning of the host will be at offset 0, and
|
||||
// the length of the output will be set to the length of the new host name.
|
||||
//
|
||||
// On error, returns false. The output in this case is undefined.
|
||||
GURL_API bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);
|
||||
|
||||
// Piece-by-piece canonicalizers ----------------------------------------------
|
||||
//
|
||||
// These individual canonicalizers append the canonicalized versions of the
|
||||
// corresponding URL component to the given std::string. The spec and the
|
||||
// previously-identified range of that component are the input. The range of
|
||||
// the canonicalized component will be written to the output component.
|
||||
//
|
||||
// These functions all append to the output so they can be chained. Make sure
|
||||
// the output is empty when you start.
|
||||
//
|
||||
// These functions returns boolean values indicating success. On failure, they
|
||||
// will attempt to write something reasonable to the output so that, if
|
||||
// displayed to the user, they will recognise it as something that's messed up.
|
||||
// Nothing more should ever be done with these invalid URLs, however.
|
||||
|
||||
// Scheme: Appends the scheme and colon to the URL. The output component will
|
||||
// indicate the range of characters up to but not including the colon.
|
||||
//
|
||||
// Canonical URLs always have a scheme. If the scheme is not present in the
|
||||
// input, this will just write the colon to indicate an empty scheme. Does not
|
||||
// append slashes which will be needed before any authority components for most
|
||||
// URLs.
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding.
|
||||
GURL_API bool CanonicalizeScheme(const char* spec,
|
||||
const url_parse::Component& scheme,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_scheme);
|
||||
GURL_API bool CanonicalizeScheme(const char16* spec,
|
||||
const url_parse::Component& scheme,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_scheme);
|
||||
|
||||
// User info: username/password. If present, this will add the delimiters so
|
||||
// the output will be "<username>:<password>@" or "<username>@". Empty
|
||||
// username/password pairs, or empty passwords, will get converted to
|
||||
// nonexistant in the canonical version.
|
||||
//
|
||||
// The components for the username and password refer to ranges in the
|
||||
// respective source strings. Usually, these will be the same string, which
|
||||
// is legal as long as the two components don't overlap.
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding.
|
||||
GURL_API bool CanonicalizeUserInfo(const char* username_source,
|
||||
const url_parse::Component& username,
|
||||
const char* password_source,
|
||||
const url_parse::Component& password,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_username,
|
||||
url_parse::Component* out_password);
|
||||
GURL_API bool CanonicalizeUserInfo(const char16* username_source,
|
||||
const url_parse::Component& username,
|
||||
const char16* password_source,
|
||||
const url_parse::Component& password,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_username,
|
||||
url_parse::Component* out_password);
|
||||
|
||||
|
||||
// This structure holds detailed state exported from the IP/Host canonicalizers.
|
||||
// Additional fields may be added as callers require them.
|
||||
struct CanonHostInfo {
|
||||
CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}
|
||||
|
||||
// Convenience function to test if family is an IP address.
|
||||
bool IsIPAddress() const { return family == IPV4 || family == IPV6; }
|
||||
|
||||
// This field summarizes how the input was classified by the canonicalizer.
|
||||
enum Family {
|
||||
NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
|
||||
// canonicalizer is concerned, it should be treated as a
|
||||
// hostname.
|
||||
BROKEN, // - Almost an IP, but was not canonicalized. This could be an
|
||||
// IPv4 address where truncation occurred, or something
|
||||
// containing the special characters :[] which did not parse
|
||||
// as an IPv6 address. Never attempt to connect to this
|
||||
// address, because it might actually succeed!
|
||||
IPV4, // - Successfully canonicalized as an IPv4 address.
|
||||
IPV6, // - Successfully canonicalized as an IPv6 address.
|
||||
};
|
||||
Family family;
|
||||
|
||||
// If |family| is IPV4, then this is the number of nonempty dot-separated
|
||||
// components in the input text, from 1 to 4. If |family| is not IPV4,
|
||||
// this value is undefined.
|
||||
int num_ipv4_components;
|
||||
|
||||
// Location of host within the canonicalized output.
|
||||
// CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6.
|
||||
// CanonicalizeHostVerbose() always sets it.
|
||||
url_parse::Component out_host;
|
||||
|
||||
// |address| contains the parsed IP Address (if any) in its first
|
||||
// AddressLength() bytes, in network order. If IsIPAddress() is false
|
||||
// AddressLength() will return zero and the content of |address| is undefined.
|
||||
unsigned char address[16];
|
||||
|
||||
// Convenience function to calculate the length of an IP address corresponding
|
||||
// to the current IP version in |family|, if any. For use with |address|.
|
||||
int AddressLength() const {
|
||||
return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Host.
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding. Use this version when you only
|
||||
// need to know whether canonicalization succeeded.
|
||||
GURL_API bool CanonicalizeHost(const char* spec,
|
||||
const url_parse::Component& host,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_host);
|
||||
GURL_API bool CanonicalizeHost(const char16* spec,
|
||||
const url_parse::Component& host,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_host);
|
||||
|
||||
// Extended version of CanonicalizeHost, which returns additional information.
|
||||
// Use this when you need to know whether the hostname was an IP address.
|
||||
// A successful return is indicated by host_info->family != BROKEN. See the
|
||||
// definition of CanonHostInfo above for details.
|
||||
GURL_API void CanonicalizeHostVerbose(const char* spec,
|
||||
const url_parse::Component& host,
|
||||
CanonOutput* output,
|
||||
CanonHostInfo* host_info);
|
||||
GURL_API void CanonicalizeHostVerbose(const char16* spec,
|
||||
const url_parse::Component& host,
|
||||
CanonOutput* output,
|
||||
CanonHostInfo* host_info);
|
||||
|
||||
|
||||
// IP addresses.
|
||||
//
|
||||
// Tries to interpret the given host name as an IPv4 or IPv6 address. If it is
|
||||
// an IP address, it will canonicalize it as such, appending it to |output|.
|
||||
// Additional status information is returned via the |*host_info| parameter.
|
||||
// See the definition of CanonHostInfo above for details.
|
||||
//
|
||||
// This is called AUTOMATICALLY from the host canonicalizer, which ensures that
|
||||
// the input is unescaped and name-prepped, etc. It should not normally be
|
||||
// necessary or wise to call this directly.
|
||||
GURL_API void CanonicalizeIPAddress(const char* spec,
|
||||
const url_parse::Component& host,
|
||||
CanonOutput* output,
|
||||
CanonHostInfo* host_info);
|
||||
GURL_API void CanonicalizeIPAddress(const char16* spec,
|
||||
const url_parse::Component& host,
|
||||
CanonOutput* output,
|
||||
CanonHostInfo* host_info);
|
||||
|
||||
// Port: this function will add the colon for the port if a port is present.
|
||||
// The caller can pass url_parse::PORT_UNSPECIFIED as the
|
||||
// default_port_for_scheme argument if there is no default port.
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding.
|
||||
GURL_API bool CanonicalizePort(const char* spec,
|
||||
const url_parse::Component& port,
|
||||
int default_port_for_scheme,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_port);
|
||||
GURL_API bool CanonicalizePort(const char16* spec,
|
||||
const url_parse::Component& port,
|
||||
int default_port_for_scheme,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_port);
|
||||
|
||||
// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
|
||||
// if the scheme is unknown.
|
||||
GURL_API int DefaultPortForScheme(const char* scheme, int scheme_len);
|
||||
|
||||
// Path. If the input does not begin in a slash (including if the input is
|
||||
// empty), we'll prepend a slash to the path to make it canonical.
|
||||
//
|
||||
// The 8-bit version assumes UTF-8 encoding, but does not verify the validity
|
||||
// of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid
|
||||
// characters, etc.). Normally, URLs will come in as UTF-16, so this isn't
|
||||
// an issue. Somebody giving us an 8-bit path is responsible for generating
|
||||
// the path that the server expects (we'll escape high-bit characters), so
|
||||
// if something is invalid, it's their problem.
|
||||
GURL_API bool CanonicalizePath(const char* spec,
|
||||
const url_parse::Component& path,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_path);
|
||||
GURL_API bool CanonicalizePath(const char16* spec,
|
||||
const url_parse::Component& path,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_path);
|
||||
|
||||
// Canonicalizes the input as a file path. This is like CanonicalizePath except
|
||||
// that it also handles Windows drive specs. For example, the path can begin
|
||||
// with "c|\" and it will get properly canonicalized to "C:/".
|
||||
// The string will be appended to |*output| and |*out_path| will be updated.
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding.
|
||||
GURL_API bool FileCanonicalizePath(const char* spec,
|
||||
const url_parse::Component& path,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_path);
|
||||
GURL_API bool FileCanonicalizePath(const char16* spec,
|
||||
const url_parse::Component& path,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_path);
|
||||
|
||||
// Query: Prepends the ? if needed.
|
||||
//
|
||||
// The 8-bit version requires the input to be UTF-8 encoding. Incorrectly
|
||||
// encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode
|
||||
// "invalid character." This function can not fail, we always just try to do
|
||||
// our best for crazy input here since web pages can set it themselves.
|
||||
//
|
||||
// This will convert the given input into the output encoding that the given
|
||||
// character set converter object provides. The converter will only be called
|
||||
// if necessary, for ASCII input, no conversions are necessary.
|
||||
//
|
||||
// The converter can be NULL. In this case, the output encoding will be UTF-8.
|
||||
GURL_API void CanonicalizeQuery(const char* spec,
|
||||
const url_parse::Component& query,
|
||||
CharsetConverter* converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_query);
|
||||
GURL_API void CanonicalizeQuery(const char16* spec,
|
||||
const url_parse::Component& query,
|
||||
CharsetConverter* converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_query);
|
||||
|
||||
// Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
|
||||
// canonicalizer that does not produce ASCII output). The output is
|
||||
// guaranteed to be valid UTF-8.
|
||||
//
|
||||
// This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
|
||||
// the "Unicode replacement character" for the confusing bits and copy the rest.
|
||||
GURL_API void CanonicalizeRef(const char* spec,
|
||||
const url_parse::Component& path,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_path);
|
||||
GURL_API void CanonicalizeRef(const char16* spec,
|
||||
const url_parse::Component& path,
|
||||
CanonOutput* output,
|
||||
url_parse::Component* out_path);
|
||||
|
||||
// Full canonicalizer ---------------------------------------------------------
|
||||
//
|
||||
// These functions replace any string contents, rather than append as above.
|
||||
// See the above piece-by-piece functions for information specific to
|
||||
// canonicalizing individual components.
|
||||
//
|
||||
// The output will be ASCII except the reference fragment, which may be UTF-8.
|
||||
//
|
||||
// The 8-bit versions require UTF-8 encoding.
|
||||
|
||||
// Use for standard URLs with authorities and paths.
|
||||
GURL_API bool CanonicalizeStandardURL(const char* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool CanonicalizeStandardURL(const char16* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Use for file URLs.
|
||||
GURL_API bool CanonicalizeFileURL(const char* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool CanonicalizeFileURL(const char16* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Use for filesystem URLs.
|
||||
GURL_API bool CanonicalizeFileSystemURL(const char* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool CanonicalizeFileSystemURL(const char16* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Use for path URLs such as javascript. This does not modify the path in any
|
||||
// way, for example, by escaping it.
|
||||
GURL_API bool CanonicalizePathURL(const char* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool CanonicalizePathURL(const char16* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Use for mailto URLs. This "canonicalizes" the url into a path and query
|
||||
// component. It does not attempt to merge "to" fields. It uses UTF-8 for
|
||||
// the query encoding if there is a query. This is because a mailto URL is
|
||||
// really intended for an external mail program, and the encoding of a page,
|
||||
// etc. which would influence a query encoding normally are irrelevant.
|
||||
GURL_API bool CanonicalizeMailtoURL(const char* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool CanonicalizeMailtoURL(const char16* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Part replacer --------------------------------------------------------------
|
||||
|
||||
// Internal structure used for storing separate strings for each component.
|
||||
// The basic canonicalization functions use this structure internally so that
|
||||
// component replacement (different strings for different components) can be
|
||||
// treated on the same code path as regular canonicalization (the same string
|
||||
// for each component).
|
||||
//
|
||||
// A url_parse::Parsed structure usually goes along with this. Those
|
||||
// components identify offsets within these strings, so that they can all be
|
||||
// in the same string, or spread arbitrarily across different ones.
|
||||
//
|
||||
// This structures does not own any data. It is the caller's responsibility to
|
||||
// ensure that the data the pointers point to stays in scope and is not
|
||||
// modified.
|
||||
template<typename CHAR>
|
||||
struct URLComponentSource {
|
||||
// Constructor normally used by callers wishing to replace components. This
|
||||
// will make them all NULL, which is no replacement. The caller would then
|
||||
// override the components they want to replace.
|
||||
URLComponentSource()
|
||||
: scheme(NULL),
|
||||
username(NULL),
|
||||
password(NULL),
|
||||
host(NULL),
|
||||
port(NULL),
|
||||
path(NULL),
|
||||
query(NULL),
|
||||
ref(NULL) {
|
||||
}
|
||||
|
||||
// Constructor normally used internally to initialize all the components to
|
||||
// point to the same spec.
|
||||
explicit URLComponentSource(const CHAR* default_value)
|
||||
: scheme(default_value),
|
||||
username(default_value),
|
||||
password(default_value),
|
||||
host(default_value),
|
||||
port(default_value),
|
||||
path(default_value),
|
||||
query(default_value),
|
||||
ref(default_value) {
|
||||
}
|
||||
|
||||
const CHAR* scheme;
|
||||
const CHAR* username;
|
||||
const CHAR* password;
|
||||
const CHAR* host;
|
||||
const CHAR* port;
|
||||
const CHAR* path;
|
||||
const CHAR* query;
|
||||
const CHAR* ref;
|
||||
};
|
||||
|
||||
// This structure encapsulates information on modifying a URL. Each component
|
||||
// may either be left unchanged, replaced, or deleted.
|
||||
//
|
||||
// By default, each component is unchanged. For those components that should be
|
||||
// modified, call either Set* or Clear* to modify it.
|
||||
//
|
||||
// The string passed to Set* functions DOES NOT GET COPIED AND MUST BE KEPT
|
||||
// IN SCOPE BY THE CALLER for as long as this object exists!
|
||||
//
|
||||
// Prefer the 8-bit replacement version if possible since it is more efficient.
|
||||
template<typename CHAR>
|
||||
class Replacements {
|
||||
public:
|
||||
Replacements() {
|
||||
}
|
||||
|
||||
// Scheme
|
||||
void SetScheme(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.scheme = s;
|
||||
components_.scheme = comp;
|
||||
}
|
||||
// Note: we don't have a ClearScheme since this doesn't make any sense.
|
||||
bool IsSchemeOverridden() const { return sources_.scheme != NULL; }
|
||||
|
||||
// Username
|
||||
void SetUsername(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.username = s;
|
||||
components_.username = comp;
|
||||
}
|
||||
void ClearUsername() {
|
||||
sources_.username = Placeholder();
|
||||
components_.username = url_parse::Component();
|
||||
}
|
||||
bool IsUsernameOverridden() const { return sources_.username != NULL; }
|
||||
|
||||
// Password
|
||||
void SetPassword(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.password = s;
|
||||
components_.password = comp;
|
||||
}
|
||||
void ClearPassword() {
|
||||
sources_.password = Placeholder();
|
||||
components_.password = url_parse::Component();
|
||||
}
|
||||
bool IsPasswordOverridden() const { return sources_.password != NULL; }
|
||||
|
||||
// Host
|
||||
void SetHost(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.host = s;
|
||||
components_.host = comp;
|
||||
}
|
||||
void ClearHost() {
|
||||
sources_.host = Placeholder();
|
||||
components_.host = url_parse::Component();
|
||||
}
|
||||
bool IsHostOverridden() const { return sources_.host != NULL; }
|
||||
|
||||
// Port
|
||||
void SetPort(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.port = s;
|
||||
components_.port = comp;
|
||||
}
|
||||
void ClearPort() {
|
||||
sources_.port = Placeholder();
|
||||
components_.port = url_parse::Component();
|
||||
}
|
||||
bool IsPortOverridden() const { return sources_.port != NULL; }
|
||||
|
||||
// Path
|
||||
void SetPath(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.path = s;
|
||||
components_.path = comp;
|
||||
}
|
||||
void ClearPath() {
|
||||
sources_.path = Placeholder();
|
||||
components_.path = url_parse::Component();
|
||||
}
|
||||
bool IsPathOverridden() const { return sources_.path != NULL; }
|
||||
|
||||
// Query
|
||||
void SetQuery(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.query = s;
|
||||
components_.query = comp;
|
||||
}
|
||||
void ClearQuery() {
|
||||
sources_.query = Placeholder();
|
||||
components_.query = url_parse::Component();
|
||||
}
|
||||
bool IsQueryOverridden() const { return sources_.query != NULL; }
|
||||
|
||||
// Ref
|
||||
void SetRef(const CHAR* s, const url_parse::Component& comp) {
|
||||
sources_.ref = s;
|
||||
components_.ref = comp;
|
||||
}
|
||||
void ClearRef() {
|
||||
sources_.ref = Placeholder();
|
||||
components_.ref = url_parse::Component();
|
||||
}
|
||||
bool IsRefOverridden() const { return sources_.ref != NULL; }
|
||||
|
||||
// Getters for the itnernal data. See the variables below for how the
|
||||
// information is encoded.
|
||||
const URLComponentSource<CHAR>& sources() const { return sources_; }
|
||||
const url_parse::Parsed& components() const { return components_; }
|
||||
|
||||
private:
|
||||
// Returns a pointer to a static empty string that is used as a placeholder
|
||||
// to indicate a component should be deleted (see below).
|
||||
const CHAR* Placeholder() {
|
||||
static const CHAR empty_string = 0;
|
||||
return &empty_string;
|
||||
}
|
||||
|
||||
// We support three states:
|
||||
//
|
||||
// Action | Source Component
|
||||
// -----------------------+--------------------------------------------------
|
||||
// Don't change component | NULL (unused)
|
||||
// Replace component | (replacement string) (replacement component)
|
||||
// Delete component | (non-NULL) (invalid component: (0,-1))
|
||||
//
|
||||
// We use a pointer to the empty string for the source when the component
|
||||
// should be deleted.
|
||||
URLComponentSource<CHAR> sources_;
|
||||
url_parse::Parsed components_;
|
||||
};
|
||||
|
||||
// The base must be an 8-bit canonical URL.
|
||||
GURL_API bool ReplaceStandardURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char>& replacements,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool ReplaceStandardURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char16>& replacements,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Filesystem URLs can only have the path, query, or ref replaced.
|
||||
// All other components will be ignored.
|
||||
GURL_API bool ReplaceFileSystemURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char>& replacements,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool ReplaceFileSystemURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char16>& replacements,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Replacing some parts of a file URL is not permitted. Everything except
|
||||
// the host, path, query, and ref will be ignored.
|
||||
GURL_API bool ReplaceFileURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char>& replacements,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool ReplaceFileURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char16>& replacements,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Path URLs can only have the scheme and path replaced. All other components
|
||||
// will be ignored.
|
||||
GURL_API bool ReplacePathURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char>& replacements,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool ReplacePathURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char16>& replacements,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Mailto URLs can only have the scheme, path, and query replaced.
|
||||
// All other components will be ignored.
|
||||
GURL_API bool ReplaceMailtoURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char>& replacements,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
GURL_API bool ReplaceMailtoURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const Replacements<char16>& replacements,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* new_parsed);
|
||||
|
||||
// Relative URL ---------------------------------------------------------------
|
||||
|
||||
// Given an input URL or URL fragment |fragment|, determines if it is a
|
||||
// relative or absolute URL and places the result into |*is_relative|. If it is
|
||||
// relative, the relevant portion of the URL will be placed into
|
||||
// |*relative_component| (there may have been trimmed whitespace, for example).
|
||||
// This value is passed to ResolveRelativeURL. If the input is not relative,
|
||||
// this value is UNDEFINED (it may be changed by the function).
|
||||
//
|
||||
// Returns true on success (we successfully determined the URL is relative or
|
||||
// not). Failure means that the combination of URLs doesn't make any sense.
|
||||
//
|
||||
// The base URL should always be canonical, therefore is ASCII.
|
||||
GURL_API bool IsRelativeURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const char* fragment,
|
||||
int fragment_len,
|
||||
bool is_base_hierarchical,
|
||||
bool* is_relative,
|
||||
url_parse::Component* relative_component);
|
||||
GURL_API bool IsRelativeURL(const char* base,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const char16* fragment,
|
||||
int fragment_len,
|
||||
bool is_base_hierarchical,
|
||||
bool* is_relative,
|
||||
url_parse::Component* relative_component);
|
||||
|
||||
// Given a canonical parsed source URL, a URL fragment known to be relative,
|
||||
// and the identified relevant portion of the relative URL (computed by
|
||||
// IsRelativeURL), this produces a new parsed canonical URL in |output| and
|
||||
// |out_parsed|.
|
||||
//
|
||||
// It also requires a flag indicating whether the base URL is a file: URL
|
||||
// which triggers additional logic.
|
||||
//
|
||||
// The base URL should be canonical and have a host (may be empty for file
|
||||
// URLs) and a path. If it doesn't have these, we can't resolve relative
|
||||
// URLs off of it and will return the base as the output with an error flag.
|
||||
// Becausee it is canonical is should also be ASCII.
|
||||
//
|
||||
// The query charset converter follows the same rules as CanonicalizeQuery.
|
||||
//
|
||||
// Returns true on success. On failure, the output will be "something
|
||||
// reasonable" that will be consistent and valid, just probably not what
|
||||
// was intended by the web page author or caller.
|
||||
GURL_API bool ResolveRelativeURL(const char* base_url,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
bool base_is_file,
|
||||
const char* relative_url,
|
||||
const url_parse::Component& relative_component,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* out_parsed);
|
||||
GURL_API bool ResolveRelativeURL(const char* base_url,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
bool base_is_file,
|
||||
const char16* relative_url,
|
||||
const url_parse::Component& relative_component,
|
||||
CharsetConverter* query_converter,
|
||||
CanonOutput* output,
|
||||
url_parse::Parsed* out_parsed);
|
||||
|
||||
} // namespace url_canon
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_CANON_H__
|
||||
@@ -1,63 +0,0 @@
|
||||
// Copyright 2011, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// ICU integration functions.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_CANON_ICU_H__
|
||||
#define GOOGLEURL_SRC_URL_CANON_ICU_H__
|
||||
|
||||
#include "googleurl/src/url_canon.h"
|
||||
|
||||
typedef struct UConverter UConverter;
|
||||
|
||||
namespace url_canon {
|
||||
|
||||
// An implementation of CharsetConverter that implementations can use to
|
||||
// interface the canonicalizer with ICU's conversion routines.
|
||||
class ICUCharsetConverter : public CharsetConverter {
|
||||
public:
|
||||
// Constructs a converter using an already-existing ICU character set
|
||||
// converter. This converter is NOT owned by this object; the lifetime must
|
||||
// be managed by the creator such that it is alive as long as this is.
|
||||
GURL_API ICUCharsetConverter(UConverter* converter);
|
||||
|
||||
GURL_API virtual ~ICUCharsetConverter();
|
||||
|
||||
GURL_API virtual void ConvertFromUTF16(const char16* input,
|
||||
int input_len,
|
||||
CanonOutput* output);
|
||||
|
||||
private:
|
||||
// The ICU converter, not owned by this class.
|
||||
UConverter* converter_;
|
||||
};
|
||||
|
||||
} // namespace url_canon
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_CANON_ICU_H__
|
||||
@@ -1,461 +0,0 @@
|
||||
// Copyright 2011, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// This file is intended to be included in another C++ file where the character
|
||||
// types are defined. This allows us to write mostly generic code, but not have
|
||||
// templace bloat because everything is inlined when anybody calls any of our
|
||||
// functions.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
|
||||
#define GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "googleurl/src/url_canon.h"
|
||||
|
||||
namespace url_canon {
|
||||
|
||||
// Character type handling -----------------------------------------------------
|
||||
|
||||
// Bits that identify different character types. These types identify different
|
||||
// bits that are set for each 8-bit character in the kSharedCharTypeTable.
|
||||
enum SharedCharTypes {
|
||||
// Characters that do not require escaping in queries. Characters that do
|
||||
// not have this flag will be escaped; see url_canon_query.cc
|
||||
CHAR_QUERY = 1,
|
||||
|
||||
// Valid in the username/password field.
|
||||
CHAR_USERINFO = 2,
|
||||
|
||||
// Valid in a IPv4 address (digits plus dot and 'x' for hex).
|
||||
CHAR_IPV4 = 4,
|
||||
|
||||
// Valid in an ASCII-representation of a hex digit (as in %-escaped).
|
||||
CHAR_HEX = 8,
|
||||
|
||||
// Valid in an ASCII-representation of a decimal digit.
|
||||
CHAR_DEC = 16,
|
||||
|
||||
// Valid in an ASCII-representation of an octal digit.
|
||||
CHAR_OCT = 32,
|
||||
|
||||
// Characters that do not require escaping in encodeURIComponent. Characters
|
||||
// that do not have this flag will be escaped; see url_util.cc.
|
||||
CHAR_COMPONENT = 64,
|
||||
};
|
||||
|
||||
// This table contains the flags in SharedCharTypes for each 8-bit character.
|
||||
// Some canonicalization functions have their own specialized lookup table.
|
||||
// For those with simple requirements, we have collected the flags in one
|
||||
// place so there are fewer lookup tables to load into the CPU cache.
|
||||
//
|
||||
// Using an unsigned char type has a small but measurable performance benefit
|
||||
// over using a 32-bit number.
|
||||
extern const unsigned char kSharedCharTypeTable[0x100];
|
||||
|
||||
// More readable wrappers around the character type lookup table.
|
||||
inline bool IsCharOfType(unsigned char c, SharedCharTypes type) {
|
||||
return !!(kSharedCharTypeTable[c] & type);
|
||||
}
|
||||
inline bool IsQueryChar(unsigned char c) {
|
||||
return IsCharOfType(c, CHAR_QUERY);
|
||||
}
|
||||
inline bool IsIPv4Char(unsigned char c) {
|
||||
return IsCharOfType(c, CHAR_IPV4);
|
||||
}
|
||||
inline bool IsHexChar(unsigned char c) {
|
||||
return IsCharOfType(c, CHAR_HEX);
|
||||
}
|
||||
inline bool IsComponentChar(unsigned char c) {
|
||||
return IsCharOfType(c, CHAR_COMPONENT);
|
||||
}
|
||||
|
||||
// Appends the given string to the output, escaping characters that do not
|
||||
// match the given |type| in SharedCharTypes.
|
||||
void AppendStringOfType(const char* source, int length,
|
||||
SharedCharTypes type,
|
||||
CanonOutput* output);
|
||||
void AppendStringOfType(const char16* source, int length,
|
||||
SharedCharTypes type,
|
||||
CanonOutput* output);
|
||||
|
||||
// Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit
|
||||
// that will be used to represent it.
|
||||
GURL_API extern const char kHexCharLookup[0x10];
|
||||
|
||||
// This lookup table allows fast conversion between ASCII hex letters and their
|
||||
// corresponding numerical value. The 8-bit range is divided up into 8
|
||||
// regions of 0x20 characters each. Each of the three character types (numbers,
|
||||
// uppercase, lowercase) falls into different regions of this range. The table
|
||||
// contains the amount to subtract from characters in that range to get at
|
||||
// the corresponding numerical value.
|
||||
//
|
||||
// See HexDigitToValue for the lookup.
|
||||
extern const char kCharToHexLookup[8];
|
||||
|
||||
// Assumes the input is a valid hex digit! Call IsHexChar before using this.
|
||||
inline unsigned char HexCharToValue(unsigned char c) {
|
||||
return c - kCharToHexLookup[c / 0x20];
|
||||
}
|
||||
|
||||
// Indicates if the given character is a dot or dot equivalent, returning the
|
||||
// number of characters taken by it. This will be one for a literal dot, 3 for
|
||||
// an escaped dot. If the character is not a dot, this will return 0.
|
||||
template<typename CHAR>
|
||||
inline int IsDot(const CHAR* spec, int offset, int end) {
|
||||
if (spec[offset] == '.') {
|
||||
return 1;
|
||||
} else if (spec[offset] == '%' && offset + 3 <= end &&
|
||||
spec[offset + 1] == '2' &&
|
||||
(spec[offset + 2] == 'e' || spec[offset + 2] == 'E')) {
|
||||
// Found "%2e"
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Returns the canonicalized version of the input character according to scheme
|
||||
// rules. This is implemented alongside the scheme canonicalizer, and is
|
||||
// required for relative URL resolving to test for scheme equality.
|
||||
//
|
||||
// Returns 0 if the input character is not a valid scheme character.
|
||||
char CanonicalSchemeChar(char16 ch);
|
||||
|
||||
// Write a single character, escaped, to the output. This always escapes: it
|
||||
// does no checking that thee character requires escaping.
|
||||
// Escaping makes sense only 8 bit chars, so code works in all cases of
|
||||
// input parameters (8/16bit).
|
||||
template<typename UINCHAR, typename OUTCHAR>
|
||||
inline void AppendEscapedChar(UINCHAR ch,
|
||||
CanonOutputT<OUTCHAR>* output) {
|
||||
output->push_back('%');
|
||||
output->push_back(kHexCharLookup[(ch >> 4) & 0xf]);
|
||||
output->push_back(kHexCharLookup[ch & 0xf]);
|
||||
}
|
||||
|
||||
// The character we'll substitute for undecodable or invalid characters.
|
||||
extern const char16 kUnicodeReplacementCharacter;
|
||||
|
||||
// UTF-8 functions ------------------------------------------------------------
|
||||
|
||||
// Reads one character in UTF-8 starting at |*begin| in |str| and places
|
||||
// the decoded value into |*code_point|. If the character is valid, we will
|
||||
// return true. If invalid, we'll return false and put the
|
||||
// kUnicodeReplacementCharacter into |*code_point|.
|
||||
//
|
||||
// |*begin| will be updated to point to the last character consumed so it
|
||||
// can be incremented in a loop and will be ready for the next character.
|
||||
// (for a single-byte ASCII character, it will not be changed).
|
||||
//
|
||||
// Implementation is in url_canon_icu.cc.
|
||||
GURL_API bool ReadUTFChar(const char* str, int* begin, int length,
|
||||
unsigned* code_point_out);
|
||||
|
||||
// Generic To-UTF-8 converter. This will call the given append method for each
|
||||
// character that should be appended, with the given output method. Wrappers
|
||||
// are provided below for escaped and non-escaped versions of this.
|
||||
//
|
||||
// The char_value must have already been checked that it's a valid Unicode
|
||||
// character.
|
||||
template<class Output, void Appender(unsigned char, Output*)>
|
||||
inline void DoAppendUTF8(unsigned char_value, Output* output) {
|
||||
if (char_value <= 0x7f) {
|
||||
Appender(static_cast<unsigned char>(char_value), output);
|
||||
} else if (char_value <= 0x7ff) {
|
||||
// 110xxxxx 10xxxxxx
|
||||
Appender(static_cast<unsigned char>(0xC0 | (char_value >> 6)),
|
||||
output);
|
||||
Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
|
||||
output);
|
||||
} else if (char_value <= 0xffff) {
|
||||
// 1110xxxx 10xxxxxx 10xxxxxx
|
||||
Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)),
|
||||
output);
|
||||
Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
|
||||
output);
|
||||
Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
|
||||
output);
|
||||
} else if (char_value <= 0x10FFFF) { // Max unicode code point.
|
||||
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
|
||||
output);
|
||||
Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
|
||||
output);
|
||||
Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
|
||||
output);
|
||||
Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
|
||||
output);
|
||||
} else {
|
||||
// Invalid UTF-8 character (>20 bits).
|
||||
NOTREACHED();
|
||||
}
|
||||
}
|
||||
|
||||
// Helper used by AppendUTF8Value below. We use an unsigned parameter so there
|
||||
// are no funny sign problems with the input, but then have to convert it to
|
||||
// a regular char for appending.
|
||||
inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {
|
||||
output->push_back(static_cast<char>(ch));
|
||||
}
|
||||
|
||||
// Writes the given character to the output as UTF-8. This does NO checking
|
||||
// of the validity of the unicode characters; the caller should ensure that
|
||||
// the value it is appending is valid to append.
|
||||
inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
|
||||
DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
|
||||
}
|
||||
|
||||
// Writes the given character to the output as UTF-8, escaping ALL
|
||||
// characters (even when they are ASCII). This does NO checking of the
|
||||
// validity of the unicode characters; the caller should ensure that the value
|
||||
// it is appending is valid to append.
|
||||
inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
|
||||
DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
|
||||
}
|
||||
|
||||
// UTF-16 functions -----------------------------------------------------------
|
||||
|
||||
// Reads one character in UTF-16 starting at |*begin| in |str| and places
|
||||
// the decoded value into |*code_point|. If the character is valid, we will
|
||||
// return true. If invalid, we'll return false and put the
|
||||
// kUnicodeReplacementCharacter into |*code_point|.
|
||||
//
|
||||
// |*begin| will be updated to point to the last character consumed so it
|
||||
// can be incremented in a loop and will be ready for the next character.
|
||||
// (for a single-16-bit-word character, it will not be changed).
|
||||
//
|
||||
// Implementation is in url_canon_icu.cc.
|
||||
GURL_API bool ReadUTFChar(const char16* str, int* begin, int length,
|
||||
unsigned* code_point);
|
||||
|
||||
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
|
||||
inline void AppendUTF16Value(unsigned code_point,
|
||||
CanonOutputT<char16>* output) {
|
||||
if (code_point > 0xffff) {
|
||||
output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0));
|
||||
output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00));
|
||||
} else {
|
||||
output->push_back(static_cast<char16>(code_point));
|
||||
}
|
||||
}
|
||||
|
||||
// Escaping functions ---------------------------------------------------------
|
||||
|
||||
// Writes the given character to the output as UTF-8, escaped. Call this
|
||||
// function only when the input is wide. Returns true on success. Failure
|
||||
// means there was some problem with the encoding, we'll still try to
|
||||
// update the |*begin| pointer and add a placeholder character to the
|
||||
// output so processing can continue.
|
||||
//
|
||||
// We will append the character starting at ch[begin] with the buffer ch
|
||||
// being |length|. |*begin| will be updated to point to the last character
|
||||
// consumed (we may consume more than one for UTF-16) so that if called in
|
||||
// a loop, incrementing the pointer will move to the next character.
|
||||
//
|
||||
// Every single output character will be escaped. This means that if you
|
||||
// give it an ASCII character as input, it will be escaped. Some code uses
|
||||
// this when it knows that a character is invalid according to its rules
|
||||
// for validity. If you don't want escaping for ASCII characters, you will
|
||||
// have to filter them out prior to calling this function.
|
||||
//
|
||||
// Assumes that ch[begin] is within range in the array, but does not assume
|
||||
// that any following characters are.
|
||||
inline bool AppendUTF8EscapedChar(const char16* str, int* begin, int length,
|
||||
CanonOutput* output) {
|
||||
// UTF-16 input. Readchar16 will handle invalid characters for us and give
|
||||
// us the kUnicodeReplacementCharacter, so we don't have to do special
|
||||
// checking after failure, just pass through the failure to the caller.
|
||||
unsigned char_value;
|
||||
bool success = ReadUTFChar(str, begin, length, &char_value);
|
||||
AppendUTF8EscapedValue(char_value, output);
|
||||
return success;
|
||||
}
|
||||
|
||||
// Handles UTF-8 input. See the wide version above for usage.
|
||||
inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
|
||||
CanonOutput* output) {
|
||||
// ReadUTF8Char will handle invalid characters for us and give us the
|
||||
// kUnicodeReplacementCharacter, so we don't have to do special checking
|
||||
// after failure, just pass through the failure to the caller.
|
||||
unsigned ch;
|
||||
bool success = ReadUTFChar(str, begin, length, &ch);
|
||||
AppendUTF8EscapedValue(ch, output);
|
||||
return success;
|
||||
}
|
||||
|
||||
// Given a '%' character at |*begin| in the string |spec|, this will decode
|
||||
// the escaped value and put it into |*unescaped_value| on success (returns
|
||||
// true). On failure, this will return false, and will not write into
|
||||
// |*unescaped_value|.
|
||||
//
|
||||
// |*begin| will be updated to point to the last character of the escape
|
||||
// sequence so that when called with the index of a for loop, the next time
|
||||
// through it will point to the next character to be considered. On failure,
|
||||
// |*begin| will be unchanged.
|
||||
inline bool Is8BitChar(char c) {
|
||||
return true; // this case is specialized to avoid a warning
|
||||
}
|
||||
inline bool Is8BitChar(char16 c) {
|
||||
return c <= 255;
|
||||
}
|
||||
|
||||
template<typename CHAR>
|
||||
inline bool DecodeEscaped(const CHAR* spec, int* begin, int end,
|
||||
unsigned char* unescaped_value) {
|
||||
if (*begin + 3 > end ||
|
||||
!Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) {
|
||||
// Invalid escape sequence because there's not enough room, or the
|
||||
// digits are not ASCII.
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned char first = static_cast<unsigned char>(spec[*begin + 1]);
|
||||
unsigned char second = static_cast<unsigned char>(spec[*begin + 2]);
|
||||
if (!IsHexChar(first) || !IsHexChar(second)) {
|
||||
// Invalid hex digits, fail.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Valid escape sequence.
|
||||
*unescaped_value = (HexCharToValue(first) << 4) + HexCharToValue(second);
|
||||
*begin += 2;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Appends the given substring to the output, escaping "some" characters that
|
||||
// it feels may not be safe. It assumes the input values are all contained in
|
||||
// 8-bit although it allows any type.
|
||||
//
|
||||
// This is used in error cases to append invalid output so that it looks
|
||||
// approximately correct. Non-error cases should not call this function since
|
||||
// the escaping rules are not guaranteed!
|
||||
void AppendInvalidNarrowString(const char* spec, int begin, int end,
|
||||
CanonOutput* output);
|
||||
void AppendInvalidNarrowString(const char16* spec, int begin, int end,
|
||||
CanonOutput* output);
|
||||
|
||||
// Misc canonicalization helpers ----------------------------------------------
|
||||
|
||||
// Converts between UTF-8 and UTF-16, returning true on successful conversion.
|
||||
// The output will be appended to the given canonicalizer output (so make sure
|
||||
// it's empty if you want to replace).
|
||||
//
|
||||
// On invalid input, this will still write as much output as possible,
|
||||
// replacing the invalid characters with the "invalid character". It will
|
||||
// return false in the failure case, and the caller should not continue as
|
||||
// normal.
|
||||
GURL_API bool ConvertUTF16ToUTF8(const char16* input, int input_len,
|
||||
CanonOutput* output);
|
||||
GURL_API bool ConvertUTF8ToUTF16(const char* input, int input_len,
|
||||
CanonOutputT<char16>* output);
|
||||
|
||||
// Converts from UTF-16 to 8-bit using the character set converter. If the
|
||||
// converter is NULL, this will use UTF-8.
|
||||
void ConvertUTF16ToQueryEncoding(const char16* input,
|
||||
const url_parse::Component& query,
|
||||
CharsetConverter* converter,
|
||||
CanonOutput* output);
|
||||
|
||||
// Applies the replacements to the given component source. The component source
|
||||
// should be pre-initialized to the "old" base. That is, all pointers will
|
||||
// point to the spec of the old URL, and all of the Parsed components will
|
||||
// be indices into that string.
|
||||
//
|
||||
// The pointers and components in the |source| for all non-NULL strings in the
|
||||
// |repl| (replacements) will be updated to reference those strings.
|
||||
// Canonicalizing with the new |source| and |parsed| can then combine URL
|
||||
// components from many different strings.
|
||||
void SetupOverrideComponents(const char* base,
|
||||
const Replacements<char>& repl,
|
||||
URLComponentSource<char>* source,
|
||||
url_parse::Parsed* parsed);
|
||||
|
||||
// Like the above 8-bit version, except that it additionally converts the
|
||||
// UTF-16 input to UTF-8 before doing the overrides.
|
||||
//
|
||||
// The given utf8_buffer is used to store the converted components. They will
|
||||
// be appended one after another, with the parsed structure identifying the
|
||||
// appropriate substrings. This buffer is a parameter because the source has
|
||||
// no storage, so the buffer must have the same lifetime as the source
|
||||
// parameter owned by the caller.
|
||||
//
|
||||
// THE CALLER MUST NOT ADD TO THE |utf8_buffer| AFTER THIS CALL. Members of
|
||||
// |source| will point into this buffer, which could be invalidated if
|
||||
// additional data is added and the CanonOutput resizes its buffer.
|
||||
//
|
||||
// Returns true on success. Fales means that the input was not valid UTF-16,
|
||||
// although we will have still done the override with "invalid characters" in
|
||||
// place of errors.
|
||||
bool SetupUTF16OverrideComponents(const char* base,
|
||||
const Replacements<char16>& repl,
|
||||
CanonOutput* utf8_buffer,
|
||||
URLComponentSource<char>* source,
|
||||
url_parse::Parsed* parsed);
|
||||
|
||||
// Implemented in url_canon_path.cc, these are required by the relative URL
|
||||
// resolver as well, so we declare them here.
|
||||
bool CanonicalizePartialPath(const char* spec,
|
||||
const url_parse::Component& path,
|
||||
int path_begin_in_output,
|
||||
CanonOutput* output);
|
||||
bool CanonicalizePartialPath(const char16* spec,
|
||||
const url_parse::Component& path,
|
||||
int path_begin_in_output,
|
||||
CanonOutput* output);
|
||||
|
||||
#ifndef WIN32
|
||||
|
||||
// Implementations of Windows' int-to-string conversions
|
||||
GURL_API int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
|
||||
GURL_API int _itow_s(int value, char16* buffer, size_t size_in_chars,
|
||||
int radix);
|
||||
|
||||
// Secure template overloads for these functions
|
||||
template<size_t N>
|
||||
inline int _itoa_s(int value, char (&buffer)[N], int radix) {
|
||||
return _itoa_s(value, buffer, N, radix);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
inline int _itow_s(int value, char16 (&buffer)[N], int radix) {
|
||||
return _itow_s(value, buffer, N, radix);
|
||||
}
|
||||
|
||||
// _strtoui64 and strtoull behave the same
|
||||
inline unsigned long long _strtoui64(const char* nptr,
|
||||
char** endptr, int base) {
|
||||
return strtoull(nptr, endptr, base);
|
||||
}
|
||||
|
||||
#endif // WIN32
|
||||
|
||||
} // namespace url_canon
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
|
||||
@@ -1,157 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// As with url_canon_internal.h, this file is intended to be included in
|
||||
// another C++ file where the template types are defined. This allows the
|
||||
// programmer to use this to use these functions for their own strings
|
||||
// types, without bloating the code by having inline templates used in
|
||||
// every call site.
|
||||
//
|
||||
// *** This file must be included after url_canon_internal as we depend on some
|
||||
// functions in it. ***
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__
|
||||
#define GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__
|
||||
|
||||
#include "googleurl/src/url_file.h"
|
||||
#include "googleurl/src/url_parse_internal.h"
|
||||
|
||||
using namespace url_canon;
|
||||
|
||||
// Given a pointer into the spec, this copies and canonicalizes the drive
|
||||
// letter and colon to the output, if one is found. If there is not a drive
|
||||
// spec, it won't do anything. The index of the next character in the input
|
||||
// spec is returned (after the colon when a drive spec is found, the begin
|
||||
// offset if one is not).
|
||||
template<typename CHAR>
|
||||
static int FileDoDriveSpec(const CHAR* spec, int begin, int end,
|
||||
CanonOutput* output) {
|
||||
// The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
|
||||
// (with backslashes instead of slashes as well).
|
||||
int num_slashes = CountConsecutiveSlashes(spec, begin, end);
|
||||
int after_slashes = begin + num_slashes;
|
||||
|
||||
if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
|
||||
return begin; // Haven't consumed any characters
|
||||
|
||||
// DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
|
||||
// and that it is followed by a colon/pipe.
|
||||
|
||||
// Normalize Windows drive letters to uppercase
|
||||
if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
|
||||
output->push_back(spec[after_slashes] - 'a' + 'A');
|
||||
else
|
||||
output->push_back(static_cast<char>(spec[after_slashes]));
|
||||
|
||||
// Normalize the character following it to a colon rather than pipe.
|
||||
output->push_back(':');
|
||||
output->push_back('/');
|
||||
return after_slashes + 2;
|
||||
}
|
||||
|
||||
// FileDoDriveSpec will have already added the first backslash, so we need to
|
||||
// write everything following the slashes using the path canonicalizer.
|
||||
template<typename CHAR, typename UCHAR>
|
||||
static void FileDoPath(const CHAR* spec, int begin, int end,
|
||||
CanonOutput* output) {
|
||||
// Normalize the number of slashes after the drive letter. The path
|
||||
// canonicalizer expects the input to begin in a slash already so
|
||||
// doesn't check. We want to handle no-slashes
|
||||
int num_slashes = CountConsecutiveSlashes(spec, begin, end);
|
||||
int after_slashes = begin + num_slashes;
|
||||
|
||||
// Now use the regular path canonicalizer to canonicalize the rest of the
|
||||
// path. We supply it with the path following the slashes. It won't prepend
|
||||
// a slash because it assumes any nonempty path already starts with one.
|
||||
// We explicitly filter out calls with no path here to prevent that case.
|
||||
ParsedURL::Component sub_path(after_slashes, end - after_slashes);
|
||||
if (sub_path.len > 0) {
|
||||
// Give it a fake output component to write into. DoCanonicalizeFile will
|
||||
// compute the full path component.
|
||||
ParsedURL::Component fake_output_path;
|
||||
URLCanonInternal<CHAR, UCHAR>::DoPath(
|
||||
spec, sub_path, output, &fake_output_path);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename CHAR, typename UCHAR>
|
||||
static bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
|
||||
const ParsedURL& parsed,
|
||||
CanonOutput* output,
|
||||
ParsedURL* new_parsed) {
|
||||
// Things we don't set in file: URLs.
|
||||
new_parsed->username = ParsedURL::Component(0, -1);
|
||||
new_parsed->password = ParsedURL::Component(0, -1);
|
||||
new_parsed->port = ParsedURL::Component(0, -1);
|
||||
|
||||
// Scheme (known, so we don't bother running it through the more
|
||||
// complicated scheme canonicalizer).
|
||||
new_parsed->scheme.begin = output->length();
|
||||
output->push_back('f');
|
||||
output->push_back('i');
|
||||
output->push_back('l');
|
||||
output->push_back('e');
|
||||
new_parsed->scheme.len = output->length() - new_parsed->scheme.begin;
|
||||
output->push_back(':');
|
||||
|
||||
// Write the separator for the host.
|
||||
output->push_back('/');
|
||||
output->push_back('/');
|
||||
|
||||
// Append the host. For many file URLs, this will be empty. For UNC, this
|
||||
// will be present.
|
||||
// TODO(brettw) This doesn't do any checking for host name validity. We
|
||||
// should probably handle validity checking of UNC hosts differently than
|
||||
// for regular IP hosts.
|
||||
bool success = URLCanonInternal<CHAR, UCHAR>::DoHost(
|
||||
source.host, parsed.host, output, &new_parsed->host);
|
||||
|
||||
// Write a separator for the start of the path. We'll ignore any slashes
|
||||
// already at the beginning of the path.
|
||||
new_parsed->path.begin = output->length();
|
||||
output->push_back('/');
|
||||
|
||||
// Copies and normalizes the "c:" at the beginning, if present.
|
||||
int after_drive = FileDoDriveSpec(source.path, parsed.path.begin,
|
||||
parsed.path.end(), output);
|
||||
|
||||
// Copies the rest of the path
|
||||
FileDoPath<CHAR, UCHAR>(source.path, after_drive, parsed.path.end(), output);
|
||||
new_parsed->path.len = output->length() - new_parsed->path.begin;
|
||||
|
||||
// Things following the path we can use the standard canonicalizers for.
|
||||
success &= URLCanonInternal<CHAR, UCHAR>::DoQuery(
|
||||
source.query, parsed.query, output, &new_parsed->query);
|
||||
success &= URLCanonInternal<CHAR, UCHAR>::DoRef(
|
||||
source.ref, parsed.ref, output, &new_parsed->ref);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__
|
||||
@@ -1,109 +0,0 @@
|
||||
// Copyright 2008, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_CANON_IP_H__
|
||||
#define GOOGLEURL_SRC_URL_CANON_IP_H__
|
||||
|
||||
#include "base/string16.h"
|
||||
#include "googleurl/src/url_canon.h"
|
||||
#include "googleurl/src/url_common.h"
|
||||
#include "googleurl/src/url_parse.h"
|
||||
|
||||
namespace url_canon {
|
||||
|
||||
// Writes the given IPv4 address to |output|.
|
||||
GURL_API void AppendIPv4Address(const unsigned char address[4],
|
||||
CanonOutput* output);
|
||||
|
||||
// Writes the given IPv6 address to |output|.
|
||||
GURL_API void AppendIPv6Address(const unsigned char address[16],
|
||||
CanonOutput* output);
|
||||
|
||||
// Searches the host name for the portions of the IPv4 address. On success,
|
||||
// each component will be placed into |components| and it will return true.
|
||||
// It will return false if the host can not be separated as an IPv4 address
|
||||
// or if there are any non-7-bit characters or other characters that can not
|
||||
// be in an IP address. (This is important so we fail as early as possible for
|
||||
// common non-IP hostnames.)
|
||||
//
|
||||
// Not all components may exist. If there are only 3 components, for example,
|
||||
// the last one will have a length of -1 or 0 to indicate it does not exist.
|
||||
//
|
||||
// Note that many platform's inet_addr will ignore everything after a space
|
||||
// in certain curcumstances if the stuff before the space looks like an IP
|
||||
// address. IE6 is included in this. We do NOT handle this case. In many cases,
|
||||
// the browser's canonicalization will get run before this which converts
|
||||
// spaces to %20 (in the case of IE7) or rejects them (in the case of
|
||||
// Mozilla), so this code path never gets hit. Our host canonicalization will
|
||||
// notice these spaces and escape them, which will make IP address finding
|
||||
// fail. This seems like better behavior than stripping after a space.
|
||||
GURL_API bool FindIPv4Components(const char* spec,
|
||||
const url_parse::Component& host,
|
||||
url_parse::Component components[4]);
|
||||
GURL_API bool FindIPv4Components(const char16* spec,
|
||||
const url_parse::Component& host,
|
||||
url_parse::Component components[4]);
|
||||
|
||||
// Converts an IPv4 address to a 32-bit number (network byte order).
|
||||
//
|
||||
// Possible return values:
|
||||
// IPV4 - IPv4 address was successfully parsed.
|
||||
// BROKEN - Input was formatted like an IPv4 address, but overflow occurred
|
||||
// during parsing.
|
||||
// NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address.
|
||||
// It might be an IPv6 address, or a hostname.
|
||||
//
|
||||
// On success, |num_ipv4_components| will be populated with the number of
|
||||
// components in the IPv4 address.
|
||||
GURL_API CanonHostInfo::Family IPv4AddressToNumber(
|
||||
const char* spec,
|
||||
const url_parse::Component& host,
|
||||
unsigned char address[4],
|
||||
int* num_ipv4_components);
|
||||
GURL_API CanonHostInfo::Family IPv4AddressToNumber(
|
||||
const char16* spec,
|
||||
const url_parse::Component& host,
|
||||
unsigned char address[4],
|
||||
int* num_ipv4_components);
|
||||
|
||||
// Converts an IPv6 address to a 128-bit number (network byte order), returning
|
||||
// true on success. False means that the input was not a valid IPv6 address.
|
||||
//
|
||||
// NOTE that |host| is expected to be surrounded by square brackets.
|
||||
// i.e. "[::1]" rather than "::1".
|
||||
GURL_API bool IPv6AddressToNumber(const char* spec,
|
||||
const url_parse::Component& host,
|
||||
unsigned char address[16]);
|
||||
GURL_API bool IPv6AddressToNumber(const char16* spec,
|
||||
const url_parse::Component& host,
|
||||
unsigned char address[16]);
|
||||
|
||||
} // namespace url_canon
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_CANON_IP_H__
|
||||
@@ -1,134 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// This header file defines a canonicalizer output method class for STL
|
||||
// strings. Because the canonicalizer tries not to be dependent on the STL,
|
||||
// we have segregated it here.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
|
||||
#define GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
|
||||
|
||||
#include <string>
|
||||
#include "googleurl/src/url_canon.h"
|
||||
|
||||
namespace url_canon {
|
||||
|
||||
// Write into a std::string given in the constructor. This object does not own
|
||||
// the string itself, and the user must ensure that the string stays alive
|
||||
// throughout the lifetime of this object.
|
||||
//
|
||||
// The given string will be appended to; any existing data in the string will
|
||||
// be preserved. The caller should reserve() the amount of data in the string
|
||||
// they expect to be written. We will resize if necessary, but that's slow.
|
||||
//
|
||||
// Note that when canonicalization is complete, the string will likely have
|
||||
// unused space at the end because we make the string very big to start out
|
||||
// with (by |initial_size|). This ends up being important because resize
|
||||
// operations are slow, and because the base class needs to write directly
|
||||
// into the buffer.
|
||||
//
|
||||
// Therefore, the user should call Complete() before using the string that
|
||||
// this class wrote into.
|
||||
class StdStringCanonOutput : public CanonOutput {
|
||||
public:
|
||||
StdStringCanonOutput(std::string* str)
|
||||
: CanonOutput(),
|
||||
str_(str) {
|
||||
cur_len_ = static_cast<int>(str_->size()); // Append to existing data.
|
||||
str_->resize(str_->capacity());
|
||||
buffer_ = str_->empty() ? NULL : &(*str_)[0];
|
||||
buffer_len_ = static_cast<int>(str_->size());
|
||||
}
|
||||
virtual ~StdStringCanonOutput() {
|
||||
// Nothing to do, we don't own the string.
|
||||
}
|
||||
|
||||
// Must be called after writing has completed but before the string is used.
|
||||
void Complete() {
|
||||
str_->resize(cur_len_);
|
||||
buffer_len_ = cur_len_;
|
||||
}
|
||||
|
||||
virtual void Resize(int sz) {
|
||||
str_->resize(sz);
|
||||
buffer_ = str_->empty() ? NULL : &(*str_)[0];
|
||||
buffer_len_ = sz;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string* str_;
|
||||
};
|
||||
|
||||
// An extension of the Replacements class that allows the setters to use
|
||||
// standard strings.
|
||||
//
|
||||
// The strings passed as arguments are not copied and must remain valid until
|
||||
// this class goes out of scope.
|
||||
template<typename STR>
|
||||
class StdStringReplacements :
|
||||
public url_canon::Replacements<typename STR::value_type> {
|
||||
public:
|
||||
void SetSchemeStr(const STR& s) {
|
||||
this->SetScheme(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
void SetUsernameStr(const STR& s) {
|
||||
this->SetUsername(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
void SetPasswordStr(const STR& s) {
|
||||
this->SetPassword(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
void SetHostStr(const STR& s) {
|
||||
this->SetHost(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
void SetPortStr(const STR& s) {
|
||||
this->SetPort(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
void SetPathStr(const STR& s) {
|
||||
this->SetPath(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
void SetQueryStr(const STR& s) {
|
||||
this->SetQuery(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
void SetRefStr(const STR& s) {
|
||||
this->SetRef(s.data(),
|
||||
url_parse::Component(0, static_cast<int>(s.length())));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace url_canon
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
// Copyright 2010, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_COMMON_H__
|
||||
#define GOOGLEURL_SRC_URL_COMMON_H__
|
||||
|
||||
#if !defined(GURL_IMPLEMENTATION)
|
||||
#define GURL_IMPLEMENTATION 0
|
||||
#endif
|
||||
|
||||
#if defined(GURL_DLL)
|
||||
#if defined(WIN32)
|
||||
#if GURL_IMPLEMENTATION
|
||||
#define GURL_API __declspec(dllexport)
|
||||
#else
|
||||
#define GURL_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
// Non-Windows DLLs.
|
||||
#define GURL_API __attribute__((visibility("default")))
|
||||
#endif
|
||||
#else
|
||||
// Not a DLL.
|
||||
#define GURL_API
|
||||
#endif
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_COMMON_H__
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Provides shared functions used by the internals of the parser and
|
||||
// canonicalizer for file URLs. Do not use outside of these modules.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_FILE_H__
|
||||
#define GOOGLEURL_SRC_URL_FILE_H__
|
||||
|
||||
#include "googleurl/src/url_parse_internal.h"
|
||||
|
||||
namespace url_parse {
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
// We allow both "c:" and "c|" as drive identifiers.
|
||||
inline bool IsWindowsDriveSeparator(char16 ch) {
|
||||
return ch == ':' || ch == '|';
|
||||
}
|
||||
inline bool IsWindowsDriveLetter(char16 ch) {
|
||||
return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
|
||||
}
|
||||
|
||||
#endif // WIN32
|
||||
|
||||
// Returns the index of the next slash in the input after the given index, or
|
||||
// spec_len if the end of the input is reached.
|
||||
template<typename CHAR>
|
||||
inline int FindNextSlash(const CHAR* spec, int begin_index, int spec_len) {
|
||||
int idx = begin_index;
|
||||
while (idx < spec_len && !IsURLSlash(spec[idx]))
|
||||
idx++;
|
||||
return idx;
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
// Returns true if the start_offset in the given spec looks like it begins a
|
||||
// drive spec, for example "c:". This function explicitly handles start_offset
|
||||
// values that are equal to or larger than the spec_len to simplify callers.
|
||||
//
|
||||
// If this returns true, the spec is guaranteed to have a valid drive letter
|
||||
// plus a colon starting at |start_offset|.
|
||||
template<typename CHAR>
|
||||
inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset,
|
||||
int spec_len) {
|
||||
int remaining_len = spec_len - start_offset;
|
||||
if (remaining_len < 2)
|
||||
return false; // Not enough room.
|
||||
if (!IsWindowsDriveLetter(spec[start_offset]))
|
||||
return false; // Doesn't start with a valid drive letter.
|
||||
if (!IsWindowsDriveSeparator(spec[start_offset + 1]))
|
||||
return false; // Isn't followed with a drive separator.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true if the start_offset in the given text looks like it begins a
|
||||
// UNC path, for example "\\". This function explicitly handles start_offset
|
||||
// values that are equal to or larger than the spec_len to simplify callers.
|
||||
//
|
||||
// When strict_slashes is set, this function will only accept backslashes as is
|
||||
// standard for Windows. Otherwise, it will accept forward slashes as well
|
||||
// which we use for a lot of URL handling.
|
||||
template<typename CHAR>
|
||||
inline bool DoesBeginUNCPath(const CHAR* text,
|
||||
int start_offset,
|
||||
int len,
|
||||
bool strict_slashes) {
|
||||
int remaining_len = len - start_offset;
|
||||
if (remaining_len < 2)
|
||||
return false;
|
||||
|
||||
if (strict_slashes)
|
||||
return text[start_offset] == '\\' && text[start_offset + 1] == '\\';
|
||||
return IsURLSlash(text[start_offset]) && IsURLSlash(text[start_offset + 1]);
|
||||
}
|
||||
|
||||
#endif // WIN32
|
||||
|
||||
} // namespace url_parse
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_FILE_H__
|
||||
@@ -1,373 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_PARSE_H__
|
||||
#define GOOGLEURL_SRC_URL_PARSE_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/basictypes.h"
|
||||
#include "base/string16.h"
|
||||
#include "googleurl/src/url_common.h"
|
||||
|
||||
namespace url_parse {
|
||||
|
||||
// Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and
|
||||
// KURLGoogle.cpp still rely on this type.
|
||||
typedef char16 UTF16Char;
|
||||
|
||||
// Component ------------------------------------------------------------------
|
||||
|
||||
// Represents a substring for URL parsing.
|
||||
struct Component {
|
||||
Component() : begin(0), len(-1) {}
|
||||
|
||||
// Normal constructor: takes an offset and a length.
|
||||
Component(int b, int l) : begin(b), len(l) {}
|
||||
|
||||
int end() const {
|
||||
return begin + len;
|
||||
}
|
||||
|
||||
// Returns true if this component is valid, meaning the length is given. Even
|
||||
// valid components may be empty to record the fact that they exist.
|
||||
bool is_valid() const {
|
||||
return (len != -1);
|
||||
}
|
||||
|
||||
// Returns true if the given component is specified on false, the component
|
||||
// is either empty or invalid.
|
||||
bool is_nonempty() const {
|
||||
return (len > 0);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
begin = 0;
|
||||
len = -1;
|
||||
}
|
||||
|
||||
bool operator==(const Component& other) const {
|
||||
return begin == other.begin && len == other.len;
|
||||
}
|
||||
|
||||
int begin; // Byte offset in the string of this component.
|
||||
int len; // Will be -1 if the component is unspecified.
|
||||
};
|
||||
|
||||
// Helper that returns a component created with the given begin and ending
|
||||
// points. The ending point is non-inclusive.
|
||||
inline Component MakeRange(int begin, int end) {
|
||||
return Component(begin, end - begin);
|
||||
}
|
||||
|
||||
// Parsed ---------------------------------------------------------------------
|
||||
|
||||
// A structure that holds the identified parts of an input URL. This structure
|
||||
// does NOT store the URL itself. The caller will have to store the URL text
|
||||
// and its corresponding Parsed structure separately.
|
||||
//
|
||||
// Typical usage would be:
|
||||
//
|
||||
// url_parse::Parsed parsed;
|
||||
// url_parse::Component scheme;
|
||||
// if (!url_parse::ExtractScheme(url, url_len, &scheme))
|
||||
// return I_CAN_NOT_FIND_THE_SCHEME_DUDE;
|
||||
//
|
||||
// if (IsStandardScheme(url, scheme)) // Not provided by this component
|
||||
// url_parseParseStandardURL(url, url_len, &parsed);
|
||||
// else if (IsFileURL(url, scheme)) // Not provided by this component
|
||||
// url_parse::ParseFileURL(url, url_len, &parsed);
|
||||
// else
|
||||
// url_parse::ParsePathURL(url, url_len, &parsed);
|
||||
//
|
||||
struct Parsed {
|
||||
// Identifies different components.
|
||||
enum ComponentType {
|
||||
SCHEME,
|
||||
USERNAME,
|
||||
PASSWORD,
|
||||
HOST,
|
||||
PORT,
|
||||
PATH,
|
||||
QUERY,
|
||||
REF,
|
||||
};
|
||||
|
||||
// The default constructor is sufficient for the components, but inner_parsed_
|
||||
// requires special handling.
|
||||
GURL_API Parsed();
|
||||
GURL_API Parsed(const Parsed&);
|
||||
GURL_API Parsed& operator=(const Parsed&);
|
||||
GURL_API ~Parsed();
|
||||
|
||||
// Returns the length of the URL (the end of the last component).
|
||||
//
|
||||
// Note that for some invalid, non-canonical URLs, this may not be the length
|
||||
// of the string. For example "http://": the parsed structure will only
|
||||
// contain an entry for the four-character scheme, and it doesn't know about
|
||||
// the "://". For all other last-components, it will return the real length.
|
||||
GURL_API int Length() const;
|
||||
|
||||
// Returns the number of characters before the given component if it exists,
|
||||
// or where the component would be if it did exist. This will return the
|
||||
// string length if the component would be appended to the end.
|
||||
//
|
||||
// Note that this can get a little funny for the port, query, and ref
|
||||
// components which have a delimiter that is not counted as part of the
|
||||
// component. The |include_delimiter| flag controls if you want this counted
|
||||
// as part of the component or not when the component exists.
|
||||
//
|
||||
// This example shows the difference between the two flags for two of these
|
||||
// delimited components that is present (the port and query) and one that
|
||||
// isn't (the reference). The components that this flag affects are marked
|
||||
// with a *.
|
||||
// 0 1 2
|
||||
// 012345678901234567890
|
||||
// Example input: http://foo:80/?query
|
||||
// include_delim=true, ...=false ("<-" indicates different)
|
||||
// SCHEME: 0 0
|
||||
// USERNAME: 5 5
|
||||
// PASSWORD: 5 5
|
||||
// HOST: 7 7
|
||||
// *PORT: 10 11 <-
|
||||
// PATH: 13 13
|
||||
// *QUERY: 14 15 <-
|
||||
// *REF: 20 20
|
||||
//
|
||||
GURL_API int CountCharactersBefore(ComponentType type,
|
||||
bool include_delimiter) const;
|
||||
|
||||
// Scheme without the colon: "http://foo"/ would have a scheme of "http".
|
||||
// The length will be -1 if no scheme is specified ("foo.com"), or 0 if there
|
||||
// is a colon but no scheme (":foo"). Note that the scheme is not guaranteed
|
||||
// to start at the beginning of the string if there are preceeding whitespace
|
||||
// or control characters.
|
||||
Component scheme;
|
||||
|
||||
// Username. Specified in URLs with an @ sign before the host. See |password|
|
||||
Component username;
|
||||
|
||||
// Password. The length will be -1 if unspecified, 0 if specified but empty.
|
||||
// Not all URLs with a username have a password, as in "http://me@host/".
|
||||
// The password is separated form the username with a colon, as in
|
||||
// "http://me:secret@host/"
|
||||
Component password;
|
||||
|
||||
// Host name.
|
||||
Component host;
|
||||
|
||||
// Port number.
|
||||
Component port;
|
||||
|
||||
// Path, this is everything following the host name. Length will be -1 if
|
||||
// unspecified. This includes the preceeding slash, so the path on
|
||||
// http://www.google.com/asdf" is "/asdf". As a result, it is impossible to
|
||||
// have a 0 length path, it will be -1 in cases like "http://host?foo".
|
||||
// Note that we treat backslashes the same as slashes.
|
||||
Component path;
|
||||
|
||||
// Stuff between the ? and the # after the path. This does not include the
|
||||
// preceeding ? character. Length will be -1 if unspecified, 0 if there is
|
||||
// a question mark but no query string.
|
||||
Component query;
|
||||
|
||||
// Indicated by a #, this is everything following the hash sign (not
|
||||
// including it). If there are multiple hash signs, we'll use the last one.
|
||||
// Length will be -1 if there is no hash sign, or 0 if there is one but
|
||||
// nothing follows it.
|
||||
Component ref;
|
||||
|
||||
// This is used for nested URL types, currently only filesystem. If you
|
||||
// parse a filesystem URL, the resulting Parsed will have a nested
|
||||
// inner_parsed_ to hold the parsed inner URL's component information.
|
||||
// For all other url types [including the inner URL], it will be NULL.
|
||||
Parsed* inner_parsed() const {
|
||||
return inner_parsed_;
|
||||
}
|
||||
|
||||
void set_inner_parsed(const Parsed& inner_parsed) {
|
||||
if (!inner_parsed_)
|
||||
inner_parsed_ = new Parsed(inner_parsed);
|
||||
else
|
||||
*inner_parsed_ = inner_parsed;
|
||||
}
|
||||
|
||||
void clear_inner_parsed() {
|
||||
if (inner_parsed_) {
|
||||
delete inner_parsed_;
|
||||
inner_parsed_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Parsed* inner_parsed_; // This object is owned and managed by this struct.
|
||||
};
|
||||
|
||||
// Initialization functions ---------------------------------------------------
|
||||
//
|
||||
// These functions parse the given URL, filling in all of the structure's
|
||||
// components. These functions can not fail, they will always do their best
|
||||
// at interpreting the input given.
|
||||
//
|
||||
// The string length of the URL MUST be specified, we do not check for NULLs
|
||||
// at any point in the process, and will actually handle embedded NULLs.
|
||||
//
|
||||
// IMPORTANT: These functions do NOT hang on to the given pointer or copy it
|
||||
// in any way. See the comment above the struct.
|
||||
//
|
||||
// The 8-bit versions require UTF-8 encoding.
|
||||
|
||||
// StandardURL is for when the scheme is known to be one that has an
|
||||
// authority (host) like "http". This function will not handle weird ones
|
||||
// like "about:" and "javascript:", or do the right thing for "file:" URLs.
|
||||
GURL_API void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
|
||||
GURL_API void ParseStandardURL(const char16* url, int url_len, Parsed* parsed);
|
||||
|
||||
// PathURL is for when the scheme is known not to have an authority (host)
|
||||
// section but that aren't file URLs either. The scheme is parsed, and
|
||||
// everything after the scheme is considered as the path. This is used for
|
||||
// things like "about:" and "javascript:"
|
||||
GURL_API void ParsePathURL(const char* url, int url_len, Parsed* parsed);
|
||||
GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed);
|
||||
|
||||
// FileURL is for file URLs. There are some special rules for interpreting
|
||||
// these.
|
||||
GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed);
|
||||
GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed);
|
||||
|
||||
// Filesystem URLs are structured differently than other URLs.
|
||||
GURL_API void ParseFileSystemURL(const char* url,
|
||||
int url_len,
|
||||
Parsed* parsed);
|
||||
GURL_API void ParseFileSystemURL(const char16* url,
|
||||
int url_len,
|
||||
Parsed* parsed);
|
||||
|
||||
// MailtoURL is for mailto: urls. They are made up scheme,path,query
|
||||
GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
|
||||
GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed);
|
||||
|
||||
// Helper functions -----------------------------------------------------------
|
||||
|
||||
// Locates the scheme according to the URL parser's rules. This function is
|
||||
// designed so the caller can find the scheme and call the correct Init*
|
||||
// function according to their known scheme types.
|
||||
//
|
||||
// It also does not perform any validation on the scheme.
|
||||
//
|
||||
// This function will return true if the scheme is found and will put the
|
||||
// scheme's range into *scheme. False means no scheme could be found. Note
|
||||
// that a URL beginning with a colon has a scheme, but it is empty, so this
|
||||
// function will return true but *scheme will = (0,0).
|
||||
//
|
||||
// The scheme is found by skipping spaces and control characters at the
|
||||
// beginning, and taking everything from there to the first colon to be the
|
||||
// scheme. The character at scheme.end() will be the colon (we may enhance
|
||||
// this to handle full width colons or something, so don't count on the
|
||||
// actual character value). The character at scheme.end()+1 will be the
|
||||
// beginning of the rest of the URL, be it the authority or the path (or the
|
||||
// end of the string).
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding.
|
||||
GURL_API bool ExtractScheme(const char* url, int url_len, Component* scheme);
|
||||
GURL_API bool ExtractScheme(const char16* url, int url_len, Component* scheme);
|
||||
|
||||
// Returns true if ch is a character that terminates the authority segment
|
||||
// of a URL.
|
||||
GURL_API bool IsAuthorityTerminator(char16 ch);
|
||||
|
||||
// Does a best effort parse of input |spec|, in range |auth|. If a particular
|
||||
// component is not found, it will be set to invalid.
|
||||
GURL_API void ParseAuthority(const char* spec,
|
||||
const Component& auth,
|
||||
Component* username,
|
||||
Component* password,
|
||||
Component* hostname,
|
||||
Component* port_num);
|
||||
GURL_API void ParseAuthority(const char16* spec,
|
||||
const Component& auth,
|
||||
Component* username,
|
||||
Component* password,
|
||||
Component* hostname,
|
||||
Component* port_num);
|
||||
|
||||
// Computes the integer port value from the given port component. The port
|
||||
// component should have been identified by one of the init functions on
|
||||
// |Parsed| for the given input url.
|
||||
//
|
||||
// The return value will be a positive integer between 0 and 64K, or one of
|
||||
// the two special values below.
|
||||
enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 };
|
||||
GURL_API int ParsePort(const char* url, const Component& port);
|
||||
GURL_API int ParsePort(const char16* url, const Component& port);
|
||||
|
||||
// Extracts the range of the file name in the given url. The path must
|
||||
// already have been computed by the parse function, and the matching URL
|
||||
// and extracted path are provided to this function. The filename is
|
||||
// defined as being everything from the last slash/backslash of the path
|
||||
// to the end of the path.
|
||||
//
|
||||
// The file name will be empty if the path is empty or there is nothing
|
||||
// following the last slash.
|
||||
//
|
||||
// The 8-bit version requires UTF-8 encoding.
|
||||
GURL_API void ExtractFileName(const char* url,
|
||||
const Component& path,
|
||||
Component* file_name);
|
||||
GURL_API void ExtractFileName(const char16* url,
|
||||
const Component& path,
|
||||
Component* file_name);
|
||||
|
||||
// Extract the first key/value from the range defined by |*query|. Updates
|
||||
// |*query| to start at the end of the extracted key/value pair. This is
|
||||
// designed for use in a loop: you can keep calling it with the same query
|
||||
// object and it will iterate over all items in the query.
|
||||
//
|
||||
// Some key/value pairs may have the key, the value, or both be empty (for
|
||||
// example, the query string "?&"). These will be returned. Note that an empty
|
||||
// last parameter "foo.com?" or foo.com?a&" will not be returned, this case
|
||||
// is the same as "done."
|
||||
//
|
||||
// The initial query component should not include the '?' (this is the default
|
||||
// for parsed URLs).
|
||||
//
|
||||
// If no key/value are found |*key| and |*value| will be unchanged and it will
|
||||
// return false.
|
||||
GURL_API bool ExtractQueryKeyValue(const char* url,
|
||||
Component* query,
|
||||
Component* key,
|
||||
Component* value);
|
||||
GURL_API bool ExtractQueryKeyValue(const char16* url,
|
||||
Component* query,
|
||||
Component* key,
|
||||
Component* value);
|
||||
|
||||
} // namespace url_parse
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_PARSE_H__
|
||||
@@ -1,112 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Contains common inline helper functions used by the URL parsing routines.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__
|
||||
#define GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__
|
||||
|
||||
#include "googleurl/src/url_parse.h"
|
||||
|
||||
namespace url_parse {
|
||||
|
||||
// We treat slashes and backslashes the same for IE compatability.
|
||||
inline bool IsURLSlash(char16 ch) {
|
||||
return ch == '/' || ch == '\\';
|
||||
}
|
||||
|
||||
// Returns true if we should trim this character from the URL because it is a
|
||||
// space or a control character.
|
||||
inline bool ShouldTrimFromURL(char16 ch) {
|
||||
return ch <= ' ';
|
||||
}
|
||||
|
||||
// Given an already-initialized begin index and length, this shrinks the range
|
||||
// to eliminate "should-be-trimmed" characters. Note that the length does *not*
|
||||
// indicate the length of untrimmed data from |*begin|, but rather the position
|
||||
// in the input string (so the string starts at character |*begin| in the spec,
|
||||
// and goes until |*len|).
|
||||
template<typename CHAR>
|
||||
inline void TrimURL(const CHAR* spec, int* begin, int* len) {
|
||||
// Strip leading whitespace and control characters.
|
||||
while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
|
||||
(*begin)++;
|
||||
|
||||
// Strip trailing whitespace and control characters. We need the >i test for
|
||||
// when the input string is all blanks; we don't want to back past the input.
|
||||
while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
|
||||
(*len)--;
|
||||
}
|
||||
|
||||
// Counts the number of consecutive slashes starting at the given offset
|
||||
// in the given string of the given length.
|
||||
template<typename CHAR>
|
||||
inline int CountConsecutiveSlashes(const CHAR *str,
|
||||
int begin_offset, int str_len) {
|
||||
int count = 0;
|
||||
while (begin_offset + count < str_len &&
|
||||
IsURLSlash(str[begin_offset + count]))
|
||||
++count;
|
||||
return count;
|
||||
}
|
||||
|
||||
// Internal functions in url_parse.cc that parse the path, that is, everything
|
||||
// following the authority section. The input is the range of everything
|
||||
// following the authority section, and the output is the identified ranges.
|
||||
//
|
||||
// This is designed for the file URL parser or other consumers who may do
|
||||
// special stuff at the beginning, but want regular path parsing, it just
|
||||
// maps to the internal parsing function for paths.
|
||||
void ParsePathInternal(const char* spec,
|
||||
const Component& path,
|
||||
Component* filepath,
|
||||
Component* query,
|
||||
Component* ref);
|
||||
void ParsePathInternal(const char16* spec,
|
||||
const Component& path,
|
||||
Component* filepath,
|
||||
Component* query,
|
||||
Component* ref);
|
||||
|
||||
|
||||
// Given a spec and a pointer to the character after the colon following the
|
||||
// scheme, this parses it and fills in the structure, Every item in the parsed
|
||||
// structure is filled EXCEPT for the scheme, which is untouched.
|
||||
void ParseAfterScheme(const char* spec,
|
||||
int spec_len,
|
||||
int after_scheme,
|
||||
Parsed* parsed);
|
||||
void ParseAfterScheme(const char16* spec,
|
||||
int spec_len,
|
||||
int after_scheme,
|
||||
Parsed* parsed);
|
||||
|
||||
} // namespace url_parse
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__
|
||||
@@ -1,78 +0,0 @@
|
||||
// Copyright 2007 Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Convenience functions for string conversions.
|
||||
// These are mostly intended for use in unit tests.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_TEST_UTILS_H__
|
||||
#define GOOGLEURL_SRC_URL_TEST_UTILS_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/string16.h"
|
||||
#include "googleurl/src/url_canon_internal.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
|
||||
namespace url_test_utils {
|
||||
|
||||
// Converts a UTF-16 string from native wchar_t format to char16, by
|
||||
// truncating the high 32 bits. This is not meant to handle true UTF-32
|
||||
// encoded strings.
|
||||
inline string16 WStringToUTF16(const wchar_t* src) {
|
||||
string16 str;
|
||||
int length = static_cast<int>(wcslen(src));
|
||||
for (int i = 0; i < length; ++i) {
|
||||
str.push_back(static_cast<char16>(src[i]));
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
// Converts a string from UTF-8 to UTF-16
|
||||
inline string16 ConvertUTF8ToUTF16(const std::string& src) {
|
||||
int length = static_cast<int>(src.length());
|
||||
EXPECT_LT(length, 1024);
|
||||
url_canon::RawCanonOutputW<1024> output;
|
||||
EXPECT_TRUE(url_canon::ConvertUTF8ToUTF16(src.data(), length, &output));
|
||||
return string16(output.data(), output.length());
|
||||
}
|
||||
|
||||
// Converts a string from UTF-16 to UTF-8
|
||||
inline std::string ConvertUTF16ToUTF8(const string16& src) {
|
||||
std::string str;
|
||||
url_canon::StdStringCanonOutput output(&str);
|
||||
EXPECT_TRUE(url_canon::ConvertUTF16ToUTF8(src.data(),
|
||||
static_cast<int>(src.length()),
|
||||
&output));
|
||||
output.Complete();
|
||||
return str;
|
||||
}
|
||||
|
||||
} // namespace url_test_utils
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_TEST_UTILS_H__
|
||||
@@ -1,228 +0,0 @@
|
||||
// Copyright 2007, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_UTIL_H__
|
||||
#define GOOGLEURL_SRC_URL_UTIL_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/string16.h"
|
||||
#include "googleurl/src/url_common.h"
|
||||
#include "googleurl/src/url_parse.h"
|
||||
#include "googleurl/src/url_canon.h"
|
||||
|
||||
namespace url_util {
|
||||
|
||||
// Init ------------------------------------------------------------------------
|
||||
|
||||
// Initialization is NOT required, it will be implicitly initialized when first
|
||||
// used. However, this implicit initialization is NOT threadsafe. If you are
|
||||
// using this library in a threaded environment and don't have a consistent
|
||||
// "first call" (an example might be calling "AddStandardScheme" with your
|
||||
// special application-specific schemes) then you will want to call initialize
|
||||
// before spawning any threads.
|
||||
//
|
||||
// It is OK to call this function more than once, subsequent calls will simply
|
||||
// "noop", unless Shutdown() was called in the mean time. This will also be a
|
||||
// "noop" if other calls to the library have forced an initialization
|
||||
// beforehand.
|
||||
GURL_API void Initialize();
|
||||
|
||||
// Cleanup is not required, except some strings may leak. For most user
|
||||
// applications, this is fine. If you're using it in a library that may get
|
||||
// loaded and unloaded, you'll want to unload to properly clean up your
|
||||
// library.
|
||||
GURL_API void Shutdown();
|
||||
|
||||
// Schemes --------------------------------------------------------------------
|
||||
|
||||
// Adds an application-defined scheme to the internal list of "standard" URL
|
||||
// schemes. This function is not threadsafe and can not be called concurrently
|
||||
// with any other url_util function. It will assert if the list of standard
|
||||
// schemes has been locked (see LockStandardSchemes).
|
||||
GURL_API void AddStandardScheme(const char* new_scheme);
|
||||
|
||||
// Sets a flag to prevent future calls to AddStandardScheme from succeeding.
|
||||
//
|
||||
// This is designed to help prevent errors for multithreaded applications.
|
||||
// Normal usage would be to call AddStandardScheme for your custom schemes at
|
||||
// the beginning of program initialization, and then LockStandardSchemes. This
|
||||
// prevents future callers from mistakenly calling AddStandardScheme when the
|
||||
// program is running with multiple threads, where such usage would be
|
||||
// dangerous.
|
||||
//
|
||||
// We could have had AddStandardScheme use a lock instead, but that would add
|
||||
// some platform-specific dependencies we don't otherwise have now, and is
|
||||
// overkill considering the normal usage is so simple.
|
||||
GURL_API void LockStandardSchemes();
|
||||
|
||||
// Locates the scheme in the given string and places it into |found_scheme|,
|
||||
// which may be NULL to indicate the caller does not care about the range.
|
||||
//
|
||||
// Returns whether the given |compare| scheme matches the scheme found in the
|
||||
// input (if any). The |compare| scheme must be a valid canonical scheme or
|
||||
// the result of the comparison is undefined.
|
||||
GURL_API bool FindAndCompareScheme(const char* str,
|
||||
int str_len,
|
||||
const char* compare,
|
||||
url_parse::Component* found_scheme);
|
||||
GURL_API bool FindAndCompareScheme(const char16* str,
|
||||
int str_len,
|
||||
const char* compare,
|
||||
url_parse::Component* found_scheme);
|
||||
inline bool FindAndCompareScheme(const std::string& str,
|
||||
const char* compare,
|
||||
url_parse::Component* found_scheme) {
|
||||
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
|
||||
compare, found_scheme);
|
||||
}
|
||||
inline bool FindAndCompareScheme(const string16& str,
|
||||
const char* compare,
|
||||
url_parse::Component* found_scheme) {
|
||||
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
|
||||
compare, found_scheme);
|
||||
}
|
||||
|
||||
// Returns true if the given string represents a standard URL. This means that
|
||||
// either the scheme is in the list of known standard schemes.
|
||||
GURL_API bool IsStandard(const char* spec,
|
||||
const url_parse::Component& scheme);
|
||||
GURL_API bool IsStandard(const char16* spec,
|
||||
const url_parse::Component& scheme);
|
||||
|
||||
// TODO(brettw) remove this. This is a temporary compatibility hack to avoid
|
||||
// breaking the WebKit build when this version is synced via Chrome.
|
||||
inline bool IsStandard(const char* spec, int spec_len,
|
||||
const url_parse::Component& scheme) {
|
||||
return IsStandard(spec, scheme);
|
||||
}
|
||||
|
||||
// URL library wrappers -------------------------------------------------------
|
||||
|
||||
// Parses the given spec according to the extracted scheme type. Normal users
|
||||
// should use the URL object, although this may be useful if performance is
|
||||
// critical and you don't want to do the heap allocation for the std::string.
|
||||
//
|
||||
// As with the url_canon::Canonicalize* functions, the charset converter can
|
||||
// be NULL to use UTF-8 (it will be faster in this case).
|
||||
//
|
||||
// Returns true if a valid URL was produced, false if not. On failure, the
|
||||
// output and parsed structures will still be filled and will be consistent,
|
||||
// but they will not represent a loadable URL.
|
||||
GURL_API bool Canonicalize(const char* spec,
|
||||
int spec_len,
|
||||
url_canon::CharsetConverter* charset_converter,
|
||||
url_canon::CanonOutput* output,
|
||||
url_parse::Parsed* output_parsed);
|
||||
GURL_API bool Canonicalize(const char16* spec,
|
||||
int spec_len,
|
||||
url_canon::CharsetConverter* charset_converter,
|
||||
url_canon::CanonOutput* output,
|
||||
url_parse::Parsed* output_parsed);
|
||||
|
||||
// Resolves a potentially relative URL relative to the given parsed base URL.
|
||||
// The base MUST be valid. The resulting canonical URL and parsed information
|
||||
// will be placed in to the given out variables.
|
||||
//
|
||||
// The relative need not be relative. If we discover that it's absolute, this
|
||||
// will produce a canonical version of that URL. See Canonicalize() for more
|
||||
// about the charset_converter.
|
||||
//
|
||||
// Returns true if the output is valid, false if the input could not produce
|
||||
// a valid URL.
|
||||
GURL_API bool ResolveRelative(const char* base_spec,
|
||||
int base_spec_len,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const char* relative,
|
||||
int relative_length,
|
||||
url_canon::CharsetConverter* charset_converter,
|
||||
url_canon::CanonOutput* output,
|
||||
url_parse::Parsed* output_parsed);
|
||||
GURL_API bool ResolveRelative(const char* base_spec,
|
||||
int base_spec_len,
|
||||
const url_parse::Parsed& base_parsed,
|
||||
const char16* relative,
|
||||
int relative_length,
|
||||
url_canon::CharsetConverter* charset_converter,
|
||||
url_canon::CanonOutput* output,
|
||||
url_parse::Parsed* output_parsed);
|
||||
|
||||
// Replaces components in the given VALID input url. The new canonical URL info
|
||||
// is written to output and out_parsed.
|
||||
//
|
||||
// Returns true if the resulting URL is valid.
|
||||
GURL_API bool ReplaceComponents(
|
||||
const char* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
const url_canon::Replacements<char>& replacements,
|
||||
url_canon::CharsetConverter* charset_converter,
|
||||
url_canon::CanonOutput* output,
|
||||
url_parse::Parsed* out_parsed);
|
||||
GURL_API bool ReplaceComponents(
|
||||
const char* spec,
|
||||
int spec_len,
|
||||
const url_parse::Parsed& parsed,
|
||||
const url_canon::Replacements<char16>& replacements,
|
||||
url_canon::CharsetConverter* charset_converter,
|
||||
url_canon::CanonOutput* output,
|
||||
url_parse::Parsed* out_parsed);
|
||||
|
||||
// String helper functions ----------------------------------------------------
|
||||
|
||||
// Compare the lower-case form of the given string against the given ASCII
|
||||
// string. This is useful for doing checking if an input string matches some
|
||||
// token, and it is optimized to avoid intermediate string copies.
|
||||
//
|
||||
// The versions of this function that don't take a b_end assume that the b
|
||||
// string is NULL terminated.
|
||||
GURL_API bool LowerCaseEqualsASCII(const char* a_begin,
|
||||
const char* a_end,
|
||||
const char* b);
|
||||
GURL_API bool LowerCaseEqualsASCII(const char* a_begin,
|
||||
const char* a_end,
|
||||
const char* b_begin,
|
||||
const char* b_end);
|
||||
GURL_API bool LowerCaseEqualsASCII(const char16* a_begin,
|
||||
const char16* a_end,
|
||||
const char* b);
|
||||
|
||||
// Unescapes the given string using URL escaping rules.
|
||||
GURL_API void DecodeURLEscapeSequences(const char* input, int length,
|
||||
url_canon::CanonOutputW* output);
|
||||
|
||||
// Escapes the given string as defined by the JS method encodeURIComponent. See
|
||||
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
|
||||
GURL_API void EncodeURIComponent(const char* input, int length,
|
||||
url_canon::CanonOutput* output);
|
||||
|
||||
|
||||
} // namespace url_util
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_UTIL_H__
|
||||
@@ -1,56 +0,0 @@
|
||||
// Copyright 2011, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__
|
||||
#define GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/string16.h"
|
||||
#include "googleurl/src/url_common.h"
|
||||
#include "googleurl/src/url_parse.h"
|
||||
|
||||
namespace url_util {
|
||||
|
||||
extern const char kFileScheme[];
|
||||
extern const char kFileSystemScheme[];
|
||||
extern const char kMailtoScheme[];
|
||||
|
||||
// Given a string and a range inside the string, compares it to the given
|
||||
// lower-case |compare_to| buffer.
|
||||
bool CompareSchemeComponent(const char* spec,
|
||||
const url_parse::Component& component,
|
||||
const char* compare_to);
|
||||
bool CompareSchemeComponent(const char16* spec,
|
||||
const url_parse::Component& component,
|
||||
const char* compare_to);
|
||||
|
||||
} // namespace url_util
|
||||
|
||||
#endif // GOOGLEURL_SRC_URL_UTIL_INTERNAL_H__
|
||||
@@ -1,43 +0,0 @@
|
||||
// Copyright 2012 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jefftk@google.com (Jeff Kaufman)
|
||||
|
||||
#include "net/instaweb/apache/add_headers_fetcher.h"
|
||||
|
||||
#include "net/instaweb/http/public/async_fetch.h"
|
||||
#include "net/instaweb/rewriter/public/rewrite_options.h"
|
||||
#include "net/instaweb/http/public/request_headers.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
AddHeadersFetcher::AddHeadersFetcher(const RewriteOptions* options,
|
||||
UrlAsyncFetcher* backend_fetcher)
|
||||
: options_(options), backend_fetcher_(backend_fetcher) {
|
||||
}
|
||||
|
||||
AddHeadersFetcher::~AddHeadersFetcher() {}
|
||||
|
||||
void AddHeadersFetcher::Fetch(const GoogleString& original_url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch) {
|
||||
RequestHeaders* request_headers = fetch->request_headers();
|
||||
for (int i = 0, n = options_->num_custom_fetch_headers(); i < n; ++i) {
|
||||
const RewriteOptions::NameValue* nv = options_->custom_fetch_header(i);
|
||||
request_headers->Replace(nv->name, nv->value);
|
||||
}
|
||||
backend_fetcher_->Fetch(original_url, message_handler, fetch);
|
||||
}
|
||||
|
||||
} // namespace net_instaweb
|
||||
@@ -1,57 +0,0 @@
|
||||
// Copyright 2012 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jefftk@google.com (Jeff Kaufman)
|
||||
|
||||
// This class is a simple wrapper around another fetcher that adds headers to
|
||||
// requests based on settings in the rewrite options before passing them on to
|
||||
// the backend fetcher.
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_ADD_HEADERS_FETCHER_H_
|
||||
#define NET_INSTAWEB_APACHE_ADD_HEADERS_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class RewriteOptions;
|
||||
class MessageHandler;
|
||||
|
||||
class AddHeadersFetcher : public UrlAsyncFetcher {
|
||||
public:
|
||||
AddHeadersFetcher(const RewriteOptions* options,
|
||||
UrlAsyncFetcher* backend_fetcher);
|
||||
virtual ~AddHeadersFetcher();
|
||||
|
||||
virtual bool SupportsHttps() const {
|
||||
return backend_fetcher_->SupportsHttps();
|
||||
}
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* callback);
|
||||
|
||||
private:
|
||||
const RewriteOptions* const options_;
|
||||
UrlAsyncFetcher* const backend_fetcher_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(AddHeadersFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_ADD_HEADERS_FETCHER_H_
|
||||
@@ -1,149 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_CONFIG_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_CONFIG_H_
|
||||
|
||||
#include "net/instaweb/rewriter/public/rewrite_options.h"
|
||||
#include "net/instaweb/system/public/system_rewrite_options.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class Hasher;
|
||||
|
||||
// Establishes a context for VirtualHosts and directory-scoped
|
||||
// options, either via .htaccess or <Directory>...</Directory>.
|
||||
class ApacheConfig : public SystemRewriteOptions {
|
||||
public:
|
||||
static void Initialize();
|
||||
static void Terminate();
|
||||
|
||||
explicit ApacheConfig(const StringPiece& dir);
|
||||
ApacheConfig();
|
||||
~ApacheConfig() {}
|
||||
|
||||
// Human-readable description of what this configuration is for. This
|
||||
// may be a directory, or a string indicating a combination of directives
|
||||
// for multiple directories.
|
||||
StringPiece description() const { return description_; }
|
||||
void set_description(const StringPiece& x) { x.CopyToString(&description_); }
|
||||
|
||||
int64 slurp_flush_limit() const {
|
||||
return slurp_flush_limit_.value();
|
||||
}
|
||||
void set_slurp_flush_limit(int64 x) {
|
||||
set_option(x, &slurp_flush_limit_);
|
||||
}
|
||||
bool slurp_read_only() const {
|
||||
return slurp_read_only_.value();
|
||||
}
|
||||
void set_slurp_read_only(bool x) {
|
||||
set_option(x, &slurp_read_only_);
|
||||
}
|
||||
bool rate_limit_background_fetches() const {
|
||||
return rate_limit_background_fetches_.value();
|
||||
}
|
||||
const GoogleString& slurp_directory() const {
|
||||
return slurp_directory_.value();
|
||||
}
|
||||
void set_slurp_directory(GoogleString x) {
|
||||
set_option(x, &slurp_directory_);
|
||||
}
|
||||
|
||||
// If this is set to true, we'll turn on our fallback proxy-like behavior
|
||||
// on non-.pagespeed. URLs without changing the main fetcher from Serf
|
||||
// (the way the slurp options would).
|
||||
bool test_proxy() const {
|
||||
return test_proxy_.value();
|
||||
}
|
||||
void set_test_proxy(bool x) {
|
||||
set_option(x, &test_proxy_);
|
||||
}
|
||||
|
||||
// This configures the fetcher we use for fallback handling if test_proxy()
|
||||
// is on:
|
||||
// - If this is empty, we use the usual mod_pagespeed fetcher
|
||||
// (e.g. Serf)
|
||||
// - If it's non-empty, the fallback URLs will be fetched from the given
|
||||
// slurp directory. mod_pagespeed resource fetches, however, will still
|
||||
// use the usual fetcher (e.g. Serf).
|
||||
GoogleString test_proxy_slurp() const {
|
||||
return test_proxy_slurp_.value();
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
bool slurping_enabled() const {
|
||||
return !slurp_directory().empty();
|
||||
}
|
||||
|
||||
bool slurping_enabled_read_only() const {
|
||||
return slurping_enabled() && slurp_read_only();
|
||||
}
|
||||
|
||||
bool experimental_fetch_from_mod_spdy() const {
|
||||
return experimental_fetch_from_mod_spdy_.value();
|
||||
}
|
||||
|
||||
// Make an identical copy of these options and return it.
|
||||
virtual ApacheConfig* Clone() const;
|
||||
|
||||
// Returns a suitably down cast version of 'instance' if it is an instance
|
||||
// of this class, NULL if not.
|
||||
static const ApacheConfig* DynamicCast(const RewriteOptions* instance);
|
||||
static ApacheConfig* DynamicCast(RewriteOptions* instance);
|
||||
|
||||
private:
|
||||
// Keeps the properties added by this subclass. These are merged into
|
||||
// RewriteOptions::all_properties_ during Initialize().
|
||||
static Properties* apache_properties_;
|
||||
|
||||
// Adds an option to apache_properties_.
|
||||
template<class RewriteOptionsSubclass, class OptionClass>
|
||||
static void AddApacheProperty(typename OptionClass::ValueType default_value,
|
||||
OptionClass RewriteOptionsSubclass::*offset,
|
||||
const char* id,
|
||||
OptionEnum option_enum,
|
||||
const char* help) {
|
||||
AddProperty(default_value, offset, id, option_enum,
|
||||
RewriteOptions::kServerScope, help,
|
||||
apache_properties_);
|
||||
}
|
||||
|
||||
void InitializeSignaturesAndDefaults();
|
||||
static void AddProperties();
|
||||
void Init();
|
||||
|
||||
GoogleString description_;
|
||||
|
||||
Option<GoogleString> slurp_directory_;
|
||||
Option<GoogleString> test_proxy_slurp_;
|
||||
|
||||
Option<bool> slurp_read_only_;
|
||||
Option<bool> test_proxy_;
|
||||
Option<bool> rate_limit_background_fetches_;
|
||||
Option<bool> experimental_fetch_from_mod_spdy_;
|
||||
|
||||
Option<int64> slurp_flush_limit_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ApacheConfig);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_CONFIG_H_
|
||||
@@ -1,38 +0,0 @@
|
||||
// Copyright 2012 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: morlovich@google.com (Maksim Orlovich)
|
||||
//
|
||||
// Makes sure we include Apache's http_log.h without conflicting with
|
||||
// Google LOG() macros, and with proper per-module logging support in
|
||||
// Apache 2.4
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_LOGGING_INCLUDES_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_LOGGING_INCLUDES_H_
|
||||
|
||||
// When HAVE_SYSLOG is defined, apache http_log.h will include syslog.h, which
|
||||
// #defines LOG_* as numbers. This conflicts with definitions of the LOG(x)
|
||||
// macros in Chromium base.
|
||||
#undef HAVE_SYSLOG
|
||||
#include "http_log.h"
|
||||
|
||||
// Apache >= 2.4 expect us to use the APLOG_USE_MODULE macro in order to
|
||||
// permit per-module log-level configuration.
|
||||
#ifdef APLOG_USE_MODULE
|
||||
extern "C" {
|
||||
APLOG_USE_MODULE(pagespeed);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_LOGGING_INCLUDES_H_
|
||||
@@ -1,91 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_MESSAGE_HANDLER_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_MESSAGE_HANDLER_H_
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/google_message_handler.h"
|
||||
#include "net/instaweb/util/public/message_handler.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
struct server_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractMutex;
|
||||
class SharedCircularBuffer;
|
||||
class Timer;
|
||||
class Writer;
|
||||
|
||||
// Implementation of an HTML parser message handler that uses Apache
|
||||
// logging to emit messsages.
|
||||
class ApacheMessageHandler : public MessageHandler {
|
||||
public:
|
||||
// version is a string added to each message.
|
||||
// Timer is used to generate timestamp for messages in shared memory.
|
||||
ApacheMessageHandler(const server_rec* server, const StringPiece& version,
|
||||
Timer* timer, AbstractMutex* mutex);
|
||||
|
||||
// Installs a signal handler for common crash signals that tries to print
|
||||
// out a backtrace.
|
||||
static void InstallCrashHandler(server_rec* global_server);
|
||||
|
||||
// When we initialize ApacheMessageHandler in ApacheRewriteDriverFactory,
|
||||
// SharedCircularBuffer of ApacheRewriteDriverFactory is not initialized yet.
|
||||
// We need to set buffer_ later in RootInit() or ChildInit().
|
||||
void set_buffer(SharedCircularBuffer* buff);
|
||||
void SetPidString(const int64 pid) {
|
||||
pid_string_ = StrCat("[", Integer64ToString(pid), "]");
|
||||
}
|
||||
// Dump contents of SharedCircularBuffer.
|
||||
bool Dump(Writer* writer);
|
||||
|
||||
protected:
|
||||
virtual void MessageVImpl(MessageType type, const char* msg, va_list args);
|
||||
|
||||
virtual void FileMessageVImpl(MessageType type, const char* filename,
|
||||
int line, const char* msg, va_list args);
|
||||
|
||||
private:
|
||||
int GetApacheLogLevel(MessageType type);
|
||||
GoogleString Format(const char* msg, va_list args);
|
||||
|
||||
const server_rec* server_rec_;
|
||||
const GoogleString version_;
|
||||
// This timer is used to prepend time when writing a message
|
||||
// to SharedCircularBuffer.
|
||||
Timer* timer_;
|
||||
scoped_ptr<AbstractMutex> mutex_;
|
||||
// String "[pid]".
|
||||
GoogleString pid_string_;
|
||||
// This handler is for internal use.
|
||||
// Some functions of SharedCircularBuffer need MessageHandler as argument,
|
||||
// We do not want to pass in another ApacheMessageHandler to cause infinite
|
||||
// loop.
|
||||
GoogleMessageHandler handler_;
|
||||
SharedCircularBuffer* buffer_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ApacheMessageHandler);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_MESSAGE_HANDLER_H_
|
||||
@@ -1,73 +0,0 @@
|
||||
/*
|
||||
* Copyright 2013 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
//
|
||||
// Captures the Apache request details in our request context, including
|
||||
// the port (used for loopback fetches) and (if enabled & serving spdy)
|
||||
// a factory for generating SPDY fetches.
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_REQUEST_CONTEXT_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_REQUEST_CONTEXT_H_
|
||||
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
struct request_rec;
|
||||
struct spdy_slave_connection_factory;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractMutex;
|
||||
|
||||
class ApacheRequestContext : public RequestContext {
|
||||
public:
|
||||
ApacheRequestContext(AbstractMutex* logging_mutex, request_rec* req);
|
||||
|
||||
// Captures the original URL of the request, which is used to help
|
||||
// authorize domains for fetches we do on behalf of that request.
|
||||
void set_url(StringPiece url) { url.CopyToString(&url_); }
|
||||
|
||||
// Returns rc as an ApacheRequestContext* if it is one and CHECK
|
||||
// fails if it is not. Returns NULL if rc is NULL.
|
||||
static ApacheRequestContext* DynamicCast(RequestContext* rc);
|
||||
|
||||
bool use_spdy_fetcher() const { return use_spdy_fetcher_; }
|
||||
int local_port() const { return local_port_; }
|
||||
const GoogleString& local_ip() const { return local_ip_; }
|
||||
StringPiece url() const { return url_; }
|
||||
spdy_slave_connection_factory* spdy_connection_factory() {
|
||||
return spdy_connection_factory_;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual ~ApacheRequestContext();
|
||||
|
||||
private:
|
||||
bool use_spdy_fetcher_;
|
||||
int local_port_;
|
||||
GoogleString local_ip_;
|
||||
GoogleString url_;
|
||||
spdy_slave_connection_factory* spdy_connection_factory_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ApacheRequestContext);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_REQUEST_CONTEXT_H_
|
||||
@@ -1,406 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
// lsong@google.com (Libo Song)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_REWRITE_DRIVER_FACTORY_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_REWRITE_DRIVER_FACTORY_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include "net/instaweb/rewriter/public/rewrite_driver_factory.h"
|
||||
#include "net/instaweb/system/public/system_rewrite_driver_factory.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/cache_interface.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/shared_mem_cache.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
struct apr_pool_t;
|
||||
struct server_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractSharedMem;
|
||||
class ApacheConfig;
|
||||
class ApacheMessageHandler;
|
||||
class ApacheServerContext;
|
||||
class FileSystem;
|
||||
class Hasher;
|
||||
class MessageHandler;
|
||||
class ModSpdyFetchController;
|
||||
class NamedLockManager;
|
||||
class QueuedWorkerPool;
|
||||
class RewriteOptions;
|
||||
class SerfUrlAsyncFetcher;
|
||||
class ServerContext;
|
||||
class SharedCircularBuffer;
|
||||
class SharedMemStatistics;
|
||||
class SlowWorker;
|
||||
class StaticAssetManager;
|
||||
class Statistics;
|
||||
class SystemCaches;
|
||||
class Timer;
|
||||
class UrlAsyncFetcher;
|
||||
class UrlFetcher;
|
||||
class UrlPollableAsyncFetcher;
|
||||
class Writer;
|
||||
|
||||
// Creates an Apache RewriteDriver.
|
||||
class ApacheRewriteDriverFactory : public SystemRewriteDriverFactory {
|
||||
public:
|
||||
// Path prefix where we serve static assets (primarily images and js
|
||||
// resources) needed by some filters.
|
||||
static const char kStaticAssetPrefix[];
|
||||
|
||||
ApacheRewriteDriverFactory(server_rec* server, const StringPiece& version);
|
||||
virtual ~ApacheRewriteDriverFactory();
|
||||
|
||||
virtual Hasher* NewHasher();
|
||||
|
||||
// Returns the fetcher that will be used by the filters to load any
|
||||
// resources they need. This either matches the resource manager's
|
||||
// async fetcher or is NULL in case we are configured in a way that
|
||||
// all fetches will succeed immediately. Must be called after the fetchers
|
||||
// have been computed
|
||||
UrlPollableAsyncFetcher* SubResourceFetcher();
|
||||
|
||||
GoogleString hostname_identifier() { return hostname_identifier_; }
|
||||
|
||||
AbstractSharedMem* shared_mem_runtime() const {
|
||||
return shared_mem_runtime_.get();
|
||||
}
|
||||
// Give access to apache_message_handler_ for the cases we need
|
||||
// to use ApacheMessageHandler rather than MessageHandler.
|
||||
// e.g. Use ApacheMessageHandler::Dump()
|
||||
// This is a better choice than cast from MessageHandler.
|
||||
ApacheMessageHandler* apache_message_handler() {
|
||||
return apache_message_handler_;
|
||||
}
|
||||
// For shared memory resources the general setup we follow is to have the
|
||||
// first running process (aka the root) create the necessary segments and
|
||||
// fill in their shared data structures, while processes created to actually
|
||||
// handle requests attach to already existing shared data structures.
|
||||
//
|
||||
// During normal server startup[1], RootInit() is called from the Apache hooks
|
||||
// in the root process for the first task, and then ChildInit() is called in
|
||||
// any child process.
|
||||
//
|
||||
// Keep in mind, however, that when fork() is involved a process may
|
||||
// effectively see both calls, in which case the 'ChildInit' call would
|
||||
// come second and override the previous root status. Both calls are also
|
||||
// invoked in the debug single-process mode (httpd -X).
|
||||
//
|
||||
// Note that these are not static methods --- they are invoked on every
|
||||
// ApacheRewriteDriverFactory instance, which exist for the global
|
||||
// configuration as well as all the vhosts.
|
||||
//
|
||||
// [1] Besides normal startup, Apache also uses a temporary process to
|
||||
// syntax check the config file. That basically looks like a complete
|
||||
// normal startup and shutdown to the code.
|
||||
bool is_root_process() const { return is_root_process_; }
|
||||
void RootInit();
|
||||
void ChildInit();
|
||||
|
||||
// Build global shared-memory statistics. This is invoked if at least
|
||||
// one server context (global or VirtualHost) enables statistics.
|
||||
Statistics* MakeGlobalSharedMemStatistics(bool logging,
|
||||
int64 logging_interval_ms,
|
||||
const GoogleString& logging_file);
|
||||
|
||||
// Creates and ::Initializes a shared memory statistics object.
|
||||
SharedMemStatistics* AllocateAndInitSharedMemStatistics(
|
||||
const StringPiece& name, const bool logging,
|
||||
const int64 logging_interval_ms, const GoogleString& logging_file);
|
||||
|
||||
virtual ApacheServerContext* MakeApacheServerContext(server_rec* server);
|
||||
ServerContext* NewServerContext();
|
||||
|
||||
|
||||
// Makes fetches from PSA to origin-server request
|
||||
// accept-encoding:gzip, even when used in a context when we want
|
||||
// cleartext. We'll decompress as we read the content if needed.
|
||||
void set_fetch_with_gzip(bool x) { fetch_with_gzip_ = x; }
|
||||
bool fetch_with_gzip() const { return fetch_with_gzip_; }
|
||||
|
||||
// Tracks the size of resources fetched from origin and populates the
|
||||
// X-Original-Content-Length header for resources derived from them.
|
||||
void set_track_original_content_length(bool x) {
|
||||
track_original_content_length_ = x;
|
||||
}
|
||||
bool track_original_content_length() const {
|
||||
return track_original_content_length_;
|
||||
}
|
||||
|
||||
void set_num_rewrite_threads(int x) { num_rewrite_threads_ = x; }
|
||||
int num_rewrite_threads() const { return num_rewrite_threads_; }
|
||||
void set_num_expensive_rewrite_threads(int x) {
|
||||
num_expensive_rewrite_threads_ = x;
|
||||
}
|
||||
int num_expensive_rewrite_threads() const {
|
||||
return num_expensive_rewrite_threads_;
|
||||
}
|
||||
|
||||
void set_message_buffer_size(int x) {
|
||||
message_buffer_size_ = x;
|
||||
}
|
||||
|
||||
// When Serf gets a system error during polling, to avoid spamming
|
||||
// the log we just print the number of outstanding fetch URLs. To
|
||||
// debug this it's useful to print the complete set of URLs, in
|
||||
// which case this should be turned on.
|
||||
void list_outstanding_urls_on_error(bool x) {
|
||||
list_outstanding_urls_on_error_ = x;
|
||||
}
|
||||
|
||||
bool use_per_vhost_statistics() const {
|
||||
return use_per_vhost_statistics_;
|
||||
}
|
||||
|
||||
void set_use_per_vhost_statistics(bool x) {
|
||||
use_per_vhost_statistics_ = x;
|
||||
}
|
||||
|
||||
bool enable_property_cache() const {
|
||||
return enable_property_cache_;
|
||||
}
|
||||
|
||||
void set_enable_property_cache(bool x) {
|
||||
enable_property_cache_ = x;
|
||||
}
|
||||
|
||||
// If true, virtual hosts should inherit global configuration.
|
||||
bool inherit_vhost_config() const {
|
||||
return inherit_vhost_config_;
|
||||
}
|
||||
|
||||
void set_inherit_vhost_config(bool x) {
|
||||
inherit_vhost_config_ = x;
|
||||
}
|
||||
|
||||
bool disable_loopback_routing() const {
|
||||
return disable_loopback_routing_;
|
||||
}
|
||||
|
||||
void set_disable_loopback_routing(bool x) {
|
||||
disable_loopback_routing_ = x;
|
||||
}
|
||||
|
||||
bool install_crash_handler() const {
|
||||
return install_crash_handler_;
|
||||
}
|
||||
|
||||
void set_install_crash_handler(bool x) {
|
||||
install_crash_handler_ = x;
|
||||
}
|
||||
|
||||
SystemCaches* caches() { return caches_.get(); }
|
||||
|
||||
// mod_pagespeed uses a beacon handler to collect data for critical images,
|
||||
// css, etc., so filters should be configured accordingly.
|
||||
virtual bool UseBeaconResultsInFilters() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Finds a fetcher for the settings in this config, sharing with
|
||||
// existing fetchers if possible, otherwise making a new one (and
|
||||
// its required thread).
|
||||
UrlAsyncFetcher* GetFetcher(ApacheConfig* config);
|
||||
|
||||
// As above, but just gets a Serf fetcher --- not a slurp fetcher or a rate
|
||||
// limiting one, etc.
|
||||
SerfUrlAsyncFetcher* GetSerfFetcher(ApacheConfig* config);
|
||||
|
||||
// Notification of apache tearing down a context (vhost or top-level)
|
||||
// corresponding to given ApacheServerContext. Returns true if it was
|
||||
// the last context.
|
||||
bool PoolDestroyed(ApacheServerContext* rm);
|
||||
|
||||
// Create a new RewriteOptions. In this implementation it will be an
|
||||
// ApacheConfig.
|
||||
virtual RewriteOptions* NewRewriteOptions();
|
||||
|
||||
// As above, but set a name on the ApacheConfig noting that it came from
|
||||
// a query.
|
||||
virtual RewriteOptions* NewRewriteOptionsForQuery();
|
||||
|
||||
// Initializes all the statistics objects created transitively by
|
||||
// ApacheRewriteDriverFactory, including apache-specific and
|
||||
// platform-independent statistics.
|
||||
static void InitStats(Statistics* statistics);
|
||||
static void Initialize();
|
||||
static void Terminate();
|
||||
|
||||
// Parses a comma-separated list of HTTPS options. If successful, applies
|
||||
// the options to the fetcher and returns true. If the options were invalid,
|
||||
// *error_message is populated and false is returned.
|
||||
//
|
||||
// It is *not* considered an error in this context to attempt to enable HTTPS
|
||||
// when support is not compiled in. However, an error message will be logged
|
||||
// in the server log, and the option-setting will have no effect.
|
||||
bool SetHttpsOptions(StringPiece directive, GoogleString* error_message);
|
||||
|
||||
ModSpdyFetchController* mod_spdy_fetch_controller() {
|
||||
return mod_spdy_fetch_controller_.get();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual UrlFetcher* DefaultUrlFetcher();
|
||||
virtual UrlAsyncFetcher* DefaultAsyncUrlFetcher();
|
||||
virtual void StopCacheActivity();
|
||||
|
||||
// Provide defaults.
|
||||
virtual MessageHandler* DefaultHtmlParseMessageHandler();
|
||||
virtual MessageHandler* DefaultMessageHandler();
|
||||
virtual FileSystem* DefaultFileSystem();
|
||||
virtual Timer* DefaultTimer();
|
||||
virtual void SetupCaches(ServerContext* resource_manager);
|
||||
virtual NamedLockManager* DefaultLockManager();
|
||||
virtual QueuedWorkerPool* CreateWorkerPool(WorkerPoolCategory pool,
|
||||
StringPiece name);
|
||||
|
||||
// Disable the Resource Manager's filesystem since we have a
|
||||
// write-through http_cache.
|
||||
virtual bool ShouldWriteResourcesToFileSystem() { return false; }
|
||||
|
||||
// This helper method contains init procedures invoked by both RootInit()
|
||||
// and ChildInit()
|
||||
void ParentOrChildInit();
|
||||
// Initialize SharedCircularBuffer and pass it to ApacheMessageHandler and
|
||||
// ApacheHtmlParseMessageHandler. is_root is true if this is invoked from
|
||||
// root (ie. parent) process.
|
||||
void SharedCircularBufferInit(bool is_root);
|
||||
|
||||
// Release all the resources. It also calls the base class ShutDown to release
|
||||
// the base class resources.
|
||||
virtual void ShutDown();
|
||||
|
||||
// Initializes the StaticAssetManager.
|
||||
virtual void InitStaticAssetManager(StaticAssetManager* static_asset_manager);
|
||||
|
||||
private:
|
||||
typedef SharedMemCache<64> MetadataShmCache;
|
||||
struct MetadataShmCacheInfo {
|
||||
MetadataShmCacheInfo() : cache_backend(NULL) {}
|
||||
|
||||
// Note that the fields may be NULL if e.g. initialization failed.
|
||||
scoped_ptr<CacheInterface> cache_to_use; // may be CacheStats or such.
|
||||
MetadataShmCache* cache_backend;
|
||||
};
|
||||
|
||||
// Updates num_rewrite_threads_ and num_expensive_rewrite_threads_
|
||||
// with sensible values if they are not explicitly set.
|
||||
void AutoDetectThreadCounts();
|
||||
|
||||
apr_pool_t* pool_;
|
||||
server_rec* server_rec_;
|
||||
scoped_ptr<SharedMemStatistics> shared_mem_statistics_;
|
||||
scoped_ptr<AbstractSharedMem> shared_mem_runtime_;
|
||||
scoped_ptr<SharedCircularBuffer> shared_circular_buffer_;
|
||||
scoped_ptr<SlowWorker> slow_worker_;
|
||||
|
||||
// TODO(jmarantz): These options could be consolidated in a protobuf or
|
||||
// some other struct, which would keep them distinct from the rest of the
|
||||
// state. Note also that some of the options are in the base class,
|
||||
// RewriteDriverFactory, so we'd have to sort out how that worked.
|
||||
GoogleString version_;
|
||||
|
||||
bool statistics_frozen_;
|
||||
bool is_root_process_;
|
||||
bool fetch_with_gzip_;
|
||||
bool track_original_content_length_;
|
||||
bool list_outstanding_urls_on_error_;
|
||||
|
||||
// hostname_identifier_ equals to "server_hostname:port" of Apache,
|
||||
// it's used to distinguish the name of shared memory,
|
||||
// so that each vhost has its own SharedCircularBuffer.
|
||||
const GoogleString hostname_identifier_;
|
||||
// This will be assigned to message_handler_ when message_handler() or
|
||||
// html_parse_message_handler is invoked for the first time.
|
||||
// We keep an extra link because we need to refer them as
|
||||
// ApacheMessageHandlers rather than just MessageHandler in initialization
|
||||
// process.
|
||||
ApacheMessageHandler* apache_message_handler_;
|
||||
// This will be assigned to html_parse_message_handler_ when
|
||||
// html_parse_message_handler() is invoked for the first time.
|
||||
// Note that apache_message_handler_ and apache_html_parse_message_handler
|
||||
// writes to the same shared memory which is owned by the factory.
|
||||
ApacheMessageHandler* apache_html_parse_message_handler_;
|
||||
|
||||
// Once ServerContexts are initialized via
|
||||
// RewriteDriverFactory::InitServerContext, they will be
|
||||
// managed by the RewriteDriverFactory. But in the root Apache process
|
||||
// the ServerContexts will never be initialized. We track these here
|
||||
// so that ApacheRewriteDriverFactory::ChildInit can iterate over all
|
||||
// the managers that need to be ChildInit'd, and so that we can free
|
||||
// the managers in the Root process that were never ChildInit'd.
|
||||
typedef std::set<ApacheServerContext*> ApacheServerContextSet;
|
||||
ApacheServerContextSet uninitialized_managers_;
|
||||
|
||||
// If true, we'll have a separate statistics object for each vhost
|
||||
// (along with a global aggregate), rather than just a single object
|
||||
// aggregating all of them.
|
||||
bool use_per_vhost_statistics_;
|
||||
|
||||
// Enable the property cache.
|
||||
bool enable_property_cache_;
|
||||
|
||||
// Inherit configuration from global context into vhosts.
|
||||
bool inherit_vhost_config_;
|
||||
|
||||
// If false (default) we will redirect all fetches to unknown hosts to
|
||||
// localhost.
|
||||
bool disable_loopback_routing_;
|
||||
|
||||
// If true, we'll install a signal handler that prints backtraces.
|
||||
bool install_crash_handler_;
|
||||
|
||||
// true iff we ran through AutoDetectThreadCounts()
|
||||
bool thread_counts_finalized_;
|
||||
|
||||
// These are <= 0 if we should autodetect.
|
||||
int num_rewrite_threads_;
|
||||
int num_expensive_rewrite_threads_;
|
||||
|
||||
int max_mod_spdy_fetch_threads_;
|
||||
|
||||
// Size of shared circular buffer for displaying Info messages in
|
||||
// /mod_pagespeed_messages.
|
||||
int message_buffer_size_;
|
||||
|
||||
// Serf fetchers are expensive -- they each cost a thread. Allocate
|
||||
// one for each proxy/slurp-setting. Currently there is no
|
||||
// consistency checking for fetcher timeout.
|
||||
typedef std::map<GoogleString, UrlAsyncFetcher*> FetcherMap;
|
||||
FetcherMap fetcher_map_;
|
||||
typedef std::map<GoogleString, SerfUrlAsyncFetcher*> SerfFetcherMap;
|
||||
SerfFetcherMap serf_fetcher_map_;
|
||||
|
||||
// Helps coordinate direct-to-mod_spdy fetches.
|
||||
scoped_ptr<ModSpdyFetchController> mod_spdy_fetch_controller_;
|
||||
|
||||
GoogleString https_options_;
|
||||
|
||||
// Manages all our caches & lock managers.
|
||||
scoped_ptr<SystemCaches> caches_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ApacheRewriteDriverFactory);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_REWRITE_DRIVER_FACTORY_H_
|
||||
@@ -1,186 +0,0 @@
|
||||
// Copyright 2011 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_SERVER_CONTEXT_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_SERVER_CONTEXT_H_
|
||||
|
||||
#include "net/instaweb/apache/apache_config.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/system/public/system_server_context.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
struct server_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class ApacheRewriteDriverFactory;
|
||||
class Histogram;
|
||||
class ProxyFetchFactory;
|
||||
class RewriteDriverPool;
|
||||
class RewriteDriver;
|
||||
class RewriteStats;
|
||||
class SharedMemStatistics;
|
||||
class Statistics;
|
||||
class UrlAsyncFetcherStats;
|
||||
|
||||
// Creates an Apache-specific ServerContext. This differs from base class
|
||||
// that it incorporates by adding per-VirtualHost configuration, including:
|
||||
// - file-cache path & limits
|
||||
// - default RewriteOptions.
|
||||
// Additionally, there are startup semantics for apache's prefork model
|
||||
// that require a phased initialization.
|
||||
class ApacheServerContext : public SystemServerContext {
|
||||
public:
|
||||
ApacheServerContext(ApacheRewriteDriverFactory* factory,
|
||||
server_rec* server,
|
||||
const StringPiece& version);
|
||||
virtual ~ApacheServerContext();
|
||||
|
||||
GoogleString hostname_identifier() { return hostname_identifier_; }
|
||||
ApacheRewriteDriverFactory* apache_factory() { return apache_factory_; }
|
||||
ApacheConfig* config();
|
||||
bool InitFileCachePath();
|
||||
|
||||
// These return configuration objects that hold settings from
|
||||
// <ModPagespeedIf spdy> and <ModPagespeedIf !spdy> sections of configuration.
|
||||
// They initialize lazily, so are not thread-safe; however they are only
|
||||
// meant to be used during configuration parsing. These methods should be
|
||||
// called only if there is actually a need to put something in them, since
|
||||
// otherwise we may end up constructing separate SPDY vs. non-SPDY
|
||||
// configurations needlessly.
|
||||
ApacheConfig* SpdyConfigOverlay();
|
||||
ApacheConfig* NonSpdyConfigOverlay();
|
||||
|
||||
// These return true if the given overlays were constructed (in response
|
||||
// to having something in config files to put in them).
|
||||
bool has_spdy_config_overlay() const {
|
||||
return spdy_config_overlay_.get() != NULL;
|
||||
}
|
||||
|
||||
bool has_non_spdy_config_overlay() const {
|
||||
return non_spdy_config_overlay_.get() != NULL;
|
||||
}
|
||||
|
||||
// These two take ownership of their parameters.
|
||||
void set_spdy_config_overlay(ApacheConfig* x) {
|
||||
spdy_config_overlay_.reset(x);
|
||||
}
|
||||
|
||||
void set_non_spdy_config_overlay(ApacheConfig* x) {
|
||||
non_spdy_config_overlay_.reset(x);
|
||||
}
|
||||
|
||||
// Returns special configuration that should be used for SPDY sessions
|
||||
// instead of config(). Returns NULL if config() should be used instead.
|
||||
ApacheConfig* SpdyConfig() { return spdy_specific_config_.get(); }
|
||||
|
||||
// Pool to pass to NewRewriteDriverFromPool to get a RewriteDriver configured
|
||||
// with SPDY-specific options. May be NULL in case there is no spdy-specific
|
||||
// configuration.
|
||||
RewriteDriverPool* spdy_driver_pool() { return spdy_driver_pool_; }
|
||||
|
||||
// This should be called after all configuration parsing is done to collapse
|
||||
// configuration inside the config overlays into actual ApacheConfig objects.
|
||||
// It will also compute signatures when done.
|
||||
void CollapseConfigOverlaysAndComputeSignatures();
|
||||
|
||||
// Initialize this ServerContext to have its own statistics domain.
|
||||
// Must be called after global_statistics has been created and had
|
||||
// ::Initialize called on it.
|
||||
void CreateLocalStatistics(Statistics* global_statistics);
|
||||
|
||||
// Should be called after the child process is forked.
|
||||
void ChildInit();
|
||||
|
||||
bool initialized() const { return initialized_; }
|
||||
|
||||
// Called on notification from Apache on child exit. Returns true
|
||||
// if this is the last ServerContext that exists.
|
||||
bool PoolDestroyed();
|
||||
|
||||
// Accumulate in a histogram the amount of time spent rewriting HTML.
|
||||
// TODO(sligocki): Remove in favor of RewriteStats::rewrite_latency_histogram.
|
||||
void AddHtmlRewriteTimeUs(int64 rewrite_time_us);
|
||||
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
const server_rec* server() const { return server_rec_; }
|
||||
|
||||
virtual RewriteDriverPool* SelectDriverPool(bool using_spdy);
|
||||
|
||||
virtual void ApplySessionFetchers(const RequestContextPtr& req,
|
||||
RewriteDriver* driver);
|
||||
|
||||
ProxyFetchFactory* proxy_fetch_factory() {
|
||||
return proxy_fetch_factory_.get();
|
||||
}
|
||||
|
||||
void InitProxyFetchFactory();
|
||||
|
||||
// We do not proxy external HTML from mod_pagespeed in Apache using the
|
||||
// ProxyFetch flow. Currently we must rely on a separate module to
|
||||
// let mod_pagespeed behave as an origin fetcher.
|
||||
virtual bool ProxiesHtml() const { return false; }
|
||||
|
||||
private:
|
||||
virtual bool UpdateCacheFlushTimestampMs(int64 timestamp_ms);
|
||||
|
||||
ApacheRewriteDriverFactory* apache_factory_;
|
||||
server_rec* server_rec_;
|
||||
GoogleString version_;
|
||||
|
||||
// hostname_identifier_ equals to "server_hostname:port" of Apache,
|
||||
// it's used to distinguish the name of shared memory,
|
||||
// so that each vhost has its own SharedCircularBuffer.
|
||||
GoogleString hostname_identifier_;
|
||||
|
||||
bool initialized_;
|
||||
|
||||
// Non-NULL if we have per-vhost stats.
|
||||
scoped_ptr<Statistics> split_statistics_;
|
||||
|
||||
// May be NULL. Owned by *split_statistics_.
|
||||
SharedMemStatistics* local_statistics_;
|
||||
|
||||
// These are non-NULL if we have per-vhost stats.
|
||||
scoped_ptr<RewriteStats> local_rewrite_stats_;
|
||||
scoped_ptr<UrlAsyncFetcherStats> stats_fetcher_;
|
||||
|
||||
// May be NULL. Constructed once we see things in config files that should
|
||||
// be stored in these.
|
||||
scoped_ptr<ApacheConfig> spdy_config_overlay_;
|
||||
scoped_ptr<ApacheConfig> non_spdy_config_overlay_;
|
||||
|
||||
// May be NULL if we don't have any special settings for when using SPDY.
|
||||
scoped_ptr<ApacheConfig> spdy_specific_config_;
|
||||
|
||||
// Owned by ServerContext via a call to ManageRewriteDriverPool.
|
||||
// May be NULL if we don't have a spdy-specific configuration.
|
||||
RewriteDriverPool* spdy_driver_pool_;
|
||||
|
||||
Histogram* html_rewrite_time_us_histogram_;
|
||||
|
||||
scoped_ptr<ProxyFetchFactory> proxy_fetch_factory_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ApacheServerContext);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_SERVER_CONTEXT_H_
|
||||
@@ -1,32 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_SLURP_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_SLURP_H_
|
||||
|
||||
struct request_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class ApacheServerContext;
|
||||
|
||||
// Loads the URL based on the fetchers and other infrastructure in the
|
||||
// factory.
|
||||
void SlurpUrl(ApacheServerContext* manager, request_rec* r);
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_SLURP_H_
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: morlovich@google.com (Maksim Orlovich)
|
||||
//
|
||||
// A wrapper around PthreadThreadSystem for use in Apache that takes care of
|
||||
// some signal masking issues that arise in prefork. We prefer pthreads to APR
|
||||
// as APR mutex, etc., creation requires pools which are generally thread
|
||||
// unsafe, introducing some additional risks.
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_THREAD_SYSTEM_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_THREAD_SYSTEM_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/pthread_thread_system.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class Timer;
|
||||
|
||||
class ApacheThreadSystem : public PthreadThreadSystem {
|
||||
public:
|
||||
ApacheThreadSystem();
|
||||
virtual ~ApacheThreadSystem();
|
||||
virtual Timer* NewTimer();
|
||||
|
||||
protected:
|
||||
virtual void BeforeThreadRunHook();
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ApacheThreadSystem);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_THREAD_SYSTEM_H_
|
||||
@@ -1,76 +0,0 @@
|
||||
// Copyright 2013 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APACHE_WRITER_H_
|
||||
#define NET_INSTAWEB_APACHE_APACHE_WRITER_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/writer.h"
|
||||
#include "httpd.h" // NOLINT
|
||||
|
||||
struct request_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
class ResponseHeaders;
|
||||
|
||||
// Writer object that writes to an Apache Request stream.
|
||||
class ApacheWriter : public Writer {
|
||||
public:
|
||||
explicit ApacheWriter(request_rec* r);
|
||||
virtual ~ApacheWriter();
|
||||
|
||||
virtual bool Write(const StringPiece& str, MessageHandler* handler);
|
||||
virtual bool Flush(MessageHandler* handler);
|
||||
|
||||
// Copies the contents of the specified response_headers to the Apache
|
||||
// headers_out structure. This must be done before any bytes are flushed.
|
||||
//
|
||||
// Note: if strip_cokies is set, the cookies will be stripped here.
|
||||
//
|
||||
// If set_content_length was previously called, this will set a
|
||||
// content length to avoid chunked encoding, otherwise it will clear
|
||||
// any content-length specified in the response headers.
|
||||
void OutputHeaders(ResponseHeaders* response_headers);
|
||||
void set_content_length(int64 x) { content_length_ = x; }
|
||||
|
||||
// Disables mod_expires and mod_headers to allow the headers to
|
||||
// be under control of mod_pagespeed. Default is false.
|
||||
void set_disable_downstream_header_filters(bool x) {
|
||||
disable_downstream_header_filters_ = x;
|
||||
}
|
||||
|
||||
// Removes 'Set-Cookie' and 'Set-Cookie2' from the response headers
|
||||
// once they are complete. Default is false.
|
||||
void set_strip_cookies(bool x) {
|
||||
strip_cookies_ = x;
|
||||
}
|
||||
|
||||
private:
|
||||
request_rec* request_;
|
||||
bool headers_out_;
|
||||
bool disable_downstream_header_filters_;
|
||||
bool strip_cookies_;
|
||||
int64 content_length_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ApacheWriter);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APACHE_WRITER_H_
|
||||
@@ -1,105 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: lsong@google.com (Libo Song)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APR_FILE_SYSTEM_H_
|
||||
#define NET_INSTAWEB_APACHE_APR_FILE_SYSTEM_H_
|
||||
|
||||
#include "apr.h"
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/file_system.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
struct apr_finfo_t;
|
||||
struct apr_pool_t;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractMutex;
|
||||
class MessageHandler;
|
||||
class ThreadSystem;
|
||||
class Timer;
|
||||
|
||||
void AprReportError(MessageHandler* message_handler, const char* filename,
|
||||
int line, const char* message, int error_code);
|
||||
|
||||
class AprFileSystem : public FileSystem {
|
||||
public:
|
||||
AprFileSystem(apr_pool_t* pool, ThreadSystem* thread_system);
|
||||
~AprFileSystem();
|
||||
|
||||
virtual int MaxPathLength(const StringPiece& base) const;
|
||||
virtual InputFile* OpenInputFile(
|
||||
const char* file, MessageHandler* message_handler);
|
||||
virtual OutputFile* OpenOutputFileHelper(
|
||||
const char* file, bool append, MessageHandler* message_handler);
|
||||
// See FileSystem interface for specifics of OpenTempFile.
|
||||
virtual OutputFile* OpenTempFileHelper(const StringPiece& prefix_name,
|
||||
MessageHandler* message_handler);
|
||||
|
||||
virtual bool ListContents(const StringPiece& dir, StringVector* files,
|
||||
MessageHandler* handler);
|
||||
// Like POSIX 'mkdir', makes a directory only if parent directory exists.
|
||||
// Fails if directory_name already exists or parent directory doesn't exist.
|
||||
virtual bool MakeDir(const char* directory_path, MessageHandler* handler);
|
||||
virtual bool RemoveDir(const char* directory_path,
|
||||
MessageHandler* message_handler);
|
||||
virtual bool RemoveFile(const char* filename,
|
||||
MessageHandler* message_handler);
|
||||
virtual bool RenameFileHelper(const char* old_filename,
|
||||
const char* new_filename,
|
||||
MessageHandler* message_handler);
|
||||
|
||||
virtual bool Atime(const StringPiece& path,
|
||||
int64* timestamp_sec, MessageHandler* handler);
|
||||
virtual bool Mtime(const StringPiece& path,
|
||||
int64* timestamp_sec, MessageHandler* handler);
|
||||
// Report the disk utilization of the file specified by path. Note that disk
|
||||
// utilization could differ from the apparent size of the file as it depends
|
||||
// on the underlying file system and default block size.
|
||||
virtual bool Size(const StringPiece& path, int64* size,
|
||||
MessageHandler* handler);
|
||||
virtual BoolOrError Exists(const char* path, MessageHandler* handler);
|
||||
virtual BoolOrError IsDir(const char* path, MessageHandler* handler);
|
||||
|
||||
virtual BoolOrError TryLock(const StringPiece& lock_name,
|
||||
MessageHandler* handler);
|
||||
virtual BoolOrError TryLockWithTimeout(const StringPiece& lock_name,
|
||||
int64 timeout_ms,
|
||||
const Timer* timer,
|
||||
MessageHandler* handler);
|
||||
virtual bool Unlock(const StringPiece& lock_name, MessageHandler* handler);
|
||||
|
||||
private:
|
||||
// Used by *time and Size methods to get file info.
|
||||
bool Stat(const StringPiece& path,
|
||||
apr_finfo_t* file_info, apr_int32_t field_wanted,
|
||||
MessageHandler* handler);
|
||||
|
||||
apr_pool_t* pool_;
|
||||
|
||||
// We use a mutex to protect the pool above when calling into apr's file
|
||||
// system ops, which might otherwise access it concurrently in an unsafe
|
||||
// way.
|
||||
scoped_ptr<AbstractMutex> mutex_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(AprFileSystem);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APR_FILE_SYSTEM_H_
|
||||
@@ -1,38 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
// lsong@google.com (Libo Song)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_APR_TIMER_H_
|
||||
#define NET_INSTAWEB_APACHE_APR_TIMER_H_
|
||||
|
||||
#include "net/instaweb/util/public/timer.h"
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
|
||||
using net_instaweb::Timer;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AprTimer : public Timer {
|
||||
public:
|
||||
virtual ~AprTimer();
|
||||
virtual int64 NowUs() const;
|
||||
virtual void SleepUs(int64 us);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_APR_TIMER_H_
|
||||
@@ -1,66 +0,0 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_HEADER_UTIL_H_
|
||||
#define NET_INSTAWEB_APACHE_HEADER_UTIL_H_
|
||||
|
||||
struct request_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class RequestHeaders;
|
||||
class ResponseHeaders;
|
||||
|
||||
// Converts Apache header structure into RequestHeaders.
|
||||
void ApacheRequestToRequestHeaders(const request_rec& request,
|
||||
RequestHeaders* request_headers);
|
||||
|
||||
// Converts Apache header structure (request.headers_out) into ResponseHeaders
|
||||
// headers. If err_headers is not NULL then request.err_headers_out is copied
|
||||
// into it. In the event that headers == err_headers, the headers from
|
||||
// request.err_headers_out will be appended to the list of headers, but no
|
||||
// merging occurs.
|
||||
void ApacheRequestToResponseHeaders(const request_rec& request,
|
||||
ResponseHeaders* headers,
|
||||
ResponseHeaders* err_headers);
|
||||
|
||||
|
||||
|
||||
// Converts ResponseHeaders into an Apache request's headers_out table.
|
||||
// This function does not alter the major/minor version of the Apache request.
|
||||
void ResponseHeadersToApacheRequest(const ResponseHeaders& response_headers,
|
||||
bool ok_to_disable_downstream_headers,
|
||||
request_rec* request);
|
||||
|
||||
// Converts ResponseHeaders (headers and err_headers) into Apache request
|
||||
// headers (headers_out and err_headers_out respectively). Either headers or
|
||||
// err_headers may be NULL but both cannot be. Unlike in
|
||||
// ApacheRequestToResponseHeaders it does not make sense for headers to equal
|
||||
// err_headers since it will result in duplicate headers being written.
|
||||
void AddResponseHeadersToRequest(const ResponseHeaders* headers,
|
||||
const ResponseHeaders* err_headers,
|
||||
bool ok_to_disable_downstream_headers,
|
||||
request_rec* request);
|
||||
|
||||
// Remove downstream filters that might corrupt our caching headers.
|
||||
void DisableDownstreamHeaderFilters(request_rec* request);
|
||||
|
||||
// Debug utility for printing Apache headers to stdout
|
||||
void PrintHeaders(request_rec* request);
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_HEADER_UTIL_H_
|
||||
@@ -1,89 +0,0 @@
|
||||
// Copyright 2013 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_IN_PLACE_RESOURCE_RECORDER_H_
|
||||
#define NET_INSTAWEB_APACHE_IN_PLACE_RESOURCE_RECORDER_H_
|
||||
|
||||
#include "net/instaweb/http/public/http_value.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/writer.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HTTPCache;
|
||||
class MessageHandler;
|
||||
class RequestHeaders;
|
||||
class ResponseHeaders;
|
||||
class Statistics;
|
||||
class Variable;
|
||||
|
||||
// Records a copy of a resource streamed through it and saves the result to
|
||||
// the cache if it's cacheable. Used in the In-Place Resource Optimization
|
||||
// (IPRO) flow to get resources into the cache.
|
||||
class InPlaceResourceRecorder : public Writer {
|
||||
public:
|
||||
// Takes ownership of request_headers, but not cache nor handler.
|
||||
// Like other callbacks, InPlaceResourceRecorder is self-owned and will
|
||||
// delete itself when DoneAndSetHeaders(). is called.
|
||||
InPlaceResourceRecorder(StringPiece url, RequestHeaders* request_headers,
|
||||
bool respect_vary, HTTPCache* cache,
|
||||
Statistics* statistics, MessageHandler* handler);
|
||||
virtual ~InPlaceResourceRecorder();
|
||||
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
virtual bool Write(const StringPiece& contents, MessageHandler* handler);
|
||||
virtual bool Flush(MessageHandler* handler);
|
||||
|
||||
// Call if something went wrong. The results will not be added to cache.
|
||||
void Fail() { success_ = false; }
|
||||
|
||||
// Call when finished and the final response headers are known.
|
||||
// Because of Apache's quirky filter order, we cannot get both the
|
||||
// uncompressed final contents and the complete headers at the same time.
|
||||
// Does not take ownership of response_headers.
|
||||
//
|
||||
// Deletes itself. Do not use object after calling DoneAndSetHeaders().
|
||||
void DoneAndSetHeaders(ResponseHeaders* response_headers);
|
||||
|
||||
const GoogleString& url() const { return url_; }
|
||||
MessageHandler* handler() { return handler_; }
|
||||
|
||||
private:
|
||||
const GoogleString url_;
|
||||
const scoped_ptr<RequestHeaders> request_headers_;
|
||||
const bool respect_vary_;
|
||||
|
||||
HTTPValue resource_value_;
|
||||
bool success_;
|
||||
|
||||
HTTPCache* cache_;
|
||||
MessageHandler* handler_;
|
||||
|
||||
Variable* num_resources_;
|
||||
Variable* num_inserted_into_cache_;
|
||||
Variable* num_not_cacheable_;
|
||||
Variable* num_failed_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(InPlaceResourceRecorder);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_IN_PLACE_RESOURCE_RECORDER_H_
|
||||
@@ -1,176 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
// lsong@google.com (Libo Song)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_INSTAWEB_CONTEXT_H_
|
||||
#define NET_INSTAWEB_APACHE_INSTAWEB_CONTEXT_H_
|
||||
|
||||
#include "net/instaweb/automatic/public/html_detector.h"
|
||||
#include "net/instaweb/http/public/content_type.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/response_headers.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/property_cache.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/string_writer.h"
|
||||
#include "net/instaweb/util/public/thread_system.h"
|
||||
|
||||
// The httpd header must be after the
|
||||
// apache_rewrite_driver_factory.h. Otherwise, the compiler will
|
||||
// complain "strtoul_is_not_a_portable_function_use_strtol_instead".
|
||||
#include "httpd.h"
|
||||
#include "apr_pools.h"
|
||||
|
||||
struct apr_bucket_brigade;
|
||||
struct request_rec;
|
||||
struct server_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class ApacheServerContext;
|
||||
class GzipInflater;
|
||||
class RequestHeaders;
|
||||
class RewriteDriver;
|
||||
class RewriteOptions;
|
||||
|
||||
const char kPagespeedOriginalUrl[] = "mod_pagespeed_original_url";
|
||||
|
||||
// Generic deleter meant to be used with apr_pool_cleanup_register().
|
||||
template <class T>
|
||||
apr_status_t apache_cleanup(void* object) {
|
||||
T* resolved = static_cast<T*>(object);
|
||||
delete resolved;
|
||||
return APR_SUCCESS;
|
||||
}
|
||||
|
||||
// Tracks a single property-cache lookup.
|
||||
class PropertyCallback : public PropertyPage {
|
||||
public:
|
||||
PropertyCallback(RewriteDriver* driver,
|
||||
ThreadSystem* thread_system,
|
||||
const StringPiece& key);
|
||||
|
||||
virtual void Done(bool success);
|
||||
|
||||
void BlockUntilDone();
|
||||
|
||||
private:
|
||||
RewriteDriver* driver_;
|
||||
GoogleString url_;
|
||||
bool done_;
|
||||
scoped_ptr<ThreadSystem::CondvarCapableMutex> mutex_;
|
||||
scoped_ptr<ThreadSystem::Condvar> condvar_;
|
||||
DISALLOW_COPY_AND_ASSIGN(PropertyCallback);
|
||||
};
|
||||
|
||||
// Context for an HTML rewrite.
|
||||
//
|
||||
// One is created for responses that appear to be HTML (although there is
|
||||
// a basic sanity check that the first non-space char is '<').
|
||||
//
|
||||
// The rewriter will put the rewritten content into the output string when
|
||||
// flushed or finished. We call Flush when we see the FLUSH bucket, and
|
||||
// call Finish when we see the EOS bucket.
|
||||
//
|
||||
// TODO(sligocki): Factor out similarities between this and ProxyFetch.
|
||||
class InstawebContext {
|
||||
public:
|
||||
enum ContentEncoding { kNone, kGzip, kDeflate, kOther };
|
||||
enum ContentDetectionState { kStart, kHtml, kNotHtml };
|
||||
|
||||
// Takes ownership of request_headers.
|
||||
InstawebContext(request_rec* request,
|
||||
RequestHeaders* request_headers,
|
||||
const ContentType& content_type,
|
||||
ApacheServerContext* server_context,
|
||||
const GoogleString& base_url,
|
||||
const RequestContextPtr& request_context,
|
||||
bool use_custom_options,
|
||||
const RewriteOptions& options);
|
||||
~InstawebContext();
|
||||
|
||||
void Rewrite(const char* input, int size);
|
||||
void Flush();
|
||||
void Finish();
|
||||
|
||||
apr_bucket_brigade* bucket_brigade() const { return bucket_brigade_; }
|
||||
ContentEncoding content_encoding() const { return content_encoding_; }
|
||||
ApacheServerContext* apache_server_context() { return server_context_; }
|
||||
const GoogleString& output() { return output_; }
|
||||
bool empty() const { return output_.empty(); }
|
||||
void clear() { output_.clear(); } // TODO(jmarantz): needed?
|
||||
|
||||
ResponseHeaders* response_headers() {
|
||||
return &response_headers_;
|
||||
}
|
||||
|
||||
bool sent_headers() { return sent_headers_; }
|
||||
void set_sent_headers(bool sent) { sent_headers_ = sent; }
|
||||
|
||||
// Populated response_headers_ with the request's headers_out table.
|
||||
void PopulateHeaders(request_rec* request);
|
||||
|
||||
// Looks up the apache server context from the server rec.
|
||||
// TODO(jmarantz): Is there a better place to put this? It needs to
|
||||
// be used by both mod_instaweb.cc and instaweb_handler.cc.
|
||||
static ApacheServerContext* ServerContextFromServerRec(server_rec* server);
|
||||
|
||||
// Returns a fetchable URI from a request, using the request pool.
|
||||
static const char* MakeRequestUrl(const RewriteOptions& options,
|
||||
request_rec* request);
|
||||
|
||||
bool modify_caching_headers() const { return modify_caching_headers_; }
|
||||
|
||||
private:
|
||||
void ComputeContentEncoding(request_rec* request);
|
||||
|
||||
// Start a new property cache lookup. The caller is responsible for cleaning
|
||||
// up the returned PropertyCallback*.
|
||||
PropertyCallback* InitiatePropertyCacheLookup();
|
||||
void ProcessBytes(const char* input, int size);
|
||||
|
||||
// Checks to see if there was a Furious cookie sent with the request.
|
||||
// If there was not, set one, and add a Set-Cookie header to the
|
||||
// response headers.
|
||||
// If there was one, make sure to set the options state appropriately.
|
||||
void SetFuriousStateAndCookie(request_rec* request, RewriteOptions* options);
|
||||
|
||||
GoogleString output_; // content after instaweb rewritten.
|
||||
apr_bucket_brigade* bucket_brigade_;
|
||||
ContentEncoding content_encoding_;
|
||||
const ContentType content_type_;
|
||||
|
||||
ApacheServerContext* server_context_;
|
||||
RewriteDriver* rewrite_driver_;
|
||||
StringWriter string_writer_;
|
||||
scoped_ptr<GzipInflater> inflater_;
|
||||
HtmlDetector html_detector_;
|
||||
GoogleString absolute_url_;
|
||||
scoped_ptr<RequestHeaders> request_headers_;
|
||||
ResponseHeaders response_headers_;
|
||||
bool started_parse_;
|
||||
bool sent_headers_;
|
||||
bool populated_headers_;
|
||||
bool modify_caching_headers_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(InstawebContext);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_INSTAWEB_CONTEXT_H_
|
||||
@@ -1,59 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: lsong@google.com (Libo Song)
|
||||
// jmarantz@google.com (Joshua Marantz)
|
||||
//
|
||||
// The Apache handler for rewriten resources and a couple other Apache hooks.
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_INSTAWEB_HANDLER_H_
|
||||
#define NET_INSTAWEB_APACHE_INSTAWEB_HANDLER_H_
|
||||
|
||||
#include "apr_pools.h" // for apr_status_t
|
||||
// The httpd header must be after the instaweb_context.h. Otherwise,
|
||||
// the compiler will complain
|
||||
// "strtoul_is_not_a_portable_function_use_strtol_instead".
|
||||
#include "httpd.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class ApacheServerContext;
|
||||
|
||||
// Was this request made by mod_pagespeed itself? If so, we should not try to
|
||||
// handle it, just let Apache deal with it like normal.
|
||||
bool is_pagespeed_subrequest(request_rec* request);
|
||||
|
||||
// Handle mod_pagespeed-specific requests. Handles both .pagespeed. rewritten
|
||||
// resources and /mod_pagespeed_statistics, /mod_pagespeed_beacon, etc.
|
||||
// TODO(sligocki): Why not make each of these different handlers?
|
||||
apr_status_t instaweb_handler(request_rec* request);
|
||||
|
||||
// Save the original URL as a request "note" before mod_rewrite has
|
||||
// a chance to corrupt mod_pagespeed's generated URLs, which would
|
||||
// prevent instaweb_handler from being able to decode the resource.
|
||||
apr_status_t save_url_hook(request_rec *request);
|
||||
|
||||
// Implementation of the Apache 'translate_name' hook. Used by the actual hook
|
||||
// 'save_url_hook' and directly when we already have the server context.
|
||||
apr_status_t save_url_in_note(request_rec *request,
|
||||
ApacheServerContext* server_context);
|
||||
|
||||
// By default, apache imposes limitations on URL segments of around
|
||||
// 256 characters that appear to correspond to filename limitations.
|
||||
// To prevent that, we hook map_to_storage for our own purposes.
|
||||
apr_status_t instaweb_map_to_storage(request_rec* request);
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_INSTAWEB_HANDLER_H_
|
||||
@@ -1,64 +0,0 @@
|
||||
// Copyright 2012 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: morlovich@google.com (Maksim Orlovich)
|
||||
//
|
||||
// Interfaces with mod_spdy's exported functions.
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_INTERFACE_MOD_SPDY_H_
|
||||
#define NET_INSTAWEB_APACHE_INTERFACE_MOD_SPDY_H_
|
||||
|
||||
#include "util_filter.h"
|
||||
|
||||
#include "third_party/mod_spdy/src/mod_spdy/apache/slave_connection_api.h"
|
||||
|
||||
struct conn_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// Needs to be called from a ap_hook_optional_fn_retrieve hook.
|
||||
void attach_mod_spdy();
|
||||
|
||||
// If the connection is using SPDY with mod_spdy, returns the protocol
|
||||
// version. Otherwise, returns 0.
|
||||
int mod_spdy_get_spdy_version(conn_rec* conn);
|
||||
|
||||
// See mod_spdy's slave_connection_api.h for description of the methods below.
|
||||
// These are merely forwarding wrappers with some CHECKS.
|
||||
// Note that this method will return NULL if the relevant mod_spdy methods
|
||||
// weren't found registered with Apache. Others, however, will CHECK-fail
|
||||
// (since there is no sensible way to call them if this method failed);
|
||||
// except you can always safely mod_spdy_destroy_slave_connection_factory(NULL).
|
||||
spdy_slave_connection_factory* mod_spdy_create_slave_connection_factory(
|
||||
conn_rec* master_connection);
|
||||
void mod_spdy_destroy_slave_connection_factory(
|
||||
spdy_slave_connection_factory* factory);
|
||||
|
||||
spdy_slave_connection* mod_spdy_create_slave_connection(
|
||||
spdy_slave_connection_factory* factory,
|
||||
ap_filter_rec_t* input_filter,
|
||||
void* input_filter_ctx,
|
||||
ap_filter_rec_t* output_filter,
|
||||
void* output_filter_ctx);
|
||||
|
||||
void mod_spdy_run_slave_connection(spdy_slave_connection* conn);
|
||||
void mod_spdy_destroy_slave_connection(spdy_slave_connection* conn);
|
||||
|
||||
// Returns true if given connection is using HTTPS.
|
||||
// (This is actually a mod_ssl function).
|
||||
bool mod_ssl_is_https(conn_rec* conn);
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_INTERFACE_MOD_SPDY_H_
|
||||
@@ -1,49 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_LOG_MESSAGE_HANDLER_H_
|
||||
#define NET_INSTAWEB_APACHE_LOG_MESSAGE_HANDLER_H_
|
||||
|
||||
#include <algorithm> // for std::min
|
||||
#include "apr_pools.h"
|
||||
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
struct server_rec;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
namespace log_message_handler {
|
||||
|
||||
// Install a log message handler that routes LOG() messages to the
|
||||
// apache error log. Should be called once at startup.
|
||||
void Install(apr_pool_t* pool);
|
||||
|
||||
// The log_message_handler is not attached to a specific server_rec, so the
|
||||
// LogLevel is not automatically set for it. Every server_rec instance
|
||||
// should call AddServerConfig and let us decide what level to log at.
|
||||
// Currently we set it to the min LogLevel.
|
||||
void AddServerConfig(const server_rec* server, const StringPiece& version);
|
||||
|
||||
// Free the memory from the log message handler
|
||||
void ShutDown();
|
||||
|
||||
} // namespace log_message_handler
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_LOG_MESSAGE_HANDLER_H_
|
||||
@@ -1,145 +0,0 @@
|
||||
// Copyright 2012 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: morlovich@google.com (Maksim Orlovich)
|
||||
|
||||
#include "net/instaweb/apache/loopback_route_fetcher.h"
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "net/instaweb/http/public/async_fetch.h"
|
||||
#include "net/instaweb/http/public/meta_data.h"
|
||||
#include "net/instaweb/http/public/request_headers.h"
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/rewriter/public/domain_lawyer.h"
|
||||
#include "net/instaweb/rewriter/public/rewrite_options.h"
|
||||
#include "net/instaweb/util/public/google_url.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
#include "apr_network_io.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
|
||||
LoopbackRouteFetcher::LoopbackRouteFetcher(
|
||||
const RewriteOptions* options,
|
||||
const GoogleString& own_ip,
|
||||
int own_port,
|
||||
UrlAsyncFetcher* backend_fetcher)
|
||||
: options_(options),
|
||||
own_ip_(own_ip),
|
||||
own_port_(own_port),
|
||||
backend_fetcher_(backend_fetcher) {
|
||||
if (own_ip_.empty()) {
|
||||
own_ip_ = "127.0.0.1";
|
||||
}
|
||||
}
|
||||
|
||||
LoopbackRouteFetcher::~LoopbackRouteFetcher() {
|
||||
}
|
||||
|
||||
void LoopbackRouteFetcher::Fetch(const GoogleString& original_url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch) {
|
||||
GoogleString url = original_url;
|
||||
GoogleUrl parsed_url(original_url);
|
||||
|
||||
if (!parsed_url.is_valid()) {
|
||||
// Fail immediately in case we can't parse the URL, rather than risk
|
||||
// getting weird handling due to inconsistencies in parsing between us
|
||||
// and backend_fetcher_.
|
||||
LOG(WARNING) << "Can't parse URL:" << original_url;
|
||||
fetch->Done(false);
|
||||
return;
|
||||
}
|
||||
|
||||
RequestHeaders* request_headers = fetch->request_headers();
|
||||
|
||||
// Check to see if the URL we hand to the backend has an origin we were never
|
||||
// explicitly told of, and if so just talk to loopback.
|
||||
// Note that in case of an origin mapping the parsed_url will contain the
|
||||
// fetch host, not the original host, so the domain_lawyer will know about it
|
||||
// and the if body will not run.
|
||||
if (!options_->domain_lawyer()->IsOriginKnown(parsed_url)) {
|
||||
// If there is no host header, make sure to add one, since we are about
|
||||
// to munge the URL.
|
||||
if (request_headers->Lookup1(HttpAttributes::kHost) == NULL) {
|
||||
request_headers->Replace(HttpAttributes::kHost, parsed_url.HostAndPort());
|
||||
}
|
||||
|
||||
GoogleUrl base;
|
||||
StringPiece scheme = parsed_url.Scheme();
|
||||
if ((own_port_ == 80 && scheme == "http") ||
|
||||
(own_port_ == 443 && scheme == "https")) {
|
||||
base.Reset(StrCat(scheme, "://", own_ip_, "/"));
|
||||
} else {
|
||||
base.Reset(
|
||||
StrCat(scheme, "://", own_ip_, ":", IntegerToString(own_port_), "/"));
|
||||
}
|
||||
|
||||
GoogleString rel;
|
||||
parsed_url.PathAndLeaf().CopyToString(&rel);
|
||||
|
||||
parsed_url.Reset(base, rel);
|
||||
parsed_url.Spec().CopyToString(&url);
|
||||
|
||||
// Note that we end up with host: containing the actual URL's host, but
|
||||
// the URL containing just our IP. This is technically wrong, but the
|
||||
// Serf fetcher will interpret it in the way we want it to --- it will
|
||||
// connect to our IP, pass only the path portion to the host, and
|
||||
// keep the host: header matching what's in the request_headers.
|
||||
}
|
||||
|
||||
backend_fetcher_->Fetch(url, message_handler, fetch);
|
||||
}
|
||||
|
||||
bool LoopbackRouteFetcher::IsLoopbackAddr(const apr_sockaddr_t* addr) {
|
||||
if (addr->family == APR_INET) {
|
||||
// 127.0.0.0/8 is the IPv4 loopback.
|
||||
// Note: is network byte order, so we can do char-wide indexing into it
|
||||
// consistently (but not look at the whole thing).
|
||||
const char* ipbytes = reinterpret_cast<const char*>(
|
||||
&addr->sa.sin.sin_addr.s_addr);
|
||||
return (ipbytes[0] == 127);
|
||||
} else if (addr->family == APR_INET6) {
|
||||
const in6_addr& addr_v6 = addr->sa.sin6.sin6_addr;
|
||||
|
||||
// There are a couple of ways we can see loopbacks in IPv6: as the
|
||||
// proper IPv6 loopback, ::1, or as "IPv4-compatible IPv6 address"
|
||||
// of the IPv4 loopback, ::FFFF:127.x.y.z.
|
||||
|
||||
// Regardless, the first 10 bytes ought to be 0.
|
||||
for (int b = 0; b < 10; ++b) {
|
||||
if (addr_v6.s6_addr[b] != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If first 10 are OK, check the last 6 bytes for the 2 options.
|
||||
return (addr_v6.s6_addr[10] == 0xFF &&
|
||||
addr_v6.s6_addr[11] == 0xFF &&
|
||||
addr_v6.s6_addr[12] == 127) ||
|
||||
(addr_v6.s6_addr[10] == 0 &&
|
||||
addr_v6.s6_addr[11] == 0 &&
|
||||
addr_v6.s6_addr[12] == 0 &&
|
||||
addr_v6.s6_addr[13] == 0 &&
|
||||
addr_v6.s6_addr[14] == 0 &&
|
||||
addr_v6.s6_addr[15] == 1);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace net_instaweb
|
||||
@@ -1,74 +0,0 @@
|
||||
// Copyright 2012 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: morlovich@google.com (Maksim Orlovich)
|
||||
//
|
||||
// This fetcher routes requests to hosts that are not explicitly mentioned in
|
||||
// the DomainLawyer towards our own IP, as extracted from the incoming
|
||||
// connection.
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_LOOPBACK_ROUTE_FETCHER_H_
|
||||
#define NET_INSTAWEB_APACHE_LOOPBACK_ROUTE_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
struct apr_sockaddr_t;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class RewriteOptions;
|
||||
class MessageHandler;
|
||||
|
||||
// See file comment.
|
||||
class LoopbackRouteFetcher : public UrlAsyncFetcher {
|
||||
public:
|
||||
// Does not take ownership of anything. own_port is the port the incoming
|
||||
// request came in on, and own_ip is the same for the IP. If the
|
||||
// backend_fetcher does actual fetching (and is not merely simulating it for
|
||||
// testing purposes) it should be the Serf fetcher, as others may not direct
|
||||
// requests this class produces properly.
|
||||
// (As this fetcher may produce requests that need to connect to some IP
|
||||
// but have a Host: and URL from somewhere else).
|
||||
LoopbackRouteFetcher(const RewriteOptions* options,
|
||||
const GoogleString& own_ip,
|
||||
int own_port,
|
||||
UrlAsyncFetcher* backend_fetcher);
|
||||
virtual ~LoopbackRouteFetcher();
|
||||
|
||||
virtual bool SupportsHttps() const {
|
||||
return backend_fetcher_->SupportsHttps();
|
||||
}
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch);
|
||||
|
||||
// Returns true if the given address is an IPv4 or IPv6 loopback.
|
||||
static bool IsLoopbackAddr(const apr_sockaddr_t* addr);
|
||||
|
||||
private:
|
||||
const RewriteOptions* const options_;
|
||||
GoogleString own_ip_;
|
||||
int own_port_;
|
||||
UrlAsyncFetcher* const backend_fetcher_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(LoopbackRouteFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_LOOPBACK_ROUTE_FETCHER_H_
|
||||
@@ -1,42 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jefftk@google.com (Jeff Kaufman)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_MOD_INSTAWEB_H_
|
||||
#define NET_INSTAWEB_APACHE_MOD_INSTAWEB_H_
|
||||
|
||||
#include "http_config.h"
|
||||
#include "httpd.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// Filter used for HTML rewriting.
|
||||
const char kModPagespeedFilterName[] = "MOD_PAGESPEED_OUTPUT_FILTER";
|
||||
// Filter used to fix headers after mod_headers runs.
|
||||
const char kModPagespeedFixHeadersName[] = "MOD_PAGESPEED_FIX_HEADERS_FILTER";
|
||||
// Filters used for In-Place Resource Optimization.
|
||||
// First filter stores un-gzipped contents.
|
||||
const char kModPagespeedInPlaceFilterName[] = "MOD_PAGESPEED_IN_PLACE_FILTER";
|
||||
// Second filter checks headers for cacheability.
|
||||
const char kModPagespeedInPlaceCheckHeadersName[] =
|
||||
"MOD_PAGESPEED_IN_PLACE_CHECK_HEADERS_FILTER";
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
extern "C" {
|
||||
extern module AP_MODULE_DECLARE_DATA pagespeed_module;
|
||||
}
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_MOD_INSTAWEB_H_
|
||||
@@ -1,67 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: morlovich@google.com (Maksim Orlovich)
|
||||
//
|
||||
// ModSpdyFetchController coordinates a threadpool and a rate controller between
|
||||
// multiple ModSpdyFetcher objects. The basic usage pattern is that
|
||||
// ModSpdyFetcher::Fetch calls ModSpdyFetchController::ScheduleBlockingFetch,
|
||||
// which will then cause ModSpdyFetcher::BlockingFetch to be called on a
|
||||
// thread in a hopefully intelligent manner.
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_MOD_SPDY_FETCH_CONTROLLER_H_
|
||||
#define NET_INSTAWEB_APACHE_MOD_SPDY_FETCH_CONTROLLER_H_
|
||||
|
||||
#include "net/instaweb/http/public/rate_controller.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/queued_worker_pool.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class MessageHandler;
|
||||
class ModSpdyFetcher;
|
||||
class Statistics;
|
||||
class ThreadSystem;
|
||||
|
||||
class ModSpdyFetchController {
|
||||
public:
|
||||
// Note: RateController::InitStats must have been called before using this.
|
||||
ModSpdyFetchController(int num_threads,
|
||||
ThreadSystem* thread_system,
|
||||
Statistics* statistics);
|
||||
~ModSpdyFetchController();
|
||||
|
||||
// Arranges for fetcher->BlockingFetch to be called on our thread pool.
|
||||
void ScheduleBlockingFetch(
|
||||
ModSpdyFetcher* fetcher, const GoogleString& url,
|
||||
MessageHandler* message_handler, AsyncFetch* fetch);
|
||||
|
||||
// TODO(morlovich): Add a ShutDown(), with semantics matching those
|
||||
// of UrlAsyncFetcher::ShutDown, and invoked similarly.
|
||||
|
||||
private:
|
||||
class FetchDispatcher;
|
||||
|
||||
RateController rate_controller_;
|
||||
QueuedWorkerPool thread_pool_;
|
||||
DISALLOW_COPY_AND_ASSIGN(ModSpdyFetchController);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_MOD_SPDY_FETCH_CONTROLLER_H_
|
||||
@@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: morlovich@google.com (Maksim Orlovich)
|
||||
//
|
||||
// A fetcher that talks to mod_spdy for requests matching a certain
|
||||
// domain (and passes the rest to fallthrough fetcher).
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_MOD_SPDY_FETCHER_H_
|
||||
#define NET_INSTAWEB_APACHE_MOD_SPDY_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
|
||||
#include "httpd.h"
|
||||
|
||||
|
||||
#include "net/instaweb/apache/interface_mod_spdy.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
struct request_rec;
|
||||
struct spdy_slave_connection_factory;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class MessageHandler;
|
||||
class ModSpdyFetchController;
|
||||
class RewriteDriver;
|
||||
|
||||
class ModSpdyFetcher : public UrlAsyncFetcher {
|
||||
public:
|
||||
// Initializes various filters this fetcher needs for operation.
|
||||
// This must be from within a register hooks implementation.
|
||||
static void Initialize();
|
||||
|
||||
ModSpdyFetcher(ModSpdyFetchController* controller,
|
||||
StringPiece url, RewriteDriver* driver,
|
||||
spdy_slave_connection_factory* connection_factory);
|
||||
virtual ~ModSpdyFetcher();
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch);
|
||||
|
||||
// Returns true if a ModSpdyFetcher should be installed as a session fetcher
|
||||
// on a given connection.
|
||||
static bool ShouldUseOn(request_rec* req);
|
||||
|
||||
// TODO(morlovich): Implement virtual void ShutDown(),
|
||||
// and give a good story on session fetchers and fetcher shutdowns in general.
|
||||
|
||||
private:
|
||||
friend class ModSpdyFetchController;
|
||||
|
||||
// The actual implementation of fetching code, normally called by
|
||||
// ModSpdyFetchController.
|
||||
void BlockingFetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch);
|
||||
|
||||
ModSpdyFetchController* controller_;
|
||||
UrlAsyncFetcher* fallback_fetcher_;
|
||||
GoogleString own_origin_; // empty if we couldn't figure it out.
|
||||
spdy_slave_connection_factory* connection_factory_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ModSpdyFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_MOD_SPDY_FETCHER_H_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,261 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
// lsong@google.com (Libo Song)
|
||||
|
||||
#ifndef NET_INSTAWEB_APACHE_SERF_URL_ASYNC_FETCHER_H_
|
||||
#define NET_INSTAWEB_APACHE_SERF_URL_ASYNC_FETCHER_H_
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "net/instaweb/http/public/url_pollable_async_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/pool.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/thread_system.h"
|
||||
|
||||
// To enable HTTPS fetching with serf, we must link against OpenSSL,
|
||||
// which is a a large library with licensing restrictions not known to
|
||||
// be wholly inline with the Apache license. To enable HTTPS fetching:
|
||||
// 1. Set SERF_HTTPS_FETCHING to 1 here
|
||||
// 2. Uncomment the references to openssl.gyp and ssl_buckets.c in
|
||||
// src/third_party/serf/serf.gyp.
|
||||
// 3. Uncomment both references to openssl in src/DEPS.
|
||||
//
|
||||
// If this is enabled, then the HTTPS fetching can be tested with
|
||||
// install/apache_https_fetch_test.sh
|
||||
#ifndef SERF_HTTPS_FETCHING
|
||||
#define SERF_HTTPS_FETCHING 0
|
||||
#endif
|
||||
|
||||
struct apr_pool_t;
|
||||
struct serf_context_t;
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class MessageHandler;
|
||||
class Statistics;
|
||||
class SerfFetch;
|
||||
class SerfThreadedFetcher;
|
||||
class Timer;
|
||||
class Variable;
|
||||
|
||||
struct SerfStats {
|
||||
static const char kSerfFetchRequestCount[];
|
||||
static const char kSerfFetchByteCount[];
|
||||
static const char kSerfFetchTimeDurationMs[];
|
||||
static const char kSerfFetchCancelCount[];
|
||||
static const char kSerfFetchActiveCount[];
|
||||
static const char kSerfFetchTimeoutCount[];
|
||||
static const char kSerfFetchFailureCount[];
|
||||
static const char kSerfFetchCertErrors[];
|
||||
};
|
||||
|
||||
// Identifies the set of HTML keywords. This is used in error messages emitted
|
||||
// both from the config parser in this module, and in the directives table in
|
||||
// mod_instaweb.cc which must be statically constructed using a compile-time
|
||||
// concatenation. Hence this must be a literal string and not a const char*.
|
||||
#define SERF_HTTPS_KEYWORDS \
|
||||
"enable,disable,allow_self_signed," \
|
||||
"allow_unknown_certificate_authority,allow_certificate_not_yet_valid"
|
||||
|
||||
// TODO(sligocki): Serf does not seem to act appropriately in IPv6
|
||||
// environments, fix and test this.
|
||||
// Specifically:
|
||||
// (1) It does not attempt to fall-back to IPv4 if IPv6 connection fails;
|
||||
// (2) It may not correctly signal failure, which causes the incoming
|
||||
// connection to hang.
|
||||
class SerfUrlAsyncFetcher : public UrlPollableAsyncFetcher {
|
||||
public:
|
||||
enum WaitChoice {
|
||||
kThreadedOnly,
|
||||
kMainlineOnly,
|
||||
kThreadedAndMainline
|
||||
};
|
||||
|
||||
SerfUrlAsyncFetcher(const char* proxy, apr_pool_t* pool,
|
||||
ThreadSystem* thread_system,
|
||||
Statistics* statistics, Timer* timer, int64 timeout_ms,
|
||||
MessageHandler* handler);
|
||||
SerfUrlAsyncFetcher(SerfUrlAsyncFetcher* parent, const char* proxy);
|
||||
virtual ~SerfUrlAsyncFetcher();
|
||||
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
// Stops all active fetches and prevents further fetches from starting
|
||||
// (they will instead quickly call back to ->Done(false).
|
||||
virtual void ShutDown();
|
||||
|
||||
virtual bool SupportsHttps() const;
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* callback);
|
||||
|
||||
virtual int Poll(int64 max_wait_ms);
|
||||
|
||||
bool WaitForActiveFetches(int64 max_milliseconds,
|
||||
MessageHandler* message_handler,
|
||||
WaitChoice wait_choice);
|
||||
|
||||
// Remove the completed fetch from the active fetch set, and put it into a
|
||||
// completed fetch list to be cleaned up.
|
||||
void FetchComplete(SerfFetch* fetch);
|
||||
apr_pool_t* pool() const { return pool_; }
|
||||
serf_context_t* serf_context() const { return serf_context_; }
|
||||
|
||||
void PrintActiveFetches(MessageHandler* handler) const;
|
||||
virtual int64 timeout_ms() { return timeout_ms_; }
|
||||
ThreadSystem* thread_system() { return thread_system_; }
|
||||
|
||||
// By default, the Serf fetcher will call
|
||||
// UrlAsyncFetcher::Callback::EnableThreaded() to determine whether
|
||||
// a particular URL fetch should be executed in the fetcher thread.
|
||||
//
|
||||
// Setting this variable causes the fetches to be threaded independent
|
||||
// of the value of UrlAsyncFetcher::Callback::EnableThreaded().
|
||||
void set_force_threaded(bool x) { force_threaded_ = x; }
|
||||
|
||||
// Indicates that Serf should enumerate failing URLs whenever the underlying
|
||||
// Serf library reports an error.
|
||||
void set_list_outstanding_urls_on_error(bool x);
|
||||
|
||||
// Indicates that Serf should track the original content length for
|
||||
// fetched resources.
|
||||
bool track_original_content_length() const {
|
||||
return track_original_content_length_;
|
||||
}
|
||||
void set_track_original_content_length(bool x);
|
||||
|
||||
void set_inflation_content_type_blacklist(
|
||||
const std::set<const ContentType*>& bypass_set) {
|
||||
inflation_content_type_blacklist_ = bypass_set;
|
||||
}
|
||||
|
||||
// Indicates that direct HTTPS fetching should be allowed, and how picky
|
||||
// to be about certificates. The directive is a comma separated list of
|
||||
// these keywords:
|
||||
// enable
|
||||
// disable
|
||||
// allow_self_signed
|
||||
// allow_unknown_certificate_authority
|
||||
// allow_certificate_not_yet_valid
|
||||
// Returns 'false' if the directive does not parse properly.
|
||||
bool SetHttpsOptions(StringPiece directive);
|
||||
|
||||
// Validates the correctness of an https directive. Exposed as a static
|
||||
// method for early exit on mis-specified pagespeed.conf.
|
||||
static bool ValidateHttpsOptions(StringPiece directive,
|
||||
GoogleString* error_message) {
|
||||
uint32 options;
|
||||
return ParseHttpsOptions(directive, &options, error_message);
|
||||
}
|
||||
|
||||
protected:
|
||||
typedef Pool<SerfFetch> SerfFetchPool;
|
||||
|
||||
// Determines whether https is allowed in the current configuration.
|
||||
inline bool allow_https() const;
|
||||
inline bool allow_self_signed() const;
|
||||
inline bool allow_unknown_certificate_authority() const;
|
||||
inline bool allow_certificate_not_yet_valid() const;
|
||||
|
||||
void set_https_options(uint32 https_options) {
|
||||
https_options_ = https_options;
|
||||
}
|
||||
|
||||
void Init(apr_pool_t* parent_pool, const char* proxy);
|
||||
bool SetupProxy(const char* proxy);
|
||||
|
||||
// Start a SerfFetch. Takes ownership of fetch and makes sure callback is
|
||||
// called even if fetch fails to start.
|
||||
//
|
||||
// mutex_ must be held before calling StartFetch.
|
||||
bool StartFetch(SerfFetch* fetch);
|
||||
|
||||
// AnyPendingFetches is accurate only at the time of call; this is
|
||||
// used conservatively during shutdown. It counts fetches that have been
|
||||
// requested by some thread, and can include fetches for which no action
|
||||
// has yet been taken (ie fetches that are not active).
|
||||
virtual bool AnyPendingFetches();
|
||||
// ApproximateNumActiveFetches can under- or over-count and is used only for
|
||||
// error reporting.
|
||||
int ApproximateNumActiveFetches();
|
||||
|
||||
void CancelActiveFetches();
|
||||
void CancelActiveFetchesMutexHeld();
|
||||
bool WaitForActiveFetchesHelper(int64 max_ms,
|
||||
MessageHandler* message_handler);
|
||||
|
||||
// This cleans up the serf resources for fetches that errored out.
|
||||
// Must be called only immediately after running the serf event loop.
|
||||
// Must be called with mutex_ held.
|
||||
void CleanupFetchesWithErrors();
|
||||
|
||||
// These must be accessed with mutex_ held.
|
||||
bool shutdown() const { return shutdown_; }
|
||||
void set_shutdown(bool s) { shutdown_ = s; }
|
||||
|
||||
apr_pool_t* pool_;
|
||||
ThreadSystem* thread_system_;
|
||||
Timer* timer_;
|
||||
|
||||
// mutex_ protects serf_context_ and active_fetches_.
|
||||
ThreadSystem::CondvarCapableMutex* mutex_;
|
||||
serf_context_t* serf_context_;
|
||||
SerfFetchPool active_fetches_;
|
||||
|
||||
typedef std::vector<SerfFetch*> FetchVector;
|
||||
SerfFetchPool completed_fetches_;
|
||||
SerfThreadedFetcher* threaded_fetcher_;
|
||||
|
||||
// This is protected because it's updated along with active_fetches_,
|
||||
// which happens in subclass SerfThreadedFetcher as well as this class.
|
||||
Variable* active_count_;
|
||||
|
||||
private:
|
||||
friend class SerfFetch; // To access stats variables below.
|
||||
|
||||
static bool ParseHttpsOptions(StringPiece directive, uint32* options,
|
||||
GoogleString* error_message);
|
||||
|
||||
Variable* request_count_;
|
||||
Variable* byte_count_;
|
||||
Variable* time_duration_ms_;
|
||||
Variable* cancel_count_;
|
||||
Variable* timeout_count_;
|
||||
Variable* failure_count_;
|
||||
Variable* cert_errors_;
|
||||
const int64 timeout_ms_;
|
||||
bool force_threaded_;
|
||||
bool shutdown_;
|
||||
bool list_outstanding_urls_on_error_;
|
||||
bool track_original_content_length_;
|
||||
uint32 https_options_; // Composed of HttpsOptions ORed together.
|
||||
MessageHandler* message_handler_;
|
||||
|
||||
// Set of content types that will not be inflated, when passing through
|
||||
// inflating fetch.
|
||||
std::set<const ContentType*> inflation_content_type_blacklist_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(SerfUrlAsyncFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_APACHE_SERF_URL_ASYNC_FETCHER_H_
|
||||
@@ -1,177 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: pulkitg@google.com (Pulkit Goyal)
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_BLINK_FLOW_CRITICAL_LINE_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_BLINK_FLOW_CRITICAL_LINE_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/google_url.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractLogRecord;
|
||||
class AsyncFetch;
|
||||
class BlinkCriticalLineData;
|
||||
class BlinkCriticalLineDataFinder;
|
||||
class PropertyPage;
|
||||
class ProxyFetchPropertyCallbackCollector;
|
||||
class ProxyFetchFactory;
|
||||
class ServerContext;
|
||||
class RewriteOptions;
|
||||
class Statistics;
|
||||
class TimedVariable;
|
||||
|
||||
// This class manages the blink flow for looking up BlinkCriticalLineData in
|
||||
// cache, modifying the options for passthru and triggering asynchronous
|
||||
// lookups to compute the critical line and insert it into cache.
|
||||
class BlinkFlowCriticalLine {
|
||||
public:
|
||||
class LogHelper;
|
||||
|
||||
// These strings identify sync-points for reproducing races between foreground
|
||||
// serving request and background blink computation requests in tests.
|
||||
static const char kBackgroundComputationDone[];
|
||||
static const char kUpdateResponseCodeDone[];
|
||||
|
||||
static void Start(const GoogleString& url,
|
||||
AsyncFetch* base_fetch,
|
||||
RewriteOptions* options,
|
||||
ProxyFetchFactory* factory,
|
||||
ServerContext* manager,
|
||||
ProxyFetchPropertyCallbackCollector* property_callback);
|
||||
|
||||
virtual ~BlinkFlowCriticalLine();
|
||||
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
static const char kNumBlinkHtmlCacheHits[];
|
||||
static const char kNumBlinkHtmlCacheMisses[];
|
||||
static const char kNumBlinkSharedFetchesStarted[];
|
||||
static const char kNumBlinkSharedFetchesCompleted[];
|
||||
static const char kNumComputeBlinkCriticalLineDataCalls[];
|
||||
static const char kNumBlinkHtmlMatches[];
|
||||
static const char kNumBlinkHtmlMismatches[];
|
||||
static const char kNumBlinkHtmlMismatchesCacheDeletes[];
|
||||
static const char kNumBlinkHtmlSmartdiffMatches[];
|
||||
static const char kNumBlinkHtmlSmartdiffMismatches[];
|
||||
|
||||
private:
|
||||
BlinkFlowCriticalLine(const GoogleString& url,
|
||||
AsyncFetch* base_fetch,
|
||||
RewriteOptions* options,
|
||||
ProxyFetchFactory* factory,
|
||||
ServerContext* manager,
|
||||
ProxyFetchPropertyCallbackCollector* property_callback);
|
||||
|
||||
// Sets request start time.
|
||||
void SetStartRequestTimings();
|
||||
|
||||
// Sets the server side response start time.
|
||||
void SetResponseStartTime();
|
||||
|
||||
// Function called by the callback collector whenever property cache lookup
|
||||
// is done. Based on the result, it will call either
|
||||
// BlinkCriticalLineDataHit() or BlinkCriticalLineDataMiss().
|
||||
void BlinkCriticalLineDataLookupDone(
|
||||
ProxyFetchPropertyCallbackCollector* collector);
|
||||
|
||||
// Serves the critical html content to the client and triggers the proxy fetch
|
||||
// for non cacheable content.
|
||||
void BlinkCriticalLineDataHit();
|
||||
|
||||
// Serves the request in passthru mode and triggers a background request to
|
||||
// compute BlinkCriticalLineData.
|
||||
void BlinkCriticalLineDataMiss();
|
||||
|
||||
// Creates a rewrite driver and triggers proxy fetch.
|
||||
// critical_line_data_found indicates whether it is a cache hit case, while
|
||||
// serve_non_critical means that non critical needs to be served (i.e., not
|
||||
// yet served).
|
||||
void TriggerProxyFetch(bool critical_line_data_found,
|
||||
bool serve_non_critical);
|
||||
|
||||
void WriteResponseStartAndLookUpTimings();
|
||||
|
||||
// Serves all the panel contents including critical html, critical images json
|
||||
// and non critical json. This is the case when there are no cacheable panels
|
||||
// in the page.
|
||||
void ServeAllPanelContents();
|
||||
|
||||
// Serves critical panel contents including critical html and
|
||||
// critical images json. This is the case when there are cacheable panels
|
||||
// in the page.
|
||||
void ServeCriticalPanelContents();
|
||||
|
||||
// Sends critical html to the client.
|
||||
void SendCriticalHtml(const GoogleString& critical_json_str);
|
||||
|
||||
// Sends inline images json to the client.
|
||||
void SendInlineImagesJson(const GoogleString& pushed_images_str);
|
||||
|
||||
// Sends non critical json to the client.
|
||||
void SendNonCriticalJson(GoogleString* non_critical_json_str);
|
||||
|
||||
// Sends the lazyload filter javascript code.
|
||||
void SendLazyloadImagesJs();
|
||||
|
||||
void WriteString(const StringPiece& str);
|
||||
|
||||
int64 GetTimeElapsedFromStartRequest();
|
||||
|
||||
GoogleString GetAddTimingScriptString(const GoogleString& timing_str,
|
||||
int64 time_ms);
|
||||
|
||||
void Flush();
|
||||
|
||||
// Modify the rewrite options to be used in the background and user-facing
|
||||
// request when BlinkCriticalLineData is found in the cache.
|
||||
void SetFilterOptions(RewriteOptions* options) const;
|
||||
|
||||
// Returns true if property cache has last response code as non 200.
|
||||
bool IsLastResponseCodeInvalid(PropertyPage* page);
|
||||
|
||||
GoogleString url_;
|
||||
GoogleUrl google_url_;
|
||||
GoogleString critical_html_;
|
||||
AsyncFetch* base_fetch_;
|
||||
// Blink needs its own log record since it needs to log even after the main
|
||||
// log record is written out when the request processing is finished.
|
||||
scoped_ptr<AbstractLogRecord> blink_log_record_;
|
||||
RewriteOptions* options_;
|
||||
ProxyFetchFactory* factory_;
|
||||
ServerContext* manager_;
|
||||
ProxyFetchPropertyCallbackCollector* property_callback_;
|
||||
scoped_ptr<BlinkCriticalLineData> blink_critical_line_data_;
|
||||
BlinkCriticalLineDataFinder* finder_;
|
||||
int64 request_start_time_ms_;
|
||||
int64 time_to_start_blink_flow_critical_line_ms_;
|
||||
int64 time_to_critical_line_data_look_up_done_ms_;
|
||||
scoped_ptr<LogHelper> blink_log_helper_;
|
||||
|
||||
TimedVariable* num_blink_html_cache_hits_;
|
||||
TimedVariable* num_blink_shared_fetches_started_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(BlinkFlowCriticalLine);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_BLINK_FLOW_CRITICAL_LINE_H_
|
||||
@@ -1,135 +0,0 @@
|
||||
/*
|
||||
* Copyright 2013 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Authors: mmohabey@google.com (Megha Mohabey)
|
||||
// pulkitg@google.com (Pulkit Goyal)
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_CACHE_HTML_FLOW_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_CACHE_HTML_FLOW_H_
|
||||
|
||||
#include "net/instaweb/rewriter/cache_html_info.pb.h"
|
||||
#include "net/instaweb/util/public/google_url.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractLogRecord;
|
||||
class AsyncFetch;
|
||||
class FallbackPropertyPage;
|
||||
class MessageHandler;
|
||||
class PropertyPage;
|
||||
class ProxyFetchPropertyCallbackCollector;
|
||||
class ProxyFetchFactory;
|
||||
class ServerContext;
|
||||
class RewriteOptions;
|
||||
class RewriteDriver;
|
||||
class Statistics;
|
||||
class TimedVariable;
|
||||
|
||||
// CacheHtmlFlow manages the flow for an html request where we can flush a
|
||||
// cached html to the client before receiving a response from the origin server.
|
||||
// In order to flush the html early before we start getting bytes back from the
|
||||
// fetcher, we lookup property cache for CacheHtmlInfo. If found, we flush
|
||||
// cached html out (with the non cacheable parts removed) and then trigger the
|
||||
// normal ProxyFetch flow which extracts cookies and non cacheable parts from
|
||||
// the page and sends it out. If CacheHtmlInfo is not found in cache, we pass
|
||||
// this request through normal ProxyFetch flow buffering the html. In the
|
||||
// background we create a driver to parse it, remove the non-cacheable parts,
|
||||
// compute CacheHtmlInfo and store it into the property cache.
|
||||
class CacheHtmlFlow {
|
||||
public:
|
||||
class LogHelper;
|
||||
|
||||
// Identifies the sync-point for reproducing races between foreground
|
||||
// serving request and background cache html computation requests in tests.
|
||||
static const char kBackgroundComputationDone[];
|
||||
|
||||
static void Start(const GoogleString& url,
|
||||
AsyncFetch* base_fetch,
|
||||
RewriteDriver* driver,
|
||||
ProxyFetchFactory* factory,
|
||||
ProxyFetchPropertyCallbackCollector* property_callback);
|
||||
|
||||
virtual ~CacheHtmlFlow();
|
||||
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
static const char kNumCacheHtmlHits[];
|
||||
static const char kNumCacheHtmlMisses[];
|
||||
static const char kNumCacheHtmlMatches[];
|
||||
static const char kNumCacheHtmlMismatches[];
|
||||
static const char kNumCacheHtmlMismatchesCacheDeletes[];
|
||||
static const char kNumCacheHtmlSmartdiffMatches[];
|
||||
static const char kNumCacheHtmlSmartdiffMismatches[];
|
||||
|
||||
private:
|
||||
CacheHtmlFlow(const GoogleString& url,
|
||||
AsyncFetch* base_fetch,
|
||||
RewriteDriver* driver,
|
||||
ProxyFetchFactory* factory,
|
||||
ProxyFetchPropertyCallbackCollector* property_callback);
|
||||
|
||||
void CacheHtmlLookupDone();
|
||||
|
||||
void Cancel();
|
||||
|
||||
// Callback that is invoked after we rewrite the cached html.
|
||||
void CacheHtmlRewriteDone(bool flushed_split_js);
|
||||
|
||||
// Serves the cached html content to the client and triggers the proxy fetch
|
||||
// for non cacheable content.
|
||||
// TODO(pulkitg): Change the function GetHtmlCriticalImages to take
|
||||
// AbstractPropertyPage so that dependency on FallbackPropertyPage can be
|
||||
// removed.
|
||||
void CacheHtmlHit(FallbackPropertyPage* page);
|
||||
|
||||
// Serves the request in passthru mode and triggers a background request to
|
||||
// compute CacheHtmlInfo.
|
||||
void CacheHtmlMiss();
|
||||
|
||||
// Triggers proxy fetch.
|
||||
void TriggerProxyFetch();
|
||||
|
||||
// Populates the cache html info from the property cache to cache_html_info_.
|
||||
// It also determines whether this info is stale or not.
|
||||
void PopulateCacheHtmlInfo(PropertyPage* page);
|
||||
|
||||
GoogleString url_;
|
||||
GoogleUrl google_url_;
|
||||
AsyncFetch* base_fetch_;
|
||||
// Cache Html Flow needs its own log record since it needs to log even after
|
||||
// the main log record is written out when the request processing is finished.
|
||||
scoped_ptr<AbstractLogRecord> cache_html_log_record_;
|
||||
RewriteDriver* rewrite_driver_;
|
||||
const RewriteOptions* options_;
|
||||
ProxyFetchFactory* factory_;
|
||||
ServerContext* server_context_;
|
||||
ProxyFetchPropertyCallbackCollector* property_cache_callback_;
|
||||
MessageHandler* handler_;
|
||||
CacheHtmlInfo cache_html_info_;
|
||||
scoped_ptr<LogHelper> cache_html_log_helper_;
|
||||
|
||||
TimedVariable* num_cache_html_misses_;
|
||||
TimedVariable* num_cache_html_hits_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(CacheHtmlFlow);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_CACHE_HTML_FLOW_H_
|
||||
@@ -1,119 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: mmohabey@google.com (Megha Mohabey)
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_FLUSH_EARLY_FLOW_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_FLUSH_EARLY_FLOW_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/string_writer.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class FlushEarlyInfo;
|
||||
class Histogram;
|
||||
class MessageHandler;
|
||||
class ProxyFetchPropertyCallbackCollector;
|
||||
class ProxyFetchFactory;
|
||||
class ServerContext;
|
||||
class RewriteDriver;
|
||||
class Statistics;
|
||||
class TimedVariable;
|
||||
|
||||
// FlushEarlyFlow manages the flow for the rewriters which flush a response to
|
||||
// the client before receiving a response from the origin server. If a request
|
||||
// can be responded to early, then FlushEarlyFlow is initiated. It also has
|
||||
// helper functions to update the property cache with the response headers which
|
||||
// are used when a request is responded to early.
|
||||
class FlushEarlyFlow {
|
||||
public:
|
||||
static const char kNumRequestsFlushedEarly[];
|
||||
static const char kNumResourcesFlushedEarly[];
|
||||
static const char kFlushEarlyRewriteLatencyMs[];
|
||||
static const char kNumFlushEarlyHttpStatusCodeDeemedUnstable[];
|
||||
static const char kNumFlushEarlyRequestsRedirected[];
|
||||
static const char kRedirectPageJs[];
|
||||
|
||||
static void Start(
|
||||
const GoogleString& url,
|
||||
AsyncFetch** base_fetch,
|
||||
RewriteDriver* driver,
|
||||
ProxyFetchFactory* factory,
|
||||
ProxyFetchPropertyCallbackCollector* property_callback);
|
||||
|
||||
static void InitStats(Statistics* stats);
|
||||
|
||||
virtual ~FlushEarlyFlow();
|
||||
|
||||
private:
|
||||
class FlushEarlyAsyncFetch;
|
||||
// Flushes some response for this request before receiving the fetch response
|
||||
// from the origin server.
|
||||
void FlushEarly();
|
||||
|
||||
// Cancels the flush early flow.
|
||||
void Cancel();
|
||||
|
||||
FlushEarlyFlow(const GoogleString& url,
|
||||
AsyncFetch* base_fetch,
|
||||
FlushEarlyAsyncFetch* flush_early_fetch,
|
||||
RewriteDriver* driver,
|
||||
ProxyFetchFactory* factory,
|
||||
ProxyFetchPropertyCallbackCollector* property_cache_callback);
|
||||
|
||||
// Generates response headers from previous values stored in property cache.
|
||||
void GenerateResponseHeaders(const FlushEarlyInfo& flush_early_info);
|
||||
|
||||
// Callback that is invoked after we rewrite the early head.
|
||||
// start_time_ms indicates the time we started rewriting the flush early
|
||||
// head. This is set to -1 if is_experimental_hit is false.
|
||||
void FlushEarlyRewriteDone(int64 start_time_ms,
|
||||
RewriteDriver* flush_early_driver);
|
||||
|
||||
void Write(const StringPiece& val);
|
||||
|
||||
GoogleString url_;
|
||||
GoogleString dummy_head_;
|
||||
StringWriter dummy_head_writer_;
|
||||
int num_resources_flushed_;
|
||||
int num_rewritten_resources_;
|
||||
int64 average_fetch_time_;
|
||||
|
||||
AsyncFetch* base_fetch_;
|
||||
FlushEarlyAsyncFetch* flush_early_fetch_;
|
||||
RewriteDriver* driver_;
|
||||
ProxyFetchFactory* factory_;
|
||||
ServerContext* manager_;
|
||||
ProxyFetchPropertyCallbackCollector* property_cache_callback_;
|
||||
bool should_flush_early_lazyload_script_;
|
||||
MessageHandler* handler_;
|
||||
bool is_mobile_user_agent_;
|
||||
|
||||
TimedVariable* num_requests_flushed_early_;
|
||||
TimedVariable* num_resources_flushed_early_;
|
||||
TimedVariable* num_flush_early_http_status_code_deemed_unstable_;
|
||||
Histogram* flush_early_rewrite_latency_ms_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(FlushEarlyFlow);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_FLUSH_EARLY_FLOW_H_
|
||||
@@ -1,100 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: morlovich@google.com (Maksim Orlovich),
|
||||
// sligocki@google.com (Shawn Ligocki)
|
||||
//
|
||||
// This contains HtmlDetector, which tries to heuristically detect whether
|
||||
// content a server claims to be HTML actually is HTML (it sometimes isn't).
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_HTML_DETECTOR_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_HTML_DETECTOR_H_
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// This class tries to heuristically detect whether something that claims to
|
||||
// HTML is likely to be. For now, it merely looks at whether the first
|
||||
// non-whitespace/non-BOM character is <.
|
||||
//
|
||||
// Typical usage:
|
||||
// HtmlDetector detect_html_;
|
||||
//
|
||||
// if (!detect_html_.already_decided() &&
|
||||
// detect_html_.ConsiderInput(data)) {
|
||||
// GoogleString buffered;
|
||||
// detect_html_.ReleaseBuffered(&buffered);
|
||||
// if (detect_html_.probable_html()) {
|
||||
// do html-specific bits with buffered
|
||||
// } else {
|
||||
// do non-html things with buffered
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (detect_html_.already_decided()) {
|
||||
// do appropriate things with data based on detect_html_.probable_html()
|
||||
// }
|
||||
class HtmlDetector {
|
||||
public:
|
||||
HtmlDetector();
|
||||
~HtmlDetector();
|
||||
|
||||
// Processes the data, trying to determine if it's HTML or not. If there is
|
||||
// enough evidence to make a decision, returns true.
|
||||
//
|
||||
// If true is returned, already_decided() will be true as well, and hence
|
||||
// probable_html() will be accessible. buffered() will not be changed.
|
||||
//
|
||||
// If false is returned, data will be accumulated inside buffered().
|
||||
//
|
||||
// Precondition: !already_decided()
|
||||
bool ConsiderInput(const StringPiece& data);
|
||||
|
||||
// Returns true if we have seen enough input to make a guess as to whether
|
||||
// it's HTML or not.
|
||||
bool already_decided() const { return already_decided_; }
|
||||
|
||||
// Precondition: already_decided() true (or ConsiderInput returning true).
|
||||
bool probable_html() const {
|
||||
DCHECK(already_decided_);
|
||||
return probable_html_;
|
||||
}
|
||||
|
||||
// Transfers any data that was buffered by ConsiderInput calls that returned
|
||||
// false into *out_buffer. The old value of out_buffer is overwritten, and
|
||||
// HtmlDetector's internal buffers are cleared.
|
||||
void ReleaseBuffered(GoogleString* out_buffer);
|
||||
|
||||
// Forces already_decided() to true, and probable_html() to match is_html.
|
||||
//
|
||||
// Precondition: !already_decided()
|
||||
void ForceDecision(bool is_html);
|
||||
|
||||
private:
|
||||
GoogleString buffer_;
|
||||
bool already_decided_;
|
||||
bool probable_html_; // valid only if already_decided_.
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlDetector);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_HTML_DETECTOR_H_
|
||||
@@ -1,489 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
//
|
||||
// NOTE: This interface is actively under development and may be
|
||||
// changed extensively. Contact us at mod-pagespeed-discuss@googlegroups.com
|
||||
// if you are interested in using it.
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_FETCH_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_FETCH_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "net/instaweb/automatic/public/html_detector.h"
|
||||
#include "net/instaweb/http/public/async_fetch.h"
|
||||
#include "net/instaweb/http/public/meta_data.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/user_agent_matcher.h"
|
||||
#include "net/instaweb/util/public/queued_worker_pool.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/fallback_property_page.h"
|
||||
#include "net/instaweb/util/public/gtest_prod.h"
|
||||
#include "net/instaweb/util/public/property_cache.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractClientState;
|
||||
class AbstractLogRecord;
|
||||
class AbstractMutex;
|
||||
class CacheUrlAsyncFetcher;
|
||||
class Function;
|
||||
class MessageHandler;
|
||||
class ProxyFetch;
|
||||
class ProxyFetchPropertyCallbackCollector;
|
||||
class QueuedAlarm;
|
||||
class ServerContext;
|
||||
class ResponseHeaders;
|
||||
class RewriteDriver;
|
||||
class RewriteOptions;
|
||||
class Timer;
|
||||
|
||||
// Factory for creating and starting ProxyFetches. Must outlive all
|
||||
// ProxyFetches it creates.
|
||||
class ProxyFetchFactory {
|
||||
public:
|
||||
explicit ProxyFetchFactory(ServerContext* manager);
|
||||
~ProxyFetchFactory();
|
||||
|
||||
// Convenience method that calls CreateNewProxyFetch and then StartFetch() on
|
||||
// the resulting fetch.
|
||||
void StartNewProxyFetch(
|
||||
const GoogleString& url,
|
||||
AsyncFetch* async_fetch,
|
||||
RewriteDriver* driver,
|
||||
ProxyFetchPropertyCallbackCollector* property_callback,
|
||||
AsyncFetch* original_content_fetch);
|
||||
|
||||
// Creates a new proxy fetch and passes it to the fetcher to start it. If the
|
||||
// UrlNamer doesn't authorize this url it calls CleanUp() on the driver,
|
||||
// Detach() on the property callback, Done() on the async_fetch and
|
||||
// original_content_fetch, and returns NULL.
|
||||
//
|
||||
// If you're using a fetcher for the original request content you should use
|
||||
// StartNewProxyFetch() instead. CreateNewProxyFetch is for callers who will
|
||||
// not be calling StartFetch() and instead will call HeadersComplete(),
|
||||
// Write(), Flush(), and Done() as they get data in from another source.
|
||||
ProxyFetch* CreateNewProxyFetch(
|
||||
const GoogleString& url,
|
||||
AsyncFetch* async_fetch,
|
||||
RewriteDriver* driver,
|
||||
ProxyFetchPropertyCallbackCollector* property_callback,
|
||||
AsyncFetch* original_content_fetch);
|
||||
|
||||
MessageHandler* message_handler() const { return handler_; }
|
||||
|
||||
private:
|
||||
friend class ProxyFetch;
|
||||
|
||||
// Helps track the status of in-flight ProxyFetches. These are intended for
|
||||
// use only by ProxyFetch.
|
||||
//
|
||||
// TODO(jmarantz): Enumerate outstanding fetches in server status page.
|
||||
void RegisterNewFetch(ProxyFetch* proxy_fetch);
|
||||
void RegisterFinishedFetch(ProxyFetch* proxy_fetch);
|
||||
|
||||
ServerContext* manager_;
|
||||
Timer* timer_;
|
||||
MessageHandler* handler_;
|
||||
|
||||
scoped_ptr<AbstractMutex> outstanding_proxy_fetches_mutex_;
|
||||
std::set<ProxyFetch*> outstanding_proxy_fetches_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ProxyFetchFactory);
|
||||
};
|
||||
|
||||
// Tracks a single property-cache lookup. These lookups are initiated
|
||||
// immediately upon handling the request, in parallel with determining
|
||||
// domain-specific RewriteOptions and fetching the HTTP headers for the HTML.
|
||||
//
|
||||
// Request handling can proceed in parallel with the property-cache lookups,
|
||||
// including RewriteOptions lookup and initating the HTTP fetch. However,
|
||||
// handling incoming bytes will be blocked waiting for property-cache lookups
|
||||
// to complete.
|
||||
class ProxyFetchPropertyCallback : public PropertyPage {
|
||||
public:
|
||||
// The cache type associated with this callback.
|
||||
enum PageType {
|
||||
kPropertyCachePage,
|
||||
kPropertyCacheFallbackPage,
|
||||
kClientPropertyCachePage,
|
||||
kDevicePropertyCachePage,
|
||||
};
|
||||
|
||||
ProxyFetchPropertyCallback(PageType page_type,
|
||||
PropertyCache* property_cache,
|
||||
const StringPiece& key,
|
||||
UserAgentMatcher::DeviceType device_type,
|
||||
ProxyFetchPropertyCallbackCollector* collector,
|
||||
AbstractMutex* mutex);
|
||||
|
||||
PageType page_type() const { return page_type_; }
|
||||
|
||||
UserAgentMatcher::DeviceType device_type() const { return device_type_; }
|
||||
|
||||
// Delegates to collector_'s IsCacheValid.
|
||||
virtual bool IsCacheValid(int64 write_timestamp_ms) const;
|
||||
|
||||
virtual void Done(bool success);
|
||||
|
||||
// Adds logs for the given PropertyPage to the specified cohort info index.
|
||||
virtual void LogPageCohortInfo(AbstractLogRecord* log_record,
|
||||
int cohort_index);
|
||||
|
||||
private:
|
||||
PageType page_type_;
|
||||
UserAgentMatcher::DeviceType device_type_;
|
||||
ProxyFetchPropertyCallbackCollector* collector_;
|
||||
GoogleString url_;
|
||||
DISALLOW_COPY_AND_ASSIGN(ProxyFetchPropertyCallback);
|
||||
};
|
||||
|
||||
// Tracks a collection of property-cache lookups occuring in parallel.
|
||||
class ProxyFetchPropertyCallbackCollector {
|
||||
public:
|
||||
ProxyFetchPropertyCallbackCollector(ServerContext* manager,
|
||||
const StringPiece& url,
|
||||
const RequestContextPtr& req_ctx,
|
||||
const RewriteOptions* options,
|
||||
UserAgentMatcher::DeviceType device_type);
|
||||
virtual ~ProxyFetchPropertyCallbackCollector();
|
||||
|
||||
// Add a callback to be handled by this collector.
|
||||
// Transfers ownership of the callback to the collector.
|
||||
void AddCallback(ProxyFetchPropertyCallback* callback);
|
||||
|
||||
// In our flow, we initiate the property-cache lookup prior to
|
||||
// creating a proxy-fetch, so that RewriteOptions lookup can proceed
|
||||
// in parallel. If/when we determine that ProxyFetch is associated
|
||||
// with HTML content, we connect it to this callback. Note that if
|
||||
// the property cache lookups have completed, this will result in
|
||||
// a direct call into proxy_fetch->PropertyCacheComplete.
|
||||
void ConnectProxyFetch(ProxyFetch* proxy_fetch);
|
||||
|
||||
// If for any reason we decide *not* to initiate a ProxyFetch for a
|
||||
// request, then we need to 'detach' this request so that we can
|
||||
// delete it once it completes, rather than waiting for a
|
||||
// ProxyFetch to be inserted. The status code of the response is passed from
|
||||
// ProxyFetch to the Collector. In case the status code is unknown then pass
|
||||
// RewriteDriver::kStatusCodeUnknown.
|
||||
void Detach(HttpStatus::Code status_code);
|
||||
|
||||
// Returns the actual property page.
|
||||
PropertyPage* property_page() {
|
||||
return fallback_property_page_ == NULL ?
|
||||
NULL : fallback_property_page_->actual_property_page();
|
||||
}
|
||||
|
||||
// Returns the fallback property page.
|
||||
FallbackPropertyPage* fallback_property_page() {
|
||||
return fallback_property_page_.get();
|
||||
}
|
||||
|
||||
// Returns the collected PropertyPage with the corresponding page_type.
|
||||
// Ownership of the object is transferred to the caller.
|
||||
PropertyPage* ReleasePropertyPage(
|
||||
ProxyFetchPropertyCallback::PageType page_type);
|
||||
|
||||
// Releases the ownership of fallback property page.
|
||||
FallbackPropertyPage* ReleaseFallbackPropertyPage() {
|
||||
return fallback_property_page_.release();
|
||||
}
|
||||
|
||||
// In our flow, property-page will be available via RewriteDriver only after
|
||||
// ProxyFetch is set. But there may be instances where the result may be
|
||||
// required even before proxy-fetch is created. Any task that depends on the
|
||||
// PropertyCache result will be executed as soon as PropertyCache lookup is
|
||||
// done.
|
||||
// func is guaranteed to execute after PropertyCache lookup has completed, as
|
||||
// long as ProxyFetch is not set before PropertyCache lookup is done. One
|
||||
// should use PropertyCache result via RewriteDriver if some other thread can
|
||||
// initiate SetProxyFetch().
|
||||
void AddPostLookupTask(Function* func);
|
||||
|
||||
// If options_ is NULL returns true. Else, returns true if (url_,
|
||||
// write_timestamp_ms) is valid as per URL cache invalidation entries is
|
||||
// options_.
|
||||
bool IsCacheValid(int64 write_timestamp_ms) const;
|
||||
|
||||
// Called by a ProxyFetchPropertyCallback when the former is complete.
|
||||
void Done(ProxyFetchPropertyCallback* callback, bool success);
|
||||
|
||||
// Updates the status code of response in property cache.
|
||||
void UpdateStatusCodeInPropertyCache();
|
||||
|
||||
const RequestContextPtr& request_context() { return request_context_; }
|
||||
|
||||
// Returns DeviceType from device property page.
|
||||
UserAgentMatcher::DeviceType device_type() { return device_type_; }
|
||||
|
||||
private:
|
||||
std::set<ProxyFetchPropertyCallback*> pending_callbacks_;
|
||||
std::map<ProxyFetchPropertyCallback::PageType, PropertyPage*>
|
||||
property_pages_;
|
||||
scoped_ptr<AbstractMutex> mutex_;
|
||||
ServerContext* server_context_;
|
||||
GoogleString url_;
|
||||
RequestContextPtr request_context_;
|
||||
UserAgentMatcher::DeviceType device_type_;
|
||||
bool detached_; // protected by mutex_.
|
||||
bool done_; // protected by mutex_.
|
||||
bool success_; // protected by mutex_; accessed after quiescence.
|
||||
ProxyFetch* proxy_fetch_; // protected by mutex_.
|
||||
// protected by mutex_.
|
||||
scoped_ptr<std::vector<Function*> > post_lookup_task_vector_;
|
||||
const RewriteOptions* options_; // protected by mutex_;
|
||||
HttpStatus::Code status_code_; // status_code_ of the response.
|
||||
scoped_ptr<FallbackPropertyPage> fallback_property_page_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ProxyFetchPropertyCallbackCollector);
|
||||
};
|
||||
|
||||
// Manages a single fetch of an HTML or resource file from the original server.
|
||||
// If it is an HTML file, it is rewritten.
|
||||
// Fetch is initialized by calling ProxyFetchFactory::StartNewProxyFetch().
|
||||
// For fetching pagespeed rewritten resources, use ResourceFetch.
|
||||
// This is only meant to be used by ProxyInterface.
|
||||
//
|
||||
// Takes ownership of custom_options.
|
||||
//
|
||||
// The ProxyFetch passes through non-HTML directly to base_writer.
|
||||
//
|
||||
// For HTML, the sequence is this:
|
||||
// 1. HeadersComplete is called, allowing us to establish we've got HTML.
|
||||
// 2. Some number of calls to Write occur.
|
||||
// 3. Optional: Flush is called, followed by more Writes. Repeat.
|
||||
// 4. Done is called.
|
||||
// These virtual methods are called from some arbitrary thread, e.g. a
|
||||
// dedicated fetcher thread. We use a QueuedWorkerPool::Sequence to
|
||||
// offload them to a worker-thread. This implementation bundles together
|
||||
// multiple Writes, and depending on the timing, may move Flushes to
|
||||
// follow Writes and collapse multiple Flushes into one.
|
||||
class ProxyFetch : public SharedAsyncFetch {
|
||||
public:
|
||||
// These strings identify sync-points for reproducing races between
|
||||
// PropertyCache lookup completion and Origin HTML Fetch completion.
|
||||
static const char kCollectorDone[];
|
||||
static const char kCollectorPrefix[];
|
||||
static const char kCollectorReady[];
|
||||
static const char kCollectorDelete[];
|
||||
static const char kCollectorDetach[];
|
||||
static const char kCollectorDoneDelete[];
|
||||
|
||||
// These strings identify sync-points for introducing races between
|
||||
// PropertyCache lookup completion and HeadersComplete.
|
||||
static const char kHeadersSetupRaceAlarmQueued[];
|
||||
static const char kHeadersSetupRaceDone[];
|
||||
static const char kHeadersSetupRaceFlush[];
|
||||
static const char kHeadersSetupRacePrefix[];
|
||||
static const char kHeadersSetupRaceWait[];
|
||||
|
||||
// Number of milliseconds to wait, in a test, for an event that we
|
||||
// are hoping does not occur, specifically an inappropriate call to
|
||||
// base_fetch()->HeadersComplete() while we are still mutating
|
||||
// response headers in SetupForHtml.
|
||||
//
|
||||
// This is used only for testing.
|
||||
static const int kTestSignalTimeoutMs = 200;
|
||||
|
||||
protected:
|
||||
// protected interface from AsyncFetch.
|
||||
virtual void HandleHeadersComplete();
|
||||
virtual bool HandleWrite(const StringPiece& content, MessageHandler* handler);
|
||||
virtual bool HandleFlush(MessageHandler* handler);
|
||||
virtual void HandleDone(bool success);
|
||||
virtual bool IsCachedResultValid(const ResponseHeaders& headers);
|
||||
|
||||
private:
|
||||
friend class ProxyFetchFactory;
|
||||
friend class ProxyFetchPropertyCallbackCollector;
|
||||
friend class MockProxyFetch;
|
||||
FRIEND_TEST(ProxyFetchTest, TestInhibitParsing);
|
||||
|
||||
// Called by ProxyFetchPropertyCallbackCollector when all property-cache
|
||||
// fetches are complete. This function takes ownership of collector.
|
||||
virtual void PropertyCacheComplete(
|
||||
bool success, ProxyFetchPropertyCallbackCollector* collector);
|
||||
|
||||
// Returns the AbstractClientState object carried by the property cache
|
||||
// callback collector, if any. Returns NULL if no AbstractClientState
|
||||
// is found. This method assumes that the property cache is enabled and
|
||||
// the client state property cache lookup has completed.
|
||||
AbstractClientState* GetClientState(
|
||||
ProxyFetchPropertyCallbackCollector* collector);
|
||||
|
||||
// If cross_domain is true, we're requested under a domain different from
|
||||
// the underlying host, using proxy mode in UrlNamer.
|
||||
ProxyFetch(const GoogleString& url,
|
||||
bool cross_domain,
|
||||
ProxyFetchPropertyCallbackCollector* property_cache_callback,
|
||||
AsyncFetch* async_fetch,
|
||||
AsyncFetch* original_content_fetch,
|
||||
RewriteDriver* driver,
|
||||
ServerContext* manager,
|
||||
Timer* timer,
|
||||
ProxyFetchFactory* factory);
|
||||
virtual ~ProxyFetch();
|
||||
|
||||
const RewriteOptions* Options();
|
||||
|
||||
// Once we have decided this is HTML, begin parsing and set headers.
|
||||
void SetupForHtml();
|
||||
|
||||
// Adds a pagespeed header to response_headers if enabled.
|
||||
void AddPagespeedHeader();
|
||||
|
||||
// Sets up driver_, registering the writer and start parsing url.
|
||||
// Returns whether we started parsing successfully or not.
|
||||
bool StartParse();
|
||||
|
||||
// Start the fetch which includes preparing the request.
|
||||
void StartFetch();
|
||||
|
||||
// Actually do the fetch, called from callback of StartFetch.
|
||||
void DoFetch();
|
||||
|
||||
// Handles buffered HTML writes, flushes, and done calls
|
||||
// in the QueuedWorkerPool::Sequence sequence_.
|
||||
void ExecuteQueued();
|
||||
|
||||
// Schedules the task to run any buffered work, if needed. Assumes mutex
|
||||
// held.
|
||||
void ScheduleQueueExecutionIfNeeded();
|
||||
|
||||
// Frees up the RewriteDriver (via FinishParse or ReleaseRewriteDriver),
|
||||
// calls the callback (nulling out callback_ to ensure that we don't
|
||||
// do it again), notifies the ProxyInterface that the fetch is
|
||||
// complete, and deletes the ProxyFetch.
|
||||
void Finish(bool success);
|
||||
|
||||
// Used to wrap up the FinishParseAsync invocation.
|
||||
void CompleteFinishParse(bool success);
|
||||
|
||||
// Callback we give to ExecuteFlushIfRequestedAsync to notify us when
|
||||
// it's done with its work.
|
||||
void FlushDone();
|
||||
|
||||
// Management functions for idle_alarm_. Must only be called from
|
||||
// within sequence_.
|
||||
|
||||
// Cancels any previous alarm.
|
||||
void CancelIdleAlarm();
|
||||
|
||||
// Cancels previous alarm and starts next one.
|
||||
void QueueIdleAlarm();
|
||||
|
||||
// Handler for the alarm; run in sequence_.
|
||||
void HandleIdleAlarm();
|
||||
|
||||
GoogleString url_;
|
||||
ServerContext* server_context_;
|
||||
Timer* timer_;
|
||||
|
||||
scoped_ptr<CacheUrlAsyncFetcher> cache_fetcher_;
|
||||
|
||||
// True if we're handling a cross-domain request in proxy mode, which
|
||||
// should do some additional checking.
|
||||
bool cross_domain_;
|
||||
|
||||
// Does page claim to be "Content-Type: text/html"? (It may be lying)
|
||||
bool claims_html_;
|
||||
|
||||
// Has a call to StartParse succeeded? We'll only do this if we actually
|
||||
// decide it is HTML.
|
||||
bool started_parse_;
|
||||
|
||||
// Has a call to RewriteDriver::ParseText been made yet.
|
||||
bool parse_text_called_;
|
||||
|
||||
// Tracks whether Done() has been called.
|
||||
bool done_called_;
|
||||
|
||||
HtmlDetector html_detector_;
|
||||
|
||||
// Tracks a set of outstanding property-cache lookups. This is NULLed
|
||||
// when the property-cache completes or when we detach it. We use
|
||||
// this to detach the callback if we decide we don't care about the
|
||||
// property-caches because we discovered we are not working with HTML.
|
||||
ProxyFetchPropertyCallbackCollector* property_cache_callback_;
|
||||
|
||||
// Fetch where raw original headers and contents are sent.
|
||||
// To contrast, base_fetch() is sent rewritten contents and headers.
|
||||
// If NULL, original_content_fetch_ is ignored.
|
||||
AsyncFetch* original_content_fetch_;
|
||||
|
||||
// ProxyFetch is responsible for getting RewriteDrivers from the pool and
|
||||
// putting them back.
|
||||
RewriteDriver* driver_;
|
||||
|
||||
// True if we have queued up ExecuteQueued but did not
|
||||
// execute it yet.
|
||||
bool queue_run_job_created_;
|
||||
|
||||
// As the UrlAsyncFetcher calls our Write & Flush methods, we collect
|
||||
// the text in text_queue, and note the Flush call in
|
||||
// network_flush_requested_, returning control to the fetcher as quickly
|
||||
// as possible so it can continue to process incoming network traffic.
|
||||
//
|
||||
// We offload the handling of the incoming text events to a
|
||||
// QueuedWorkerPool::Sequence. Note that we may receive a new chunk
|
||||
// of text while we are still processing an old chunk. The sequentiality
|
||||
// is preserved by QueuedWorkerPool::Sequence.
|
||||
//
|
||||
// The Done callback is also indirected through this Sequence.
|
||||
scoped_ptr<AbstractMutex> mutex_;
|
||||
StringStarVector text_queue_;
|
||||
bool network_flush_outstanding_;
|
||||
QueuedWorkerPool::Sequence* sequence_;
|
||||
|
||||
// done_oustanding_ will be true if we got called with ::Done but didn't
|
||||
// invoke Finish yet.
|
||||
bool done_outstanding_;
|
||||
|
||||
// Finish is true if we started Finish, perhaps doing FinishParseAsync.
|
||||
// Accessed only from within context of sequence_.
|
||||
bool finishing_;
|
||||
|
||||
// done_result_ is used to store the result of ::Done if we're deferring
|
||||
// handling it until the driver finishes handling a Flush.
|
||||
bool done_result_;
|
||||
|
||||
// We may also end up receiving new events in between calling FlushAsync
|
||||
// and getting the callback called. In that case, we want to hold off
|
||||
// on actually dispatching things queued up above.
|
||||
bool waiting_for_flush_to_finish_;
|
||||
|
||||
// Alarm used to keep track of inactivity, in order to help issue
|
||||
// flushes. Must only be accessed from the thread context of sequence_
|
||||
QueuedAlarm* idle_alarm_;
|
||||
|
||||
ProxyFetchFactory* factory_;
|
||||
|
||||
// Whether PrepareRequest() to url_namer succeeded.
|
||||
bool prepare_success_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ProxyFetch);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_FETCH_H_
|
||||
@@ -1,139 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
//
|
||||
// Simple interface for running Page Speed Automatic as a proxy.
|
||||
//
|
||||
// When implementing a Page Speed Automatic proxy, simply construct a
|
||||
// ProxyInterface at start up time and call Fetch for every
|
||||
// requested resource. Fetch decides how to deal with requests
|
||||
// (pagespeed resources will be computed, HTML pages will be proxied
|
||||
// and rewritten, and other resources will just be proxied).
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_H_
|
||||
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class GoogleUrl;
|
||||
class MessageHandler;
|
||||
class ProxyFetchPropertyCallbackCollector;
|
||||
class ProxyFetchFactory;
|
||||
class ServerContext;
|
||||
class RewriteOptions;
|
||||
class Statistics;
|
||||
class TimedVariable;
|
||||
class Timer;
|
||||
|
||||
// TODO(sligocki): Rename as per style-guide.
|
||||
class ProxyInterface : public UrlAsyncFetcher {
|
||||
public:
|
||||
ProxyInterface(const StringPiece& hostname, int port,
|
||||
ServerContext* manager, Statistics* stats);
|
||||
virtual ~ProxyInterface();
|
||||
|
||||
// Initializes statistics variables associated with this class.
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
// All requests use this interface. We decide internally whether the
|
||||
// request is a pagespeed resource, HTML page to be rewritten or another
|
||||
// resource to be proxied directly.
|
||||
virtual void Fetch(const GoogleString& requested_url,
|
||||
MessageHandler* handler,
|
||||
AsyncFetch* async_fetch);
|
||||
|
||||
// Callback function passed to UrlNamer to finish handling requests once we
|
||||
// have rewrite_options for requests that are being proxied.
|
||||
void ProxyRequestCallback(
|
||||
bool is_resource_fetch,
|
||||
GoogleUrl* request_url,
|
||||
AsyncFetch* async_fetch,
|
||||
RewriteOptions* domain_options,
|
||||
RewriteOptions* query_options,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Is this url_string well-formed enough to proxy through?
|
||||
bool IsWellFormedUrl(const GoogleUrl& url);
|
||||
|
||||
static const char kBlinkRequestCount[];
|
||||
static const char kBlinkCriticalLineRequestCount[];
|
||||
static const char kCacheHtmlRequestCount[];
|
||||
|
||||
// Initiates the PropertyCache look up.
|
||||
virtual ProxyFetchPropertyCallbackCollector* InitiatePropertyCacheLookup(
|
||||
bool is_resource_fetch,
|
||||
const GoogleUrl& request_url,
|
||||
RewriteOptions* options,
|
||||
AsyncFetch* async_fetch,
|
||||
bool* added_page_property_callback = NULL);
|
||||
|
||||
private:
|
||||
friend class ProxyInterfaceTest;
|
||||
|
||||
// Handle requests that are being proxied.
|
||||
// * HTML requests are rewritten.
|
||||
// * Resource requests are proxied verbatim.
|
||||
void ProxyRequest(bool is_resource_fetch,
|
||||
const GoogleUrl& requested_url,
|
||||
AsyncFetch* async_fetch,
|
||||
MessageHandler* handler);
|
||||
|
||||
// If the URL and port are for this server, don't proxy those (to avoid
|
||||
// infinite fetching loops). This might be the favicon or something...
|
||||
bool UrlAndPortMatchThisServer(const GoogleUrl& url);
|
||||
|
||||
// References to unowned objects.
|
||||
ServerContext* server_context_; // thread-safe
|
||||
UrlAsyncFetcher* fetcher_; // thread-safe
|
||||
Timer* timer_; // thread-safe
|
||||
MessageHandler* handler_; // thread-safe
|
||||
|
||||
// This server's hostname and port (to avoid making circular requests).
|
||||
// TODO(sligocki): This assumes we will only be called as one hostname,
|
||||
// there could be multiple DNS entries pointing at us.
|
||||
const GoogleString hostname_;
|
||||
const int port_;
|
||||
|
||||
// Varz variables
|
||||
// Total requests.
|
||||
TimedVariable* all_requests_;
|
||||
// Total Pagespeed requests.
|
||||
TimedVariable* pagespeed_requests_;
|
||||
// Blink requests.
|
||||
TimedVariable* blink_requests_;
|
||||
// Blink requests in the critical line flow.
|
||||
TimedVariable* blink_critical_line_requests_;
|
||||
// Cache Html requests.
|
||||
TimedVariable* cache_html_flow_requests_;
|
||||
// Rejected requests counter.
|
||||
TimedVariable* rejected_requests_;
|
||||
|
||||
scoped_ptr<ProxyFetchFactory> proxy_fetch_factory_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ProxyInterface);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_H_
|
||||
@@ -1,255 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: mmohabey@google.com (Megha Mohabey)
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_TEST_BASE_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_TEST_BASE_H_
|
||||
|
||||
#include "net/instaweb/automatic/public/proxy_interface.h"
|
||||
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
|
||||
#include "net/instaweb/http/public/async_fetch.h"
|
||||
#include "net/instaweb/http/public/response_headers.h"
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/rewriter/public/rewrite_options.h"
|
||||
#include "net/instaweb/rewriter/public/rewrite_test_base.h"
|
||||
#include "net/instaweb/rewriter/public/test_rewrite_driver_factory.h"
|
||||
#include "net/instaweb/rewriter/public/url_namer.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/worker_test_base.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractClientState;
|
||||
class CriticalImagesFinder;
|
||||
class GoogleUrl;
|
||||
class HtmlElement;
|
||||
class HtmlFilter;
|
||||
class MessageHandler;
|
||||
class PropertyValue;
|
||||
class RequestHeaders;
|
||||
class RewriteDriver;
|
||||
|
||||
const char kPageUrl[] = "page.html";
|
||||
const char kBackgroundFetchHeader[] = "X-Background-Fetch";
|
||||
|
||||
// Creates a proxy URL naming rule that encodes an "owner" domain and an
|
||||
// "origin" domain, all inside a fixed proxy-domain.
|
||||
class ProxyUrlNamer : public UrlNamer {
|
||||
public:
|
||||
static const char kProxyHost[];
|
||||
|
||||
ProxyUrlNamer() : authorized_(true), options_(NULL) {}
|
||||
|
||||
// Given the request_url, generate the original url.
|
||||
virtual bool Decode(const GoogleUrl& gurl,
|
||||
GoogleUrl* domain,
|
||||
GoogleString* decoded) const;
|
||||
|
||||
virtual bool IsAuthorized(const GoogleUrl& gurl,
|
||||
const RewriteOptions& options) const {
|
||||
return authorized_;
|
||||
}
|
||||
|
||||
// Given the request url and request headers, generate the rewrite options.
|
||||
virtual void DecodeOptions(const GoogleUrl& request_url,
|
||||
const RequestHeaders& request_headers,
|
||||
Callback* callback,
|
||||
MessageHandler* handler) const {
|
||||
callback->Done((options_ == NULL) ? NULL : options_->Clone());
|
||||
}
|
||||
|
||||
void set_authorized(bool authorized) { authorized_ = authorized; }
|
||||
void set_options(RewriteOptions* options) { options_ = options; }
|
||||
|
||||
private:
|
||||
bool authorized_;
|
||||
RewriteOptions* options_;
|
||||
DISALLOW_COPY_AND_ASSIGN(ProxyUrlNamer);
|
||||
};
|
||||
|
||||
// Mock filter which gets passed to the new rewrite driver created in
|
||||
// proxy_fetch.
|
||||
//
|
||||
// This is used to check the flow for injecting data into filters via the
|
||||
// ProxyInterface, including:
|
||||
// property_cache.
|
||||
class MockFilter : public EmptyHtmlFilter {
|
||||
public:
|
||||
explicit MockFilter(RewriteDriver* driver)
|
||||
: driver_(driver),
|
||||
num_elements_(0),
|
||||
num_elements_property_(NULL),
|
||||
client_state_(NULL) {
|
||||
}
|
||||
|
||||
virtual void StartDocument();
|
||||
|
||||
virtual void StartElement(HtmlElement* element);
|
||||
|
||||
virtual void EndDocument();
|
||||
|
||||
virtual const char* Name() const { return "MockFilter"; }
|
||||
|
||||
private:
|
||||
RewriteDriver* driver_;
|
||||
int num_elements_;
|
||||
PropertyValue* num_elements_property_;
|
||||
GoogleString client_id_;
|
||||
AbstractClientState* client_state_;
|
||||
DISALLOW_COPY_AND_ASSIGN(MockFilter);
|
||||
};
|
||||
|
||||
// Hook provided to TestRewriteDriverFactory to add a new filter when
|
||||
// a rewrite_driver is created.
|
||||
class CreateFilterCallback
|
||||
: public TestRewriteDriverFactory::CreateFilterCallback {
|
||||
public:
|
||||
CreateFilterCallback() {}
|
||||
virtual ~CreateFilterCallback() {}
|
||||
|
||||
virtual HtmlFilter* Done(RewriteDriver* driver) {
|
||||
return new MockFilter(driver);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(CreateFilterCallback);
|
||||
};
|
||||
|
||||
// Subclass of AsyncFetch that adds a response header indicating whether the
|
||||
// fetch is for a user-facing request, or a background rewrite.
|
||||
class BackgroundFetchCheckingAsyncFetch : public SharedAsyncFetch {
|
||||
public:
|
||||
explicit BackgroundFetchCheckingAsyncFetch(AsyncFetch* base_fetch)
|
||||
: SharedAsyncFetch(base_fetch),
|
||||
async_fetch_(base_fetch) {}
|
||||
virtual ~BackgroundFetchCheckingAsyncFetch() {}
|
||||
|
||||
virtual void HandleHeadersComplete() {
|
||||
SharedAsyncFetch::HandleHeadersComplete();
|
||||
response_headers()->Add(kBackgroundFetchHeader,
|
||||
async_fetch_->IsBackgroundFetch() ? "1" : "0");
|
||||
// Call ComputeCaching again since Add sets cache_fields_dirty_ to true.
|
||||
response_headers()->ComputeCaching();
|
||||
}
|
||||
|
||||
virtual void HandleDone(bool success) {
|
||||
SharedAsyncFetch::HandleDone(success);
|
||||
delete this;
|
||||
}
|
||||
|
||||
private:
|
||||
AsyncFetch* async_fetch_;
|
||||
DISALLOW_COPY_AND_ASSIGN(BackgroundFetchCheckingAsyncFetch);
|
||||
};
|
||||
|
||||
// Subclass of UrlAsyncFetcher that wraps the AsyncFetch with a
|
||||
// BackgroundFetchCheckingAsyncFetch.
|
||||
class BackgroundFetchCheckingUrlAsyncFetcher : public UrlAsyncFetcher {
|
||||
public:
|
||||
explicit BackgroundFetchCheckingUrlAsyncFetcher(UrlAsyncFetcher* fetcher)
|
||||
: base_fetcher_(fetcher),
|
||||
num_background_fetches_(0) {}
|
||||
virtual ~BackgroundFetchCheckingUrlAsyncFetcher() {}
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch) {
|
||||
if (fetch->IsBackgroundFetch()) {
|
||||
num_background_fetches_++;
|
||||
}
|
||||
BackgroundFetchCheckingAsyncFetch* new_fetch =
|
||||
new BackgroundFetchCheckingAsyncFetch(fetch);
|
||||
base_fetcher_->Fetch(url, message_handler, new_fetch);
|
||||
}
|
||||
|
||||
int num_background_fetches() { return num_background_fetches_; }
|
||||
void clear_num_background_fetches() { num_background_fetches_ = 0; }
|
||||
|
||||
private:
|
||||
UrlAsyncFetcher* base_fetcher_;
|
||||
int num_background_fetches_;
|
||||
DISALLOW_COPY_AND_ASSIGN(BackgroundFetchCheckingUrlAsyncFetcher);
|
||||
};
|
||||
|
||||
// TODO(morlovich): This currently relies on ResourceManagerTestBase to help
|
||||
// setup fetchers; and also indirectly to prevent any rewrites from timing out
|
||||
// (as it runs the tests with real scheduler but mock timer). It would probably
|
||||
// be better to port this away to use TestRewriteDriverFactory directly.
|
||||
class ProxyInterfaceTestBase : public RewriteTestBase {
|
||||
public:
|
||||
void TestHeadersSetupRace();
|
||||
|
||||
protected:
|
||||
static const int kHtmlCacheTimeSec = 5000;
|
||||
|
||||
ProxyInterfaceTestBase();
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
void FetchFromProxy(const StringPiece& url,
|
||||
const RequestHeaders& request_headers,
|
||||
bool expect_success,
|
||||
GoogleString* string_out,
|
||||
ResponseHeaders* headers_out);
|
||||
|
||||
void FetchFromProxy(const StringPiece& url,
|
||||
bool expect_success,
|
||||
GoogleString* string_out,
|
||||
ResponseHeaders* headers_out);
|
||||
|
||||
void FetchFromProxyLoggingFlushes(const StringPiece& url,
|
||||
bool expect_success,
|
||||
GoogleString* string_out);
|
||||
|
||||
void FetchFromProxyNoWait(const StringPiece& url,
|
||||
const RequestHeaders& request_headers,
|
||||
bool expect_success,
|
||||
bool log_flush,
|
||||
ResponseHeaders* headers_out);
|
||||
|
||||
void WaitForFetch();
|
||||
|
||||
void TestPropertyCache(const StringPiece& url,
|
||||
bool delay_pcache, bool thread_pcache,
|
||||
bool expect_success);
|
||||
|
||||
void TestPropertyCacheWithHeadersAndOutput(
|
||||
const StringPiece& url, bool delay_pcache, bool thread_pcache,
|
||||
bool expect_success, bool check_stats, bool add_create_filter_callback,
|
||||
bool expect_detach_before_pcache, const RequestHeaders& request_headers,
|
||||
ResponseHeaders* response_headers, GoogleString* output);
|
||||
|
||||
void SetCriticalImagesInFinder(StringSet* critical_images);
|
||||
void SetCssCriticalImagesInFinder(StringSet* css_critical_images);
|
||||
|
||||
scoped_ptr<ProxyInterface> proxy_interface_;
|
||||
scoped_ptr<WorkerTestBase::SyncPoint> sync_;
|
||||
ResponseHeaders callback_response_headers_;
|
||||
GoogleString callback_buffer_;
|
||||
bool callback_done_value_;
|
||||
|
||||
private:
|
||||
friend class FilterCallback;
|
||||
|
||||
CriticalImagesFinder* fake_critical_images_finder_;
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_PROXY_INTERFACE_TEST_BASE_H_
|
||||
@@ -1,96 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_AUTOMATIC_PUBLIC_STATIC_REWRITER_H_
|
||||
#define NET_INSTAWEB_AUTOMATIC_PUBLIC_STATIC_REWRITER_H_
|
||||
|
||||
#include "net/instaweb/rewriter/public/rewrite_driver_factory.h"
|
||||
#include "net/instaweb/rewriter/public/rewrite_gflags.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/simple_stats.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class FileSystem;
|
||||
class Hasher;
|
||||
class MessageHandler;
|
||||
class ServerContext;
|
||||
class Statistics;
|
||||
class UrlAsyncFetcher;
|
||||
class UrlFetcher;
|
||||
class Writer;
|
||||
|
||||
// Implements a baseline RewriteDriverFactory with the simplest possible
|
||||
// options for cache, fetchers, & system interface.
|
||||
//
|
||||
// TODO(jmarantz): fill out enough functionality so that this will be
|
||||
// a functional static rewriter that could optimize an HTML file
|
||||
// passed as a command-line parameter or via stdin.
|
||||
class FileRewriter : public RewriteDriverFactory {
|
||||
public:
|
||||
FileRewriter(const RewriteGflags* gflags,
|
||||
bool echo_errors_to_stdout);
|
||||
virtual ~FileRewriter();
|
||||
virtual Hasher* NewHasher();
|
||||
virtual UrlFetcher* DefaultUrlFetcher();
|
||||
virtual UrlAsyncFetcher* DefaultAsyncUrlFetcher();
|
||||
virtual MessageHandler* DefaultHtmlParseMessageHandler();
|
||||
virtual MessageHandler* DefaultMessageHandler();
|
||||
virtual FileSystem* DefaultFileSystem();
|
||||
virtual void SetupCaches(ServerContext* resource_manager);
|
||||
virtual Statistics* statistics();
|
||||
virtual ServerContext* NewServerContext();
|
||||
virtual bool UseBeaconResultsInFilters() const { return false; }
|
||||
|
||||
private:
|
||||
const RewriteGflags* gflags_;
|
||||
SimpleStats simple_stats_;
|
||||
bool echo_errors_to_stdout_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(FileRewriter);
|
||||
};
|
||||
|
||||
// Encapsulates the instantiation of a FileRewriter & a simple one-shot
|
||||
// interface to rewrite some HTML text.
|
||||
class StaticRewriter {
|
||||
public:
|
||||
StaticRewriter(int* argc, char*** argv);
|
||||
StaticRewriter();
|
||||
~StaticRewriter();
|
||||
|
||||
bool ParseText(const StringPiece& text,
|
||||
const StringPiece& url,
|
||||
const StringPiece& id,
|
||||
const StringPiece& output_dir,
|
||||
Writer* writer);
|
||||
|
||||
FileSystem* file_system();
|
||||
MessageHandler* message_handler();
|
||||
|
||||
private:
|
||||
RewriteGflags gflags_;
|
||||
FileRewriter file_rewriter_;
|
||||
ServerContext* server_context_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(StaticRewriter);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_AUTOMATIC_PUBLIC_STATIC_REWRITER_H_
|
||||
@@ -1,220 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_HTML_EVENT_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_HTML_EVENT_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/html_element.h"
|
||||
#include "net/instaweb/htmlparse/public/html_filter.h"
|
||||
#include "net/instaweb/htmlparse/public/html_node.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlEvent {
|
||||
public:
|
||||
explicit HtmlEvent(int line_number) : line_number_(line_number) {
|
||||
}
|
||||
virtual ~HtmlEvent();
|
||||
virtual void Run(HtmlFilter* filter) = 0;
|
||||
virtual void ToString(GoogleString* buffer) = 0;
|
||||
|
||||
// If this is a StartElement event, returns the HtmlElement that is being
|
||||
// started. Otherwise returns NULL.
|
||||
virtual HtmlElement* GetElementIfStartEvent() { return NULL; }
|
||||
|
||||
// If this is an EndElement event, returns the HtmlElement that is being
|
||||
// ended. Otherwise returns NULL.
|
||||
virtual HtmlElement* GetElementIfEndEvent() { return NULL; }
|
||||
|
||||
virtual HtmlLeafNode* GetLeafNode() { return NULL; }
|
||||
virtual HtmlNode* GetNode() { return NULL; }
|
||||
virtual HtmlCharactersNode* GetCharactersNode() { return NULL; }
|
||||
void DebugPrint();
|
||||
|
||||
int line_number() const { return line_number_; }
|
||||
|
||||
private:
|
||||
int line_number_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlEvent);
|
||||
};
|
||||
|
||||
class HtmlStartDocumentEvent: public HtmlEvent {
|
||||
public:
|
||||
explicit HtmlStartDocumentEvent(int line_number) : HtmlEvent(line_number) {}
|
||||
virtual void Run(HtmlFilter* filter) { filter->StartDocument(); }
|
||||
virtual void ToString(GoogleString* str) { *str += "StartDocument"; }
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlStartDocumentEvent);
|
||||
};
|
||||
|
||||
class HtmlEndDocumentEvent: public HtmlEvent {
|
||||
public:
|
||||
explicit HtmlEndDocumentEvent(int line_number) : HtmlEvent(line_number) {}
|
||||
virtual void Run(HtmlFilter* filter) { filter->EndDocument(); }
|
||||
virtual void ToString(GoogleString* str) { *str += "EndDocument"; }
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlEndDocumentEvent);
|
||||
};
|
||||
|
||||
class HtmlStartElementEvent: public HtmlEvent {
|
||||
public:
|
||||
HtmlStartElementEvent(HtmlElement* element, int line_number)
|
||||
: HtmlEvent(line_number),
|
||||
element_(element) {
|
||||
}
|
||||
virtual void Run(HtmlFilter* filter) { filter->StartElement(element_); }
|
||||
virtual void ToString(GoogleString* str) {
|
||||
*str += "StartElement ";
|
||||
*str += element_->name_str();
|
||||
}
|
||||
virtual HtmlElement* GetElementIfStartEvent() { return element_; }
|
||||
virtual HtmlElement* GetNode() { return element_; }
|
||||
private:
|
||||
HtmlElement* element_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlStartElementEvent);
|
||||
};
|
||||
|
||||
class HtmlEndElementEvent: public HtmlEvent {
|
||||
public:
|
||||
HtmlEndElementEvent(HtmlElement* element, int line_number)
|
||||
: HtmlEvent(line_number),
|
||||
element_(element) {
|
||||
}
|
||||
virtual void Run(HtmlFilter* filter) { filter->EndElement(element_); }
|
||||
virtual void ToString(GoogleString* str) {
|
||||
*str += "EndElement ";
|
||||
*str += element_->name_str();
|
||||
}
|
||||
virtual HtmlElement* GetElementIfEndEvent() { return element_; }
|
||||
virtual HtmlElement* GetNode() { return element_; }
|
||||
private:
|
||||
HtmlElement* element_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlEndElementEvent);
|
||||
};
|
||||
|
||||
class HtmlLeafNodeEvent: public HtmlEvent {
|
||||
public:
|
||||
explicit HtmlLeafNodeEvent(int line_number) : HtmlEvent(line_number) { }
|
||||
virtual HtmlNode* GetNode() { return GetLeafNode(); }
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlLeafNodeEvent);
|
||||
};
|
||||
|
||||
class HtmlIEDirectiveEvent: public HtmlLeafNodeEvent {
|
||||
public:
|
||||
HtmlIEDirectiveEvent(HtmlIEDirectiveNode* directive, int line_number)
|
||||
: HtmlLeafNodeEvent(line_number),
|
||||
directive_(directive) {
|
||||
}
|
||||
virtual void Run(HtmlFilter* filter) { filter->IEDirective(directive_); }
|
||||
virtual void ToString(GoogleString* str) {
|
||||
*str += "IEDirective ";
|
||||
*str += directive_->contents();
|
||||
}
|
||||
virtual HtmlLeafNode* GetLeafNode() { return directive_; }
|
||||
private:
|
||||
HtmlIEDirectiveNode* directive_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlIEDirectiveEvent);
|
||||
};
|
||||
|
||||
class HtmlCdataEvent: public HtmlLeafNodeEvent {
|
||||
public:
|
||||
HtmlCdataEvent(HtmlCdataNode* cdata, int line_number)
|
||||
: HtmlLeafNodeEvent(line_number),
|
||||
cdata_(cdata) {
|
||||
}
|
||||
virtual void Run(HtmlFilter* filter) { filter->Cdata(cdata_); }
|
||||
virtual void ToString(GoogleString* str) {
|
||||
*str += "Cdata ";
|
||||
*str += cdata_->contents();
|
||||
}
|
||||
virtual HtmlLeafNode* GetLeafNode() { return cdata_; }
|
||||
private:
|
||||
HtmlCdataNode* cdata_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlCdataEvent);
|
||||
};
|
||||
|
||||
class HtmlCommentEvent: public HtmlLeafNodeEvent {
|
||||
public:
|
||||
HtmlCommentEvent(HtmlCommentNode* comment, int line_number)
|
||||
: HtmlLeafNodeEvent(line_number),
|
||||
comment_(comment) {
|
||||
}
|
||||
virtual void Run(HtmlFilter* filter) { filter->Comment(comment_); }
|
||||
virtual void ToString(GoogleString* str) {
|
||||
*str += "Comment ";
|
||||
*str += comment_->contents();
|
||||
}
|
||||
virtual HtmlLeafNode* GetLeafNode() { return comment_; }
|
||||
|
||||
private:
|
||||
HtmlCommentNode* comment_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlCommentEvent);
|
||||
};
|
||||
|
||||
class HtmlCharactersEvent: public HtmlLeafNodeEvent {
|
||||
public:
|
||||
HtmlCharactersEvent(HtmlCharactersNode* characters, int line_number)
|
||||
: HtmlLeafNodeEvent(line_number),
|
||||
characters_(characters) {
|
||||
}
|
||||
virtual void Run(HtmlFilter* filter) { filter->Characters(characters_); }
|
||||
virtual void ToString(GoogleString* str) {
|
||||
*str += "Characters ";
|
||||
*str += characters_->contents();
|
||||
}
|
||||
virtual HtmlLeafNode* GetLeafNode() { return characters_; }
|
||||
virtual HtmlCharactersNode* GetCharactersNode() { return characters_; }
|
||||
private:
|
||||
HtmlCharactersNode* characters_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlCharactersEvent);
|
||||
};
|
||||
|
||||
class HtmlDirectiveEvent: public HtmlLeafNodeEvent {
|
||||
public:
|
||||
HtmlDirectiveEvent(HtmlDirectiveNode* directive, int line_number)
|
||||
: HtmlLeafNodeEvent(line_number),
|
||||
directive_(directive) {
|
||||
}
|
||||
virtual void Run(HtmlFilter* filter) { filter->Directive(directive_); }
|
||||
virtual void ToString(GoogleString* str) {
|
||||
*str += "Directive: ";
|
||||
*str += directive_->contents();
|
||||
}
|
||||
virtual HtmlLeafNode* GetLeafNode() { return directive_; }
|
||||
private:
|
||||
HtmlDirectiveNode* directive_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlDirectiveEvent);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_HTML_EVENT_H_
|
||||
@@ -1,248 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_HTML_LEXER_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_HTML_LEXER_H_
|
||||
|
||||
#include <vector>
|
||||
#include "net/instaweb/htmlparse/public/html_name.h"
|
||||
#include "net/instaweb/htmlparse/public/doctype.h"
|
||||
#include "net/instaweb/htmlparse/public/html_element.h"
|
||||
#include "net/instaweb/http/public/content_type.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/printf_format.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlParse;
|
||||
|
||||
// Constructs a re-entrant HTML lexer. This lexer minimally parses tags,
|
||||
// attributes, and comments. It is intended to parse the Wild West of the
|
||||
// Web. It's designed to be tolerant of syntactic transgressions, merely
|
||||
// passing through unparseable chunks as Characters.
|
||||
//
|
||||
// TODO(jmarantz): refactor this with html_parse, so that this class owns
|
||||
// the symbol table and the event queue, and no longer needs to mutually
|
||||
// depend on HtmlParse. That will make it easier to unit-test.
|
||||
class HtmlLexer {
|
||||
public:
|
||||
explicit HtmlLexer(HtmlParse* html_parse);
|
||||
~HtmlLexer();
|
||||
|
||||
// Initialize a new parse session, id is only used for error messages.
|
||||
void StartParse(const StringPiece& id, const ContentType& content_type);
|
||||
|
||||
// Parse a chunk of text, adding events to the parser by calling
|
||||
// html_parse_->AddEvent(...).
|
||||
void Parse(const char* text, int size);
|
||||
|
||||
// Completes parse, reporting any leftover text as a final HtmlCharacterEvent.
|
||||
void FinishParse();
|
||||
|
||||
// Determines whether a tag should be terminated in HTML.
|
||||
bool IsImplicitlyClosedTag(HtmlName::Keyword keyword) const;
|
||||
|
||||
// Determines whether a tag should be interpreted as a 'literal'
|
||||
// tag. That is, a tag whose contents are not parsed until a
|
||||
// corresponding matching end tag is encountered.
|
||||
static bool IsLiteralTag(HtmlName::Keyword keyword);
|
||||
|
||||
// Determines whether a tag is interpreted as a 'literal' tag in
|
||||
// some user agents. Since some user agents will interpret the
|
||||
// contents of these tags, our lexer never treats them as literal
|
||||
// tags.
|
||||
static bool IsSometimesLiteralTag(HtmlName::Keyword keyword);
|
||||
|
||||
// Determines whether a tag can be terminated briefly (e.g. <tag/>)
|
||||
bool TagAllowsBriefTermination(HtmlName::Keyword keyword) const;
|
||||
|
||||
// Determines whether it's OK to leave a tag unclosed.
|
||||
bool IsOptionallyClosedTag(HtmlName::Keyword keyword) const;
|
||||
|
||||
// Print element stack to stdout (for debugging).
|
||||
void DebugPrintStack();
|
||||
|
||||
// Returns the current lowest-level parent element in the element stack, or
|
||||
// NULL if the stack is empty.
|
||||
HtmlElement* Parent() const;
|
||||
|
||||
// Return the current assumed doctype of the document (based on the content
|
||||
// type and any HTML directives encountered so far).
|
||||
const DocType& doctype() const { return doctype_; }
|
||||
|
||||
// Sets the limit on the maximum number of bytes that should be parsed.
|
||||
void set_size_limit(int64 x) { size_limit_ = x; }
|
||||
|
||||
// Indicates whether we have exceeded the limit on the maximum number of bytes
|
||||
// that we should parse.
|
||||
bool size_limit_exceeded() const { return size_limit_exceeded_; }
|
||||
|
||||
private:
|
||||
// Most of these routines expect c to be the last character of literal_
|
||||
inline void EvalStart(char c);
|
||||
inline void EvalTag(char c);
|
||||
inline void EvalTagOpen(char c);
|
||||
inline void EvalTagClose(char c);
|
||||
inline void EvalTagCloseTerminate(char c);
|
||||
inline void EvalTagBriefClose(char c);
|
||||
inline void EvalTagBriefCloseAttr(char c);
|
||||
inline void EvalCommentStart1(char c);
|
||||
inline void EvalCommentStart2(char c);
|
||||
inline void EvalCommentBody(char c);
|
||||
inline void EvalCommentEnd1(char c);
|
||||
inline void EvalCommentEnd2(char c);
|
||||
inline void EvalCdataStart1(char c);
|
||||
inline void EvalCdataStart2(char c);
|
||||
inline void EvalCdataStart3(char c);
|
||||
inline void EvalCdataStart4(char c);
|
||||
inline void EvalCdataStart5(char c);
|
||||
inline void EvalCdataStart6(char c);
|
||||
inline void EvalCdataBody(char c);
|
||||
inline void EvalCdataEnd1(char c);
|
||||
inline void EvalCdataEnd2(char c);
|
||||
inline void EvalAttribute(char c);
|
||||
inline void EvalAttrName(char c);
|
||||
inline void EvalAttrEq(char c);
|
||||
inline void EvalAttrVal(char c);
|
||||
inline void EvalAttrValSq(char c);
|
||||
inline void EvalAttrValDq(char c);
|
||||
inline void EvalLiteralTag(char c);
|
||||
inline void EvalDirective(char c);
|
||||
|
||||
// Makes an element based on token_, which will be parsed as the tag
|
||||
// name.
|
||||
void MakeElement();
|
||||
|
||||
void MakeAttribute(bool has_value);
|
||||
void FinishAttribute(char c, bool has_value, bool brief_close);
|
||||
|
||||
void EmitCdata();
|
||||
void EmitComment();
|
||||
void EmitLiteral();
|
||||
void EmitTagOpen(bool allow_implicit_close); // expects element_ != NULL.
|
||||
void EmitTagClose(HtmlElement::CloseStyle close_style);
|
||||
void EmitTagBriefClose();
|
||||
void EmitDirective();
|
||||
void Restart(char c);
|
||||
|
||||
// Emits a syntax error message.
|
||||
void SyntaxError(const char* format, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
|
||||
|
||||
// Tries to find a HTML element on the stack matching a tag. If it
|
||||
// finds it, it pops all the intervening elements off the stack,
|
||||
// issuing warnings for each discarded tag, the matching element is
|
||||
// also popped off the stack, and returned.
|
||||
//
|
||||
// If the tag is not matched, then no mutations are done to the stack,
|
||||
// and NULL is returned.
|
||||
//
|
||||
// The tag name should be interned.
|
||||
// TODO(jmarantz): use type system
|
||||
HtmlElement* PopElementMatchingTag(const StringPiece& tag);
|
||||
|
||||
HtmlElement* PopElement();
|
||||
void CloseElement(HtmlElement* element, HtmlElement::CloseStyle close_style);
|
||||
|
||||
// Minimal i18n analysis. With utf-8 and gb2312 we can do this
|
||||
// context-free, and thus the method can be static. If we add
|
||||
// more encodings we may need to turn this into a non-static method.
|
||||
static inline bool IsI18nChar(char c) {return (((c) & 0x80) != 0); }
|
||||
|
||||
// Determines whether a character can be used in a tag name as first char ...
|
||||
static inline bool IsLegalTagFirstChar(char c);
|
||||
// ... or subsequent char.
|
||||
static inline bool IsLegalTagChar(char c);
|
||||
|
||||
// Determines whether a character can be used in an attribute name.
|
||||
static inline bool IsLegalAttrNameChar(char c);
|
||||
|
||||
// The lexer is implemented as a pure state machine. There is
|
||||
// no lookahead. The state is understood primarily in this
|
||||
// enum, although there are a few state flavors that are managed
|
||||
// by the other member variables, notably: has_attr_value_ and
|
||||
// attr_name_.empty(). Those could be eliminated by adding
|
||||
// a few more explicit states.
|
||||
enum State {
|
||||
START,
|
||||
TAG, // "<"
|
||||
TAG_CLOSE, // "</"
|
||||
TAG_CLOSE_TERMINATE, // "</x "
|
||||
TAG_OPEN, // "<x"
|
||||
TAG_BRIEF_CLOSE, // "<x/"
|
||||
TAG_BRIEF_CLOSE_ATTR, // "<x /" or "<x y/" or "x y=/z" etc
|
||||
COMMENT_START1, // "<!"
|
||||
COMMENT_START2, // "<!-"
|
||||
COMMENT_BODY, // "<!--"
|
||||
COMMENT_END1, // "-"
|
||||
COMMENT_END2, // "--"
|
||||
CDATA_START1, // "<!["
|
||||
CDATA_START2, // "<![C"
|
||||
CDATA_START3, // "<![CD"
|
||||
CDATA_START4, // "<![CDA"
|
||||
CDATA_START5, // "<![CDAT"
|
||||
CDATA_START6, // "<![CDATA"
|
||||
CDATA_BODY, // "<![CDATA["
|
||||
CDATA_END1, // "]"
|
||||
CDATA_END2, // "]]"
|
||||
TAG_ATTRIBUTE, // "<x "
|
||||
TAG_ATTR_NAME, // "<x y"
|
||||
TAG_ATTR_NAME_SPACE, // "<x y "
|
||||
TAG_ATTR_EQ, // "<x y="
|
||||
TAG_ATTR_VAL, // "<x y=x" value terminated by whitespace or >
|
||||
TAG_ATTR_VALDQ, // '<x y="' value terminated by double-quote
|
||||
TAG_ATTR_VALSQ, // "<x y='" value terminated by single-quote
|
||||
LITERAL_TAG, // "<script " or "<iframe "
|
||||
DIRECTIVE // "<!x"
|
||||
};
|
||||
|
||||
HtmlParse* html_parse_;
|
||||
State state_;
|
||||
GoogleString token_; // accmulates tag names and comments
|
||||
GoogleString literal_; // accumulates raw text to pass through
|
||||
GoogleString attr_name_; // accumulates attribute name
|
||||
GoogleString attr_value_; // accumulates attribute value
|
||||
HtmlElement::QuoteStyle attr_quote_; // quote used to delimit attribute
|
||||
bool has_attr_value_; // distinguishes <a n=> from <a n>
|
||||
HtmlElement* element_; // current element; used to collect attributes
|
||||
int line_;
|
||||
int tag_start_line_; // line at which we last transitioned to TAG state
|
||||
GoogleString id_;
|
||||
GoogleString literal_close_; // specific tag go close, e.g </script>
|
||||
|
||||
ContentType content_type_;
|
||||
DocType doctype_;
|
||||
|
||||
std::vector<HtmlElement*> element_stack_;
|
||||
|
||||
// Indicates that we have exceeded the enforced size limit on the maximum
|
||||
// number of input HTML that we can parse.
|
||||
bool size_limit_exceeded_;
|
||||
// Whether we should skip parsing of all subsequent bytes. HtmlParse calls
|
||||
// this once it has started or ended an HtmlElement.
|
||||
bool skip_parsing_;
|
||||
int64 num_bytes_parsed_;
|
||||
int64 size_limit_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlLexer);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_HTML_LEXER_H_
|
||||
@@ -1,58 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_HTML_TESTING_PEER_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_HTML_TESTING_PEER_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/html_node.h"
|
||||
#include "net/instaweb/htmlparse/public/html_parse.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlElement;
|
||||
class HtmlEvent;
|
||||
|
||||
class HtmlTestingPeer {
|
||||
public:
|
||||
HtmlTestingPeer() { }
|
||||
|
||||
static void SetNodeParent(HtmlNode* node, HtmlElement* parent) {
|
||||
node->set_parent(parent);
|
||||
}
|
||||
static void AddEvent(HtmlParse* parser, HtmlEvent* event) {
|
||||
parser->AddEvent(event);
|
||||
}
|
||||
static void SetCurrent(HtmlParse* parser, HtmlNode* node) {
|
||||
parser->SetCurrent(node);
|
||||
}
|
||||
static void set_coalesce_characters(HtmlParse* parser, bool x) {
|
||||
parser->set_coalesce_characters(x);
|
||||
}
|
||||
static size_t symbol_table_size(HtmlParse* parser) {
|
||||
return parser->symbol_table_size();
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlTestingPeer);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_HTML_TESTING_PEER_H_
|
||||
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_CANONICAL_ATTRIBUTES_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_CANONICAL_ATTRIBUTES_H_
|
||||
|
||||
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlElement;
|
||||
class HtmlParse;
|
||||
|
||||
// Rewrites every attribute-value that can be safely decoded. This helps us
|
||||
// determine whether our attribute value parsing is problematic.
|
||||
class CanonicalAttributes : public EmptyHtmlFilter {
|
||||
public:
|
||||
explicit CanonicalAttributes(HtmlParse* html_parse);
|
||||
virtual ~CanonicalAttributes();
|
||||
|
||||
virtual void StartDocument();
|
||||
virtual void StartElement(HtmlElement* element);
|
||||
virtual const char* Name() const { return "CanonicalAttributes"; }
|
||||
int num_changes() const { return num_changes_; }
|
||||
int num_errors() const { return num_errors_; }
|
||||
|
||||
private:
|
||||
HtmlParse* html_parse_;
|
||||
int num_changes_;
|
||||
int num_errors_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(CanonicalAttributes);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_CANONICAL_ATTRIBUTES_H_
|
||||
@@ -1,90 +0,0 @@
|
||||
// Copyright 2010 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Author: mdsteele@google.com (Matthew D. Steele)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_DOCTYPE_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_DOCTYPE_H_
|
||||
|
||||
#include "net/instaweb/util/public/string_util.h" // for StringPiece
|
||||
|
||||
namespace net_instaweb {
|
||||
struct ContentType;
|
||||
|
||||
// Holds an HTML Doctype declaration, providing a parsing mechanism and queries
|
||||
// for properties.
|
||||
class DocType {
|
||||
public:
|
||||
DocType() : doctype_(UNKNOWN) {}
|
||||
DocType(const DocType& src) : doctype_(src.doctype_) {}
|
||||
~DocType() {}
|
||||
|
||||
DocType& operator=(const DocType& src) {
|
||||
if (&src != this) {
|
||||
doctype_ = src.doctype_;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const DocType& other) const {
|
||||
return doctype_ == other.doctype_;
|
||||
}
|
||||
|
||||
bool operator!=(const DocType& other) const {
|
||||
return doctype_ != other.doctype_;
|
||||
}
|
||||
|
||||
// Return true iff this is a known XHTML doctype (of some version).
|
||||
bool IsXhtml() const;
|
||||
// Return true iff this is an HTML 5 or XHTML 5 doctype.
|
||||
bool IsVersion5() const;
|
||||
// TODO(mdsteele): Add more such methods as necessary.
|
||||
|
||||
static const DocType kUnknown;
|
||||
static const DocType kHTML5;
|
||||
static const DocType kHTML4Strict;
|
||||
static const DocType kHTML4Transitional;
|
||||
static const DocType kXHTML5;
|
||||
static const DocType kXHTML11;
|
||||
static const DocType kXHTML10Strict;
|
||||
static const DocType kXHTML10Transitional;
|
||||
|
||||
// Given the contents of an HTML directive and the content type of the file
|
||||
// it appears in, update this DocType to match that specified by the
|
||||
// directive and return true. If the directive is not a doctype directive,
|
||||
// return false and don't alter the DocType.
|
||||
bool Parse(const StringPiece& directive,
|
||||
const ContentType& content_type);
|
||||
|
||||
private:
|
||||
enum DocTypeEnum {
|
||||
UNKNOWN = 0,
|
||||
HTML_5,
|
||||
HTML_4_STRICT,
|
||||
HTML_4_TRANSITIONAL,
|
||||
XHTML_5,
|
||||
XHTML_1_1,
|
||||
XHTML_1_0_STRICT,
|
||||
XHTML_1_0_TRANSITIONAL,
|
||||
OTHER_XHTML,
|
||||
};
|
||||
|
||||
explicit DocType(DocTypeEnum doctype) : doctype_(doctype) {}
|
||||
|
||||
DocTypeEnum doctype_;
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_DOCTYPE_H_
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_EMPTY_HTML_FILTER_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_EMPTY_HTML_FILTER_H_
|
||||
|
||||
#include "net/instaweb/htmlparse/public/html_filter.h"
|
||||
|
||||
// This 'string' is not needed for this include. But removing it breaks
|
||||
// PageSpeed Insights. See
|
||||
// third_party/pagespeed/src/pagespeed/html/external_resource_filter.h
|
||||
// Remove this once that one is fixed.
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlCdataNode;
|
||||
class HtmlCharactersNode;
|
||||
class HtmlCommentNode;
|
||||
class HtmlDirectiveNode;
|
||||
class HtmlElement;
|
||||
class HtmlIEDirectiveNode;
|
||||
|
||||
// TODO(jmarantz): remove this -- it's IWYU-superfluous but is needed until
|
||||
// PageSpeed Insights puts this declaration in. Note that IWYU annoyingly
|
||||
// ignores the pragma on forward declarations, but the intent should be clear.
|
||||
class HtmlParse;
|
||||
|
||||
// Base class for rewriting filters that don't need to be sure to
|
||||
// override every filter method. Other filters that need to be sure
|
||||
// they override every method would derive directly from HtmlFilter.
|
||||
class EmptyHtmlFilter : public HtmlFilter {
|
||||
public:
|
||||
EmptyHtmlFilter();
|
||||
virtual ~EmptyHtmlFilter();
|
||||
|
||||
virtual void StartDocument();
|
||||
virtual void EndDocument();
|
||||
virtual void StartElement(HtmlElement* element);
|
||||
virtual void EndElement(HtmlElement* element);
|
||||
virtual void Cdata(HtmlCdataNode* cdata);
|
||||
virtual void Comment(HtmlCommentNode* comment);
|
||||
virtual void IEDirective(HtmlIEDirectiveNode* directive);
|
||||
virtual void Characters(HtmlCharactersNode* characters);
|
||||
virtual void Directive(HtmlDirectiveNode* directive);
|
||||
virtual void Flush();
|
||||
virtual void DetermineEnabled();
|
||||
|
||||
// Note -- this does not provide an implementation for Name(). This
|
||||
// must be supplied by derived classes.
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_EMPTY_HTML_FILTER_H_
|
||||
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_EXPLICIT_CLOSE_TAG_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_EXPLICIT_CLOSE_TAG_H_
|
||||
|
||||
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlElement;
|
||||
|
||||
// Makes every tag explicitly or briefly closed so that when we re-serialize
|
||||
// we can see the structure as interpreted by the parser.
|
||||
//
|
||||
// This is intended for validation & unit-testing so that we can see
|
||||
// the DOM-structure output of the parser in the serialized output.
|
||||
// In general we will not want to turn this filter on in production
|
||||
// because it makes the HTML bigger.
|
||||
class ExplicitCloseTag : public EmptyHtmlFilter {
|
||||
public:
|
||||
ExplicitCloseTag() {}
|
||||
virtual ~ExplicitCloseTag();
|
||||
|
||||
virtual void EndElement(HtmlElement* element);
|
||||
virtual const char* Name() const { return "ExplicitCloseTag"; }
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ExplicitCloseTag);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_EXPLICIT_CLOSE_TAG_H_
|
||||
@@ -1,79 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_FILE_DRIVER_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_FILE_DRIVER_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/html_writer_filter.h"
|
||||
#include "net/instaweb/htmlparse/public/logging_html_filter.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class FileSystem;
|
||||
class HtmlParse;
|
||||
class MessageHandler;
|
||||
class StatisticsLog;
|
||||
|
||||
// Framework for reading an input HTML file, running it through
|
||||
// a chain of HTML filters, and writing an output file.
|
||||
class FileDriver {
|
||||
public:
|
||||
FileDriver(HtmlParse* html_parse, FileSystem* file_system);
|
||||
|
||||
// Return the parser. This can be used to add filters.
|
||||
HtmlParse* html_parse() { return html_parse_; }
|
||||
|
||||
// Helper function to generate an output .html filename from
|
||||
// an input filename. Given "/a/b/c.html" returns "a/b/c.out.html".
|
||||
// Returns false if the input file does not contain a "."
|
||||
static bool GenerateOutputFilename(
|
||||
const char* infilename, GoogleString* outfilename);
|
||||
|
||||
// Helper function to generate an output .stats filename from
|
||||
// an input filename. Given "/a/b/c.html" returns "a/b/c.stats".
|
||||
// Returns false if the input file does not contain a "."
|
||||
static bool GenerateStatsFilename(
|
||||
const char* infilename, GoogleString* statsfilename);
|
||||
|
||||
// Error messages are sent to the message file, true is returned
|
||||
// if the file was parsed successfully.
|
||||
bool ParseFile(const char* infilename,
|
||||
const char* outfilename,
|
||||
const char* statsfilename,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Indicates that we should Flush the parser every x bytes. Disabled if x<=0.
|
||||
void set_flush_byte_count(int x) { flush_byte_count_ = x; }
|
||||
|
||||
private:
|
||||
HtmlParse* html_parse_;
|
||||
LoggingFilter logging_filter_;
|
||||
StatisticsLog* stats_log_;
|
||||
HtmlWriterFilter html_write_filter_;
|
||||
bool filters_added_;
|
||||
FileSystem* file_system_;
|
||||
int flush_byte_count_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(FileDriver);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_FILE_DRIVER_H_
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmaessen@google.com (Jan Maessen)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_FILE_STATISTICS_LOG_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_FILE_STATISTICS_LOG_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/statistics_log.h"
|
||||
#include "net/instaweb/util/public/file_system.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
|
||||
// Statistics logger that sends its output to a file.
|
||||
class FileStatisticsLog : public StatisticsLog {
|
||||
public:
|
||||
// Note: calling context responsible for closing & cleaning up file.
|
||||
explicit FileStatisticsLog(FileSystem::OutputFile* file,
|
||||
MessageHandler* message_handler);
|
||||
virtual ~FileStatisticsLog();
|
||||
virtual void LogStat(const char *statName, int value);
|
||||
|
||||
private:
|
||||
FileSystem::OutputFile* file_;
|
||||
MessageHandler* message_handler_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(FileStatisticsLog);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_FILE_STATISTICS_LOG_H_
|
||||
@@ -1,400 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_ELEMENT_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_ELEMENT_H_
|
||||
|
||||
#include "net/instaweb/htmlparse/public/html_name.h"
|
||||
#include "net/instaweb/htmlparse/public/html_node.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/inline_slist.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// Represents an HTML tag, including all its attributes. These are never
|
||||
// constructed independently, but are managed by class HtmlParse. They
|
||||
// are constructed when parsing an HTML document, and they can also be
|
||||
// synthesized via methods in HtmlParse::NewElement.
|
||||
//
|
||||
// Note that HtmlElement* saved during filter execution are valid only until
|
||||
// a Flush occurs. HtmlElement* can still be fully accessed during a Flush, but
|
||||
// after that, to save memory, the contents of the HtmlElement* are cleared.
|
||||
// After that, the only method it's legal to do is to call is
|
||||
// HtmlParse::IsRewriteable(), which will return false.
|
||||
class HtmlElement : public HtmlNode {
|
||||
public:
|
||||
// Tags can be closed in three ways: implicitly (e.g. <img ..>),
|
||||
// briefly (e.g. <br/>), or explicitly (<a...>...</a>). The
|
||||
// Lexer will always record the way it parsed a tag, but synthesized
|
||||
// elements will have AUTO_CLOSE, and rewritten elements may
|
||||
// no longer qualify for the closing style with which they were
|
||||
// parsed.
|
||||
enum CloseStyle {
|
||||
AUTO_CLOSE, // synthesized tag, or not yet closed in source
|
||||
IMPLICIT_CLOSE, // E.g. <img...> <meta...> <link...> <br...> <input...>
|
||||
EXPLICIT_CLOSE, // E.g. <a href=...>anchor</a>
|
||||
BRIEF_CLOSE, // E.g. <head/>
|
||||
UNCLOSED // Was never closed in source
|
||||
};
|
||||
|
||||
// Various ways things can be quoted (or not)
|
||||
enum QuoteStyle {
|
||||
NO_QUOTE,
|
||||
SINGLE_QUOTE,
|
||||
DOUBLE_QUOTE
|
||||
};
|
||||
|
||||
class Attribute : public InlineSListElement<Attribute> {
|
||||
public:
|
||||
// A large quantity of HTML in the wild has attributes that are
|
||||
// improperly escaped. Browsers are generally tolerant of this.
|
||||
// But we want to avoid corrupting pages we do not understand.
|
||||
|
||||
// The result of DecodedValueOrNull() and escaped_value() is still
|
||||
// owned by this, and will be invalidated by a subsequent call to
|
||||
// SetValue() or SetUnescapedValue
|
||||
|
||||
// Returns the attribute name, which is not guaranteed to be case-folded.
|
||||
// Compare keyword() to the Keyword constant found in html_name.h for
|
||||
// fast attribute comparisons.
|
||||
const char* name_str() const { return name_.c_str(); }
|
||||
|
||||
// Returns the HTML keyword enum. If this attribute name is not
|
||||
// recognized, returns HtmlName::kNotAKeyword, and you can examine
|
||||
// name_str().
|
||||
HtmlName::Keyword keyword() const { return name_.keyword(); }
|
||||
|
||||
HtmlName name() const { return name_; }
|
||||
void set_name(const HtmlName& name) { name_ = name; }
|
||||
|
||||
// Returns the value in its original directly from the HTML source.
|
||||
// This may have HTML escapes in it, such as "&".
|
||||
const char* escaped_value() const { return escaped_value_.get(); }
|
||||
|
||||
// The result of DecodedValueOrNull() is still owned by this, and
|
||||
// will be invalidated by a subsequent call to SetValue().
|
||||
//
|
||||
// The result will be a NUL-terminated string containing the value of the
|
||||
// attribute, or NULL if the attribute has no value at all (this is
|
||||
// distinct from having the empty string for a value), or there is
|
||||
// a decoding error. E.g.
|
||||
// <tag a="val"> --> "val"
|
||||
// <tag a="&"> --> "&"
|
||||
// <tag a=""> --> ""
|
||||
// <tag a> --> NULL
|
||||
// <tag a="muñecos"> --> NULL (decoding_error()==true)
|
||||
//
|
||||
// Returns the unescaped value, suitable for directly operating on
|
||||
// in filters as URLs or other data. Note that decoding_error() is
|
||||
// true if the parsed value from HTML could not be decoded. This
|
||||
// might occur if:
|
||||
// - the charset is not known
|
||||
// - the charset is not supported. Currently none are supported and
|
||||
// only values that fall in 7-bit ascii can be interpreted.
|
||||
// - the charset is known & supported but the value does not appear to be
|
||||
// legal.
|
||||
//
|
||||
// The decoded value uses 8-bit characters to represent any unicode
|
||||
// code-point less than 256.
|
||||
const char* DecodedValueOrNull() const {
|
||||
if (!decoded_value_computed_) {
|
||||
ComputeDecodedValue();
|
||||
}
|
||||
return decoded_value_.get();
|
||||
}
|
||||
|
||||
void set_decoding_error(bool x) { decoding_error_ = x; }
|
||||
bool decoding_error() const {
|
||||
if (!decoded_value_computed_) {
|
||||
ComputeDecodedValue();
|
||||
}
|
||||
return decoding_error_;
|
||||
}
|
||||
|
||||
// See comment about quote on constructor for Attribute.
|
||||
// Returns the quotation mark associated with this URL.
|
||||
QuoteStyle quote_style() const { return quote_style_; }
|
||||
|
||||
// Textual form of quote for printing.
|
||||
const char* quote_str() const;
|
||||
|
||||
// Two related methods to modify the value of attribute (eg to rewrite
|
||||
// dest of src or href). As with the constructor, copies the string in,
|
||||
// so caller retains ownership of value.
|
||||
//
|
||||
// A StringPiece pointing to an empty string (that is, a char array {'\0'})
|
||||
// indicates that the attribute value is the empty string (e.g. <foo
|
||||
// bar="">); however, a StringPiece with a data() pointer of NULL indicates
|
||||
// that the attribute has no value at all (e.g. <foo bar>). This is an
|
||||
// important distinction.
|
||||
//
|
||||
// Note that passing a value containing NULs in the middle will cause
|
||||
// breakage, but this isn't currently checked for.
|
||||
// TODO(mdsteele): Perhaps we should check for this?
|
||||
|
||||
// Sets the value of the attribute. No HTML escaping is expected.
|
||||
// This call causes the HTML-escaped value to be automatically computed
|
||||
// by scanning the value and escaping any characters required in HTML
|
||||
// attributes.
|
||||
void SetValue(const StringPiece& value);
|
||||
|
||||
// Sets the escaped value. This is intended to be called from the HTML
|
||||
// Lexer, and results in the Value being computed automatically by
|
||||
// scanning the value for escape sequences.
|
||||
void SetEscapedValue(const StringPiece& value);
|
||||
|
||||
void set_quote_style(QuoteStyle new_quote_style) {
|
||||
quote_style_ = new_quote_style;
|
||||
}
|
||||
|
||||
friend class HtmlElement;
|
||||
|
||||
private:
|
||||
void ComputeDecodedValue() const;
|
||||
|
||||
// This should only be called from AddAttribute
|
||||
Attribute(const HtmlName& name, const StringPiece& escaped_value,
|
||||
QuoteStyle quote_style);
|
||||
|
||||
static inline void CopyValue(const StringPiece& src,
|
||||
scoped_array<char>* dst);
|
||||
|
||||
HtmlName name_;
|
||||
QuoteStyle quote_style_ : 8;
|
||||
mutable bool decoding_error_;
|
||||
mutable bool decoded_value_computed_;
|
||||
|
||||
// Attribute value represented as ascii and
|
||||
// HTML-escape-sequences, typically parsed directly from an HTML
|
||||
// file. This is the canonical representation, and it can handle
|
||||
// any arbitrary multi-byte characters.
|
||||
//
|
||||
// Note that it is acceptable to have 8-bit characters in escape
|
||||
// sequences (typically iso8859). However we will not be able to
|
||||
// decode such attributes.
|
||||
scoped_array<char> escaped_value_;
|
||||
|
||||
// An 8-bit representation of the escaped_value. Escape sequences
|
||||
// that contain character-codes >= 256 are not decoded, and will
|
||||
// result in decoding_error_==true. Also note that a literal 8-bit
|
||||
// code in escaped_value_ cannot be decoded either.
|
||||
//
|
||||
// We can get fewer decoding errors if we are careful to track the
|
||||
// character-encoding for the document, and implement some of the
|
||||
// popular ones, e.g. utf8, gb2312 and iso8859. Note that failing
|
||||
// to decode an attribute value does not impact our ability to
|
||||
// parse and reserialize the document. It just prevents us from
|
||||
// looking at the decoded value, which is a requirement primarily
|
||||
// for tags referencing URLs, e.g. <img src=...>.
|
||||
//
|
||||
// Note that we do not decode non-ASCII characters but we can
|
||||
// represent them in escaped_value_. We can get 8-bit characters
|
||||
// into decoded_value_ via  etc.
|
||||
mutable scoped_array<char> decoded_value_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(Attribute);
|
||||
};
|
||||
|
||||
typedef InlineSList<Attribute> AttributeList;
|
||||
typedef InlineSList<Attribute>::Iterator AttributeIterator;
|
||||
typedef InlineSList<Attribute>::ConstIterator AttributeConstIterator;
|
||||
|
||||
virtual ~HtmlElement();
|
||||
|
||||
// Determines whether this node is still accessible via API. Note that
|
||||
// when a FLUSH occurs after an open-element, the element will be live()
|
||||
// but will not be rewritable. Specifically, node->live() can be true when
|
||||
// html_parse->IsRewritable(node) is false. Once a node is closed, a FLUSH
|
||||
// will cause the node's data to be freed, which triggers this method
|
||||
// returning false.
|
||||
virtual bool live() const { return (data_.get() != NULL) && data_->live_; }
|
||||
|
||||
virtual void MarkAsDead(const HtmlEventListIterator& end);
|
||||
|
||||
// Add a copy of an attribute to this element. The attribute may come
|
||||
// from this element, or another one.
|
||||
void AddAttribute(const Attribute& attr);
|
||||
|
||||
// Unconditionally add attribute, copying value.
|
||||
// For binary attributes (those without values) use value=NULL.
|
||||
// TODO(sligocki): StringPiece(NULL) seems fragile because what it is or
|
||||
// how it's treated is not docutmented.
|
||||
//
|
||||
// Doesn't check for attribute duplication (which is illegal in html).
|
||||
//
|
||||
// The value, if non-null, is assumed to be unescaped. See also
|
||||
// AddEscapedAttribute.
|
||||
void AddAttribute(const HtmlName& name,
|
||||
const StringPiece& decoded_value,
|
||||
QuoteStyle quote_style);
|
||||
// As AddAttribute, but assumes value has been escaped for html output.
|
||||
void AddEscapedAttribute(const HtmlName& name,
|
||||
const StringPiece& escaped_value,
|
||||
QuoteStyle quote_style);
|
||||
|
||||
// Remove the attribute with the given name. Return true if the attribute
|
||||
// was deleted, false if it wasn't there to begin with.
|
||||
bool DeleteAttribute(HtmlName::Keyword keyword);
|
||||
|
||||
// Look up attribute by name. NULL if no attribute exists.
|
||||
// Use this for attributes whose value you might want to change
|
||||
// after lookup.
|
||||
const Attribute* FindAttribute(HtmlName::Keyword keyword) const;
|
||||
Attribute* FindAttribute(HtmlName::Keyword keyword) {
|
||||
const HtmlElement* const_this = this;
|
||||
const Attribute* result = const_this->FindAttribute(keyword);
|
||||
return const_cast<Attribute*>(result);
|
||||
}
|
||||
|
||||
// Look up decoded attribute value by name.
|
||||
// Returns NULL if:
|
||||
// 1. no attribute exists
|
||||
// 2. the attribute has no value.
|
||||
// 3. the attribute has a value, but it cannot currently be safely decoded.
|
||||
// If you care about this distinction, call FindAttribute.
|
||||
// Use this only if you don't intend to change the attribute value;
|
||||
// if you might change the attribute value, use FindAttribute instead
|
||||
// (this avoids a double lookup).
|
||||
const char* AttributeValue(HtmlName::Keyword name) const {
|
||||
const Attribute* attribute = FindAttribute(name);
|
||||
if (attribute != NULL) {
|
||||
return attribute->DecodedValueOrNull();
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Look up escaped attribute value by name.
|
||||
// Returns NULL if:
|
||||
// 1. no attribute exists
|
||||
// 2. the attribute has no value.
|
||||
// If you care about this distinction, call FindAttribute.
|
||||
// Use this only if you don't intend to change the attribute value;
|
||||
// if you might change the attribute value, use FindAttribute instead
|
||||
// (this avoids a double lookup).
|
||||
const char* EscapedAttributeValue(HtmlName::Keyword name) const {
|
||||
const Attribute* attribute = FindAttribute(name);
|
||||
if (attribute != NULL) {
|
||||
return attribute->escaped_value();
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Returns the element tag name, which is not guaranteed to be
|
||||
// case-folded. Compare keyword() to the Keyword constant found in
|
||||
// html_name.h for fast tag name comparisons.
|
||||
const char* name_str() const { return data_->name_.c_str(); }
|
||||
|
||||
// Returns the HTML keyword enum. If this tag name is not
|
||||
// recognized, returns HtmlName::kNotAKeyword, and you can
|
||||
// examine name_str().
|
||||
HtmlName::Keyword keyword() const { return data_->name_.keyword(); }
|
||||
|
||||
const HtmlName& name() const { return data_->name_; }
|
||||
|
||||
// Changing that tag of an element should only occur if the caller knows
|
||||
// that the old attributes make sense for the new tag. E.g. a div could
|
||||
// be changed to a span.
|
||||
void set_name(const HtmlName& new_tag) { data_->name_ = new_tag; }
|
||||
|
||||
const AttributeList& attributes() const { return data_->attributes_; }
|
||||
AttributeList* mutable_attributes() { return &data_->attributes_; }
|
||||
|
||||
friend class HtmlParse;
|
||||
friend class HtmlLexer;
|
||||
|
||||
CloseStyle close_style() const { return data_->close_style_; }
|
||||
void set_close_style(CloseStyle style) { data_->close_style_ = style; }
|
||||
|
||||
// Render an element as a string for debugging. This is not
|
||||
// intended as a fully legal serialization.
|
||||
void ToString(GoogleString* buf) const;
|
||||
void DebugPrint() const;
|
||||
|
||||
int begin_line_number() const { return data_->begin_line_number_; }
|
||||
int end_line_number() const { return data_->end_line_number_; }
|
||||
|
||||
protected:
|
||||
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
|
||||
HtmlEventList* queue);
|
||||
|
||||
virtual HtmlEventListIterator begin() const { return data_->begin_; }
|
||||
virtual HtmlEventListIterator end() const { return data_->end_; }
|
||||
|
||||
private:
|
||||
// All of the data associated with an HtmlElement is indirected through this
|
||||
// class, so we can delete it on Flush after a CloseElement event.
|
||||
struct Data {
|
||||
Data(const HtmlName& name,
|
||||
const HtmlEventListIterator& begin,
|
||||
const HtmlEventListIterator& end);
|
||||
~Data();
|
||||
|
||||
// Pack four fields into 64 bits using bitfields. Warning: this
|
||||
// stuff is quite sensitive to details, so make sure to look at
|
||||
// object sizes before changing! Interleaving the 24-bit and
|
||||
// 8-bit member variables gives a total size of 8 bytes for these
|
||||
// 4 variables on a gcc 64-bit compile. But putting the two
|
||||
// 24-bit integers together gives a total size of 16 bytes, so
|
||||
// we interleave.
|
||||
//
|
||||
// HtmlParse::DeleteElement will set live_ to false without
|
||||
// deleting element->data_. Flushing an ElementClose deletes
|
||||
// data_ but HtmlElement knows that null data_ implies !live().
|
||||
unsigned begin_line_number_ : 24;
|
||||
unsigned live_ : 8;
|
||||
unsigned end_line_number_ : 24;
|
||||
CloseStyle close_style_ : 8;
|
||||
|
||||
HtmlName name_;
|
||||
AttributeList attributes_;
|
||||
HtmlEventListIterator begin_;
|
||||
HtmlEventListIterator end_;
|
||||
};
|
||||
|
||||
// Begin/end event iterators are used by HtmlParse to keep track
|
||||
// of the span of events underneath an element. This is primarily to
|
||||
// help delete the element. Events are not public.
|
||||
void set_begin(const HtmlEventListIterator& begin) { data_->begin_ = begin; }
|
||||
void set_end(const HtmlEventListIterator& end) { data_->end_ = end; }
|
||||
|
||||
void set_begin_line_number(int line) { data_->begin_line_number_ = line; }
|
||||
void set_end_line_number(int line) { data_->end_line_number_ = line; }
|
||||
|
||||
// construct via HtmlParse::NewElement
|
||||
HtmlElement(HtmlElement* parent, const HtmlName& name,
|
||||
const HtmlEventListIterator& begin,
|
||||
const HtmlEventListIterator& end);
|
||||
|
||||
// HtmlElement data is held in HtmlElement::Data*, which is freed
|
||||
// when a CloseElement is Flushed. The pointers themselves are
|
||||
// retained and can correctly answer element->IsRewritable() and
|
||||
// element->is_live(), but the rest of the data (attributes etc)
|
||||
// is deleted.
|
||||
void FreeData() { data_.reset(NULL); }
|
||||
|
||||
scoped_ptr<Data> data_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlElement);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_ELEMENT_H_
|
||||
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_FILTER_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_FILTER_H_
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlCdataNode;
|
||||
class HtmlCharactersNode;
|
||||
class HtmlCommentNode;
|
||||
class HtmlDirectiveNode;
|
||||
class HtmlElement;
|
||||
class HtmlIEDirectiveNode;
|
||||
|
||||
// Base-class used to register for HTML Parser Callbacks. Derive from this
|
||||
// class and register with HtmlParse::AddFilter to use the HTML Parser.
|
||||
class HtmlFilter {
|
||||
public:
|
||||
HtmlFilter();
|
||||
virtual ~HtmlFilter();
|
||||
|
||||
// Starts a new document. Filters should clear their state in this function,
|
||||
// as the same Filter instance may be used for multiple HTML documents.
|
||||
virtual void StartDocument() = 0;
|
||||
// Note: EndDocument will be called imediately before the last Flush call.
|
||||
virtual void EndDocument() = 0;
|
||||
|
||||
// When an HTML element is encountered during parsing, each filter's
|
||||
// StartElement method is called. The HtmlElement lives for the entire
|
||||
// duration of the document.
|
||||
//
|
||||
// TODO(jmarantz): consider passing handles rather than pointers and
|
||||
// reference-counting them instead to save memory on long documents.
|
||||
virtual void StartElement(HtmlElement* element) = 0;
|
||||
virtual void EndElement(HtmlElement* element) = 0;
|
||||
|
||||
// Called for CDATA blocks (e.g. <![CDATA[foobar]]>)
|
||||
virtual void Cdata(HtmlCdataNode* cdata) = 0;
|
||||
|
||||
// Called for HTML comments that aren't IE directives (e.g. <!--foobar-->).
|
||||
virtual void Comment(HtmlCommentNode* comment) = 0;
|
||||
|
||||
// Called for an IE directive; typically used for CSS styling.
|
||||
// See http://msdn.microsoft.com/en-us/library/ms537512(VS.85).aspx
|
||||
//
|
||||
// TODO(mdsteele): Should we try to maintain the nested structure of
|
||||
// the conditionals, in the same way that we maintain nesting of elements?
|
||||
virtual void IEDirective(HtmlIEDirectiveNode* directive) = 0;
|
||||
|
||||
// Called for raw characters between tags.
|
||||
virtual void Characters(HtmlCharactersNode* characters) = 0;
|
||||
|
||||
// Called for HTML directives (e.g. <!doctype foobar>).
|
||||
virtual void Directive(HtmlDirectiveNode* directive) = 0;
|
||||
|
||||
// Notifies the Filter that a flush is occurring. A filter that's
|
||||
// generating streamed output should flush at this time. A filter
|
||||
// that's mutating elements can mutate any element seen since the
|
||||
// most recent flush; once an element is flushed it is already on
|
||||
// the wire to its destination and it's too late to mutate. Flush
|
||||
// is initiated by an application calling HttpParse::Flush().
|
||||
//
|
||||
// Flush() is called after all other handlers during a HttpParse::Flush(),
|
||||
// except RenderDone(), which (if in use) happens after Flush().
|
||||
virtual void Flush() = 0;
|
||||
|
||||
// Notifies a filter that an asynchronous rewrite & render computation
|
||||
// phase has finished. This is not used by HtmlParse itself, but only by
|
||||
// RewriteDriver for pre-render filters. Happens after the corresponding
|
||||
// flush, for every flush window. Default implementation does nothing.
|
||||
// TODO(morlovich): Push this down into CommonFilter and convert all the
|
||||
// pre-render filters to inherit off it.
|
||||
virtual void RenderDone();
|
||||
|
||||
// Invoked by rewrite driver where all filters should determine whether
|
||||
// they are enabled for this request.
|
||||
virtual void DetermineEnabled() = 0;
|
||||
|
||||
// Intended to be called from DetermineEnabled implementations in filters.
|
||||
// Returns whether a filter is enabled.
|
||||
bool is_enabled() const { return is_enabled_; }
|
||||
|
||||
// The name of this filter -- used for logging and debugging.
|
||||
virtual const char* Name() const = 0;
|
||||
|
||||
protected:
|
||||
void set_is_enabled(bool is_enabled) { is_enabled_ = is_enabled; }
|
||||
|
||||
private:
|
||||
bool is_enabled_;
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_FILTER_H_
|
||||
@@ -1,206 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/html_name.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_hash.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "pagespeed/kernel/util/sparse_hash_map.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// Helper class for HtmlParser to recognize HTML keywords, handle escaping
|
||||
// and unescaping, and assist the lexer in understanding how to interpret
|
||||
// unbalanced tags.
|
||||
class HtmlKeywords {
|
||||
public:
|
||||
// Initialize a singleton instance of this class. This call is
|
||||
// inherently thread unsafe, but only the first time it is called.
|
||||
// If multi-threaded programs call this function before spawning
|
||||
// threads then there will be no races.
|
||||
static void Init();
|
||||
|
||||
// Tear down the singleton instance of this class, freeing any
|
||||
// allocated memory. This call is inherently thread unsafe.
|
||||
static void ShutDown();
|
||||
|
||||
// Returns an HTML keyword as a string, or NULL if not a keyword.
|
||||
static const char* KeywordToString(HtmlName::Keyword keyword) {
|
||||
return singleton_->keyword_vector_[keyword];
|
||||
}
|
||||
|
||||
// Take a raw text and escape it so it's safe for an HTML attribute,
|
||||
// e.g. a&b --> a&b
|
||||
static StringPiece Escape(const StringPiece& unescaped, GoogleString* buf) {
|
||||
return singleton_->EscapeHelper(unescaped, buf);
|
||||
}
|
||||
|
||||
// Take escaped text and unescape it so its value can be interpreted,
|
||||
// e.g. "http://myhost.com/p?v&w" --> "http://myhost.com/p?v&w"
|
||||
//
|
||||
// *decoding_error is set to true if the escaped string could not be
|
||||
// safely transformed into a simple stream of bytes.
|
||||
//
|
||||
// TODO(jmarantz): Support a variant where we unescape to UTF-8.
|
||||
static StringPiece Unescape(const StringPiece& escaped, GoogleString* buf,
|
||||
bool* decoding_error) {
|
||||
return singleton_->UnescapeHelper(escaped, buf, decoding_error);
|
||||
}
|
||||
|
||||
// Note that Escape and Unescape are not guaranteed to be inverses of
|
||||
// one another. For example, Unescape("")=="&", but Escape("&")="&".
|
||||
// However, note that Unescape(Escape(s)) == s.
|
||||
//
|
||||
// Another case to be wary of is when the argument to Unescape is not
|
||||
// properly escaped. The result will be that the string is returned
|
||||
// unmodified. For example, Unescape("a&b")=="a&b", butthen re-escaping
|
||||
// that will give "a&b". Hence, the careful maintainer of an HTML
|
||||
// parsing and rewriting system will need to maintain the original escaped
|
||||
// text parsed from HTML files, and pass that to browsers.
|
||||
|
||||
// Determines whether an open tag of type k1 should be automatically closed
|
||||
// if a StartElement for tag k2 is encountered. E.g. <tr><tbody> should
|
||||
// be transformed to <tr></tr><tbody>.
|
||||
static bool IsAutoClose(HtmlName::Keyword k1, HtmlName::Keyword k2) {
|
||||
return std::binary_search(singleton_->auto_close_.begin(),
|
||||
singleton_->auto_close_.end(),
|
||||
MakeKeywordPair(k1, k2));
|
||||
}
|
||||
|
||||
// Determines whether an open tag of type k1 should be automatically closed
|
||||
// if an EndElement for tag k2 is encountered. E.g. <tbody></table> should
|
||||
// be transformed into <tbody></tbody></table>.
|
||||
static bool IsContained(HtmlName::Keyword k1, HtmlName::Keyword k2) {
|
||||
return std::binary_search(singleton_->contained_.begin(),
|
||||
singleton_->contained_.end(),
|
||||
MakeKeywordPair(k1, k2));
|
||||
}
|
||||
|
||||
// Determines whether the specified HTML keyword is closed automatically
|
||||
// by the parser if the close-tag is omitted. E.g. <head> must be closed,
|
||||
// but formatting elements such as <p> do not need to be closed. Also note
|
||||
// the distinction with tags which are *implicitly* closed in HTML such as
|
||||
// <img> and <br>.
|
||||
static bool IsOptionallyClosedTag(HtmlName::Keyword keyword) {
|
||||
return std::binary_search(singleton_->optionally_closed_.begin(),
|
||||
singleton_->optionally_closed_.end(),
|
||||
keyword);
|
||||
}
|
||||
|
||||
private:
|
||||
typedef int32 KeywordPair; // Encoded via shift & OR.
|
||||
typedef std::vector<KeywordPair> KeywordPairVec;
|
||||
typedef std::vector<HtmlName::Keyword> KeywordVec;
|
||||
|
||||
HtmlKeywords();
|
||||
const char* UnescapeAttributeValue();
|
||||
void InitEscapeSequences();
|
||||
void InitAutoClose();
|
||||
void InitContains();
|
||||
void InitOptionallyClosedKeywords();
|
||||
|
||||
// Translate the escape sequence and append the corresponding character
|
||||
// into *buf.
|
||||
//
|
||||
// accumulate_numeric_code==true means that the sequence has been accumulated
|
||||
// into numeric_value and that will be used to form a character for appending
|
||||
// to *buf.
|
||||
//
|
||||
// accumulate_numeric_code==false means that the sequence is in 'escape' and
|
||||
// that will be looked up in the keyword tables to get the character to append
|
||||
// to *buf.
|
||||
//
|
||||
// was_terminated indicates that the escape-sequence was properly terminated
|
||||
// by a semicolon. This affects handling of unknown escape sequences, where
|
||||
// we will need to retain the ";".
|
||||
//
|
||||
// Returns false iff the escape-sequence is a valid multi-byte sequence,
|
||||
// which we can't currently represent in our 8-bit format.
|
||||
bool TryUnescape(bool accumulate_numeric_code,
|
||||
uint32 numeric_value,
|
||||
const GoogleString& escape,
|
||||
bool was_terminated,
|
||||
GoogleString* buf) const;
|
||||
|
||||
// Encodes two keyword enums as a KeywordPair, represented as an int32.
|
||||
static KeywordPair MakeKeywordPair(HtmlName::Keyword k1,
|
||||
HtmlName::Keyword k2) {
|
||||
return (static_cast<KeywordPair>(k1) << 16) | static_cast<KeywordPair>(k2);
|
||||
}
|
||||
|
||||
// Adds all combinations of the members of k1_list and k2_list to
|
||||
// kmap. The lists are represented as space-delimited keywords.
|
||||
// E.g. if k1_list="a b" and k2_list="c d", then this adds (a,c),
|
||||
// (b,c), (a,d), (b,d) to kmap.
|
||||
void AddCrossProduct(const StringPiece& k1_list, const StringPiece& k2_list,
|
||||
KeywordPairVec* kmap);
|
||||
void AddAutoClose(const StringPiece& k1_list, const StringPiece& k2_list) {
|
||||
AddCrossProduct(k1_list, k2_list, &auto_close_);
|
||||
}
|
||||
void AddContained(const StringPiece& k1_list, const StringPiece& k2_list) {
|
||||
AddCrossProduct(k1_list, k2_list, &contained_);
|
||||
}
|
||||
|
||||
// Adds every space-delimited token in klist to kset.
|
||||
void AddToSet(const StringPiece& klist, KeywordVec* kset);
|
||||
|
||||
static HtmlKeywords* singleton_;
|
||||
|
||||
StringPiece EscapeHelper(const StringPiece& unescaped,
|
||||
GoogleString* buf) const;
|
||||
StringPiece UnescapeHelper(const StringPiece& escaped,
|
||||
GoogleString* buf,
|
||||
bool* decoding_error) const;
|
||||
|
||||
// Conventional wisdom suggests this application calls for dense_hash_map,
|
||||
// but my microbenchmarks show that sparse_hash_map has better performance.
|
||||
// My theory is that the maps are sufficiently small that the algorithmic
|
||||
// differences are not dominant, but keeping the data small helps the
|
||||
// processor cache behavior.
|
||||
typedef sparse_hash_map<
|
||||
GoogleString, const char*,
|
||||
CaseFoldStringHash,
|
||||
CaseFoldStringEqual> StringStringSparseHashMapInsensitive;
|
||||
typedef sparse_hash_map<
|
||||
GoogleString, const char*,
|
||||
CasePreserveStringHash> StringStringSparseHashMapSensitive;
|
||||
|
||||
StringStringSparseHashMapInsensitive unescape_insensitive_map_;
|
||||
StringStringSparseHashMapSensitive unescape_sensitive_map_;
|
||||
StringStringSparseHashMapSensitive escape_map_;
|
||||
|
||||
CharStarVector keyword_vector_;
|
||||
|
||||
// These vectors of KeywordPair and Keyword are sorted numerically during
|
||||
// construction to enable binary-search during parsing.
|
||||
KeywordPairVec auto_close_;
|
||||
KeywordPairVec contained_;
|
||||
KeywordVec optionally_closed_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlKeywords);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_KEYWORDS_H_
|
||||
@@ -1,281 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_NAME_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_NAME_H_
|
||||
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// HTML names are case insensitive. However, in the parser, we keep
|
||||
// the original parsed case of the name, in addition to the html
|
||||
// keyword enumeration, if any. Thus for both tags and attribute
|
||||
// names, we have an enum representation which is used in filters
|
||||
// for scanning, plus we have the original string representation.
|
||||
class HtmlName {
|
||||
public:
|
||||
// We keep both attribute names and tag names in the same space
|
||||
// for convenience. This list must be kept in alpha-order and
|
||||
// in sync with the static array in html_name.cc.
|
||||
//
|
||||
// Note that this list does not need to cover all HTML keywords --
|
||||
// only the ones that we are interested in for rewriting.
|
||||
enum Keyword {
|
||||
kXml, // ?Xml
|
||||
kA,
|
||||
kAbbr,
|
||||
kAction,
|
||||
kAddress,
|
||||
kAlt,
|
||||
kArea,
|
||||
kArticle,
|
||||
kAside,
|
||||
kAsync,
|
||||
kAudio,
|
||||
kAutocomplete,
|
||||
kAutofocus,
|
||||
kAutoplay,
|
||||
kB,
|
||||
kBackground,
|
||||
kBase,
|
||||
kBdi,
|
||||
kBdo,
|
||||
kBlockquote,
|
||||
kBody,
|
||||
kBr,
|
||||
kButton,
|
||||
kCharset,
|
||||
kChecked,
|
||||
kCite,
|
||||
kClass,
|
||||
kCode,
|
||||
kCol,
|
||||
kColgroup,
|
||||
kColspan,
|
||||
kCommand,
|
||||
kContent,
|
||||
kControls,
|
||||
kData,
|
||||
kDataPagespeedFlushStyle,
|
||||
kDataPagespeedHref,
|
||||
kDataPagespeedPrioritize,
|
||||
kDataSrc,
|
||||
kDd,
|
||||
kDeclare,
|
||||
kDefaultchecked,
|
||||
kDefaultselected,
|
||||
kDefer,
|
||||
kDel,
|
||||
kDetails,
|
||||
kDfn,
|
||||
kDir,
|
||||
kDisabled,
|
||||
kDisplay,
|
||||
kDiv,
|
||||
kDl,
|
||||
kDt,
|
||||
kEm,
|
||||
kEmbed,
|
||||
kEnctype,
|
||||
kEvent,
|
||||
kFieldset,
|
||||
kFont,
|
||||
kFooter,
|
||||
kFor,
|
||||
kForm,
|
||||
kFormaction,
|
||||
kFormnovalidate,
|
||||
kFrame,
|
||||
kFrameborder,
|
||||
kH1,
|
||||
kH2,
|
||||
kH3,
|
||||
kH4,
|
||||
kH5,
|
||||
kH6,
|
||||
kHead,
|
||||
kHeader,
|
||||
kHeight,
|
||||
kHgroup,
|
||||
kHr,
|
||||
kHref,
|
||||
kHtml,
|
||||
kHttpEquiv,
|
||||
kI,
|
||||
kIcon,
|
||||
kId,
|
||||
kIframe,
|
||||
kImg,
|
||||
kIndeterminate,
|
||||
kIns,
|
||||
kInput,
|
||||
kIsmap,
|
||||
kKbd,
|
||||
kKeygen,
|
||||
kKeytype,
|
||||
kLang,
|
||||
kLanguage,
|
||||
kLi,
|
||||
kLink,
|
||||
kLoop,
|
||||
kManifest,
|
||||
kMark,
|
||||
kMarquee,
|
||||
kMedia,
|
||||
kMenu,
|
||||
kMeta,
|
||||
kMethod,
|
||||
kMultiple,
|
||||
kMuted,
|
||||
kName,
|
||||
kNav,
|
||||
kNoembed,
|
||||
kNoframes,
|
||||
kNohref,
|
||||
kNoresize,
|
||||
kNoscript,
|
||||
kNovalidate,
|
||||
kObject,
|
||||
kOl,
|
||||
kOnclick,
|
||||
kOnload,
|
||||
kOpen,
|
||||
kOptgroup,
|
||||
kOption,
|
||||
kOther,
|
||||
kP,
|
||||
kPagespeedBlankSrc,
|
||||
kPagespeedHighResSrc,
|
||||
kPagespeedIframe,
|
||||
kPagespeedInlineSrc,
|
||||
kPagespeedLazySrc,
|
||||
kPagespeedLowResSrc,
|
||||
kPagespeedLscExpiry,
|
||||
kPagespeedLscHash,
|
||||
kPagespeedLscUrl,
|
||||
kPagespeedNoDefer,
|
||||
kPagespeedNoTransform,
|
||||
kPagespeedOrigSrc,
|
||||
kPagespeedOrigType,
|
||||
kPagespeedSize,
|
||||
kPagespeedUrlHash,
|
||||
kParam,
|
||||
kPre,
|
||||
kProfile,
|
||||
kQ,
|
||||
kReadonly,
|
||||
kRel,
|
||||
kRequired,
|
||||
kReversed,
|
||||
kRowspan,
|
||||
kRp,
|
||||
kRt,
|
||||
kRuby,
|
||||
kS,
|
||||
kSamp,
|
||||
kScoped,
|
||||
kScript,
|
||||
kScrolling,
|
||||
kSeamless,
|
||||
kSection,
|
||||
kSelect,
|
||||
kSelected,
|
||||
kShape,
|
||||
kSmall,
|
||||
kSource,
|
||||
kSpan,
|
||||
kSrc,
|
||||
kStrong,
|
||||
kStyle,
|
||||
kSub,
|
||||
kTable,
|
||||
kTag,
|
||||
kTbody,
|
||||
kTd,
|
||||
kTest,
|
||||
kTextarea,
|
||||
kTfoot,
|
||||
kTh,
|
||||
kThead,
|
||||
kTime,
|
||||
kTitle,
|
||||
kTr,
|
||||
kTrack,
|
||||
kType,
|
||||
kU,
|
||||
kUl,
|
||||
kValuetype,
|
||||
kVar,
|
||||
kVideo,
|
||||
kWbr,
|
||||
kWidth,
|
||||
kWrap,
|
||||
kXmp,
|
||||
kNotAKeyword
|
||||
};
|
||||
|
||||
// Constructs an HTML name given a keyword, which can be
|
||||
// HtmlName::kNotAKeyword, and 'const char* str'. 'str'
|
||||
// is used to retain the case-sensitive spelling of the
|
||||
// keyword. The storage for 'str' must be managed, and
|
||||
// must be guaranteed valid throughout the life of the HtmlName.
|
||||
HtmlName(Keyword keyword, const char* str)
|
||||
: keyword_(keyword), c_str_(str) {
|
||||
}
|
||||
|
||||
// Returns the keyword enumeration for this HTML Name. Note that
|
||||
// keyword lookup is case-insensitive.
|
||||
Keyword keyword() const { return keyword_; }
|
||||
|
||||
// Return the atom string, which may not be case folded.
|
||||
const char* c_str() const { return c_str_; }
|
||||
|
||||
// Limited iterator (not an STL iterator). Example usage:
|
||||
// for (HtmlName::Iterator iter; !iter.AtEnd(); iter.Next()) {
|
||||
// use(iter.keyword(), iter.name());
|
||||
// }
|
||||
class Iterator {
|
||||
public:
|
||||
Iterator() : index_(-1) { Next(); }
|
||||
bool AtEnd() const;
|
||||
void Next();
|
||||
Keyword keyword() const;
|
||||
const char* name() const;
|
||||
|
||||
private:
|
||||
int index_;
|
||||
|
||||
// Implicit copy and assign ok. The members can be safely copied by bits.
|
||||
};
|
||||
|
||||
static int num_keywords();
|
||||
static Keyword Lookup(const StringPiece& name);
|
||||
|
||||
private:
|
||||
friend class HtmlNameTest;
|
||||
|
||||
Keyword keyword_;
|
||||
const char* c_str_;
|
||||
|
||||
// Implicit copy and assign ok. The members can be safely copied by bits.
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_NAME_H_
|
||||
@@ -1,252 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: mdsteele@google.com (Matthew D. Steele)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_NODE_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_NODE_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <list>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "net/instaweb/util/public/arena.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlElement;
|
||||
class HtmlEvent;
|
||||
|
||||
typedef std::list<HtmlEvent*> HtmlEventList;
|
||||
typedef HtmlEventList::iterator HtmlEventListIterator;
|
||||
|
||||
// Base class for HtmlElement and HtmlLeafNode. Generally represents all
|
||||
// lexical tokens in HTML, except that for subclass HtmlElement, which
|
||||
// represents both the opening & closing token.
|
||||
class HtmlNode {
|
||||
public:
|
||||
virtual ~HtmlNode();
|
||||
friend class HtmlParse;
|
||||
|
||||
HtmlElement* parent() const { return parent_; }
|
||||
virtual bool live() const = 0;
|
||||
|
||||
// Marks a node as dead. The queue's end iterator should be passed in,
|
||||
// to remove references to stale iterators, and to force IsRewritable to
|
||||
// return false.
|
||||
virtual void MarkAsDead(const HtmlEventListIterator& end) = 0;
|
||||
|
||||
void* operator new(size_t size, Arena<HtmlNode>* arena) {
|
||||
return arena->Allocate(size);
|
||||
}
|
||||
|
||||
void operator delete(void* ptr, Arena<HtmlNode>* arena) {
|
||||
LOG(FATAL) << "HtmlNode must not be deleted directly.";
|
||||
}
|
||||
|
||||
protected:
|
||||
// TODO(jmarantz): jmaessen suggests instantiating the html nodes
|
||||
// without parents and computing them from context at the time they
|
||||
// are instantiated from the lexer. This is a little more difficult
|
||||
// when synthesizing new nodes, however. We assert sanity, however,
|
||||
// when calling HtmlParse::ApplyFilter.
|
||||
explicit HtmlNode(HtmlElement* parent) : parent_(parent) {}
|
||||
|
||||
// Create new event object(s) representing this node, and insert them into
|
||||
// the queue just before the given iterator; also, update this node object as
|
||||
// necessary so that begin() and end() will return iterators pointing to
|
||||
// the new event(s). The line number for each event should probably be -1.
|
||||
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
|
||||
HtmlEventList* queue) = 0;
|
||||
|
||||
// Return an iterator pointing to the first event associated with this node.
|
||||
virtual HtmlEventListIterator begin() const = 0;
|
||||
// Return an iterator pointing to the last event associated with this node.
|
||||
virtual HtmlEventListIterator end() const = 0;
|
||||
|
||||
// Version that affects visibility of the destructor.
|
||||
void operator delete(void* ptr) {
|
||||
LOG(FATAL) << "HtmlNode must not be deleted directly.";
|
||||
}
|
||||
|
||||
private:
|
||||
friend class HtmlLexer;
|
||||
friend class HtmlTestingPeer;
|
||||
|
||||
// Note: setting the parent doesn't change the DOM -- it just updates
|
||||
// the pointer. This is intended to be called only from the DOM manipulation
|
||||
// methods in HtmlParse.
|
||||
void set_parent(HtmlElement* parent) { parent_ = parent; }
|
||||
|
||||
HtmlElement* parent_;
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlNode);
|
||||
};
|
||||
|
||||
class HtmlLeafNode : public HtmlNode {
|
||||
public:
|
||||
virtual ~HtmlLeafNode();
|
||||
virtual bool live() const { return (data_.get() != NULL) && data_->is_live_; }
|
||||
virtual void MarkAsDead(const HtmlEventListIterator& end);
|
||||
|
||||
const GoogleString& contents() const { return data_->contents_; }
|
||||
virtual HtmlEventListIterator begin() const {
|
||||
return data_->iter_;
|
||||
}
|
||||
virtual HtmlEventListIterator end() const {
|
||||
return data_->iter_;
|
||||
}
|
||||
void set_iter(const HtmlEventListIterator& iter) {
|
||||
data_->iter_ = iter;
|
||||
}
|
||||
|
||||
void FreeData() { data_.reset(NULL); }
|
||||
|
||||
protected:
|
||||
HtmlLeafNode(HtmlElement* parent, const HtmlEventListIterator& iter,
|
||||
const StringPiece& contents);
|
||||
|
||||
// Write-access to the contents is protected by default, and made
|
||||
// accessible by subclasses that need to expose this method.
|
||||
GoogleString* mutable_contents() { return &data_->contents_; }
|
||||
|
||||
private:
|
||||
struct Data {
|
||||
Data(const HtmlEventListIterator& iter, const StringPiece& contents)
|
||||
: contents_(contents.data(), contents.size()),
|
||||
is_live_(true),
|
||||
iter_(iter) {
|
||||
}
|
||||
GoogleString contents_;
|
||||
bool is_live_;
|
||||
HtmlEventListIterator iter_;
|
||||
};
|
||||
|
||||
scoped_ptr<Data> data_;
|
||||
};
|
||||
|
||||
// Leaf node representing a CDATA section
|
||||
class HtmlCdataNode : public HtmlLeafNode {
|
||||
public:
|
||||
virtual ~HtmlCdataNode();
|
||||
friend class HtmlParse;
|
||||
|
||||
protected:
|
||||
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
|
||||
HtmlEventList* queue);
|
||||
|
||||
private:
|
||||
HtmlCdataNode(HtmlElement* parent,
|
||||
const StringPiece& contents,
|
||||
const HtmlEventListIterator& iter)
|
||||
: HtmlLeafNode(parent, iter, contents) {
|
||||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlCdataNode);
|
||||
};
|
||||
|
||||
// Leaf node representing raw characters in HTML
|
||||
class HtmlCharactersNode : public HtmlLeafNode {
|
||||
public:
|
||||
virtual ~HtmlCharactersNode();
|
||||
void Append(const StringPiece& str) {
|
||||
mutable_contents()->append(str.data(), str.size());
|
||||
}
|
||||
friend class HtmlParse;
|
||||
|
||||
// Expose writable contents for Characters nodes.
|
||||
using HtmlLeafNode::mutable_contents;
|
||||
|
||||
protected:
|
||||
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
|
||||
HtmlEventList* queue);
|
||||
|
||||
private:
|
||||
HtmlCharactersNode(HtmlElement* parent,
|
||||
const StringPiece& contents,
|
||||
const HtmlEventListIterator& iter)
|
||||
: HtmlLeafNode(parent, iter, contents) {
|
||||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlCharactersNode);
|
||||
};
|
||||
|
||||
// Leaf node representing an HTML comment
|
||||
class HtmlCommentNode : public HtmlLeafNode {
|
||||
public:
|
||||
virtual ~HtmlCommentNode();
|
||||
friend class HtmlParse;
|
||||
|
||||
protected:
|
||||
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
|
||||
HtmlEventList* queue);
|
||||
|
||||
private:
|
||||
HtmlCommentNode(HtmlElement* parent,
|
||||
const StringPiece& contents,
|
||||
const HtmlEventListIterator& iter)
|
||||
: HtmlLeafNode(parent, iter, contents) {
|
||||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlCommentNode);
|
||||
};
|
||||
|
||||
// Leaf node representing an HTML IE directive
|
||||
class HtmlIEDirectiveNode : public HtmlLeafNode {
|
||||
public:
|
||||
virtual ~HtmlIEDirectiveNode();
|
||||
friend class HtmlParse;
|
||||
|
||||
protected:
|
||||
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
|
||||
HtmlEventList* queue);
|
||||
|
||||
private:
|
||||
HtmlIEDirectiveNode(HtmlElement* parent,
|
||||
const StringPiece& contents,
|
||||
const HtmlEventListIterator& iter)
|
||||
: HtmlLeafNode(parent, iter, contents) {
|
||||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlIEDirectiveNode);
|
||||
};
|
||||
|
||||
// Leaf node representing an HTML directive
|
||||
class HtmlDirectiveNode : public HtmlLeafNode {
|
||||
public:
|
||||
virtual ~HtmlDirectiveNode();
|
||||
friend class HtmlParse;
|
||||
|
||||
protected:
|
||||
virtual void SynthesizeEvents(const HtmlEventListIterator& iter,
|
||||
HtmlEventList* queue);
|
||||
|
||||
private:
|
||||
HtmlDirectiveNode(HtmlElement* parent,
|
||||
const StringPiece& contents,
|
||||
const HtmlEventListIterator& iter)
|
||||
: HtmlLeafNode(parent, iter, contents) {
|
||||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlDirectiveNode);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_NODE_H_
|
||||
@@ -1,487 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_PARSE_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_PARSE_H_
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstddef>
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/html_element.h"
|
||||
#include "net/instaweb/htmlparse/public/html_name.h"
|
||||
#include "net/instaweb/htmlparse/public/html_node.h"
|
||||
#include "net/instaweb/http/public/content_type.h"
|
||||
#include "net/instaweb/util/public/arena.h"
|
||||
#include "net/instaweb/util/public/google_url.h"
|
||||
#include "net/instaweb/util/public/printf_format.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/symbol_table.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class DocType;
|
||||
class HtmlEvent;
|
||||
class HtmlFilter;
|
||||
class HtmlLexer;
|
||||
class MessageHandler;
|
||||
class Timer;
|
||||
|
||||
typedef std::set <const HtmlEvent*> ConstHtmlEventSet;
|
||||
|
||||
// Streaming Html Parser API. Callbacks defined in HtmlFilter are
|
||||
// called on each parser token.
|
||||
//
|
||||
// Any number of filters can be added to the Html Parser; they are
|
||||
// organized in a chain. Each filter processes a stream of SAX events
|
||||
// (HtmlEvent), interspersed by Flushes. The filter operates on the
|
||||
// sequence of events between flushes (a flush-window), and the system
|
||||
// passes the (possibly mutated) event-stream to the next filter.
|
||||
//
|
||||
// An HTML Event is a lexical token provided by the parser, including:
|
||||
// begin document
|
||||
// end document
|
||||
// begin element
|
||||
// end element
|
||||
// whitespace
|
||||
// characters
|
||||
// cdata
|
||||
// comment
|
||||
//
|
||||
// The parser retains the sequence of events as a data structure:
|
||||
// list<HtmlEvent>. HtmlEvents are sent to filters (HtmlFilter), as follows:
|
||||
// foreach filter in filter-chain
|
||||
// foreach event in flush-window
|
||||
// apply filter to event
|
||||
//
|
||||
// Filters may mutate the event streams as they are being processed,
|
||||
// and these mutations be seen by downstream filters. The filters can
|
||||
// mutate any event that has not been flushed. Supported mutations include:
|
||||
// - Removing an HTML element whose begin/end tags are both within
|
||||
// the flush window. This will also remove any nested elements.
|
||||
// - Removing other HTML events
|
||||
// - Inserting new elements (automatically inserts begin/end events)
|
||||
// before or after "current" event
|
||||
// - Inserting new events, before or after "current" event
|
||||
class HtmlParse {
|
||||
public:
|
||||
explicit HtmlParse(MessageHandler* message_handler);
|
||||
virtual ~HtmlParse();
|
||||
|
||||
// Application methods for parsing functions and adding filters
|
||||
|
||||
// Add a new html filter to the filter-chain, without taking ownership
|
||||
// of it.
|
||||
void AddFilter(HtmlFilter* filter);
|
||||
|
||||
// Initiate a chunked parsing session. Finish with FinishParse. The
|
||||
// url is only used to resolve relative URLs; the contents are not
|
||||
// directly fetched. The caller must supply the text and call ParseText.
|
||||
//
|
||||
// Returns whether the URL is valid.
|
||||
bool StartParse(const StringPiece& url) {
|
||||
return StartParseWithType(url, kContentTypeHtml);
|
||||
}
|
||||
bool StartParseWithType(const StringPiece& url,
|
||||
const ContentType& content_type) {
|
||||
return StartParseId(url, url, content_type);
|
||||
}
|
||||
|
||||
// Returns whether the google_url() URL is valid.
|
||||
bool is_url_valid() const { return url_valid_; }
|
||||
|
||||
// Mostly useful for file-based rewriters so that messages can reference
|
||||
// the HTML file and produce navigable errors.
|
||||
//
|
||||
// Returns whether the URL is valid.
|
||||
virtual bool StartParseId(const StringPiece& url, const StringPiece& id,
|
||||
const ContentType& content_type);
|
||||
|
||||
// Parses an arbitrary block of an html file, queuing up the events. Call
|
||||
// Flush to send the events through the Filter.
|
||||
//
|
||||
// To parse an entire file, first call StartParse(), then call
|
||||
// ParseText on the file contents (in whatever size chunks are convenient),
|
||||
// then call FinishParse().
|
||||
//
|
||||
// It is invalid to call ParseText when the StartParse* routines returned
|
||||
// false.
|
||||
void ParseText(const char* content, int size) {
|
||||
ParseTextInternal(content, size);
|
||||
}
|
||||
void ParseText(const StringPiece& sp) {
|
||||
ParseTextInternal(sp.data(), sp.size());
|
||||
}
|
||||
|
||||
// Flush the currently queued events through the filters. It is desirable
|
||||
// for large web pages, particularly dynamically generated ones, to start
|
||||
// getting delivered to the browser as soon as they are ready. On the
|
||||
// other hand, rewriting is more powerful when more of the content can
|
||||
// be considered for image/css/js spriting. This method should be called
|
||||
// when the controlling network process wants to induce a new chunk of
|
||||
// output. The less you call this function the better the rewriting will
|
||||
// be.
|
||||
//
|
||||
// It is invalid to call Flush when the StartParse* routines returned
|
||||
// false.
|
||||
//
|
||||
// If this is called from a Filter, the request will be deferred until after
|
||||
// currently active filters are completed.
|
||||
virtual void Flush();
|
||||
|
||||
// Finish a chunked parsing session. This also induces a Flush.
|
||||
//
|
||||
// It is invalid to call FinishParse when the StartParse* routines returned
|
||||
// false.
|
||||
virtual void FinishParse();
|
||||
|
||||
|
||||
// Utility methods for implementing filters
|
||||
|
||||
HtmlCdataNode* NewCdataNode(HtmlElement* parent,
|
||||
const StringPiece& contents);
|
||||
HtmlCharactersNode* NewCharactersNode(HtmlElement* parent,
|
||||
const StringPiece& literal);
|
||||
HtmlCommentNode* NewCommentNode(HtmlElement* parent,
|
||||
const StringPiece& contents);
|
||||
HtmlDirectiveNode* NewDirectiveNode(HtmlElement* parent,
|
||||
const StringPiece& contents);
|
||||
HtmlIEDirectiveNode* NewIEDirectiveNode(HtmlElement* parent,
|
||||
const StringPiece& contents);
|
||||
|
||||
// DOM-manipulation methods.
|
||||
// TODO(sligocki): Find Javascript equivalents and list them or even change
|
||||
// our names to be consistent.
|
||||
|
||||
// TODO(mdsteele): Rename these methods to e.g. InsertNodeBeforeNode.
|
||||
// This and downstream filters will then see inserted elements but upstream
|
||||
// filters will not.
|
||||
// Note: In Javascript the first is called insertBefore and takes the arg
|
||||
// in the opposite order.
|
||||
// Note: new_node must not already be in the DOM.
|
||||
void InsertElementBeforeElement(const HtmlNode* existing_node,
|
||||
HtmlNode* new_node);
|
||||
void InsertElementAfterElement(const HtmlNode* existing_node,
|
||||
HtmlNode* new_node);
|
||||
|
||||
// Add a new child element at the beginning or end of existing_parent's
|
||||
// children. Named after Javascript's appendChild method.
|
||||
// Note: new_child must not already be in the DOM.
|
||||
void PrependChild(const HtmlElement* existing_parent, HtmlNode* new_child);
|
||||
void AppendChild(const HtmlElement* existing_parent, HtmlNode* new_child);
|
||||
|
||||
// Insert a new element before the current one. current_ remains unchanged.
|
||||
// Note: new_node must not already be in the DOM.
|
||||
void InsertElementBeforeCurrent(HtmlNode* new_node);
|
||||
|
||||
// Insert a new element after the current one, moving current_ to the new
|
||||
// element. In a Filter, the flush-loop will advance past this on
|
||||
// the next iteration.
|
||||
// Note: new_node must not already be in the DOM.
|
||||
void InsertElementAfterCurrent(HtmlNode* new_node);
|
||||
|
||||
// Enclose element around two elements in a sequence. The first
|
||||
// element must be the same as, or precede the last element in the
|
||||
// event-stream, and this is not checked, but the two elements do
|
||||
// not need to be adjacent. They must have the same parent to start
|
||||
// with.
|
||||
bool AddParentToSequence(HtmlNode* first, HtmlNode* last,
|
||||
HtmlElement* new_parent);
|
||||
|
||||
// Moves current node (and all children) to an already-existing parent,
|
||||
// where they will be placed as the last elements in that parent.
|
||||
// Returns false if the operation could not be performed because either
|
||||
// the node or its parent was partially or wholly flushed.
|
||||
// Note: Will not work if called from StartElement() event.
|
||||
//
|
||||
// This differs from AppendChild() because it moves the current node,
|
||||
// which is already in the DOM, rather than adding a new node.
|
||||
bool MoveCurrentInto(HtmlElement* new_parent);
|
||||
|
||||
// Moves current node (and all children) directly before existing_node.
|
||||
// Note: Will not work if called from StartElement() event.
|
||||
//
|
||||
// This differs from InsertElementBeforeElement() because it moves the
|
||||
// current node, which is already in the DOM, rather than adding a new node.
|
||||
bool MoveCurrentBefore(HtmlNode* existing_node);
|
||||
|
||||
// If the given node is rewritable, delete it and all of its children (if
|
||||
// any) and return true; otherwise, do nothing and return false.
|
||||
// Note: Javascript appears to use removeChild for this.
|
||||
bool DeleteElement(HtmlNode* node);
|
||||
|
||||
// Delete a parent element, retaining any children and moving them to
|
||||
// reside under the parent's parent.
|
||||
bool DeleteSavingChildren(HtmlElement* element);
|
||||
|
||||
// Determines whether the element, in the context of its flush
|
||||
// window, has children. If the element is not rewritable, or
|
||||
// has not been closed yet, or inserted into the DOM event stream,
|
||||
// then 'false' is returned.
|
||||
//
|
||||
// Note that the concept of the Flush Window is important because the
|
||||
// knowledge of an element's children is not limited to the current
|
||||
// event being presented to a Filter. A Filter can call this method
|
||||
// in the StartElement of an event to see if any children are going
|
||||
// to be coming. Of course, if the StartElement is at the end of a
|
||||
// Flush window, then we won't know about the children, but IsRewritable
|
||||
// will also be false.
|
||||
bool HasChildrenInFlushWindow(HtmlElement* element);
|
||||
|
||||
// If possible, replace the existing node with the new node and return true;
|
||||
// otherwise, do nothing and return false.
|
||||
bool ReplaceNode(HtmlNode* existing_node, HtmlNode* new_node);
|
||||
|
||||
// Creates an another element with the same name and attributes as in_element.
|
||||
// Does not duplicate the children or insert it anywhere.
|
||||
HtmlElement* CloneElement(HtmlElement* in_element);
|
||||
|
||||
HtmlElement* NewElement(HtmlElement* parent, const StringPiece& str) {
|
||||
return NewElement(parent, MakeName(str));
|
||||
}
|
||||
HtmlElement* NewElement(HtmlElement* parent, HtmlName::Keyword keyword) {
|
||||
return NewElement(parent, MakeName(keyword));
|
||||
}
|
||||
HtmlElement* NewElement(HtmlElement* parent, const HtmlName& name);
|
||||
|
||||
void AddAttribute(HtmlElement* element, HtmlName::Keyword keyword,
|
||||
const StringPiece& value) {
|
||||
return element->AddAttribute(MakeName(keyword), value,
|
||||
HtmlElement::DOUBLE_QUOTE);
|
||||
}
|
||||
void AddEscapedAttribute(HtmlElement* element, HtmlName::Keyword keyword,
|
||||
const StringPiece& escaped_value) {
|
||||
return element->AddEscapedAttribute(MakeName(keyword), escaped_value,
|
||||
HtmlElement::DOUBLE_QUOTE);
|
||||
}
|
||||
void AddAttribute(HtmlElement* element, HtmlName::Keyword keyword,
|
||||
int value) {
|
||||
return AddAttribute(element, keyword, IntegerToString(value));
|
||||
}
|
||||
void SetAttributeName(HtmlElement::Attribute* attribute,
|
||||
HtmlName::Keyword keyword) {
|
||||
attribute->set_name(MakeName(keyword));
|
||||
}
|
||||
|
||||
HtmlName MakeName(const StringPiece& str);
|
||||
HtmlName MakeName(HtmlName::Keyword keyword);
|
||||
|
||||
bool IsRewritable(const HtmlNode* node) const;
|
||||
|
||||
void ClearElements();
|
||||
|
||||
// Log the HtmlEvent queue_ to the message_handler_ for debugging.
|
||||
void DebugLogQueue();
|
||||
|
||||
// Print the HtmlEvent queue_ to stdout for debugging.
|
||||
void DebugPrintQueue();
|
||||
|
||||
// Implementation helper with detailed knowledge of html parsing libraries
|
||||
friend class HtmlLexer;
|
||||
|
||||
// Determines whether a tag should be terminated in HTML, e.g. <meta ..>.
|
||||
// We do not expect to see a close-tag for meta and should never insert one.
|
||||
bool IsImplicitlyClosedTag(HtmlName::Keyword keyword) const;
|
||||
|
||||
// Determines whether a tag should be interpreted as a 'literal'
|
||||
// tag. That is, a tag whose contents are not parsed until a
|
||||
// corresponding matching end tag is encountered.
|
||||
static bool IsLiteralTag(HtmlName::Keyword keyword);
|
||||
|
||||
// Determines whether a tag is interpreted as a 'literal' tag in
|
||||
// some user agents. Since some user agents will interpret the
|
||||
// contents of these tags, our parser never treats them as literal
|
||||
// tags. However, a filter that wants to insert new tags that should
|
||||
// be processed by all user agents should not insert those tags into
|
||||
// a tag that is sometimes parsed as a literal tag. Those filters
|
||||
// can use this method to determine if they are within such a tag.
|
||||
static bool IsSometimesLiteralTag(HtmlName::Keyword keyword);
|
||||
|
||||
// An optionally closed tag ranges from <p>, which is typically not closed,
|
||||
// but we infer the closing from context. Also consider <html>, which usually
|
||||
// is closed but not always. E.g. www.google.com does not close its html tag.
|
||||
bool IsOptionallyClosedTag(HtmlName::Keyword keyword) const;
|
||||
|
||||
// Determines whether a tag allows brief termination in HTML, e.g. <tag/>
|
||||
bool TagAllowsBriefTermination(HtmlName::Keyword keyword) const;
|
||||
|
||||
MessageHandler* message_handler() const { return message_handler_; }
|
||||
// Gets the current location information; typically to help with error
|
||||
// messages.
|
||||
const char* url() const { return url_.c_str(); }
|
||||
// Gets a parsed GoogleUrl& corresponding to url().
|
||||
const GoogleUrl& google_url() const { return google_url_; }
|
||||
const char* id() const { return id_.c_str(); }
|
||||
int line_number() const { return line_number_; }
|
||||
// Returns URL (or id) and line number as a string, to be used in messages.
|
||||
GoogleString UrlLine() const {
|
||||
return StringPrintf("%s:%d", id(), line_number());
|
||||
}
|
||||
|
||||
// Return the current assumed doctype of the document (based on the content
|
||||
// type and any HTML directives encountered so far).
|
||||
const DocType& doctype() const;
|
||||
|
||||
// Interface for any caller to report an error message via the message handler
|
||||
void Info(const char* filename, int line, const char* msg, ...)
|
||||
INSTAWEB_PRINTF_FORMAT(4, 5);
|
||||
void Warning(const char* filename, int line, const char* msg, ...)
|
||||
INSTAWEB_PRINTF_FORMAT(4, 5);
|
||||
void Error(const char* filename, int line, const char* msg, ...)
|
||||
INSTAWEB_PRINTF_FORMAT(4, 5);
|
||||
void FatalError(const char* filename, int line, const char* msg, ...)
|
||||
INSTAWEB_PRINTF_FORMAT(4, 5);
|
||||
|
||||
void InfoV(const char* file, int line, const char *msg, va_list args);
|
||||
void WarningV(const char* file, int line, const char *msg, va_list args);
|
||||
void ErrorV(const char* file, int line, const char *msg, va_list args);
|
||||
void FatalErrorV(const char* file, int line, const char* msg, va_list args);
|
||||
|
||||
// Report error message with current parsing filename and linenumber.
|
||||
void InfoHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
|
||||
void WarningHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
|
||||
void ErrorHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
|
||||
void FatalErrorHere(const char* msg, ...) INSTAWEB_PRINTF_FORMAT(2, 3);
|
||||
|
||||
// If set_log_rewrite_timing(true) has been called, logs the given message
|
||||
// at info level with a timeset offset from the parsing start time,
|
||||
void ShowProgress(const char* message);
|
||||
|
||||
void InfoHereV(const char *msg, va_list args) {
|
||||
InfoV(id_.c_str(), line_number_, msg, args);
|
||||
}
|
||||
void WarningHereV(const char *msg, va_list args) {
|
||||
WarningV(id_.c_str(), line_number_, msg, args);
|
||||
}
|
||||
void ErrorHereV(const char *msg, va_list args) {
|
||||
ErrorV(id_.c_str(), line_number_, msg, args);
|
||||
}
|
||||
void FatalErrorHereV(const char* msg, va_list args) {
|
||||
FatalErrorV(id_.c_str(), line_number_, msg, args);
|
||||
}
|
||||
|
||||
void AddElement(HtmlElement* element, int line_number);
|
||||
void CloseElement(HtmlElement* element, HtmlElement::CloseStyle close_style,
|
||||
int line_number);
|
||||
|
||||
// Run a filter on the current queue of parse nodes.
|
||||
void ApplyFilter(HtmlFilter* filter);
|
||||
|
||||
// Provide timer to helping to report timing of each filter. You must also
|
||||
// set_log_rewrite_timing(true) to turn on this reporting.
|
||||
void set_timer(Timer* timer) { timer_ = timer; }
|
||||
Timer* timer() const { return timer_; }
|
||||
void set_log_rewrite_timing(bool x) { log_rewrite_timing_ = x; }
|
||||
|
||||
// Adds a filter to be called during parsing as new events are added.
|
||||
// Takes ownership of the HtmlFilter passed in.
|
||||
void add_event_listener(HtmlFilter* listener);
|
||||
|
||||
// Inserts a comment before or after the current node. The function tries to
|
||||
// pick an intelligent place depending on the document structure and
|
||||
// whether the current node is a start-element, end-element, or a leaf.
|
||||
void InsertComment(const StringPiece& sp);
|
||||
|
||||
// Sets the limit on the maximum number of bytes that should be parsed.
|
||||
void set_size_limit(int64 x);
|
||||
// Returns whether we have exceeded the size limit.
|
||||
bool size_limit_exceeded() const;
|
||||
|
||||
protected:
|
||||
typedef std::vector<HtmlFilter*> FilterVector;
|
||||
typedef std::list<HtmlFilter*> FilterList;
|
||||
|
||||
// HtmlParse::FinishParse() is equivalent to the sequence of
|
||||
// BeginFinishParse(); Flush(); EndFinishParse().
|
||||
// Split up to permit asynchronous versions.
|
||||
void BeginFinishParse();
|
||||
void EndFinishParse();
|
||||
|
||||
// Returns the number of events on the event queue.
|
||||
size_t GetEventQueueSize();
|
||||
|
||||
virtual void ParseTextInternal(const char* content, int size);
|
||||
|
||||
// Allow filters to determine whether they are enabled for this request.
|
||||
void DetermineEnabledFilters(FilterVector* filters) const;
|
||||
|
||||
private:
|
||||
void ApplyFilterHelper(HtmlFilter* filter);
|
||||
HtmlEventListIterator Last(); // Last element in queue
|
||||
bool IsInEventWindow(const HtmlEventListIterator& iter) const;
|
||||
void InsertElementBeforeEvent(const HtmlEventListIterator& event,
|
||||
HtmlNode* new_node);
|
||||
void InsertElementAfterEvent(const HtmlEventListIterator& event,
|
||||
HtmlNode* new_node);
|
||||
bool MoveCurrentBeforeEvent(const HtmlEventListIterator& move_to);
|
||||
bool IsDescendantOf(const HtmlNode* possible_child,
|
||||
const HtmlNode* possible_parent);
|
||||
void SanityCheck();
|
||||
void CheckEventParent(HtmlEvent* event, HtmlElement* expect,
|
||||
HtmlElement* actual);
|
||||
void CheckParentFromAddEvent(HtmlEvent* event);
|
||||
void FixParents(const HtmlEventListIterator& begin,
|
||||
const HtmlEventListIterator& end_inclusive,
|
||||
HtmlElement* new_parent);
|
||||
void CoalesceAdjacentCharactersNodes();
|
||||
void ClearEvents();
|
||||
void EmitQueue(MessageHandler* handler);
|
||||
|
||||
// Visible for testing only, via HtmlTestingPeer
|
||||
friend class HtmlTestingPeer;
|
||||
void AddEvent(HtmlEvent* event);
|
||||
void SetCurrent(HtmlNode* node);
|
||||
void set_coalesce_characters(bool x) { coalesce_characters_ = x; }
|
||||
size_t symbol_table_size() const {
|
||||
return string_table_.string_bytes_allocated();
|
||||
}
|
||||
|
||||
FilterVector event_listeners_;
|
||||
SymbolTableSensitive string_table_;
|
||||
FilterVector filters_;
|
||||
HtmlLexer* lexer_;
|
||||
Arena<HtmlNode> nodes_;
|
||||
HtmlEventList queue_;
|
||||
HtmlEventListIterator current_;
|
||||
// Have we deleted current? Then we shouldn't do certain manipulations to it.
|
||||
MessageHandler* message_handler_;
|
||||
GoogleString url_;
|
||||
GoogleUrl google_url_;
|
||||
GoogleString id_; // Per-request identifier string used in error messages.
|
||||
int line_number_;
|
||||
bool deleted_current_;
|
||||
bool need_sanity_check_;
|
||||
bool coalesce_characters_;
|
||||
bool need_coalesce_characters_;
|
||||
bool url_valid_;
|
||||
bool log_rewrite_timing_; // Should we time the speed of parsing?
|
||||
bool running_filters_;
|
||||
int64 parse_start_time_us_;
|
||||
Timer* timer_;
|
||||
int first_filter_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlParse);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_PARSE_H_
|
||||
@@ -1,169 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
// Infrastructure for testing html parsing and rewriting.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_PARSE_TEST_BASE_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_PARSE_TEST_BASE_H_
|
||||
|
||||
#include "net/instaweb/htmlparse/public/html_parse.h"
|
||||
#include "net/instaweb/htmlparse/public/html_writer_filter.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/gtest.h"
|
||||
#include "net/instaweb/util/public/mock_message_handler.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/string_writer.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// Shared infrastructure for unit-testing the HTML parser.
|
||||
class HtmlParseTestBaseNoAlloc : public testing::Test {
|
||||
protected:
|
||||
static const char kTestDomain[];
|
||||
static const char kXhtmlDtd[]; // DOCTYPE string for claiming XHTML
|
||||
|
||||
HtmlParseTestBaseNoAlloc()
|
||||
: write_to_string_(&output_buffer_),
|
||||
added_filter_(false) {
|
||||
}
|
||||
virtual ~HtmlParseTestBaseNoAlloc();
|
||||
|
||||
// To make the tests more concise, we generally omit the <html>...</html>
|
||||
// tags bracketing the input. The libxml parser will add those in
|
||||
// if we don't have them. To avoid having that make the test data more
|
||||
// verbose, we automatically add them in the test infrastructure, both
|
||||
// for stimulus and expected response.
|
||||
//
|
||||
// This flag controls whether we also add <body>...</body> tags. In
|
||||
// the case html_parse_test, we go ahead and add them in. In the
|
||||
// case of the rewriter tests, we want to explicitly control/observe
|
||||
// the head and the body so we don't add the body tags in
|
||||
// automatically. So classes that derive from HtmlParseTestBase must
|
||||
// override this variable to indicate which they prefer.
|
||||
virtual bool AddBody() const = 0;
|
||||
|
||||
// If true, prepends "<html>\n" and appends "\n</html>" to input text
|
||||
// prior to parsing it. This was originally done for consistency with
|
||||
// libxml2 but that's long since been made irrelevant and we should probably
|
||||
// just stop doing it. Adding the virtual function here should help us
|
||||
// incrementally update tests & their gold results.
|
||||
virtual bool AddHtmlTags() const { return true; }
|
||||
|
||||
// Set a doctype string (e.g. "<!doctype html>") to be inserted before the
|
||||
// rest of the document (for the current test only). If none is set, it
|
||||
// defaults to the empty string.
|
||||
void SetDoctype(const StringPiece& directive) {
|
||||
directive.CopyToString(&doctype_string_);
|
||||
}
|
||||
|
||||
virtual GoogleString AddHtmlBody(const StringPiece& html) {
|
||||
GoogleString ret;
|
||||
if (AddHtmlTags()) {
|
||||
ret = AddBody() ? "<html><body>\n" : "<html>\n";
|
||||
StrAppend(&ret, html, (AddBody() ? "\n</body></html>\n" : "\n</html>"));
|
||||
} else {
|
||||
html.CopyToString(&ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Check that the output HTML is serialized to string-compare
|
||||
// precisely with the input.
|
||||
void ValidateNoChanges(const StringPiece& case_id,
|
||||
const GoogleString& html_input) {
|
||||
ValidateExpected(case_id, html_input, html_input);
|
||||
}
|
||||
|
||||
// Fail to ValidateNoChanges.
|
||||
void ValidateNoChangesFail(const StringPiece& case_id,
|
||||
const GoogleString& html_input) {
|
||||
ValidateExpectedFail(case_id, html_input, html_input);
|
||||
}
|
||||
|
||||
void SetupWriter() {
|
||||
SetupWriter(&html_writer_filter_);
|
||||
}
|
||||
|
||||
void SetupWriter(scoped_ptr<HtmlWriterFilter>* html_writer_filter) {
|
||||
output_buffer_.clear();
|
||||
if (html_writer_filter->get() == NULL) {
|
||||
html_writer_filter->reset(new HtmlWriterFilter(html_parse()));
|
||||
(*html_writer_filter)->set_writer(&write_to_string_);
|
||||
html_parse()->AddFilter(html_writer_filter->get());
|
||||
}
|
||||
}
|
||||
|
||||
// Parse html_input, the result is stored in output_buffer_.
|
||||
void Parse(const StringPiece& case_id, const GoogleString& html_input) {
|
||||
// HtmlParser needs a valid HTTP URL to evaluate relative paths,
|
||||
// so we create a dummy URL.
|
||||
GoogleString dummy_url = StrCat(kTestDomain, case_id, ".html");
|
||||
ParseUrl(dummy_url, html_input);
|
||||
}
|
||||
|
||||
// Parse given an explicit URL rather than an id to build URL around.
|
||||
virtual void ParseUrl(const StringPiece& url, const StringPiece& html_input);
|
||||
|
||||
// Validate that the output HTML serializes as specified in
|
||||
// 'expected', which might not be identical to the input.
|
||||
// Also, returns true if result came out as expected.
|
||||
bool ValidateExpected(const StringPiece& case_id,
|
||||
const GoogleString& html_input,
|
||||
const GoogleString& expected);
|
||||
|
||||
// Same as ValidateExpected, but with an explicit URL rather than an id.
|
||||
bool ValidateExpectedUrl(const StringPiece& url,
|
||||
const GoogleString& html_input,
|
||||
const GoogleString& expected);
|
||||
|
||||
// Fail to ValidateExpected.
|
||||
void ValidateExpectedFail(const StringPiece& case_id,
|
||||
const GoogleString& html_input,
|
||||
const GoogleString& expected);
|
||||
|
||||
virtual HtmlParse* html_parse() = 0;
|
||||
|
||||
MockMessageHandler message_handler_;
|
||||
StringWriter write_to_string_;
|
||||
GoogleString output_buffer_;
|
||||
bool added_filter_;
|
||||
scoped_ptr<HtmlWriterFilter> html_writer_filter_;
|
||||
GoogleString doctype_string_;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlParseTestBaseNoAlloc);
|
||||
};
|
||||
|
||||
class HtmlParseTestBase : public HtmlParseTestBaseNoAlloc {
|
||||
public:
|
||||
HtmlParseTestBase() : html_parse_(&message_handler_) {
|
||||
};
|
||||
protected:
|
||||
virtual HtmlParse* html_parse() { return &html_parse_; }
|
||||
|
||||
HtmlParse html_parse_;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlParseTestBase);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_PARSE_TEST_BASE_H_
|
||||
@@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_WRITER_FILTER_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_WRITER_FILTER_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/html_element.h"
|
||||
#include "net/instaweb/htmlparse/public/html_filter.h"
|
||||
#include "net/instaweb/htmlparse/public/html_name.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlCdataNode;
|
||||
class HtmlCharactersNode;
|
||||
class HtmlCommentNode;
|
||||
class HtmlDirectiveNode;
|
||||
class HtmlIEDirectiveNode;
|
||||
class HtmlParse;
|
||||
class Writer;
|
||||
|
||||
// Filter that serializes HTML to a Writer stream.
|
||||
class HtmlWriterFilter : public HtmlFilter {
|
||||
public:
|
||||
explicit HtmlWriterFilter(HtmlParse* html_parse);
|
||||
|
||||
void set_writer(Writer* writer) { writer_ = writer; }
|
||||
virtual ~HtmlWriterFilter();
|
||||
|
||||
virtual void StartDocument();
|
||||
virtual void EndDocument();
|
||||
virtual void StartElement(HtmlElement* element);
|
||||
virtual void EndElement(HtmlElement* element);
|
||||
virtual void Cdata(HtmlCdataNode* cdata);
|
||||
virtual void Comment(HtmlCommentNode* comment);
|
||||
virtual void IEDirective(HtmlIEDirectiveNode* directive);
|
||||
virtual void Characters(HtmlCharactersNode* characters);
|
||||
virtual void Directive(HtmlDirectiveNode* directive);
|
||||
virtual void Flush();
|
||||
virtual void DetermineEnabled();
|
||||
|
||||
void set_max_column(int max_column) { max_column_ = max_column; }
|
||||
void set_case_fold(bool case_fold) { case_fold_ = case_fold; }
|
||||
|
||||
virtual const char* Name() const { return "HtmlWriter"; }
|
||||
|
||||
protected:
|
||||
// Clear various variables for rewriting a new html file.
|
||||
virtual void Clear();
|
||||
|
||||
Writer* writer() { return writer_; }
|
||||
|
||||
// Terminates the current lazy close element if it is not already terminated.
|
||||
void TerminateLazyCloseElement();
|
||||
|
||||
private:
|
||||
void EmitBytes(const StringPiece& str);
|
||||
|
||||
// Emits an HTML name, possibly case-folded depending on the
|
||||
// caller-specified option.
|
||||
void EmitName(const HtmlName& name);
|
||||
|
||||
HtmlElement::CloseStyle GetCloseStyle(HtmlElement* element);
|
||||
|
||||
// Escapes arbitrary text as HTML, e.g. turning & into &. If quoteChar
|
||||
// is non-zero, e.g. '"', then it would escape " as well.
|
||||
void EncodeBytes(const GoogleString& val, int quoteChar);
|
||||
|
||||
HtmlParse* html_parse_;
|
||||
Writer* writer_;
|
||||
|
||||
// Helps writer exploit shortcuts like <img .../> rather than writing
|
||||
// <img ...></img>. At the end of StartElement, we defer writing the ">"
|
||||
// until we see what's coming next. If it's the matching end_tag, then
|
||||
// we can emit />. If something else comes first, then we have to
|
||||
// first emit the delayed ">" before continuing.
|
||||
HtmlElement* lazy_close_element_;
|
||||
|
||||
int column_;
|
||||
int max_column_;
|
||||
int write_errors_;
|
||||
bool case_fold_;
|
||||
GoogleString case_fold_buffer_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HtmlWriterFilter);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_HTML_WRITER_FILTER_H_
|
||||
@@ -1,118 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmaessen@google.com (Jan Maessen)
|
||||
|
||||
// html_filter that passes data through unmodified, but
|
||||
// logs statistics about the data as it goes by.
|
||||
// It should be possible to create many instances of this
|
||||
// class and insert them at different points in the rewriting flow
|
||||
// Goal is to log:
|
||||
// NUM_EXPLICIT_CLOSED - <tag> </tag> pairs
|
||||
// NUM_IMPLICIT_CLOSED - <tag> for implicitly-closed tag
|
||||
// NUM_BRIEF_CLOSED - </tag>
|
||||
// NUM_CLOSED - Sum of above three
|
||||
// NUM_UNCLOSED - <tag> without matching </tag>
|
||||
// NUM_SPURIOUS_CLOSED - </tag> without preceding <tag>; UNCOUNTED RIGHT NOW!
|
||||
// NUM_TAGS - Total number of opening tags
|
||||
// NUM_CDATA - cdata sections
|
||||
// NUM_COMMENTS - comments
|
||||
// NUM_DIRECTIVES - directives
|
||||
// NUM_DOCUMENTS - started documents
|
||||
// NUM_IE_DIRECTIVES - ie directives
|
||||
// NUM_IMG_TAGS - Number of IMG tags.
|
||||
// NUM_INLINED_IMG_TAGS - Number of IMG tags which have inlined data.
|
||||
// Reporting:
|
||||
// We report this information via a StatisticsLog: filter.ToString(log)
|
||||
// Two sets of statistics (eg before and after processing) can be
|
||||
// compared using before.Equals(after),
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_LOGGING_HTML_FILTER_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_LOGGING_HTML_FILTER_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/htmlparse/public/empty_html_filter.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HtmlCdataNode;
|
||||
class HtmlCommentNode;
|
||||
class HtmlDirectiveNode;
|
||||
class HtmlElement;
|
||||
class HtmlIEDirectiveNode;
|
||||
class StatisticsLog;
|
||||
|
||||
// Counts some basic statistics observed as HTML is parsed.
|
||||
class LoggingFilter : public EmptyHtmlFilter {
|
||||
public:
|
||||
// internal names of statistics.
|
||||
// NOTE: must match string names in kStatisticNames at top of
|
||||
// logging_html_filter.c
|
||||
enum Statistic {
|
||||
MIN_STAT = 0,
|
||||
NUM_EXPLICIT_CLOSED = 0,
|
||||
NUM_IMPLICIT_CLOSED,
|
||||
NUM_BRIEF_CLOSED,
|
||||
NUM_CLOSED,
|
||||
NUM_UNCLOSED,
|
||||
NUM_SPURIOUS_CLOSED,
|
||||
NUM_TAGS,
|
||||
NUM_CDATA,
|
||||
NUM_COMMENTS,
|
||||
NUM_DIRECTIVES,
|
||||
NUM_DOCUMENTS,
|
||||
NUM_IE_DIRECTIVES,
|
||||
NUM_IMG_TAGS,
|
||||
NUM_INLINED_IMG_TAGS,
|
||||
MAX_STAT
|
||||
};
|
||||
|
||||
LoggingFilter();
|
||||
|
||||
// HtmlFilter methods.
|
||||
virtual void StartDocument();
|
||||
virtual void StartElement(HtmlElement* element);
|
||||
virtual void EndElement(HtmlElement* element);
|
||||
virtual void Cdata(HtmlCdataNode* cdata);
|
||||
virtual void Comment(HtmlCommentNode* comment);
|
||||
virtual void IEDirective(HtmlIEDirectiveNode* directive);
|
||||
virtual void Directive(HtmlDirectiveNode* directive);
|
||||
virtual const char* Name() const { return "Logging"; }
|
||||
|
||||
// Getter for individual statistics; NO BOUNDS CHECKS.
|
||||
inline int get(const Statistic statistic) const {
|
||||
return stats_[statistic];
|
||||
}
|
||||
|
||||
// Logging, diffing, and aggregation
|
||||
|
||||
// Report all statistics
|
||||
void LogStatistics(StatisticsLog *statistics_log) const;
|
||||
|
||||
int num_img_tags() const { return stats_[NUM_IMG_TAGS]; }
|
||||
int num_inlined_img_tags() const { return stats_[NUM_INLINED_IMG_TAGS]; }
|
||||
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
int stats_[MAX_STAT];
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(LoggingFilter);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_LOGGING_HTML_FILTER_H_
|
||||
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmaessen@google.com (Jan Maessen)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTMLPARSE_PUBLIC_STATISTICS_LOG_H_
|
||||
#define NET_INSTAWEB_HTMLPARSE_PUBLIC_STATISTICS_LOG_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class StatisticsLog {
|
||||
public:
|
||||
StatisticsLog() { }
|
||||
virtual ~StatisticsLog();
|
||||
virtual void LogStat(const char *statName, int value) = 0;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(StatisticsLog);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTMLPARSE_PUBLIC_STATISTICS_LOG_H_
|
||||
@@ -1,392 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
// sligocki@google.com (Shawn Ligocki)
|
||||
//
|
||||
// AsyncFetch represents the context of a single fetch.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_ASYNC_FETCH_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_ASYNC_FETCH_H_
|
||||
|
||||
#include "net/instaweb/http/public/http_value.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/request_headers.h"
|
||||
#include "net/instaweb/http/public/response_headers.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/writer.h"
|
||||
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractLogRecord;
|
||||
class MessageHandler;
|
||||
class Variable;
|
||||
|
||||
// Abstract base class for encapsulating streaming, asynchronous HTTP fetches.
|
||||
//
|
||||
// If you want to fetch a resources, implement this interface, create an
|
||||
// instance and call UrlAsyncFetcher::Fetch() with it.
|
||||
//
|
||||
// It combines the 3 callbacks we expect to get from fetchers
|
||||
// (Write, Flush and Done) and adds a HeadersComplete indicator that is
|
||||
// useful in any place where we want to deal with and send headers before
|
||||
// Write or Done are called.
|
||||
//
|
||||
// Note that it automatically invokes HeadersComplete before the first call to
|
||||
// Write, Flush or Done.
|
||||
class AsyncFetch : public Writer {
|
||||
public:
|
||||
static const int kContentLengthUnknown = -1;
|
||||
|
||||
AsyncFetch();
|
||||
explicit AsyncFetch(const RequestContextPtr& request_ctx);
|
||||
|
||||
virtual ~AsyncFetch();
|
||||
|
||||
// Called when ResponseHeaders have been set, but before writing contents.
|
||||
// Contract: Must be called (at most once) before Write, Flush or Done.
|
||||
// Automatically invoked (if neccessary) before the first call to Write,
|
||||
// Flush, or Done. This interface is intended for callers (e.g. Fetchers).
|
||||
// Implementors of the AsyncFetch interface must override
|
||||
// HandleHeadersComplete.
|
||||
void HeadersComplete();
|
||||
|
||||
// Fetch complete. This interface is intended for callers
|
||||
// (e.g. Fetchers). Implementors must override HandleDone.
|
||||
void Done(bool success);
|
||||
|
||||
// Data available. This interface is intended for callers. Implementors
|
||||
// must override HandlerWrite and HandleFlush.
|
||||
virtual bool Write(const StringPiece& content, MessageHandler* handler);
|
||||
virtual bool Flush(MessageHandler* handler);
|
||||
|
||||
// Is the cache entry corresponding to headers valid? Default is that it is
|
||||
// valid. Sub-classes can provide specific implementations, e.g., based on
|
||||
// cache invalidation timestamp in domain specific options.
|
||||
// Used by CacheUrlAsyncFetcher.
|
||||
// TODO(nikhilmadan): Consider making this virtual so that subclass authors
|
||||
// are forced to look at this function.
|
||||
virtual bool IsCachedResultValid(const ResponseHeaders& headers) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns a pointer to the request-headers, lazily constructing
|
||||
// them if needed. If they are constructed here (as opposed to
|
||||
// being set with set_request_headers) then they will be owned by
|
||||
// the class instance.
|
||||
RequestHeaders* request_headers();
|
||||
|
||||
// Sets the request-headers to the specifid pointer. The caller must
|
||||
// guarantee that the pointed-to headers remain valid as long as the
|
||||
// AsyncFetch is running.
|
||||
//
|
||||
// Does not take ownership of headers.
|
||||
void set_request_headers(RequestHeaders* headers);
|
||||
|
||||
// Same as above, but takes ownership.
|
||||
void SetRequestHeadersTakingOwnership(RequestHeaders* headers);
|
||||
|
||||
// Returns the request_headers as a const pointer: it is required
|
||||
// that the RequestHeaders be pre-initialized via non-const
|
||||
// request_headers() or via set_request_headers before calling this.
|
||||
const RequestHeaders* request_headers() const;
|
||||
|
||||
// See doc for request_headers and set_request_headers.
|
||||
ResponseHeaders* response_headers();
|
||||
void set_response_headers(ResponseHeaders* headers);
|
||||
|
||||
// Returns extra response headers which may be modified between
|
||||
// calls to HeadersComplete() and Done(). This is used to allow
|
||||
// a fetch to provide additional headers which cannot be determined
|
||||
// when HeadersComplete() has been invoked, e.g., X-Original-Content-Length.
|
||||
// This is needed because it is not safe for the producer to modify
|
||||
// response_headers() once HeadersComplete() has been called.
|
||||
ResponseHeaders* extra_response_headers();
|
||||
void set_extra_response_headers(ResponseHeaders* headers);
|
||||
|
||||
virtual bool EnableThreaded() const { return false; }
|
||||
|
||||
// Indicates whether the request is a background fetch. These can be scheduled
|
||||
// differently by the fetcher.
|
||||
virtual bool IsBackgroundFetch() const { return false; }
|
||||
|
||||
// Resets the 'headers_complete_' flag.
|
||||
// TODO(jmarantz): should this also clear the response headers?
|
||||
virtual void Reset() { headers_complete_ = false; }
|
||||
|
||||
bool headers_complete() const { return headers_complete_; }
|
||||
|
||||
// Keep track of whether the content-length is known before the
|
||||
// body is sent, so that a server can decide whether it needs chunked.
|
||||
//
|
||||
// Note that this is not necessarily the same as the Content-Length
|
||||
// attribute in the response-headers, which might reflect pre-optimized
|
||||
// or pre-compressed sizes.
|
||||
bool content_length_known() const {
|
||||
return content_length_ != kContentLengthUnknown;
|
||||
}
|
||||
int64 content_length() const { return content_length_; }
|
||||
void set_content_length(int64 x) { content_length_ = x; }
|
||||
|
||||
// Returns logging information in a string eg. c1:0;c2:2;hf:45;.
|
||||
// c1 is cache 1, c2 is cache 2, hf is headers fetch.
|
||||
GoogleString LoggingString();
|
||||
|
||||
// Returns the request context associated with this fetch, if any, or
|
||||
// NULL if no request context exists.
|
||||
virtual const RequestContextPtr& request_context() { return request_ctx_; }
|
||||
|
||||
// Returns a pointer to a log record that wraps this fetch's logging
|
||||
// info.
|
||||
virtual AbstractLogRecord* log_record();
|
||||
|
||||
protected:
|
||||
virtual bool HandleWrite(const StringPiece& sp, MessageHandler* handler) = 0;
|
||||
virtual bool HandleFlush(MessageHandler* handler) = 0;
|
||||
virtual void HandleDone(bool success) = 0;
|
||||
virtual void HandleHeadersComplete() = 0;
|
||||
|
||||
private:
|
||||
RequestHeaders* request_headers_;
|
||||
ResponseHeaders* response_headers_;
|
||||
ResponseHeaders* extra_response_headers_;
|
||||
RequestContextPtr request_ctx_;
|
||||
bool owns_request_headers_;
|
||||
bool owns_response_headers_;
|
||||
bool owns_extra_response_headers_;
|
||||
bool headers_complete_;
|
||||
int64 content_length_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(AsyncFetch);
|
||||
};
|
||||
|
||||
// Class to represent an Async fetch that collects the response-data into
|
||||
// a string, which can be accessed via buffer() and cleared via Reset().
|
||||
//
|
||||
// TODO(jmarantz): move StringAsyncFetch into its own file.
|
||||
class StringAsyncFetch : public AsyncFetch {
|
||||
public:
|
||||
explicit StringAsyncFetch(const RequestContextPtr& request_ctx)
|
||||
: AsyncFetch(request_ctx), buffer_pointer_(&buffer_) {
|
||||
Init();
|
||||
}
|
||||
|
||||
StringAsyncFetch(const RequestContextPtr& request_ctx, GoogleString* buffer)
|
||||
: AsyncFetch(request_ctx), buffer_pointer_(buffer) {
|
||||
Init();
|
||||
}
|
||||
|
||||
virtual ~StringAsyncFetch();
|
||||
|
||||
virtual bool HandleWrite(const StringPiece& content,
|
||||
MessageHandler* handler) {
|
||||
content.AppendToString(buffer_pointer_);
|
||||
return true;
|
||||
}
|
||||
virtual bool HandleFlush(MessageHandler* handler) { return true; }
|
||||
virtual void HandleHeadersComplete() {}
|
||||
virtual void HandleDone(bool success) {
|
||||
success_ = success;
|
||||
done_ = true;
|
||||
}
|
||||
|
||||
bool success() const { return success_; }
|
||||
bool done() const { return done_; }
|
||||
const GoogleString& buffer() const { return *buffer_pointer_; }
|
||||
|
||||
virtual void Reset() {
|
||||
done_ = false;
|
||||
success_ = false;
|
||||
buffer_pointer_->clear();
|
||||
response_headers()->Clear();
|
||||
extra_response_headers()->Clear();
|
||||
request_headers()->Clear();
|
||||
AsyncFetch::Reset();
|
||||
}
|
||||
|
||||
protected:
|
||||
// For subclasses that need to use complex logic to set success_ and done_.
|
||||
// Most subclasses should not need these.
|
||||
void set_success(bool success) { success_ = success; }
|
||||
void set_done(bool done) { done_ = done; }
|
||||
|
||||
private:
|
||||
void Init() {
|
||||
success_ = false;
|
||||
done_ = false;
|
||||
}
|
||||
|
||||
GoogleString buffer_;
|
||||
GoogleString* buffer_pointer_;
|
||||
bool success_;
|
||||
bool done_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(StringAsyncFetch);
|
||||
};
|
||||
|
||||
// Creates an AsyncFetch object using an existing Writer* object,
|
||||
// which is used to delegate Write and Flush operations. This
|
||||
// class is still abstract, and requires inheritors to implement Done().
|
||||
class AsyncFetchUsingWriter : public AsyncFetch {
|
||||
public:
|
||||
AsyncFetchUsingWriter(const RequestContextPtr& request_context,
|
||||
Writer* writer)
|
||||
: AsyncFetch(request_context),
|
||||
writer_(writer) {}
|
||||
virtual ~AsyncFetchUsingWriter();
|
||||
|
||||
protected:
|
||||
virtual bool HandleWrite(const StringPiece& sp, MessageHandler* handler);
|
||||
virtual bool HandleFlush(MessageHandler* handler);
|
||||
|
||||
private:
|
||||
Writer* writer_;
|
||||
DISALLOW_COPY_AND_ASSIGN(AsyncFetchUsingWriter);
|
||||
};
|
||||
|
||||
// Creates an AsyncFetch object using an existing AsyncFetcher*,
|
||||
// sharing the response & request headers, and by default delegating
|
||||
// all 4 Handle methods to the base fetcher. Any one of them can
|
||||
// be overridden by inheritors of this class, but to propagate the
|
||||
// callbacks to the base-fetch, overrides should upcall this class,
|
||||
// e.g. SharedAsyncFetch::HandleWrite(...).
|
||||
class SharedAsyncFetch : public AsyncFetch {
|
||||
public:
|
||||
explicit SharedAsyncFetch(AsyncFetch* base_fetch);
|
||||
virtual ~SharedAsyncFetch();
|
||||
|
||||
virtual const RequestContextPtr& request_context() {
|
||||
return base_fetch_->request_context();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void HandleDone(bool success) {
|
||||
base_fetch_->Done(success);
|
||||
}
|
||||
|
||||
virtual bool HandleWrite(const StringPiece& content,
|
||||
MessageHandler* handler) {
|
||||
return base_fetch_->Write(content, handler);
|
||||
}
|
||||
|
||||
virtual bool HandleFlush(MessageHandler* handler) {
|
||||
return base_fetch_->Flush(handler);
|
||||
}
|
||||
|
||||
virtual void HandleHeadersComplete();
|
||||
|
||||
virtual bool EnableThreaded() const {
|
||||
return base_fetch_->EnableThreaded();
|
||||
}
|
||||
|
||||
virtual bool IsCachedResultValid(const ResponseHeaders& headers) {
|
||||
return base_fetch_->IsCachedResultValid(headers);
|
||||
}
|
||||
|
||||
virtual bool IsBackgroundFetch() const {
|
||||
return base_fetch_->IsBackgroundFetch();
|
||||
}
|
||||
|
||||
// Propagates any set_content_length from this to the base fetch.
|
||||
void PropagateContentLength();
|
||||
|
||||
private:
|
||||
AsyncFetch* base_fetch_;
|
||||
DISALLOW_COPY_AND_ASSIGN(SharedAsyncFetch);
|
||||
};
|
||||
|
||||
// Creates a SharedAsyncFetch object using an existing AsyncFetch and a fallback
|
||||
// value that is used in case the fetched response is an error. Note that in
|
||||
// case the fetched response is an error and we have a non-empty fallback value,
|
||||
// we completely ignore the fetched response.
|
||||
// Also, note that this class gets deleted when HandleDone is called.
|
||||
class FallbackSharedAsyncFetch : public SharedAsyncFetch {
|
||||
public:
|
||||
// Warning header to be added if a stale response is served.
|
||||
static const char kStaleWarningHeaderValue[];
|
||||
|
||||
FallbackSharedAsyncFetch(AsyncFetch* base_fetch, HTTPValue* fallback,
|
||||
MessageHandler* handler);
|
||||
virtual ~FallbackSharedAsyncFetch();
|
||||
|
||||
void set_fallback_responses_served(Variable* x) {
|
||||
fallback_responses_served_ = x;
|
||||
}
|
||||
|
||||
bool serving_fallback() const { return serving_fallback_; }
|
||||
|
||||
protected:
|
||||
virtual void HandleDone(bool success);
|
||||
virtual bool HandleWrite(const StringPiece& content, MessageHandler* handler);
|
||||
virtual bool HandleFlush(MessageHandler* handler);
|
||||
virtual void HandleHeadersComplete();
|
||||
|
||||
private:
|
||||
// Note that this is only used while serving the fallback response.
|
||||
MessageHandler* handler_;
|
||||
HTTPValue fallback_;
|
||||
bool serving_fallback_;
|
||||
Variable* fallback_responses_served_; // may be NULL.
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(FallbackSharedAsyncFetch);
|
||||
};
|
||||
|
||||
// Creates a SharedAsyncFetch object using an existing AsyncFetch and a cached
|
||||
// value (that may be stale) that is used to conditionally check if the resource
|
||||
// at the origin has changed. If the resource hasn't changed and we get a 304,
|
||||
// we serve the cached response, thus avoiding the download of the entire
|
||||
// content.
|
||||
// Note that we if you want the conditionally validated resource to be treated
|
||||
// as a newly fetched with the original ttl, you should use this fetch such that
|
||||
// the fixing of date headers happens in the base fetch.
|
||||
// Also, note that this class gets deleted when HandleDone is called.
|
||||
class ConditionalSharedAsyncFetch : public SharedAsyncFetch {
|
||||
public:
|
||||
ConditionalSharedAsyncFetch(AsyncFetch* base_fetch, HTTPValue* cached_value,
|
||||
MessageHandler* handler);
|
||||
virtual ~ConditionalSharedAsyncFetch();
|
||||
|
||||
void set_num_conditional_refreshes(Variable* x) {
|
||||
num_conditional_refreshes_ = x;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void HandleDone(bool success);
|
||||
virtual bool HandleWrite(const StringPiece& content, MessageHandler* handler);
|
||||
virtual bool HandleFlush(MessageHandler* handler);
|
||||
virtual void HandleHeadersComplete();
|
||||
|
||||
private:
|
||||
// Note that this is only used while serving the cached response.
|
||||
MessageHandler* handler_;
|
||||
HTTPValue cached_value_;
|
||||
// Indicates that we received a 304 from the origin and are serving out the
|
||||
// cached value.
|
||||
bool serving_cached_value_;
|
||||
// Indicates that we added conditional headers to the request.
|
||||
bool added_conditional_headers_to_request_;
|
||||
|
||||
Variable* num_conditional_refreshes_; // may be NULL.
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ConditionalSharedAsyncFetch);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_ASYNC_FETCH_H_
|
||||
@@ -1,34 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: fangfei@google.com (Fangfei Zhou)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_BOT_CHECKER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_BOT_CHECKER_H_
|
||||
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
namespace net_instaweb {
|
||||
|
||||
// BOTs names are case sensitive.
|
||||
// We keep the exact BOT names in .gperf table.
|
||||
class BotChecker {
|
||||
public:
|
||||
static bool Lookup(const StringPiece& user_agent);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_BOT_CHECKER_H_
|
||||
@@ -1,141 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_CACHE_URL_ASYNC_FETCHER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_CACHE_URL_ASYNC_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class Histogram;
|
||||
class HTTPCache;
|
||||
class MessageHandler;
|
||||
class Variable;
|
||||
|
||||
// Composes an asynchronous URL fetcher with an http cache, to
|
||||
// generate an asynchronous caching URL fetcher.
|
||||
//
|
||||
// This fetcher will asynchronously check the cache. If the url
|
||||
// is found in cache and is still valid, the fetch's callback will be
|
||||
// called right away. Otherwise (if fetcher != NULL) an async fetch
|
||||
// will be performed in the fetcher, the result of which will be written
|
||||
// into the cache. In case the fetch fails and there is a stale response
|
||||
// in the cache, we serve the stale response.
|
||||
//
|
||||
// If fetcher == NULL, this will only perform a cache lookup and then call
|
||||
// the callback immediately.
|
||||
//
|
||||
// TODO(sligocki): In order to use this for fetching resources for rewriting
|
||||
// we'd need to integrate resource locking in this class. Do we want that?
|
||||
class CacheUrlAsyncFetcher : public UrlAsyncFetcher {
|
||||
public:
|
||||
CacheUrlAsyncFetcher(HTTPCache* cache, UrlAsyncFetcher* fetcher)
|
||||
: http_cache_(cache),
|
||||
fetcher_(fetcher),
|
||||
backend_first_byte_latency_(NULL),
|
||||
fallback_responses_served_(NULL),
|
||||
num_conditional_refreshes_(NULL),
|
||||
respect_vary_(false),
|
||||
ignore_recent_fetch_failed_(false),
|
||||
serve_stale_if_fetch_error_(false),
|
||||
default_cache_html_(false) {
|
||||
}
|
||||
virtual ~CacheUrlAsyncFetcher();
|
||||
|
||||
virtual bool SupportsHttps() const { return fetcher_->SupportsHttps(); }
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* base_fetch);
|
||||
|
||||
// HTTP status code used to indicate that we failed the Fetch because
|
||||
// result was not found in cache. (Only happens if fetcher_ == NULL).
|
||||
static const int kNotInCacheStatus;
|
||||
|
||||
HTTPCache* http_cache() const { return http_cache_; }
|
||||
UrlAsyncFetcher* fetcher() const { return fetcher_; }
|
||||
|
||||
void set_backend_first_byte_latency_histogram(Histogram* x) {
|
||||
backend_first_byte_latency_ = x;
|
||||
}
|
||||
|
||||
Histogram* backend_first_byte_latency_histogram() const {
|
||||
return backend_first_byte_latency_;
|
||||
}
|
||||
|
||||
void set_fallback_responses_served(Variable* x) {
|
||||
fallback_responses_served_ = x;
|
||||
}
|
||||
|
||||
Variable* fallback_responses_served() const {
|
||||
return fallback_responses_served_;
|
||||
}
|
||||
|
||||
void set_num_conditional_refreshes(Variable* x) {
|
||||
num_conditional_refreshes_ = x;
|
||||
}
|
||||
|
||||
Variable* num_conditional_refreshes() const {
|
||||
return num_conditional_refreshes_;
|
||||
}
|
||||
|
||||
void set_respect_vary(bool x) { respect_vary_ = x; }
|
||||
bool respect_vary() const { return respect_vary_; }
|
||||
|
||||
void set_ignore_recent_fetch_failed(bool x) {
|
||||
ignore_recent_fetch_failed_ = x;
|
||||
}
|
||||
bool ignore_recent_fetch_failed() const {
|
||||
return ignore_recent_fetch_failed_;
|
||||
}
|
||||
|
||||
void set_serve_stale_if_fetch_error(bool x) {
|
||||
serve_stale_if_fetch_error_ = x;
|
||||
}
|
||||
|
||||
bool serve_stale_if_fetch_error() const {
|
||||
return serve_stale_if_fetch_error_;
|
||||
}
|
||||
|
||||
void set_default_cache_html(bool x) { default_cache_html_ = x; }
|
||||
bool default_cache_html() const { return default_cache_html_; }
|
||||
|
||||
private:
|
||||
// Not owned by CacheUrlAsyncFetcher.
|
||||
HTTPCache* http_cache_;
|
||||
UrlAsyncFetcher* fetcher_;
|
||||
|
||||
Histogram* backend_first_byte_latency_; // may be NULL.
|
||||
Variable* fallback_responses_served_; // may be NULL.
|
||||
Variable* num_conditional_refreshes_; // may be NULL.
|
||||
|
||||
bool respect_vary_;
|
||||
bool ignore_recent_fetch_failed_;
|
||||
bool serve_stale_if_fetch_error_;
|
||||
bool default_cache_html_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(CacheUrlAsyncFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_CACHE_URL_ASYNC_FETCHER_H_
|
||||
@@ -1,139 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
//
|
||||
// A collection of content-types and their attributes.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_CONTENT_TYPE_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_CONTENT_TYPE_H_
|
||||
|
||||
#include <set>
|
||||
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
struct ContentType {
|
||||
public:
|
||||
// The MIME types we process.
|
||||
enum Type {
|
||||
kHtml,
|
||||
kXhtml,
|
||||
kCeHtml, // See http://en.wikipedia.org/wiki/CE-HTML
|
||||
kJavascript,
|
||||
kCss,
|
||||
kText,
|
||||
kXml,
|
||||
kPng,
|
||||
kGif,
|
||||
kJpeg,
|
||||
kSwf,
|
||||
kWebp,
|
||||
kIco,
|
||||
kJson,
|
||||
kPdf,
|
||||
kVideo,
|
||||
kOctetStream, // Binary resources.
|
||||
kOther, // Used to specify a new local ContentType in one test file.
|
||||
};
|
||||
|
||||
// Returns the maximum extension length of any resource types our filters
|
||||
// can create. Does not count the ".".
|
||||
// See RewriteDriver::CreateOutputResourceWithPath()
|
||||
static int MaxProducedExtensionLength();
|
||||
|
||||
const char* mime_type() const { return mime_type_; }
|
||||
// TODO(sligocki): Stop returning '.' in file_extension().
|
||||
const char* file_extension() const { return file_extension_; }
|
||||
Type type() const { return type_; }
|
||||
|
||||
// Return true iff this content type is CSS.
|
||||
bool IsCss() const;
|
||||
|
||||
// Return true iff this content type is HTML, or XHTML, or some other such
|
||||
// thing (e.g. CE-HTML) that we can rewrite.
|
||||
bool IsHtmlLike() const;
|
||||
|
||||
// Return true iff this content type is XML of some kind (either XHTML or
|
||||
// some other XML).
|
||||
bool IsXmlLike() const;
|
||||
|
||||
// Return true iff this content type is Flash.
|
||||
bool IsFlash() const;
|
||||
|
||||
// Return true iff this content type is Image.
|
||||
bool IsImage() const;
|
||||
|
||||
// Return true iff this content type is Video.
|
||||
bool IsVideo() const;
|
||||
|
||||
// These fields should be private; we leave them public only so we can use
|
||||
// struct literals in content_type.cc. Other code should use the above
|
||||
// accessor methods instead of accessing these fields directly.
|
||||
const char* mime_type_;
|
||||
const char* file_extension_; // includes ".", e.g. ".ext"
|
||||
Type type_;
|
||||
};
|
||||
|
||||
// HTML-like (i.e. rewritable) text:
|
||||
extern const ContentType& kContentTypeHtml;
|
||||
extern const ContentType& kContentTypeXhtml;
|
||||
extern const ContentType& kContentTypeCeHtml;
|
||||
// Other text:
|
||||
extern const ContentType& kContentTypeJavascript;
|
||||
extern const ContentType& kContentTypeCss;
|
||||
extern const ContentType& kContentTypeText;
|
||||
extern const ContentType& kContentTypeXml;
|
||||
extern const ContentType& kContentTypeJson;
|
||||
// Images:
|
||||
extern const ContentType& kContentTypePng;
|
||||
extern const ContentType& kContentTypeGif;
|
||||
extern const ContentType& kContentTypeJpeg;
|
||||
extern const ContentType& kContentTypeSwf;
|
||||
extern const ContentType& kContentTypeWebp;
|
||||
extern const ContentType& kContentTypeIco;
|
||||
// PDF:
|
||||
extern const ContentType& kContentTypePdf;
|
||||
|
||||
// Binary/octet-stream.
|
||||
extern const ContentType& kContentTypeBinaryOctetStream;
|
||||
|
||||
// Given a name (file or url), see if it has the canonical extension
|
||||
// corresponding to a particular content type.
|
||||
const ContentType* NameExtensionToContentType(const StringPiece& name);
|
||||
const ContentType* MimeTypeToContentType(const StringPiece& mime_type);
|
||||
|
||||
// Extracts mime_type and charset from a string of the form
|
||||
// "<mime_type>; charset=<charset>".
|
||||
// If mime_type or charset is not specified, they will be populated
|
||||
// with the empty string.
|
||||
// Returns true if either a mime_type or a charset was extracted.
|
||||
bool ParseContentType(const StringPiece& content_type_str,
|
||||
GoogleString* mime_type,
|
||||
GoogleString* charset);
|
||||
|
||||
// Splits comma-separated string to elements and tries to match each one with
|
||||
// a recognized content type. The out set will be cleared first and must be
|
||||
// present.
|
||||
void MimeTypeListToContentTypeSet(
|
||||
const GoogleString& in,
|
||||
std::set<const ContentType*>* out);
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_CONTENT_TYPE_H_
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
//
|
||||
// Wraps an asynchronous fetcher, but keeps track of success/failure count.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_COUNTING_URL_ASYNC_FETCHER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_COUNTING_URL_ASYNC_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class MessageHandler;
|
||||
|
||||
class CountingUrlAsyncFetcher : public UrlAsyncFetcher {
|
||||
public:
|
||||
explicit CountingUrlAsyncFetcher(UrlAsyncFetcher* fetcher)
|
||||
: fetcher_(fetcher) {
|
||||
Clear();
|
||||
}
|
||||
virtual ~CountingUrlAsyncFetcher();
|
||||
|
||||
void set_fetcher(UrlAsyncFetcher* fetcher) { fetcher_ = fetcher; }
|
||||
|
||||
virtual bool SupportsHttps() const { return fetcher_->SupportsHttps(); }
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch);
|
||||
|
||||
int fetch_count() const { return fetch_count_; }
|
||||
int byte_count() const { return byte_count_; }
|
||||
int failure_count() const { return failure_count_; }
|
||||
|
||||
void Clear();
|
||||
|
||||
class CountingFetch;
|
||||
friend class CountingFetch;
|
||||
|
||||
private:
|
||||
UrlAsyncFetcher* fetcher_;
|
||||
int fetch_count_;
|
||||
int byte_count_;
|
||||
int failure_count_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(CountingUrlAsyncFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_COUNTING_URL_ASYNC_FETCHER_H_
|
||||
@@ -1,111 +0,0 @@
|
||||
// Copyright 2012 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_DEVICE_PROPERTIES_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_DEVICE_PROPERTIES_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "net/instaweb/http/public/user_agent_matcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/gtest_prod.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// This class keeps track of the device properties of the client, which are
|
||||
// for the most part learned from the UserAgent string.
|
||||
class DeviceProperties {
|
||||
public:
|
||||
explicit DeviceProperties(UserAgentMatcher* matcher);
|
||||
virtual ~DeviceProperties();
|
||||
|
||||
void set_user_agent(const StringPiece& user_agent_string);
|
||||
bool SupportsImageInlining() const;
|
||||
bool SupportsLazyloadImages() const;
|
||||
bool SupportsCriticalImagesBeacon() const;
|
||||
bool SupportsJsDefer(bool enable_mobile) const;
|
||||
bool SupportsWebp() const;
|
||||
bool SupportsWebpLosslessAlpha() const;
|
||||
bool IsBot() const;
|
||||
bool SupportsSplitHtml(bool enable_mobile) const;
|
||||
bool CanPreloadResources() const;
|
||||
bool GetScreenResolution(int* width, int* height) const;
|
||||
UserAgentMatcher::DeviceType GetDeviceType() const;
|
||||
bool IsMobile() const {
|
||||
return GetDeviceType() == UserAgentMatcher::kMobile;
|
||||
}
|
||||
|
||||
enum ImageQualityPreference {
|
||||
// Server uses its own default image quality.
|
||||
kImageQualityDefault,
|
||||
// The request asks for low image quality.
|
||||
kImageQualityLow,
|
||||
// The request asks for medium image quality.
|
||||
kImageQualityMedium,
|
||||
// The request asks for high image quality.
|
||||
kImageQualityHigh,
|
||||
};
|
||||
static const int kMediumScreenWidthThreshold = 720;
|
||||
static const int kLargeScreenWidthThreshold = 1500;
|
||||
|
||||
// Does not own the vectors. Callers must ensure the lifetime of vectors
|
||||
// exceeds that of the DeviceProperties.
|
||||
void SetPreferredImageQualities(
|
||||
const std::vector<int>* webp, const std::vector<int>* jpeg);
|
||||
// Returns true iff WebP and Jpeg image quality are set for the preference.
|
||||
bool GetPreferredImageQualities(
|
||||
ImageQualityPreference preference, int* webp, int* jpeg) const;
|
||||
static int GetPreferredImageQualityCount();
|
||||
|
||||
private:
|
||||
friend class ImageRewriteTest;
|
||||
FRIEND_TEST(ImageRewriteTest, SquashImagesForMobileScreen);
|
||||
FRIEND_TEST(DevicePropertiesTest, GetScreenGroupIndex);
|
||||
|
||||
// Returns true if a valid screen_index is returned for the screen_width.
|
||||
// The returned screen_index represents a small, medium or large screen group.
|
||||
static bool GetScreenGroupIndex(int screen_width, int* screen_index);
|
||||
void SetScreenResolution(int width, int height) const;
|
||||
// Returns true if there are valid preferred image qualities.
|
||||
bool HasPreferredImageQualities() const;
|
||||
|
||||
GoogleString user_agent_;
|
||||
UserAgentMatcher* ua_matcher_;
|
||||
|
||||
mutable LazyBool supports_image_inlining_;
|
||||
mutable LazyBool supports_js_defer_;
|
||||
mutable LazyBool supports_lazyload_images_;
|
||||
mutable LazyBool supports_webp_;
|
||||
mutable LazyBool supports_webp_lossless_alpha_;
|
||||
mutable LazyBool is_bot_;
|
||||
mutable LazyBool is_mobile_user_agent_;
|
||||
mutable LazyBool supports_split_html_;
|
||||
mutable LazyBool supports_flush_early_;
|
||||
mutable LazyBool screen_dimensions_set_;
|
||||
mutable int screen_width_;
|
||||
mutable int screen_height_;
|
||||
const std::vector<int>* preferred_webp_qualities_;
|
||||
const std::vector<int>* preferred_jpeg_qualities_;
|
||||
// Used to lazily set device_type_.
|
||||
mutable LazyBool device_type_set_;
|
||||
mutable UserAgentMatcher::DeviceType device_type_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(DeviceProperties);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_DEVICE_PROPERTIES_H_
|
||||
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Authors: jmarantz@google.com (Joshua Marantz)
|
||||
// vchudnov@google.com (Victor Chudnovsky)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_EXTERNAL_URL_FETCHER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_EXTERNAL_URL_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
#include "net/instaweb/http/public/url_fetcher.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
class RequestHeaders;
|
||||
class ResponseHeaders;
|
||||
class Writer;
|
||||
|
||||
// Runs an external command ('wget' by default, or 'curl') via popen
|
||||
// for blocking URL fetches.
|
||||
|
||||
// TODO(vchudnov): Incorporate NetcatUrlFetcher functionality into
|
||||
// this class.
|
||||
class ExternalUrlFetcher : public UrlFetcher {
|
||||
public:
|
||||
ExternalUrlFetcher() {}
|
||||
virtual ~ExternalUrlFetcher() {}
|
||||
|
||||
// TODO(sligocki): Allow protocol version number (e.g. HTTP/1.1)
|
||||
// and request type (e.g. GET, POST, etc.) to be specified.
|
||||
virtual bool StreamingFetchUrl(const GoogleString& url,
|
||||
const RequestHeaders& request_headers,
|
||||
ResponseHeaders* response_headers,
|
||||
Writer* writer,
|
||||
MessageHandler* message_handler,
|
||||
const RequestContextPtr& request_context);
|
||||
|
||||
|
||||
// Default user agent to use.
|
||||
static const char kDefaultUserAgent[];
|
||||
|
||||
// Sets the path to "binary" when fetching using "how".
|
||||
void set_binary(const GoogleString& binary);
|
||||
|
||||
|
||||
protected:
|
||||
// Appends to escaped_headers one header line for each Name, Value
|
||||
// pair in request_headers.
|
||||
virtual void AppendHeaders(const RequestHeaders& request_headers,
|
||||
StringVector* escaped_headers);
|
||||
|
||||
GoogleString binary_;
|
||||
|
||||
private:
|
||||
virtual const char* GetFetchLabel() = 0;
|
||||
|
||||
// Returns the external command to run in order to fetch a URL. The
|
||||
// URL and the vector of header lines must be already escaped in
|
||||
// escaped_url and escaped_headers, respectively. In addition to the
|
||||
// specified headers, the User-Agent is also explicitly set to the
|
||||
// value of user_agent, unless the latter is NULL.
|
||||
virtual GoogleString ConstructFetchCommand(
|
||||
const GoogleString& escaped_url,
|
||||
const char* user_agent,
|
||||
const StringVector& escaped_headers) = 0;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ExternalUrlFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_EXTERNAL_URL_FETCHER_H_
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
//
|
||||
// UrlFetcher is an interface for asynchronously fetching urls. The
|
||||
// caller must supply a callback to be called when the fetch is complete.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_FAKE_URL_ASYNC_FETCHER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_FAKE_URL_ASYNC_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/http/public/url_pollable_async_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class MessageHandler;
|
||||
class UrlFetcher;
|
||||
|
||||
// Constructs an async fetcher using a synchronous fetcher, blocking
|
||||
// on a fetch and then the 'done' callback directly. It's also
|
||||
// possible to construct a real async interface using a synchronous
|
||||
// fetcher in a thread, but this does not do that: it blocks.
|
||||
//
|
||||
// This is intended for functional regression tests only.
|
||||
class FakeUrlAsyncFetcher : public UrlPollableAsyncFetcher {
|
||||
public:
|
||||
explicit FakeUrlAsyncFetcher(UrlFetcher* url_fetcher)
|
||||
: url_fetcher_(url_fetcher),
|
||||
fetcher_supports_https_(true) {
|
||||
}
|
||||
virtual ~FakeUrlAsyncFetcher();
|
||||
|
||||
virtual bool SupportsHttps() const { return fetcher_supports_https_; }
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch);
|
||||
|
||||
// Since the underlying fetcher is blocking, there can never be
|
||||
// any outstanding fetches.
|
||||
virtual int Poll(int64 max_wait_ms) { return 0; }
|
||||
|
||||
void set_fetcher_supports_https(bool val) { fetcher_supports_https_ = val; }
|
||||
|
||||
private:
|
||||
UrlFetcher* url_fetcher_;
|
||||
bool fetcher_supports_https_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(FakeUrlAsyncFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_FAKE_URL_ASYNC_FETCHER_H_
|
||||
@@ -1,190 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
// Unit-test framework for wget fetcher
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_FETCHER_TEST_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_FETCHER_TEST_H_
|
||||
|
||||
#include <utility> // for pair
|
||||
#include <vector>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "net/instaweb/http/public/async_fetch.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/http/public/url_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/google_message_handler.h"
|
||||
#include "net/instaweb/util/public/gtest.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/thread_system.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
class RequestHeaders;
|
||||
class ResponseHeaders;
|
||||
class SimpleStats;
|
||||
class Writer;
|
||||
|
||||
class FetcherTest : public testing::Test {
|
||||
protected:
|
||||
static const char kStartDate[];
|
||||
static const char kHtmlContent[];
|
||||
static const char kGoodUrl[];
|
||||
static const char kNotCachedUrl[];
|
||||
static const char kBadUrl[];
|
||||
static const char kHeaderName[];
|
||||
static const char kHeaderValue[];
|
||||
static const char kErrorMessage[];
|
||||
|
||||
FetcherTest();
|
||||
|
||||
static void SetUpTestCase();
|
||||
static void TearDownTestCase();
|
||||
|
||||
// Helpful classes for testing.
|
||||
|
||||
// This mock fetcher will only fetch kGoodUrl, returning kHtmlContent.
|
||||
// If you ask for any other URL it will fail.
|
||||
class MockFetcher : public UrlFetcher {
|
||||
public:
|
||||
MockFetcher() : num_fetches_(0) {}
|
||||
|
||||
virtual bool StreamingFetchUrl(const GoogleString& url,
|
||||
const RequestHeaders& request_headers,
|
||||
ResponseHeaders* response_headers,
|
||||
Writer* response_writer,
|
||||
MessageHandler* message_handler,
|
||||
const RequestContextPtr& request_context);
|
||||
|
||||
int num_fetches() const { return num_fetches_; }
|
||||
|
||||
private:
|
||||
bool Populate(const char* cache_control, ResponseHeaders* response_headers,
|
||||
Writer* writer, MessageHandler* message_handler);
|
||||
|
||||
int num_fetches_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(MockFetcher);
|
||||
};
|
||||
|
||||
// This is a pseudo-asynchronous interface to MockFetcher. It performs
|
||||
// fetches instantly, but defers calling the callback until the user
|
||||
// calls CallCallbacks(). Then it will execute the deferred callbacks.
|
||||
class MockAsyncFetcher : public UrlAsyncFetcher {
|
||||
public:
|
||||
explicit MockAsyncFetcher(UrlFetcher* url_fetcher)
|
||||
: url_fetcher_(url_fetcher) {}
|
||||
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* handler,
|
||||
AsyncFetch* fetch);
|
||||
|
||||
void CallCallbacks();
|
||||
|
||||
private:
|
||||
UrlFetcher* url_fetcher_;
|
||||
std::vector<std::pair<bool, AsyncFetch*> > deferred_callbacks_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(MockAsyncFetcher);
|
||||
};
|
||||
|
||||
// Callback that just checks correct Done status and keeps track of whether
|
||||
// it has been called yet or not.
|
||||
class CheckCallback : public StringAsyncFetch {
|
||||
public:
|
||||
CheckCallback(const RequestContextPtr& ctx, bool expect_success,
|
||||
bool* callback_called)
|
||||
: StringAsyncFetch(ctx),
|
||||
expect_success_(expect_success),
|
||||
callback_called_(callback_called) {
|
||||
}
|
||||
|
||||
virtual void HandleDone(bool success) {
|
||||
*callback_called_ = true;
|
||||
CHECK_EQ(expect_success_, success);
|
||||
ValidateMockFetcherResponse(success, true, buffer(), *response_headers());
|
||||
delete this;
|
||||
}
|
||||
|
||||
bool expect_success_;
|
||||
bool* callback_called_;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(CheckCallback);
|
||||
};
|
||||
|
||||
static void ValidateMockFetcherResponse(
|
||||
bool success, bool check_error_message, const GoogleString& content,
|
||||
const ResponseHeaders& response_headers);
|
||||
|
||||
// Do a URL fetch, and return the number of times the mock fetcher
|
||||
// had to be run to perform the fetch.
|
||||
// Note: You must override sync_fetcher() to return the correct fetcher.
|
||||
int CountFetchesSync(const StringPiece& url, bool expect_success,
|
||||
bool check_error_message);
|
||||
// Use an explicit fetcher (you don't need to override sync_fetcher()).
|
||||
int CountFetchesSync(const StringPiece& url, UrlFetcher* fetcher,
|
||||
bool expect_success, bool check_error_message);
|
||||
|
||||
// Initiate an async URL fetch, and return the number of times the mock
|
||||
// fetcher had to be run to perform the fetch.
|
||||
// Note: You must override async_fetcher() to return the correct fetcher.
|
||||
int CountFetchesAsync(const StringPiece& url, bool expect_success,
|
||||
bool* callback_called);
|
||||
|
||||
// Override these to allow CountFetchesSync or Async respectively.
|
||||
// These are not abstract (= 0) because they only need to be overridden by
|
||||
// classes which want to use CountFetchersSync/Async without specifying the
|
||||
// fetcher in each call.
|
||||
virtual UrlFetcher* sync_fetcher() {
|
||||
LOG(FATAL) << "sync_fetcher() must be overridden before use.";
|
||||
return NULL;
|
||||
};
|
||||
virtual UrlAsyncFetcher* async_fetcher() {
|
||||
LOG(FATAL) << "async_fetcher() must be overridden before use.";
|
||||
return NULL;
|
||||
};
|
||||
|
||||
GoogleString TestFilename() {
|
||||
return (GTestSrcDir() +
|
||||
"/net/instaweb/http/testdata/google.http");
|
||||
}
|
||||
|
||||
// This validation code is hard-coded to the http request capture in
|
||||
// testdata/google.http.
|
||||
void ValidateOutput(const GoogleString& content,
|
||||
const ResponseHeaders& response_headers);
|
||||
|
||||
GoogleMessageHandler message_handler_;
|
||||
MockFetcher mock_fetcher_;
|
||||
MockAsyncFetcher mock_async_fetcher_;
|
||||
scoped_ptr<ThreadSystem> thread_system_;
|
||||
static SimpleStats* statistics_;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(FetcherTest);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_FETCHER_TEST_H_
|
||||
@@ -1,163 +0,0 @@
|
||||
// Copyright 2011 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HEADERS_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HEADERS_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/proto_util.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
class NameValue;
|
||||
class StringMultiMapInsensitive;
|
||||
class Writer;
|
||||
|
||||
// Read/write API for HTTP headers (shared base class)
|
||||
template<class Proto> class Headers {
|
||||
public:
|
||||
Headers();
|
||||
virtual ~Headers();
|
||||
|
||||
virtual void Clear();
|
||||
|
||||
int major_version() const;
|
||||
bool has_major_version() const;
|
||||
int minor_version() const;
|
||||
void set_major_version(int major_version);
|
||||
void set_minor_version(int major_version);
|
||||
|
||||
// Raw access for random access to attribute name/value pairs.
|
||||
int NumAttributes() const;
|
||||
const GoogleString& Name(int i) const;
|
||||
const GoogleString& Value(int i) const;
|
||||
|
||||
// Lookup attributes with provided name. Attribute values are stored in
|
||||
// values. Returns true iff there were any attributes with provided name.
|
||||
//
|
||||
// Note that Lookup, though declared const, is NOT thread-safe. This
|
||||
// is because it lazily generates a map.
|
||||
// TODO(jmarantz): this is a problem waiting to happen, but I believe it
|
||||
// will not be a problem in the immediate future. We can refactor our way
|
||||
// around this problem by moving the Map to an explicit separate class that
|
||||
// can be instantiated to assist with Lookups and Remove. But that should
|
||||
// be done in a separate CL from the one I'm typing into now.
|
||||
bool Lookup(const StringPiece& name, ConstStringStarVector* values) const;
|
||||
|
||||
// Looks up a single attribute value. Returns NULL if the attribute is
|
||||
// not found, or if more than one attribute is found.
|
||||
const char* Lookup1(const StringPiece& name) const;
|
||||
|
||||
// Does there exist a header with given name.
|
||||
bool Has(const StringPiece& name) const;
|
||||
|
||||
// Is value one of the values in Lookup(name)?
|
||||
bool HasValue(const StringPiece& name, const StringPiece& value) const;
|
||||
|
||||
// NumAttributeNames is also const but not thread-safe.
|
||||
int NumAttributeNames() const;
|
||||
|
||||
// Remove all instances of cookie_name in all the cookie headers.
|
||||
// Empty cookie headers will be removed.
|
||||
// It might be better for performance if this function is called after
|
||||
// checking that the cookie is present.
|
||||
// CAVEAT: Double quoted values are not necessarily treated as one token.
|
||||
// Please refer to the test cases in headers_cookie_util_test.cc for more
|
||||
// details.
|
||||
void RemoveCookie(const StringPiece& cookie_name);
|
||||
|
||||
// Adds a new header, even if a header with the 'name' exists already.
|
||||
virtual void Add(const StringPiece& name, const StringPiece& value);
|
||||
|
||||
// Remove headers by name and value. Return true if anything was removed.
|
||||
// Note: If the original headers were:
|
||||
// attr: val1
|
||||
// attr: val2
|
||||
// attr: val3
|
||||
// and you Remove(attr, val2), your new headers will be:
|
||||
// attr: val1, val3 (if attr is a comma-separated field)
|
||||
// and -
|
||||
// attr: val1
|
||||
// attr: val3 (otherwise).
|
||||
virtual bool Remove(const StringPiece& name, const StringPiece& value);
|
||||
|
||||
// Removes all headers by name. Return true if anything was removed.
|
||||
virtual bool RemoveAll(const StringPiece& name);
|
||||
|
||||
// Removes all headers whose name is in |names|.
|
||||
// Return true if anything was removed.
|
||||
virtual bool RemoveAllFromSet(const StringSetInsensitive& names);
|
||||
|
||||
// Removes all headers whose name is in |names|.
|
||||
static void RemoveFromHeaders(const StringSetInsensitive& names,
|
||||
protobuf::RepeatedPtrField<NameValue>* headers);
|
||||
|
||||
// Removes all headers whose name starts with prefix.
|
||||
virtual void RemoveAllWithPrefix(const StringPiece& prefix);
|
||||
|
||||
// Similar to RemoveAll followed by Add. Note that the attribute
|
||||
// order may be changed as a side effect of this operation.
|
||||
virtual void Replace(const StringPiece& name, const StringPiece& value);
|
||||
|
||||
// Merge headers. Replaces all headers specified both here and in
|
||||
// other with the version in other. Useful for updating headers
|
||||
// when recieving 304 Not Modified responses.
|
||||
// Note: This is order-scrambling.
|
||||
virtual void UpdateFrom(const Headers<Proto>& other);
|
||||
|
||||
// Serialize HTTP header to a binary stream.
|
||||
virtual bool WriteAsBinary(Writer* writer, MessageHandler* message_handler);
|
||||
|
||||
// Read HTTP header from a binary string.
|
||||
virtual bool ReadFromBinary(const StringPiece& buf, MessageHandler* handler);
|
||||
|
||||
// Serialize HTTP headers in HTTP format so it can be re-parsed
|
||||
virtual bool WriteAsHttp(Writer* writer, MessageHandler* handler) const;
|
||||
|
||||
protected:
|
||||
void PopulateMap() const; // const is a lie, mutates map_.
|
||||
|
||||
// We have two represenations for the name/value pairs. The
|
||||
// HttpResponseHeader protobuf contains a simple string-pair vector, but
|
||||
// lacks a fast associative lookup. So we will build structures for
|
||||
// associative lookup lazily, and keep them up-to-date if they are
|
||||
// present.
|
||||
mutable scoped_ptr<StringMultiMapInsensitive> map_;
|
||||
scoped_ptr<Proto> proto_;
|
||||
|
||||
private:
|
||||
bool IsCommaSeparatedField(const StringPiece& name) const;
|
||||
|
||||
// If name is a comma-separated field (above), then split value at commas,
|
||||
// and add name, val for each of the comma-separated values
|
||||
// (removing whitespace and commas).
|
||||
// Otherwise, add the name, value pair to the map_.
|
||||
// const is a lie
|
||||
// NOTE: the map will contain the comma-split values, but the protobuf
|
||||
// will contain the original pairs including comma-separated values.
|
||||
void AddToMap(const StringPiece& name, const StringPiece& value) const;
|
||||
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(Headers);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HEADERS_H_
|
||||
@@ -1,343 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_CACHE_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_CACHE_H_
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "net/instaweb/http/public/http_value.h"
|
||||
#include "net/instaweb/http/public/meta_data.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/response_headers.h"
|
||||
#include "net/instaweb/util/public/atomic_bool.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractLogRecord;
|
||||
class CacheInterface;
|
||||
class Hasher;
|
||||
class MessageHandler;
|
||||
class RequestHeaders;
|
||||
class Statistics;
|
||||
class Timer;
|
||||
class Variable;
|
||||
|
||||
// Implements HTTP caching semantics, including cache expiration and
|
||||
// retention of the originally served cache headers.
|
||||
class HTTPCache {
|
||||
public:
|
||||
// Names of statistics variables: exported for tests.
|
||||
static const char kCacheTimeUs[];
|
||||
static const char kCacheHits[];
|
||||
static const char kCacheMisses[];
|
||||
static const char kCacheFallbacks[];
|
||||
static const char kCacheExpirations[];
|
||||
static const char kCacheInserts[];
|
||||
static const char kCacheDeletes[];
|
||||
|
||||
// The prefix used for Etags.
|
||||
static const char kEtagPrefix[];
|
||||
|
||||
// Format that is used while generating Etags.
|
||||
static const char kEtagFormat[];
|
||||
|
||||
// Does not take ownership of any inputs.
|
||||
HTTPCache(CacheInterface* cache, Timer* timer, Hasher* hasher,
|
||||
Statistics* stats);
|
||||
virtual ~HTTPCache();
|
||||
|
||||
// When a lookup is done in the HTTP Cache, it returns one of these values.
|
||||
enum FindResult {
|
||||
kFound,
|
||||
kNotFound,
|
||||
// Helps avoid frequent refetching of resources which have error status
|
||||
// codes or are not cacheable.
|
||||
kRecentFetchFailed,
|
||||
kRecentFetchNotCacheable,
|
||||
};
|
||||
|
||||
virtual void set_hasher(Hasher* hasher) { hasher_ = hasher; }
|
||||
|
||||
// Class to handle an asynchronous cache lookup response.
|
||||
//
|
||||
// TODO(jmarantz): consider inheriting from AsyncFetch with an implementation
|
||||
// of Write/Flush/HeadersComplete -- we'd have to make Done take true/false so
|
||||
// this would impact callers.
|
||||
class Callback {
|
||||
public:
|
||||
explicit Callback(const RequestContextPtr& request_ctx)
|
||||
: response_headers_(NULL),
|
||||
owns_response_headers_(false),
|
||||
request_ctx_(request_ctx),
|
||||
log_timing_(true) {
|
||||
}
|
||||
virtual ~Callback();
|
||||
virtual void Done(FindResult find_result) = 0;
|
||||
// A method that allows client Callbacks to apply invalidation checks. We
|
||||
// first (in http_cache.cc) check whether the entry is expired using normal
|
||||
// http semantics, and if it is not expired, then this check is called --
|
||||
// thus callbacks can apply any further invalidation semantics it wants on
|
||||
// otherwise valid entries. But there's no way for a callback to override
|
||||
// when the HTTP semantics say the entry is expired.
|
||||
//
|
||||
// See also OptionsAwareHTTPCacheCallback in rewrite_driver.h for an
|
||||
// implementation you probably want to use.
|
||||
virtual bool IsCacheValid(const GoogleString& key,
|
||||
const ResponseHeaders& headers) = 0;
|
||||
|
||||
// A method that allows client Callbacks to check if the response in cache
|
||||
// is fresh enough, in addition to it being valid. This is used while
|
||||
// freshening resources to check that the response in cache is not only
|
||||
// valid, but is also not going to expire anytime soon.
|
||||
// Note that if the response in cache is valid but not fresh, the HTTPCache
|
||||
// calls Callback::Done with find_result = kNotFound and fills in
|
||||
// fallback_http_value() with the cached response.
|
||||
virtual bool IsFresh(const ResponseHeaders& headers) { return true; }
|
||||
|
||||
// Overrides the cache ttl of the cached response with the given value. Note
|
||||
// that this has no effect if the returned value is negative or less than
|
||||
// the cache ttl of the stored value.
|
||||
virtual int64 OverrideCacheTtlMs(const GoogleString& key) { return -1; }
|
||||
|
||||
// TODO(jmarantz): specify the dataflow between http_value and
|
||||
// response_headers.
|
||||
HTTPValue* http_value() { return &http_value_; }
|
||||
ResponseHeaders* response_headers() {
|
||||
if (response_headers_ == NULL) {
|
||||
response_headers_ = new ResponseHeaders;
|
||||
owns_response_headers_ = true;
|
||||
}
|
||||
return response_headers_;
|
||||
}
|
||||
const ResponseHeaders* response_headers() const {
|
||||
return const_cast<Callback*>(this)->response_headers();
|
||||
}
|
||||
void set_response_headers(ResponseHeaders* headers) {
|
||||
DCHECK(!owns_response_headers_);
|
||||
if (owns_response_headers_) {
|
||||
delete response_headers_;
|
||||
}
|
||||
response_headers_ = headers;
|
||||
owns_response_headers_ = false;
|
||||
}
|
||||
HTTPValue* fallback_http_value() { return &fallback_http_value_; }
|
||||
|
||||
AbstractLogRecord* log_record();
|
||||
const RequestContextPtr& request_context() { return request_ctx_; }
|
||||
void set_log_timing(bool t) { log_timing_ = t; }
|
||||
bool log_timing() const { return log_timing_; }
|
||||
|
||||
virtual void SetTimingMs(int64 timing_value_ms);
|
||||
|
||||
private:
|
||||
HTTPValue http_value_;
|
||||
// Stale value that can be used in case a fetch fails. Note that Find()
|
||||
// may fill in a stale value here but it will still return kNotFound.
|
||||
HTTPValue fallback_http_value_;
|
||||
ResponseHeaders* response_headers_;
|
||||
bool owns_response_headers_;
|
||||
RequestContextPtr request_ctx_;
|
||||
bool log_timing_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(Callback);
|
||||
};
|
||||
|
||||
// Makes the cache ignore put requests that do not record successes.
|
||||
virtual void SetIgnoreFailurePuts();
|
||||
|
||||
// Non-blocking Find. Calls callback when done. 'handler' must all
|
||||
// stay valid until callback->Done() is called.
|
||||
virtual void Find(const GoogleString& key, MessageHandler* handler,
|
||||
Callback* callback);
|
||||
|
||||
// Note that Put takes a non-const pointer for HTTPValue so it can
|
||||
// bump the reference count.
|
||||
virtual void Put(const GoogleString& key, HTTPValue* value,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Note that Put takes a non-const pointer for ResponseHeaders* so it
|
||||
// can update the caching fields prior to storing.
|
||||
// If you call this method, you must be certain that the outgoing
|
||||
// request was not sent with Authorization:.
|
||||
virtual void Put(const GoogleString& key, ResponseHeaders* headers,
|
||||
const StringPiece& content, MessageHandler* handler);
|
||||
|
||||
// Deletes an element in the cache.
|
||||
virtual void Delete(const GoogleString& key);
|
||||
|
||||
virtual void set_force_caching(bool force) { force_caching_ = force; }
|
||||
bool force_caching() const { return force_caching_; }
|
||||
virtual void set_disable_html_caching_on_https(bool x) {
|
||||
disable_html_caching_on_https_ = x;
|
||||
}
|
||||
Timer* timer() const { return timer_; }
|
||||
|
||||
// Tell the HTTP Cache to remember that a particular key is not cacheable
|
||||
// because the URL was marked with Cache-Control 'nocache' or Cache-Control
|
||||
// 'private'. We would like to avoid DOSing the origin server or spinning our
|
||||
// own wheels trying to re-fetch this resource.
|
||||
// The not-cacheable setting will be 'remembered' for
|
||||
// remember_not_cacheable_ttl_seconds_.
|
||||
// Note that we remember whether the response was originally a "200 OK" so
|
||||
// that we can check if the cache TTL can be overridden.
|
||||
virtual void RememberNotCacheable(const GoogleString& key,
|
||||
bool is_200_status_code,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Tell the HTTP Cache to remember that a particular key is not cacheable
|
||||
// because the associated URL failing Fetch.
|
||||
//
|
||||
// The not-cacheable setting will be 'remembered' for
|
||||
// remember_fetch_failed_ttl_seconds_.
|
||||
virtual void RememberFetchFailed(const GoogleString& key,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Tell the HTTP Cache to remember that we had to give up on doing a
|
||||
// background fetch due to load. This will remember it for
|
||||
// remember_fetch_load_shed_ttl_seconds_.
|
||||
virtual void RememberFetchDropped(const GoogleString& key,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Indicates if the response is within the cacheable size limit. Clients of
|
||||
// HTTPCache must check if they will be eventually able to cache their entries
|
||||
// before buffering them in memory. If the content length header is not found
|
||||
// then consider it as cacheable. This could be a chunked response.
|
||||
bool IsCacheableContentLength(ResponseHeaders* headers) const;
|
||||
// Indicates if the response body is within the cacheable size limit. If the
|
||||
// response headers do not have content length header, then the clients of
|
||||
// HTTPCache must check if the received response body is of cacheable size
|
||||
// before buffering them in memory.
|
||||
bool IsCacheableBodySize(int64 body_size) const;
|
||||
|
||||
// Initialize statistics variables for the cache
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
// Returns true if the resource is already at the point of expiration
|
||||
// (or not cacheable by us), and would never be used if inserted into the
|
||||
// cache. Otherwise, returns false. If the entry was rejected because of
|
||||
// expiration but would otherwise have been cacheable, this also increments
|
||||
// the cache expirations statistic.
|
||||
//
|
||||
// request_headers is used to check for resources requested with
|
||||
// authorization. It is OK to pass NULL if you're certain that the fetch
|
||||
// was done without authorization headers.
|
||||
bool IsAlreadyExpired(const RequestHeaders* request_headers,
|
||||
const ResponseHeaders& headers);
|
||||
|
||||
Variable* cache_time_us() { return cache_time_us_; }
|
||||
Variable* cache_hits() { return cache_hits_; }
|
||||
Variable* cache_misses() { return cache_misses_; }
|
||||
Variable* cache_fallbacks() { return cache_fallbacks_; }
|
||||
Variable* cache_expirations() { return cache_expirations_; }
|
||||
Variable* cache_inserts() { return cache_inserts_; }
|
||||
Variable* cache_deletes() { return cache_deletes_; }
|
||||
|
||||
int64 remember_not_cacheable_ttl_seconds() {
|
||||
return remember_not_cacheable_ttl_seconds_;
|
||||
}
|
||||
|
||||
virtual void set_remember_not_cacheable_ttl_seconds(int64 value) {
|
||||
DCHECK_LE(0, value);
|
||||
if (value >= 0) {
|
||||
remember_not_cacheable_ttl_seconds_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
int64 remember_fetch_failed_ttl_seconds() {
|
||||
return remember_fetch_failed_ttl_seconds_;
|
||||
}
|
||||
|
||||
virtual void set_remember_fetch_failed_ttl_seconds(int64 value) {
|
||||
DCHECK_LE(0, value);
|
||||
if (value >= 0) {
|
||||
remember_fetch_failed_ttl_seconds_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
int64 remember_fetch_dropped_ttl_seconds() {
|
||||
return remember_fetch_dropped_ttl_seconds_;
|
||||
}
|
||||
|
||||
virtual void set_remember_fetch_dropped_ttl_seconds(int64 value) {
|
||||
DCHECK_LE(0, value);
|
||||
if (value >= 0) {
|
||||
remember_fetch_dropped_ttl_seconds_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
int max_cacheable_response_content_length() {
|
||||
return max_cacheable_response_content_length_;
|
||||
}
|
||||
|
||||
virtual void set_max_cacheable_response_content_length(int64 value);
|
||||
|
||||
virtual const char* Name() const { return name_.c_str(); }
|
||||
|
||||
protected:
|
||||
virtual void PutInternal(const GoogleString& key, int64 start_us,
|
||||
HTTPValue* value);
|
||||
|
||||
private:
|
||||
friend class HTTPCacheCallback;
|
||||
friend class WriteThroughHTTPCache;
|
||||
|
||||
bool IsCurrentlyValid(const RequestHeaders* request_headers,
|
||||
const ResponseHeaders& headers, int64 now_ms);
|
||||
|
||||
bool MayCacheUrl(const GoogleString& url, const ResponseHeaders& headers);
|
||||
// Requires either content or value to be non-NULL.
|
||||
// Applies changes to headers. If the headers are actually changed or if value
|
||||
// is NULL then it builds and returns a new HTTPValue. If content is NULL
|
||||
// then content is extracted from value.
|
||||
HTTPValue* ApplyHeaderChangesForPut(
|
||||
const GoogleString& key, int64 start_us, const StringPiece* content,
|
||||
ResponseHeaders* headers, HTTPValue* value, MessageHandler* handler);
|
||||
void UpdateStats(FindResult result, bool has_fallback, int64 delta_us);
|
||||
void RememberFetchFailedorNotCacheableHelper(
|
||||
const GoogleString& key, MessageHandler* handler, HttpStatus::Code code,
|
||||
int64 ttl_sec);
|
||||
|
||||
CacheInterface* cache_; // Owned by the caller.
|
||||
Timer* timer_;
|
||||
Hasher* hasher_;
|
||||
bool force_caching_;
|
||||
// Whether to disable caching of HTML content fetched via https.
|
||||
bool disable_html_caching_on_https_;
|
||||
Variable* cache_time_us_;
|
||||
Variable* cache_hits_;
|
||||
Variable* cache_misses_;
|
||||
Variable* cache_fallbacks_;
|
||||
Variable* cache_expirations_;
|
||||
Variable* cache_inserts_;
|
||||
Variable* cache_deletes_;
|
||||
GoogleString name_;
|
||||
int64 remember_not_cacheable_ttl_seconds_;
|
||||
int64 remember_fetch_failed_ttl_seconds_;
|
||||
int64 remember_fetch_dropped_ttl_seconds_;
|
||||
int64 max_cacheable_response_content_length_;
|
||||
AtomicBool ignore_failure_puts_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HTTPCache);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HTTP_CACHE_H_
|
||||
@@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_ASYNC_WRITER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_ASYNC_WRITER_H_
|
||||
|
||||
#include "net/instaweb/http/public/url_async_fetcher.h"
|
||||
#include "net/instaweb/http/public/http_dump_url_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AsyncFetch;
|
||||
class FileSystem;
|
||||
class MessageHandler;
|
||||
class Timer;
|
||||
|
||||
// HttpDumpWriter checks to see whether the HTTP dump is available on the
|
||||
// filesystem. If not, it fetches it from another fetcher (e.g. one that
|
||||
// uses the network) and writes it to the filesystem so that HttpDumpFetcher
|
||||
// can find it.
|
||||
class HttpDumpUrlAsyncWriter : public UrlAsyncFetcher {
|
||||
public:
|
||||
HttpDumpUrlAsyncWriter(const StringPiece& root_dir,
|
||||
UrlAsyncFetcher* base_fetcher,
|
||||
FileSystem* file_system,
|
||||
Timer* timer)
|
||||
: dump_fetcher_(root_dir, file_system, timer),
|
||||
base_fetcher_(base_fetcher),
|
||||
file_system_(file_system),
|
||||
accept_gzip_(true) {
|
||||
root_dir.CopyToString(&root_dir_);
|
||||
}
|
||||
virtual ~HttpDumpUrlAsyncWriter();
|
||||
|
||||
virtual bool SupportsHttps() const { return base_fetcher_->SupportsHttps(); }
|
||||
|
||||
// This is a synchronous/blocking implementation.
|
||||
virtual void Fetch(const GoogleString& url,
|
||||
MessageHandler* handler,
|
||||
AsyncFetch* base_fetch);
|
||||
|
||||
// Controls whether we will request and save gzipped content to the
|
||||
// file system. Note that http_dump_url_fetcher will inflate on
|
||||
// read if its caller does not want gzipped output.
|
||||
void set_accept_gzip(bool x) { accept_gzip_ = x; }
|
||||
|
||||
private:
|
||||
// Helper class to manage individual fetchs.
|
||||
class DumpFetch;
|
||||
|
||||
HttpDumpUrlFetcher dump_fetcher_;
|
||||
// Used to fetch urls that aren't in the dump yet.
|
||||
UrlAsyncFetcher* base_fetcher_;
|
||||
GoogleString root_dir_; // Root directory of the HTTP dumps.
|
||||
FileSystem* file_system_;
|
||||
bool accept_gzip_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HttpDumpUrlAsyncWriter);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_ASYNC_WRITER_H_
|
||||
@@ -1,110 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_FETCHER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_FETCHER_H_
|
||||
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/url_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/file_system.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class GoogleUrl;
|
||||
class MessageHandler;
|
||||
class RequestHeaders;
|
||||
class ResponseHeaders;
|
||||
class Timer;
|
||||
class Writer;
|
||||
|
||||
// TODO(sligocki): Can we forward declare these somehow?
|
||||
// class FileSystem;
|
||||
// class FileSystem::InputFile;
|
||||
|
||||
// HttpDumpFetcher fetches raw HTTP dumps from the filesystem.
|
||||
// These dumps could be compressed or chunked, the fetcher does not
|
||||
// decompress or de-chunk them.
|
||||
class HttpDumpUrlFetcher : public UrlFetcher {
|
||||
public:
|
||||
// When the slurped data is gzipped, but request headers are made
|
||||
// that don't include 'gzip' in an Accept-Encodings header, then
|
||||
// this fetcher inflates the gzipped output as it streams. It
|
||||
// also captures the original gzipped size in this attribute in
|
||||
// the response headers.
|
||||
static const char kGzipContentLengthAttribute[];
|
||||
|
||||
HttpDumpUrlFetcher(const StringPiece& root_dir, FileSystem* file_system,
|
||||
Timer* timer);
|
||||
virtual ~HttpDumpUrlFetcher();
|
||||
|
||||
// Converts URL into filename the way that Latency Lab does.
|
||||
// Note: root_dir_ must be standardized to have a / at end already.
|
||||
static bool GetFilenameFromUrl(const StringPiece& root_dir,
|
||||
const GoogleUrl& url,
|
||||
GoogleString* filename,
|
||||
MessageHandler* message_handler);
|
||||
|
||||
// Non-static version that uses the fetcher's root dir.
|
||||
bool GetFilename(const GoogleUrl& url,
|
||||
GoogleString* filename,
|
||||
MessageHandler* message_handler) {
|
||||
return GetFilenameFromUrl(root_dir_, url, filename, message_handler);
|
||||
}
|
||||
|
||||
// This is a synchronous/blocking implementation.
|
||||
virtual bool StreamingFetchUrl(const GoogleString& url,
|
||||
const RequestHeaders& request_headers,
|
||||
ResponseHeaders* response_headers,
|
||||
Writer* fetched_content_writer,
|
||||
MessageHandler* message_handler,
|
||||
const RequestContextPtr& request_context);
|
||||
|
||||
// Parse file into response_headers and response_writer as if it were bytes
|
||||
// off the wire.
|
||||
bool ParseFile(FileSystem::InputFile* file,
|
||||
ResponseHeaders* response_headers,
|
||||
Writer* response_writer,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Helper function to return a generic error response.
|
||||
void RespondError(ResponseHeaders* response_headers, Writer* response_writer,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Print URLs each time they are fetched.
|
||||
void set_print_urls(bool on);
|
||||
|
||||
private:
|
||||
GoogleString root_dir_; // Root directory of the HTTP dumps.
|
||||
FileSystem* file_system_;
|
||||
Timer* timer_;
|
||||
|
||||
// Response to use if something goes wrong.
|
||||
GoogleString error_body_;
|
||||
|
||||
scoped_ptr<StringSet> urls_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HttpDumpUrlFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_FETCHER_H_
|
||||
@@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_WRITER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_WRITER_H_
|
||||
|
||||
#include "net/instaweb/http/public/http_dump_url_fetcher.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/url_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class FileSystem;
|
||||
class MessageHandler;
|
||||
class RequestHeaders;
|
||||
class ResponseHeaders;
|
||||
class Timer;
|
||||
class Writer;
|
||||
|
||||
// HttpDumpWriter checks to see whether the HTTP dump is available on the
|
||||
// filesystem. If not, it fetches it from another fetcher (e.g. one that
|
||||
// uses the network) and writes it to the filesystem so that HttpDumpFetcher
|
||||
// can find it.
|
||||
class HttpDumpUrlWriter : public UrlFetcher {
|
||||
public:
|
||||
HttpDumpUrlWriter(const StringPiece& root_dir, UrlFetcher* base_fetcher,
|
||||
FileSystem* file_system, Timer* timer)
|
||||
: dump_fetcher_(root_dir, file_system, timer),
|
||||
base_fetcher_(base_fetcher),
|
||||
file_system_(file_system),
|
||||
accept_gzip_(true) {
|
||||
root_dir.CopyToString(&root_dir_);
|
||||
}
|
||||
virtual ~HttpDumpUrlWriter();
|
||||
|
||||
// This is a synchronous/blocking implementation.
|
||||
virtual bool StreamingFetchUrl(const GoogleString& url,
|
||||
const RequestHeaders& request_headers,
|
||||
ResponseHeaders* response_headers,
|
||||
Writer* response_writer,
|
||||
MessageHandler* message_handler,
|
||||
const RequestContextPtr& request_context);
|
||||
|
||||
// Controls whether we will request and save gzipped content to the
|
||||
// file system. Note that http_dump_url_fetcher will inflate on
|
||||
// read if its caller does not want gzipped output.
|
||||
void set_accept_gzip(bool x) { accept_gzip_ = x; }
|
||||
|
||||
// Print URLs each time they are fetched.
|
||||
void set_print_urls(bool on) { dump_fetcher_.set_print_urls(on); }
|
||||
|
||||
private:
|
||||
HttpDumpUrlFetcher dump_fetcher_;
|
||||
UrlFetcher* base_fetcher_; // Used to fetch urls that aren't in the dump yet.
|
||||
GoogleString root_dir_; // Root directory of the HTTP dumps.
|
||||
FileSystem* file_system_;
|
||||
bool accept_gzip_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HttpDumpUrlWriter);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HTTP_DUMP_URL_WRITER_H_
|
||||
@@ -1,76 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_RESPONSE_PARSER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_RESPONSE_PARSER_H_
|
||||
|
||||
#include <cstdio> // for FILE
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
// TODO(sligocki): Find a way to forward declare FileSystem::InputFile.
|
||||
#include "net/instaweb/http/public/response_headers_parser.h"
|
||||
#include "net/instaweb/util/public/file_system.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
class ResponseHeaders;
|
||||
class Writer;
|
||||
|
||||
// Helper class to facilitate parsing a raw streaming HTTP response including
|
||||
// headers and body.
|
||||
class HttpResponseParser {
|
||||
public:
|
||||
HttpResponseParser(ResponseHeaders* response_headers, Writer* writer,
|
||||
MessageHandler* handler)
|
||||
: reading_headers_(true),
|
||||
ok_(true),
|
||||
response_headers_(response_headers),
|
||||
writer_(writer),
|
||||
handler_(handler),
|
||||
parser_(response_headers) {
|
||||
}
|
||||
|
||||
// Parse complete HTTP response from a file.
|
||||
bool ParseFile(FileSystem::InputFile* file);
|
||||
|
||||
// Parse complete HTTP response from a FILE stream.
|
||||
// TODO(sligocki): We need a Readable abstraction (like Writer)
|
||||
bool Parse(FILE* stream);
|
||||
|
||||
// Read a chunk of HTTP response, populating response_headers and call
|
||||
// writer on output body, returning true if the status is ok.
|
||||
bool ParseChunk(const StringPiece& data);
|
||||
|
||||
bool ok() const { return ok_; }
|
||||
bool headers_complete() const { return parser_.headers_complete(); }
|
||||
|
||||
private:
|
||||
bool reading_headers_;
|
||||
bool ok_;
|
||||
ResponseHeaders* response_headers_;
|
||||
Writer* writer_;
|
||||
MessageHandler* handler_;
|
||||
ResponseHeadersParser parser_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HttpResponseParser);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HTTP_RESPONSE_PARSER_H_
|
||||
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_VALUE_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_VALUE_H_
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/shared_string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
#include "net/instaweb/util/public/writer.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class ResponseHeaders;
|
||||
class MessageHandler;
|
||||
|
||||
// Provides shared, ref-counted, copy-on-write storage for HTTP
|
||||
// contents, to aid sharing between active fetches and filters, and
|
||||
// the cache, which from which data may be evicted at any time.
|
||||
class HTTPValue : public Writer {
|
||||
public:
|
||||
HTTPValue() : contents_size_(0) {}
|
||||
|
||||
// Clears the value (both headers and content)
|
||||
void Clear();
|
||||
|
||||
// Is this HTTPValue empty
|
||||
bool Empty() const { return storage_.empty(); }
|
||||
|
||||
// Sets the HTTP headers for this value. This method may only
|
||||
// be called once and must be called before or after all of the
|
||||
// contents are set (using the streaming interface Write).
|
||||
//
|
||||
// If Clear() is called, then SetHeaders() can be called once again.
|
||||
//
|
||||
// Does NOT take ownership of headers.
|
||||
// A non-const pointer is required for the response headers so that
|
||||
// the cache fields can be updated if necessary.
|
||||
void SetHeaders(ResponseHeaders* headers);
|
||||
|
||||
// Writes contents into the HTTPValue object. Write can be called
|
||||
// multiple times to append more data, and can be called before
|
||||
// or after SetHeaders. However, SetHeaders cannot be interleaved
|
||||
// in between calls to Write.
|
||||
virtual bool Write(const StringPiece& str, MessageHandler* handler);
|
||||
virtual bool Flush(MessageHandler* handler);
|
||||
|
||||
// Retrieves the headers, returning false if empty.
|
||||
bool ExtractHeaders(ResponseHeaders* headers, MessageHandler* handler) const;
|
||||
|
||||
// Retrieves the contents, returning false if empty. Note that the
|
||||
// contents are only guaranteed valid as long as the HTTPValue
|
||||
// object is in scope.
|
||||
bool ExtractContents(StringPiece* str) const;
|
||||
|
||||
// Tests whether this reference is the only active one to the string object.
|
||||
bool unique() const { return storage_.unique(); }
|
||||
|
||||
// Assigns the storage of an HTTPValue based on the provided storage. This
|
||||
// can be used for a cache Get. Returns false if the string is not
|
||||
// well-formed.
|
||||
//
|
||||
// Extracts the headers into the provided ResponseHeaders buffer.
|
||||
bool Link(SharedString* src, ResponseHeaders* headers,
|
||||
MessageHandler* handler);
|
||||
|
||||
// Links two HTTPValues together, using the contents of 'src' and discarding
|
||||
// the contents of this.
|
||||
void Link(HTTPValue* src) {
|
||||
if (src != this) {
|
||||
storage_ = src->storage_; // SharedString links via assignment.
|
||||
contents_size_ = src->contents_size();
|
||||
}
|
||||
}
|
||||
|
||||
// Access the shared string, for insertion into a cache via Put.
|
||||
SharedString* share() { return &storage_; }
|
||||
|
||||
size_t size() const { return storage_.size(); }
|
||||
int64 contents_size() { return contents_size_; }
|
||||
|
||||
private:
|
||||
friend class HTTPValueTest;
|
||||
|
||||
// Must be called with storage_ non-empty.
|
||||
char type_identifier() const { return *storage_.data(); }
|
||||
|
||||
unsigned int SizeOfFirstChunk() const;
|
||||
void SetSizeOfFirstChunk(unsigned int size);
|
||||
int64 ComputeContentsSize() const;
|
||||
|
||||
// Disconnects this HTTPValue from other HTTPValues that may share the
|
||||
// underlying storage, allowing a new buffer.
|
||||
void CopyOnWrite();
|
||||
|
||||
SharedString storage_;
|
||||
// Member variable to keep the size of body in storage.
|
||||
int64 contents_size_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HTTPValue);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HTTP_VALUE_H_
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
// Author: mmohabey@google.com (Megha Mohabey)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_VALUE_WRITER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_VALUE_WRITER_H_
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class HTTPCache;
|
||||
class HTTPValue;
|
||||
class MessageHandler;
|
||||
class ResponseHeaders;
|
||||
|
||||
// Wrappper for buffering an HTTPValue. HTTPValueWriter ensures that an
|
||||
// HTTPValue which cannot be eventually cached is not buffered.
|
||||
class HTTPValueWriter {
|
||||
public:
|
||||
HTTPValueWriter(HTTPValue* value, HTTPCache* cache)
|
||||
: value_(value),
|
||||
cache_(cache),
|
||||
has_buffered_(true) {}
|
||||
|
||||
void SetHeaders(ResponseHeaders* headers);
|
||||
|
||||
bool Write(const StringPiece& str, MessageHandler* handler);
|
||||
|
||||
bool has_buffered() const { return has_buffered_; }
|
||||
|
||||
// Checks if the http_value should be buffered or not depending on whether we
|
||||
// can eventually cache it. It also clears the http_value if it can not be
|
||||
// buffered. Note that this only checks the size constraints, not cache
|
||||
// headers.
|
||||
bool CheckCanCacheElseClear(ResponseHeaders* headers);
|
||||
|
||||
// Checks if we can write the string to the HttpValue without going over
|
||||
// limits.
|
||||
bool CanCacheContent(const StringPiece& str) const;
|
||||
|
||||
private:
|
||||
HTTPValue* value_;
|
||||
HTTPCache* cache_;
|
||||
bool has_buffered_;
|
||||
DISALLOW_COPY_AND_ASSIGN(HTTPValueWriter);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_HTTP_VALUE_WRITER_H_
|
||||
@@ -1,99 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_INFLATING_FETCH_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_INFLATING_FETCH_H_
|
||||
|
||||
#include "net/instaweb/http/public/async_fetch.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/gzip_inflater.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
|
||||
// This Fetch layer helps work with origin servers that serve gzipped
|
||||
// content even when request-headers do not include
|
||||
// accept-encoding:gzip. In that scenario, this class inflates the
|
||||
// content and strips the content-encoding:gzip response header.
|
||||
//
|
||||
// Some servers will serve gzipped content even to clients that didn't
|
||||
// ask for it. Depending on the serving environment, we may also want
|
||||
// to ask backend servers for gzipped content even if we want cleartext
|
||||
// to be sent to the Write methods. Users of this class can force this
|
||||
// by calling EnableGzipFromBackend.
|
||||
class InflatingFetch : public SharedAsyncFetch {
|
||||
public:
|
||||
explicit InflatingFetch(AsyncFetch* fetch);
|
||||
virtual ~InflatingFetch();
|
||||
|
||||
// Use this one cautiously, since it may cause resources to be corrupted
|
||||
// if you use it with anything other than the IPRO path. Note, that if NULL
|
||||
// is contained in the bypass_set then resources with unknown content type
|
||||
// will not be inflated.
|
||||
void set_inflation_content_type_blacklist(
|
||||
const std::set<const ContentType*>& bypass_set) {
|
||||
inflation_content_type_blacklist_ = bypass_set;
|
||||
}
|
||||
|
||||
// Adds accept-encoding:gzip to the request headers sent to the
|
||||
// origin. The data is inflated as we Write it. If deflate
|
||||
// or gzip was already in the request then this has no effect.
|
||||
void EnableGzipFromBackend();
|
||||
|
||||
protected:
|
||||
// If inflation is required, inflates and passes bytes to the linked fetch,
|
||||
// otherwise just passes bytes.
|
||||
virtual bool HandleWrite(const StringPiece& sp, MessageHandler* handler);
|
||||
|
||||
// Analyzes headers and depending on the request settings and flags will
|
||||
// either setup inflater or not.
|
||||
virtual void HandleHeadersComplete();
|
||||
virtual void HandleDone(bool success);
|
||||
virtual void Reset();
|
||||
|
||||
private:
|
||||
void InitInflater(GzipInflater::InflateType, const StringPiece& value);
|
||||
|
||||
// If this returns true, it means that we should not inflate incoming data and
|
||||
// pass it to the caller as is, since that is what caller requested.
|
||||
bool IsCompressionAllowedInRequest();
|
||||
|
||||
scoped_ptr<GzipInflater> inflater_;
|
||||
|
||||
// Caching gate inside IsCompressionAllowedInRequest().
|
||||
bool request_checked_for_accept_encoding_;
|
||||
|
||||
// Will be set to true if accepted encoding included gzip and/or deflate.
|
||||
bool compression_desired_;
|
||||
|
||||
// Whether any kind of error happened to the inflater. Once set to true, never
|
||||
// gets reset.
|
||||
bool inflate_failure_;
|
||||
|
||||
// Set of content types that will not be inflated.
|
||||
std::set<const ContentType*> inflation_content_type_blacklist_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(InflatingFetch);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_INFLATING_FETCH_H_
|
||||
@@ -1,382 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: marq@google.com (Mark Cogan)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_LOG_RECORD_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_LOG_RECORD_H_
|
||||
|
||||
#include <map>
|
||||
// TODO(gee): Should this be in public? Do we really care?
|
||||
#include "net/instaweb/util/enums.pb.h"
|
||||
#include "net/instaweb/http/public/logging_proto.h"
|
||||
#include "net/instaweb/http/public/logging_proto_impl.h"
|
||||
#include "net/instaweb/rewriter/image_types.pb.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/gtest_prod.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
// If your .cc file needs to use the types declared in logging_proto.h,
|
||||
// you must also include net/instaweb/http/public/logging_proto_impl.h
|
||||
// See that header file for an explanation of why this is necessary.
|
||||
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractMutex;
|
||||
|
||||
// This class is a wrapper around a protobuf used to collect logging
|
||||
// information. It also provides a simple aggregation mechanism for
|
||||
// collecting the ids of applied rewriters.
|
||||
//
|
||||
// Care and feeding of log records:
|
||||
// (1) All logging must be done through log records. No class should
|
||||
// have static members of any logging proto class. Log records
|
||||
// can either create the logging protos, or will take ownership of them.
|
||||
// (2) All access and manipulation of log data must be guarded by the log
|
||||
// record's mutex. Commonly repeated logging operations should be factored
|
||||
// into functions in this class (and be so guarded therein).
|
||||
// (3) In most cases, log records should be created and owned by request
|
||||
// contexts.
|
||||
|
||||
// Subclasses may wrap some other type of protobuf; they must still provide
|
||||
// access to a LoggingInfo instance, however.
|
||||
class AbstractLogRecord {
|
||||
public:
|
||||
// Construct a AbstractLogRecord with a new LoggingInfo proto and caller-
|
||||
// supplied mutex. This class takes ownership of the mutex.
|
||||
explicit AbstractLogRecord(AbstractMutex* mutex);
|
||||
virtual ~AbstractLogRecord();
|
||||
|
||||
// For compatibility with older logging methods, returns a comma-joined string
|
||||
// concatenating the sorted coalesced rewriter ids of APPLIED_OK entries in
|
||||
// the rewriter_info array. Each id will appear once in the string if any
|
||||
// number of successful rewrites for that id have been logged.
|
||||
GoogleString AppliedRewritersString();
|
||||
|
||||
// Create a new rewriter logging submessage for |rewriter_id|, returning a
|
||||
// pointer to it for later access. Note that this can return NULL if the
|
||||
// size of rewriter_info has grown too large. It is the caller's
|
||||
// responsibility to handle this safely.
|
||||
RewriterInfo* NewRewriterInfo(const char* rewriter_id);
|
||||
|
||||
// Creates a new rewriter logging submessage for |rewriter_id|,
|
||||
// and sets status it.
|
||||
void SetRewriterLoggingStatus(
|
||||
const char* rewriter_id, RewriterApplication::Status status);
|
||||
|
||||
// Creates a new rewriter logging submessage for |rewriter_id|,
|
||||
// sets status and the url index.
|
||||
void SetRewriterLoggingStatus(
|
||||
const char* rewriter_id, const GoogleString& url,
|
||||
RewriterApplication::Status status) {
|
||||
SetRewriterLoggingStatusHelper(rewriter_id, url, status);
|
||||
}
|
||||
|
||||
// Log the HTML level status for a filter. This should be called only once
|
||||
// per filter, at the point where it is determined the filter is either
|
||||
// active or not.
|
||||
void LogRewriterHtmlStatus(const char* rewriter_id,
|
||||
RewriterHtmlApplication::Status status);
|
||||
|
||||
// Log the status of a rewriter application on a resource.
|
||||
// TODO(gee): I'd really prefer rewriter_id was an enum.
|
||||
void LogRewriterApplicationStatus(
|
||||
const char* rewriter_id, RewriterApplication::Status status);
|
||||
|
||||
// TODO(gee): Deprecate raw access to proto.
|
||||
// Return the LoggingInfo proto wrapped by this class. Calling code must
|
||||
// guard any reads and writes to this using mutex().
|
||||
virtual LoggingInfo* logging_info() = 0;
|
||||
|
||||
// TODO(huibao): Rename LogImageBackgroundRewriteActivity() to make it clear
|
||||
// that it will log even when the rewriting finishes in the line-of-request.
|
||||
|
||||
// Log image rewriting activity, which may not finish when the request
|
||||
// processing is done. The outcome is a new log record with request type
|
||||
// set to "BACKGROUND_REWRITE".
|
||||
void LogImageBackgroundRewriteActivity(
|
||||
RewriterApplication::Status status,
|
||||
const GoogleString& url,
|
||||
const char* id,
|
||||
int original_size,
|
||||
int optimized_size,
|
||||
bool is_recompressed,
|
||||
ImageType original_image_type,
|
||||
ImageType optimized_image_type,
|
||||
bool is_resized);
|
||||
|
||||
// Atomically sets is_html_response in the logging proto.
|
||||
void SetIsHtml(bool is_html);
|
||||
|
||||
// Adds a new cohort info with the given cohort name and returns its index.
|
||||
int AddPropertyCohortInfo(const GoogleString& cohort);
|
||||
|
||||
// Updates the cohort info at the specified index, to include the given
|
||||
// property in the last of properties found in the cache.
|
||||
void AddFoundPropertyToCohortInfo(int index, const GoogleString& property);
|
||||
|
||||
// Updates the cohort info at the specified index, to indicate whether it was
|
||||
// a cache hit.
|
||||
void SetCacheStatusForCohortInfo(int index, bool found, int key_state);
|
||||
|
||||
// Updates the cohort info at the specified index with the device and cache
|
||||
// type.
|
||||
void SetDeviceAndCacheTypeForCohortInfo(
|
||||
int index, int device_type, int cache_type);
|
||||
|
||||
// Mutex-guarded log mutation convenience methods. The rule of thumb is that
|
||||
// if a single-field update to a logging proto occurs multiple times, it
|
||||
// should be factored out into a method on this class.
|
||||
void SetBlinkRequestFlow(int flow);
|
||||
void SetCacheHtmlRequestFlow(int flow);
|
||||
void SetIsOriginalResourceCacheable(bool cacheable);
|
||||
void SetTimingRequestStartMs(int64 ms);
|
||||
void SetTimingHeaderFetchMs(int64 ms);
|
||||
void SetTimingFetchMs(int64 ms);
|
||||
int64 GetTimingFetchMs();
|
||||
void SetTimingProcessingTimeMs(int64 ms);
|
||||
// Sets time_to_start_fetch_ms in the TimingInfo submessage as an offset from
|
||||
// timing_info.request_start_ms (|start_time_ms| is an absolute time value
|
||||
// and is converted into the offset). If request_start_ms is unset, this is a
|
||||
// silent no-op. This may be called several times in sucession, for example
|
||||
// in the case of retried fetches. In that case, if time_to_start_fetch_ms has
|
||||
// already been set in the log record, this is again a silent no-op.
|
||||
void UpdateTimingInfoWithFetchStartTime(int64 start_time_ms);
|
||||
|
||||
// Override SetBlinkInfoImpl if necessary.
|
||||
void SetBlinkInfo(const GoogleString& user_agent);
|
||||
|
||||
// Override SetCacheHtmlInfoImpl if necessary.
|
||||
void SetCacheHtmlLoggingInfo(const GoogleString& user_agent);
|
||||
|
||||
// Log a RewriterInfo for the flush early filter.
|
||||
void LogFlushEarlyActivity(
|
||||
const char* id,
|
||||
const GoogleString& url,
|
||||
RewriterApplication::Status status,
|
||||
FlushEarlyResourceInfo::ContentType content_type,
|
||||
FlushEarlyResourceInfo::ResourceType resource_type,
|
||||
bool is_bandwidth_affected,
|
||||
bool in_head);
|
||||
|
||||
// Log a RewriterInfo for the image rewrite filter.
|
||||
void LogImageRewriteActivity(
|
||||
const char* id,
|
||||
const GoogleString& url,
|
||||
RewriterApplication::Status status,
|
||||
bool is_image_inlined,
|
||||
bool is_critical_image,
|
||||
bool try_low_res_src_insertion,
|
||||
bool low_res_src_inserted,
|
||||
int low_res_data_size);
|
||||
|
||||
// TODO(gee): Change the callsites.
|
||||
void LogJsDisableFilter(const char* id, bool has_pagespeed_no_defer);
|
||||
|
||||
void LogLazyloadFilter(const char* id,
|
||||
RewriterApplication::Status status,
|
||||
bool is_blacklisted, bool is_critical);
|
||||
|
||||
// Mutex-guarded log-writing operations. Derived classes should override
|
||||
// *Impl methods. Returns false if the log write attempt failed.
|
||||
bool WriteLog();
|
||||
|
||||
// Return the mutex associated with this instance. Calling code should
|
||||
// guard reads and writes of AbstractLogRecords
|
||||
AbstractMutex* mutex() { return mutex_.get(); }
|
||||
|
||||
// Sets the maximum number of RewriterInfo submessages that can accumulate in
|
||||
// the LoggingInfo proto wrapped by this class.
|
||||
void SetRewriterInfoMaxSize(int x);
|
||||
|
||||
// Sets whether urls should be logged. This could potentially generate a lot
|
||||
// of logs data, so this should be switched on only for debugging.
|
||||
void SetAllowLoggingUrls(bool allow_logging_urls);
|
||||
|
||||
// Sets whether URL indices should be logged for every rewriter application
|
||||
// or not.
|
||||
void SetLogUrlIndices(bool log_url_indices);
|
||||
|
||||
// Sets the number of critical images in HTML.
|
||||
void SetNumHtmlCriticalImages(int num_html_critical_images);
|
||||
|
||||
// Sets the number of critical images in CSS.
|
||||
void SetNumCssCriticalImages(int num_css_critical_images);
|
||||
|
||||
// Sets image related statistics.
|
||||
void SetImageStats(int num_img_tags, int num_inlined_img_tags);
|
||||
|
||||
// Sets critical CSS related byte counts (all uncompressed).
|
||||
void SetCriticalCssInfo(int critical_inlined_bytes,
|
||||
int original_external_bytes,
|
||||
int overhead_bytes);
|
||||
|
||||
// Log information related to the user agent and device making the request.
|
||||
void LogDeviceInfo(
|
||||
int device_type,
|
||||
bool supports_image_inlining,
|
||||
bool supports_lazyload_images,
|
||||
bool supports_critical_images_beacon,
|
||||
bool supports_deferjs,
|
||||
bool supports_webp,
|
||||
bool supports_webplossless_alpha,
|
||||
bool is_bot,
|
||||
bool supports_split_html,
|
||||
bool can_preload_resources);
|
||||
|
||||
// Sets initial information for background rewrite log.
|
||||
virtual void SetBackgroundRewriteInfo(
|
||||
bool log_urls,
|
||||
bool log_url_indices,
|
||||
int max_rewrite_info_log_size);
|
||||
|
||||
|
||||
// Sets the time from the start of the request till it begins getting
|
||||
// processed.
|
||||
void SetTimeToStartProcessing(int64 end_ms) {
|
||||
SetTimeFromRequestStart(
|
||||
&TimingInfo::set_time_to_start_processing_ms, end_ms);
|
||||
}
|
||||
|
||||
// Sets the time from the start of the request till the start of parsing.
|
||||
void SetTimeToStartParse(int64 end_ms) {
|
||||
SetTimeFromRequestStart(
|
||||
&TimingInfo::set_time_to_start_parse_ms, end_ms);
|
||||
}
|
||||
|
||||
// Sets the time from the start of the request till the start of the pcache
|
||||
// lookup.
|
||||
void SetTimeToPcacheStart(int64 end_ms) {
|
||||
SetTimeFromRequestStart(
|
||||
&TimingInfo::set_time_to_pcache_lookup_start_ms, end_ms);
|
||||
}
|
||||
|
||||
// Sets the time from the start of the request till the end of the pcache
|
||||
// lookup.
|
||||
void SetTimeToPcacheEnd(int64 end_ms) {
|
||||
SetTimeFromRequestStart(
|
||||
&TimingInfo::set_time_to_pcache_lookup_end_ms, end_ms);
|
||||
}
|
||||
|
||||
protected:
|
||||
// Implements setting Blink specific log information; base impl is a no-op.
|
||||
virtual void SetBlinkInfoImpl(const GoogleString& user_agent) {}
|
||||
|
||||
// Implements setting Cache Html specific log information
|
||||
virtual void SetCacheHtmlLoggingInfoImpl(const GoogleString& user_agent) {}
|
||||
// Implements writing a log, base implementation is a no-op. Returns false if
|
||||
// writing failed.
|
||||
virtual bool WriteLogImpl() = 0;
|
||||
|
||||
private:
|
||||
typedef void (TimingInfo::*SetTimeFromStartFn)(int64);
|
||||
|
||||
// Called on construction.
|
||||
void InitLogging();
|
||||
|
||||
void PopulateUrl(
|
||||
const GoogleString& url, RewriteResourceInfo* rewrite_resource_info);
|
||||
|
||||
// Fill LoggingInfo proto with information collected from LogRewriterStatus
|
||||
// and LogRewrite.
|
||||
void PopulateRewriterStatusCounts();
|
||||
|
||||
void SetTimeFromRequestStart(SetTimeFromStartFn fn, int64 end_ms);
|
||||
|
||||
// Helper function which creates a new rewriter logging submessage for
|
||||
// |rewriter_id|, sets status and the url index. It is intended to be called
|
||||
// only inside logging code.
|
||||
RewriterInfo* SetRewriterLoggingStatusHelper(
|
||||
const char* rewriter_id, const GoogleString& url,
|
||||
RewriterApplication::Status status);
|
||||
|
||||
// Thus must be set. Implementation constructors must minimally default this
|
||||
// to a NullMutex.
|
||||
scoped_ptr<AbstractMutex> mutex_;
|
||||
|
||||
// The maximum number of rewrite info logs stored for a single request.
|
||||
int rewriter_info_max_size_;
|
||||
|
||||
// Allow urls to be logged.
|
||||
bool allow_logging_urls_;
|
||||
|
||||
// Allow url indices to be logged.
|
||||
bool log_url_indices_;
|
||||
|
||||
// Map which maintains the url to index for logging urls.
|
||||
StringIntMap url_index_map_;
|
||||
|
||||
// Stats collected from calls to LogRewrite.
|
||||
typedef std::map<RewriterApplication::Status, int> RewriteStatusCountMap;
|
||||
struct RewriterStatsInternal {
|
||||
RewriterHtmlApplication::Status html_status;
|
||||
|
||||
// RewriterApplication::Status -> count.
|
||||
RewriteStatusCountMap status_counts;
|
||||
|
||||
RewriterStatsInternal()
|
||||
: html_status(RewriterHtmlApplication::UNKNOWN_STATUS) {}
|
||||
};
|
||||
typedef std::map<GoogleString, RewriterStatsInternal> RewriterStatsMap;
|
||||
RewriterStatsMap rewriter_stats_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(AbstractLogRecord);
|
||||
};
|
||||
|
||||
// Simple AbstractLogRecord implementation which owns a LoggingInfo protobuf.
|
||||
class LogRecord : public AbstractLogRecord {
|
||||
public:
|
||||
explicit LogRecord(AbstractMutex* mutex);
|
||||
|
||||
virtual ~LogRecord();
|
||||
|
||||
LoggingInfo* logging_info() { return logging_info_.get(); }
|
||||
|
||||
bool WriteLogImpl() { return true; }
|
||||
|
||||
private:
|
||||
scoped_ptr<LoggingInfo> logging_info_;
|
||||
};
|
||||
|
||||
// TODO(gee): I'm pretty sure the functionality can be provided by the previous
|
||||
// ALR implementation, but for the time being leave this around to make the
|
||||
// refactoring as limited as possilble.
|
||||
// AbstractLogRecord that copies logging_info() when in WriteLog. This should
|
||||
// be useful for testing any logging flow where an owned subordinate log record
|
||||
// is needed.
|
||||
class CopyOnWriteLogRecord : public LogRecord {
|
||||
public:
|
||||
CopyOnWriteLogRecord(AbstractMutex* logging_mutex, LoggingInfo* logging_info)
|
||||
: LogRecord(logging_mutex), logging_info_copy_(logging_info) {}
|
||||
|
||||
protected:
|
||||
virtual bool WriteLogImpl() {
|
||||
logging_info_copy_->CopyFrom(*logging_info());
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
LoggingInfo* logging_info_copy_; // Not owned by us.
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(CopyOnWriteLogRecord);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_LOG_RECORD_H_
|
||||
@@ -1,43 +0,0 @@
|
||||
// Copyright 2012 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Author: Mark Cogan (marq@google.com)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_LOGGING_PROTO_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_LOGGING_PROTO_H_
|
||||
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class BlinkInfo;
|
||||
class CacheHtmlLoggingInfo;
|
||||
class CriticalCssInfo;
|
||||
class DeviceInfo;
|
||||
class FlushEarlyFilterInfo;
|
||||
class FlushEarlyResourceInfo;
|
||||
class ImageRewriteResourceInfo;
|
||||
class LoggingInfo;
|
||||
class MetadataCacheInfo;
|
||||
class PropertyCohortInfo;
|
||||
class PropertyPageInfo;
|
||||
class RewriterInfo;
|
||||
class RewriteStatusCount;
|
||||
class RewriterStats;
|
||||
class RewriteResourceInfo;
|
||||
class TimingInfo;
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_LOGGING_PROTO_H_
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: Mark Cogan (marq@google.com)
|
||||
|
||||
// This header should be included (in addition to log_record.h) whenever
|
||||
// code needs access to the implementations of the logging_info protobuf
|
||||
// This file needs to be separate from log_record.h because both apache's
|
||||
// httpd.h and any pb.h #define incompatible |OK| macros.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_LOGGING_PROTO_IMPL_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_LOGGING_PROTO_IMPL_H_
|
||||
|
||||
|
||||
#include "net/instaweb/http/logging.pb.h"
|
||||
|
||||
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_LOGGING_PROTO_IMPL_H_
|
||||
@@ -1,190 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: jmarantz@google.com (Joshua Marantz)
|
||||
//
|
||||
// Meta-data associated with a rewriting resource. This is
|
||||
// primarily a key-value store, but additionally we want to
|
||||
// get easy access to the cache expiration time.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_META_DATA_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_META_DATA_H_
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// Global constants for common HTML attributes names and values.
|
||||
//
|
||||
// TODO(jmarantz): Proactively change all the occurrences of the static strings
|
||||
// to use these shared constants.
|
||||
struct HttpAttributes {
|
||||
static const char kAcceptEncoding[];
|
||||
static const char kAllow[];
|
||||
static const char kAuthorization[];
|
||||
static const char kCacheControl[];
|
||||
static const char kConnection[];
|
||||
static const char kContentEncoding[];
|
||||
static const char kContentLanguage[];
|
||||
static const char kContentLength[];
|
||||
static const char kContentType[];
|
||||
static const char kCookie[];
|
||||
static const char kCookie2[];
|
||||
static const char kDate[];
|
||||
static const char kDeflate[];
|
||||
static const char kDnt[];
|
||||
static const char kEtag[];
|
||||
static const char kExpires[];
|
||||
static const char kGzip[];
|
||||
static const char kHost[];
|
||||
static const char kIfModifiedSince[];
|
||||
static const char kIfNoneMatch[];
|
||||
static const char kLastModified[];
|
||||
static const char kLocation[];
|
||||
static const char kNoCache[];
|
||||
static const char kPragma[];
|
||||
static const char kProxyAuthorization[];
|
||||
static const char kReferer[]; // sic
|
||||
static const char kServer[];
|
||||
static const char kSetCookie[];
|
||||
static const char kSetCookie2[];
|
||||
static const char kTransferEncoding[];
|
||||
static const char kUserAgent[];
|
||||
static const char kVary[];
|
||||
static const char kWarning[];
|
||||
static const char kXmlHttpRequest[];
|
||||
static const char kXAssociatedContent[];
|
||||
static const char kXForwardedFor[];
|
||||
static const char kXForwardedProto[];
|
||||
static const char kXGooglePagespeedClientId[];
|
||||
static const char kXGoogleRequestEventId[];
|
||||
// If this header's value matches the configured blocking rewrite key, then
|
||||
// all rewrites are completed before the response is sent to the client.
|
||||
static const char kXPsaBlockingRewrite[];
|
||||
|
||||
// A request header for client to specify client options.
|
||||
static const char kXPsaClientOptions[];
|
||||
|
||||
// This header is set in distributed rewrite requests that originated from
|
||||
// fetch requests (.pagespeed. and IPRO).
|
||||
static const char kXPsaDistributedRewriteFetch[];
|
||||
|
||||
// This header is set in distributed rewrite requests that originated from
|
||||
// HTML requests (HTML and nested filters).
|
||||
static const char kXPsaDistributedRewriteHtml[];
|
||||
|
||||
// This header is set on optional fetches that got dropped due to load.
|
||||
static const char kXPsaLoadShed[];
|
||||
|
||||
// If this header is present on an incoming request it will be treated as if
|
||||
// it came over a SPDY connection for purposes of applying special
|
||||
// configuration or optimizations.
|
||||
static const char kXPsaOptimizeForSpdy[];
|
||||
|
||||
// This header is set in a distributed rewrite task to ask for metadata
|
||||
// in the response.
|
||||
static const char kXPsaRequestMetadata[];
|
||||
|
||||
// This header is set in a distributed rewrite response and the value
|
||||
// is the serialized metadata.
|
||||
static const char kXPsaResponseMetadata[];
|
||||
|
||||
static const char kXRequestedWith[];
|
||||
|
||||
// This header is set on optimized responses to indicate the original
|
||||
// content length.
|
||||
static const char kXOriginalContentLength[];
|
||||
static const char kXUACompatible[];
|
||||
};
|
||||
|
||||
namespace HttpStatus {
|
||||
// Http status codes.
|
||||
// Grokked from http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
|
||||
enum Code {
|
||||
kContinue = 100,
|
||||
kSwitchingProtocols = 101,
|
||||
|
||||
kOK = 200,
|
||||
kCreated = 201,
|
||||
kAccepted = 202,
|
||||
kNonAuthoritative = 203,
|
||||
kNoContent = 204,
|
||||
kResetContent = 205,
|
||||
kPartialContent = 206,
|
||||
|
||||
kMultipleChoices = 300,
|
||||
kMovedPermanently = 301,
|
||||
kFound = 302,
|
||||
kSeeOther = 303,
|
||||
kNotModified = 304,
|
||||
kUseProxy = 305,
|
||||
kSwitchProxy = 306, // In old spec; no longer used.
|
||||
kTemporaryRedirect = 307,
|
||||
|
||||
kBadRequest = 400,
|
||||
kUnauthorized = 401,
|
||||
kPaymentRequired = 402,
|
||||
kForbidden = 403,
|
||||
kNotFound = 404,
|
||||
kMethodNotAllowed = 405,
|
||||
kNotAcceptable = 406,
|
||||
kProxyAuthRequired = 407,
|
||||
kRequestTimeout = 408,
|
||||
kConflict = 409,
|
||||
kGone = 410,
|
||||
kLengthRequired = 411,
|
||||
kPreconditionFailed = 412,
|
||||
kEntityTooLarge = 413,
|
||||
kUriTooLong = 414,
|
||||
kUnsupportedMediaType = 415,
|
||||
kRangeNotSatisfiable = 416,
|
||||
kExpectationFailed = 417,
|
||||
kImATeapot = 418,
|
||||
|
||||
kInternalServerError = 500,
|
||||
kNotImplemented = 501,
|
||||
kBadGateway = 502,
|
||||
kUnavailable = 503,
|
||||
kGatewayTimeout = 504,
|
||||
kHttpVersionNotSupported = 505,
|
||||
|
||||
// Instaweb-specific proxy failure constants.
|
||||
kProxyPublisherFailure = 520,
|
||||
kProxyFailure = 521,
|
||||
kProxyConfigurationFailure = 522,
|
||||
kProxyDeclinedRequest = 523,
|
||||
kProxyDnsLookupFailure = 524,
|
||||
|
||||
// Instaweb-specific response codes: these are intentionally chosen to be
|
||||
// outside the normal HTTP range, but we consider these response codes
|
||||
// to be 'cacheable' in our own cache.
|
||||
kRememberFetchFailedStatusCode = 10001,
|
||||
// Note that this includes all non-200 status code responses that are not
|
||||
// cacheable.
|
||||
kRememberNotCacheableStatusCode = 10002,
|
||||
// This includes all 200 status code responses that are not cacheable.
|
||||
kRememberNotCacheableAnd200StatusCode = 10003,
|
||||
// Status code used when the actual status code of the response is unknown at
|
||||
// the time of ProxyFetchPropertyCallbackCollector::Detach().
|
||||
kUnknownStatusCode = 10004,
|
||||
};
|
||||
|
||||
// Transform a status code into the equivalent reason phrase.
|
||||
const char* GetReasonPhrase(Code rc);
|
||||
|
||||
} // namespace HttpStatus
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_META_DATA_H_
|
||||
@@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
// Callbacks used for testing.
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_MOCK_CALLBACK_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_MOCK_CALLBACK_H_
|
||||
|
||||
#include "net/instaweb/http/public/async_fetch.h"
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/gtest.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
// Callback that can be used for testing resource fetches which makes sure
|
||||
// that Done() is called exactly once and with the expected success value.
|
||||
// Can be used multiple times by calling Reset in between.
|
||||
class ExpectStringAsyncFetch : public StringAsyncFetch {
|
||||
public:
|
||||
ExpectStringAsyncFetch(bool expect_success,
|
||||
const RequestContextPtr& request_context)
|
||||
: StringAsyncFetch(request_context), expect_success_(expect_success) {}
|
||||
virtual ~ExpectStringAsyncFetch() {
|
||||
EXPECT_TRUE(done());
|
||||
}
|
||||
|
||||
virtual void HandleDone(bool success) {
|
||||
EXPECT_FALSE(done()) << "Already Done; perhaps you reused without Reset()";
|
||||
StringAsyncFetch::HandleDone(success);
|
||||
EXPECT_EQ(expect_success_, success);
|
||||
}
|
||||
|
||||
void set_expect_success(bool x) { expect_success_ = x; }
|
||||
|
||||
private:
|
||||
bool expect_success_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(ExpectStringAsyncFetch);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_MOCK_CALLBACK_H_
|
||||
@@ -1,168 +0,0 @@
|
||||
/*
|
||||
* Copyright 2010 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: sligocki@google.com (Shawn Ligocki)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_MOCK_URL_FETCHER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_MOCK_URL_FETCHER_H_
|
||||
|
||||
#include <map>
|
||||
#include "net/instaweb/http/public/request_context.h"
|
||||
#include "net/instaweb/http/public/response_headers.h"
|
||||
#include "net/instaweb/http/public/url_fetcher.h"
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
#include "net/instaweb/util/public/string_util.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class MessageHandler;
|
||||
class RequestHeaders;
|
||||
class Timer;
|
||||
class Writer;
|
||||
|
||||
// Simple UrlFetcher meant for tests, you can set responses for individual URLs.
|
||||
// Meant only for testing.
|
||||
class MockUrlFetcher : public UrlFetcher {
|
||||
public:
|
||||
MockUrlFetcher() : enabled_(true), fail_on_unexpected_(true),
|
||||
update_date_headers_(false), omit_empty_writes_(false),
|
||||
fail_after_headers_(false), verify_host_header_(false),
|
||||
split_writes_(false), timer_(NULL) {}
|
||||
virtual ~MockUrlFetcher();
|
||||
|
||||
void SetResponse(const StringPiece& url,
|
||||
const ResponseHeaders& response_header,
|
||||
const StringPiece& response_body);
|
||||
|
||||
// Adds a new response-header attribute name/value pair to an existing
|
||||
// response. If the response does not already exist, the method check-fails.
|
||||
void AddToResponse(const StringPiece& url,
|
||||
const StringPiece& name,
|
||||
const StringPiece& value);
|
||||
|
||||
// Set a conditional response which will either respond with the supplied
|
||||
// response_headers and response_body or a simple 304 Not Modified depending
|
||||
// upon last_modified_time and conditional GET "If-Modified-Since" headers.
|
||||
void SetConditionalResponse(const StringPiece& url,
|
||||
int64 last_modified_date,
|
||||
const GoogleString& etag,
|
||||
const ResponseHeaders& response_header,
|
||||
const StringPiece& response_body);
|
||||
|
||||
// Fetching unset URLs will cause EXPECT failures as well as return false.
|
||||
virtual bool StreamingFetchUrl(const GoogleString& url,
|
||||
const RequestHeaders& request_headers,
|
||||
ResponseHeaders* response_headers,
|
||||
Writer* response_writer,
|
||||
MessageHandler* message_handler,
|
||||
const RequestContextPtr& request_context);
|
||||
|
||||
// Indicates that the specified URL should respond with headers and data,
|
||||
// but still return a 'false' status. This is similar to a live fetcher
|
||||
// that times out or disconnects while streaming data.
|
||||
//
|
||||
// This differs from set_fail_after_headers in that it's specific to a
|
||||
// URL, and writes the body first before returning failure.
|
||||
void SetResponseFailure(const StringPiece& url);
|
||||
|
||||
// Clear all set responses.
|
||||
void Clear();
|
||||
|
||||
// Remove a single response. Will be a no-op if no response was set for url.
|
||||
void RemoveResponse(const StringPiece& url);
|
||||
|
||||
// When disabled, fetcher will fail (but not crash) for all requests.
|
||||
// Use to simulate temporarily not having access to resources, for example.
|
||||
void Disable() { enabled_ = false; }
|
||||
void Enable() { enabled_ = true; }
|
||||
|
||||
// Set to false if you don't want the fetcher to EXPECT fail on unfound URL.
|
||||
// Useful in MockUrlFetcher unittest :)
|
||||
void set_fail_on_unexpected(bool x) { fail_on_unexpected_ = x; }
|
||||
|
||||
// Update response header's Date using supplied timer.
|
||||
// Note: Must set_timer().
|
||||
void set_update_date_headers(bool x) { update_date_headers_ = x; }
|
||||
|
||||
// If set to true (defaults to false) the fetcher will not emit writes of
|
||||
// length 0.
|
||||
void set_omit_empty_writes(bool x) { omit_empty_writes_ = x; }
|
||||
|
||||
// If set to true (defaults to false) the fetcher will fail after outputting
|
||||
// the headers. See also SetResponseFailure which fails after writing
|
||||
// the body.
|
||||
void set_fail_after_headers(bool x) { fail_after_headers_ = x; }
|
||||
|
||||
// If set to true (defaults to false) the fetcher will verify that the Host:
|
||||
// header is present, and matches the host/port of the requested URL.
|
||||
void set_verify_host_header(bool x) { verify_host_header_ = x; }
|
||||
|
||||
void set_timer(Timer* timer) { timer_ = timer; }
|
||||
|
||||
// If true then each time the fetcher writes it will split the write in half
|
||||
// and write each half separately. This is needed to test that Ajax's
|
||||
// RecordingFetch caches writes properly and recovers from failure.
|
||||
void set_split_writes(bool val) { split_writes_ = val; }
|
||||
|
||||
private:
|
||||
class HttpResponse {
|
||||
public:
|
||||
HttpResponse(int64 last_modified_time, const GoogleString& etag,
|
||||
const ResponseHeaders& in_header, const StringPiece& in_body)
|
||||
: last_modified_time_(last_modified_time),
|
||||
etag_(etag),
|
||||
body_(in_body.data(), in_body.size()),
|
||||
success_(true) {
|
||||
header_.CopyFrom(in_header);
|
||||
}
|
||||
|
||||
const int64 last_modified_time() const { return last_modified_time_; }
|
||||
const GoogleString& etag() const { return etag_; }
|
||||
const ResponseHeaders& header() const { return header_; }
|
||||
ResponseHeaders* mutable_header() { return &header_; }
|
||||
const GoogleString& body() const { return body_; }
|
||||
void set_success(bool success) { success_ = success; }
|
||||
bool success() const { return success_; }
|
||||
|
||||
private:
|
||||
int64 last_modified_time_;
|
||||
GoogleString etag_;
|
||||
ResponseHeaders header_;
|
||||
GoogleString body_;
|
||||
bool success_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(HttpResponse);
|
||||
};
|
||||
typedef std::map<const GoogleString, HttpResponse*> ResponseMap;
|
||||
|
||||
ResponseMap response_map_;
|
||||
|
||||
bool enabled_;
|
||||
bool fail_on_unexpected_; // Should we EXPECT if unexpected url called?
|
||||
bool update_date_headers_; // Should we update Date headers from timer?
|
||||
bool omit_empty_writes_; // Should we call ->Write with length 0?
|
||||
bool fail_after_headers_; // Should we call Done(false) after headers?
|
||||
bool verify_host_header_; // Should we verify the Host: header?
|
||||
bool split_writes_; // Should we turn one write into multiple?
|
||||
Timer* timer_; // Timer to use for updating header dates.
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(MockUrlFetcher);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_MOCK_URL_FETCHER_H_
|
||||
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 Google Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Author: nikhilmadan@google.com (Nikhil Madan)
|
||||
|
||||
#ifndef NET_INSTAWEB_HTTP_PUBLIC_RATE_CONTROLLER_H_
|
||||
#define NET_INSTAWEB_HTTP_PUBLIC_RATE_CONTROLLER_H_
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "net/instaweb/util/public/basictypes.h"
|
||||
#include "net/instaweb/util/public/ref_counted_ptr.h"
|
||||
#include "net/instaweb/util/public/scoped_ptr.h"
|
||||
#include "net/instaweb/util/public/string.h"
|
||||
|
||||
namespace net_instaweb {
|
||||
|
||||
class AbstractMutex;
|
||||
class AsyncFetch;
|
||||
class MessageHandler;
|
||||
class Statistics;
|
||||
class ThreadSystem;
|
||||
class TimedVariable;
|
||||
class UrlAsyncFetcher;
|
||||
class Variable;
|
||||
|
||||
// Controller which limits the number of outgoing fetches per domain. If the
|
||||
// fetch is for a user-facing request, this sends the request out anyway and
|
||||
// updates the count for number of outgoing fetches.
|
||||
// For non-user facing requests, this checks that the number of outgoing fetches
|
||||
// for this domain is less than the limit. If less than the limit, it sends
|
||||
// the fetch out and updates the count. If greater than the per-domain limit,
|
||||
// and if the global queue size is within the limit, it queues the request up.
|
||||
// However, if the global queue size is above the limit, it drops the request.
|
||||
// If a request is dropped, the response will have HttpAttributes::kXPsaLoadShed
|
||||
// set on the response headers.
|
||||
//
|
||||
// Note: this requires working statistics to work.
|
||||
class RateController {
|
||||
public:
|
||||
static const char kQueuedFetchCount[];
|
||||
static const char kDroppedFetchCount[];
|
||||
static const char kCurrentGlobalFetchQueueSize[];
|
||||
|
||||
RateController(int max_global_queue_size,
|
||||
int per_host_outgoing_request_threshold,
|
||||
int per_host_queued_request_threshold,
|
||||
ThreadSystem* thread_system,
|
||||
Statistics* statistics);
|
||||
|
||||
virtual ~RateController();
|
||||
|
||||
// Applies our shaping policies, and either (eventually) asks fetcher to
|
||||
// fetch the given URL or drops it.
|
||||
void Fetch(UrlAsyncFetcher* fetcher,
|
||||
const GoogleString& url,
|
||||
MessageHandler* message_handler,
|
||||
AsyncFetch* fetch);
|
||||
|
||||
// Initializes statistics variables associated with this class.
|
||||
static void InitStats(Statistics* statistics);
|
||||
|
||||
private:
|
||||
class HostFetchInfo;
|
||||
class CustomFetch;
|
||||
friend class CustomFetch;
|
||||
|
||||
typedef RefCountedPtr<HostFetchInfo> HostFetchInfoPtr;
|
||||
|
||||
typedef std::map<GoogleString, HostFetchInfoPtr*> HostFetchInfoMap;
|
||||
|
||||
// Delete the fetch info from fetch_info_map_ if possible.
|
||||
void DeleteFetchInfoIfPossible(const HostFetchInfoPtr& fetch_info);
|
||||
|
||||
// The maximum permissible size of the global queue.
|
||||
const int max_global_queue_size_;
|
||||
// The maximum number of outgoing requests allowed per host.
|
||||
const int per_host_outgoing_request_threshold_;
|
||||
// The maximum number of queued requests allowed per host.
|
||||
const int per_host_queued_request_threshold_;
|
||||
ThreadSystem* thread_system_;
|
||||
|
||||
// Map containing per-host information tracking outgoing and queued fetches.
|
||||
HostFetchInfoMap fetch_info_map_;
|
||||
scoped_ptr<AbstractMutex> mutex_;
|
||||
|
||||
TimedVariable* queued_fetch_count_;
|
||||
TimedVariable* dropped_fetch_count_;
|
||||
// Using a variable here, since we want to be able to track this in the server
|
||||
// statistics.
|
||||
Variable* current_global_fetch_queue_size_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(RateController);
|
||||
};
|
||||
|
||||
} // namespace net_instaweb
|
||||
|
||||
#endif // NET_INSTAWEB_HTTP_PUBLIC_RATE_CONTROLLER_H_
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user