Skip to content

Commit 1bb8edf

Browse files
committed
fix(fetch): close redirect/FQDN bypasses in domain filtering
Three issues found while reviewing the domain-filtering feature: 1. SSRF via redirect (critical). The http.Client had no CheckRedirect, so an allow-listed origin returning a 3xx to a forbidden host would be followed and its body returned to the caller \u2014 a classic bypass. Now every redirect target is re-checked against the same lists; a regression test against http://169.254.169.254/ (AWS metadata IP) demonstrates the fix. 2. FQDN trailing-dot bypass. URLs in FQDN form ("http://host./") kept the trailing dot in url.URL.Hostname() and slipped past patterns like "host". The matcher now strips trailing dots from both inputs. 3. Empty/whitespace domain entries silently rejected every URL in allowed_domains and matched nothing in blocked_domains. Validation now rejects them at config-load time with a clear error. Also documented the IP-encoding limitation (decimal/hex/octal IPv4) in the user-facing fetch tool docs. Assisted-By: docker-agent
1 parent 7a516b4 commit 1bb8edf

5 files changed

Lines changed: 133 additions & 2 deletions

File tree

docs/tools/fetch/index.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,18 @@ Domain patterns in `allowed_domains` and `blocked_domains` use the following rul
4141
- **Bare domain** — `example.com` matches the host `example.com` _and_ any subdomain such as `docs.example.com`. It does **not** match unrelated hosts that share a suffix (e.g. `badexample.com`).
4242
- **Leading dot** — `.example.com` matches **only** strict subdomains (`docs.example.com`, `a.b.example.com`), not the apex `example.com`.
4343
- **IP literal** — IP addresses are matched exactly (`169.254.169.254`).
44+
- **Trailing dots** in FQDN-form URLs (`http://example.com./`) are stripped before matching, so they cannot bypass a deny-list entry.
4445

4546
The lists are mutually exclusive: a single fetch toolset may set either `allowed_domains` or `blocked_domains`, but not both.
4647

48+
When a list is configured, every redirect target is re-checked against the same list. A request to an allowed origin that redirects to a forbidden host is rejected before any data is read from the redirect.
49+
50+
<div class="callout callout-warning" markdown="1">
51+
<div class="callout-title">⚠️ Limitations
52+
</div>
53+
<p>Matching is purely string-based on the URL host. It does <strong>not</strong> perform DNS resolution and does <strong>not</strong> normalise alternative IP encodings (decimal <code>2852039166</code>, hex <code>0xa9.0xfe.0xa9.0xfe</code>, octal, IPv4-mapped IPv6, etc.). If you need to deny access to a specific IP, also list its alternative encodings, or block at the network layer.</p>
54+
</div>
55+
4756
### Custom Timeout
4857

4958
```yaml

pkg/config/latest/validate.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ func (t *Toolset) validate() error {
8888
if len(t.AllowedDomains) > 0 && len(t.BlockedDomains) > 0 {
8989
return errors.New("allowed_domains and blocked_domains are mutually exclusive")
9090
}
91+
if err := validateDomainPatterns("allowed_domains", t.AllowedDomains); err != nil {
92+
return err
93+
}
94+
if err := validateDomainPatterns("blocked_domains", t.BlockedDomains); err != nil {
95+
return err
96+
}
9197
if len(t.Models) > 0 && t.Type != "model_picker" {
9298
return errors.New("models can only be used with type 'model_picker'")
9399
}
@@ -203,6 +209,18 @@ func (t *Toolset) validate() error {
203209
return nil
204210
}
205211

212+
// validateDomainPatterns rejects empty / whitespace-only entries in a fetch
213+
// allow- or block-list, since they silently match nothing and turn the list
214+
// into a foot-gun (e.g. allowed_domains: [""] would reject every URL).
215+
func validateDomainPatterns(field string, patterns []string) error {
216+
for i, p := range patterns {
217+
if strings.TrimSpace(p) == "" {
218+
return fmt.Errorf("%s[%d] must not be empty", field, i)
219+
}
220+
}
221+
return nil
222+
}
223+
206224
// isLoopbackHost reports whether host is a loopback address (with or without
207225
// a port component). It accepts IPv4 loopback, IPv6 loopback, and the literal
208226
// "localhost".

pkg/config/latest/validate_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,35 @@ agents:
298298
`,
299299
wantErr: "blocked_domains can only be used with type 'fetch'",
300300
},
301+
{
302+
name: "empty allowed_domains entry is rejected",
303+
config: `
304+
version: "8"
305+
agents:
306+
root:
307+
model: "openai/gpt-4"
308+
toolsets:
309+
- type: fetch
310+
allowed_domains:
311+
- ""
312+
- docker.com
313+
`,
314+
wantErr: "allowed_domains[0] must not be empty",
315+
},
316+
{
317+
name: "whitespace-only blocked_domains entry is rejected",
318+
config: `
319+
version: "8"
320+
agents:
321+
root:
322+
model: "openai/gpt-4"
323+
toolsets:
324+
- type: fetch
325+
blocked_domains:
326+
- " "
327+
`,
328+
wantErr: "blocked_domains[0] must not be empty",
329+
},
301330
}
302331

303332
for _, tt := range tests {

pkg/tools/builtin/fetch.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ func (h *fetchHandler) CallTool(ctx context.Context, params FetchToolArgs) (*too
5555
client := &http.Client{
5656
Timeout: h.timeout,
5757
Transport: remote.NewTransport(ctx),
58+
// Re-check the domain allow/deny lists on every redirect: without this,
59+
// an allowed origin could redirect into a denied one and bypass the
60+
// policy. The 10-redirect cap mirrors the net/http default.
61+
CheckRedirect: func(req *http.Request, via []*http.Request) error {
62+
if len(via) >= 10 {
63+
return errors.New("stopped after 10 redirects")
64+
}
65+
return h.checkDomainAllowed(req.URL)
66+
},
5867
}
5968
if params.Timeout > 0 {
6069
client.Timeout = time.Duration(params.Timeout) * time.Second
@@ -291,9 +300,13 @@ func (h *fetchHandler) checkDomainAllowed(u *url.URL) error {
291300
// ("docs.example.com"); it does NOT match unrelated hosts that share a suffix
292301
// ("badexample.com"). A pattern with a leading dot (".example.com") matches
293302
// strict subdomains only — the apex "example.com" is excluded.
303+
//
304+
// Trailing dots used in FQDN form ("example.com.") are stripped from both
305+
// host and pattern before matching, so a URL like http://example.com./ cannot
306+
// be used to bypass a deny-list entry for example.com.
294307
func matchesDomain(host, pattern string) bool {
295-
host = strings.ToLower(strings.TrimSpace(host))
296-
pattern = strings.ToLower(strings.TrimSpace(pattern))
308+
host = strings.TrimSuffix(strings.ToLower(strings.TrimSpace(host)), ".")
309+
pattern = strings.TrimSuffix(strings.ToLower(strings.TrimSpace(pattern)), ".")
297310
if host == "" || pattern == "" || pattern == "." {
298311
return false
299312
}

pkg/tools/builtin/fetch_test.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"net/http"
77
"net/http/httptest"
8+
neturl "net/url"
89
"testing"
910
"time"
1011

@@ -433,6 +434,11 @@ func TestMatchesDomain(t *testing.T) {
433434
{"only-dot pattern matches nothing", "example.com", ".", false},
434435
{"whitespace tolerated", " example.com ", " example.com ", true},
435436
{"ip address exact", "169.254.169.254", "169.254.169.254", true},
437+
// FQDN trailing dot (regression: must not bypass the matcher).
438+
{"trailing dot host matches apex pattern", "example.com.", "example.com", true},
439+
{"trailing dot host matches subdomain pattern", "docs.example.com.", "example.com", true},
440+
{"trailing dot pattern matches apex host", "example.com", "example.com.", true},
441+
{"trailing dot host matches strict-subdomain pattern", "docs.example.com.", ".example.com", true},
436442
}
437443

438444
for _, tc := range tests {
@@ -523,3 +529,59 @@ func TestFetch_BlockedDomains_DeniesIgnoringRobots(t *testing.T) {
523529
assert.Contains(t, result.Output, "is blocked by blocked_domains")
524530
assert.False(t, robotsRequested, "blocked URLs must not trigger any network call, including robots.txt")
525531
}
532+
533+
// TestFetch_AllowedDomains_RejectsRedirectToBlockedHost is a regression test for an
534+
// SSRF-style bypass: an allow-listed origin returning a redirect to a host
535+
// that is NOT in the allow-list must be rejected before the redirect is
536+
// followed, otherwise the policy is hollow.
537+
func TestFetch_AllowedDomains_RejectsRedirectToBlockedHost(t *testing.T) {
538+
redirected := false
539+
url := runHTTPServer(t, func(w http.ResponseWriter, r *http.Request) {
540+
if r.URL.Path == "/robots.txt" {
541+
http.NotFound(w, r)
542+
return
543+
}
544+
redirected = true
545+
http.Redirect(w, r, "http://attacker.example.com/secret", http.StatusFound)
546+
})
547+
parsed, err := neturl.Parse(url)
548+
require.NoError(t, err)
549+
550+
// Allow only the test server's host. The redirect target must be
551+
// rejected without any network call to attacker.example.com.
552+
tool := NewFetchTool(WithAllowedDomains([]string{parsed.Hostname()}))
553+
554+
result, err := tool.handler.CallTool(t.Context(), FetchToolArgs{
555+
URLs: []string{url + "/start"},
556+
Format: "text",
557+
})
558+
require.NoError(t, err)
559+
assert.True(t, redirected, "the test server should have been hit at least once to issue the redirect")
560+
assert.Contains(t, result.Output, "Error fetching")
561+
assert.Contains(t, result.Output, "attacker.example.com", "the error should mention the rejected redirect target")
562+
assert.Contains(t, result.Output, "is not in allowed_domains")
563+
}
564+
565+
// TestFetch_BlockedDomains_RejectsRedirectToBlockedHost mirrors the allow-list
566+
// regression test for the deny-list path: a redirect to a deny-listed host
567+
// must not be followed.
568+
func TestFetch_BlockedDomains_RejectsRedirectToBlockedHost(t *testing.T) {
569+
url := runHTTPServer(t, func(w http.ResponseWriter, r *http.Request) {
570+
if r.URL.Path == "/robots.txt" {
571+
http.NotFound(w, r)
572+
return
573+
}
574+
http.Redirect(w, r, "http://169.254.169.254/metadata", http.StatusFound)
575+
})
576+
577+
tool := NewFetchTool(WithBlockedDomains([]string{"169.254.169.254"}))
578+
579+
result, err := tool.handler.CallTool(t.Context(), FetchToolArgs{
580+
URLs: []string{url + "/innocent"},
581+
Format: "text",
582+
})
583+
require.NoError(t, err)
584+
assert.Contains(t, result.Output, "Error fetching")
585+
assert.Contains(t, result.Output, "is blocked by blocked_domains")
586+
assert.Contains(t, result.Output, "169.254.169.254")
587+
}

0 commit comments

Comments
 (0)