Added robots file
[ghost-theme-work.git] / robots.txt
1 #
2 # robots.txt, based on the one for http://www.wikipedia.org/ and friends
3 #
4 # Please note: There are a lot of pages on this site, and there are
5 # some misbehaved spiders out there that go _way_ too fast. If you're
6 # irresponsible, your access to the site may be blocked.
7 #
8
9 # advertising-related bots:
10 User-agent: Mediapartners-Google*
11 Disallow: /
12
13 # Wikipedia work bots:
14 User-agent: IsraBot
15 Disallow: /
16
17 User-agent: Orthogaffe
18 Disallow: /
19
20 # Crawlers that are kind enough to obey, but which we'd rather not have
21 # unless they're feeding search engines.
22 User-agent: UbiCrawler
23 Disallow: /
24
25 User-agent: DOC
26 Disallow: /
27
28 User-agent: Zao
29 Disallow: /
30
31 # Some bots are known to be trouble, particularly those designed to copy
32 # entire sites. Please obey robots.txt.
33 User-agent: sitecheck.internetseer.com
34 Disallow: /
35
36 User-agent: Zealbot
37 Disallow: /
38
39 User-agent: MSIECrawler
40 Disallow: /
41
42 User-agent: SiteSnagger
43 Disallow: /
44
45 User-agent: WebStripper
46 Disallow: /
47
48 User-agent: WebCopier
49 Disallow: /
50
51 User-agent: Fetch
52 Disallow: /
53
54 User-agent: Offline Explorer
55 Disallow: /
56
57 User-agent: Teleport
58 Disallow: /
59
60 User-agent: TeleportPro
61 Disallow: /
62
63 User-agent: WebZIP
64 Disallow: /
65
66 User-agent: linko
67 Disallow: /
68
69 User-agent: HTTrack
70 Disallow: /
71
72 User-agent: Microsoft.URL.Control
73 Disallow: /
74
75 User-agent: Xenu
76 Disallow: /
77
78 User-agent: larbin
79 Disallow: /
80
81 User-agent: libwww
82 Disallow: /
83
84 User-agent: ZyBORG
85 Disallow: /
86
87 User-agent: Download Ninja
88 Disallow: /
89
90 #
91 # Sorry, wget in its recursive mode is a frequent problem.
92 # Please read the man page and use it properly; there is a
93 # --wait option you can use to set the delay between hits,
94 # for instance.
95 #
96 User-agent: wget
97 Disallow: /
98
99 #
100 # The 'grub' distributed client has been *very* poorly behaved.
101 #
102 User-agent: grub-client
103 Disallow: /
104
105 #
106 # Doesn't follow robots.txt anyway, but...
107 #
108 User-agent: k2spider
109 Disallow: /
110
111 #
112 # Hits many times per second, not acceptable
113 # http://www.nameprotect.com/botinfo.html
114 User-agent: NPBot
115 Disallow: /
116
117 # A capture bot, downloads gazillions of pages with no public benefit
118 # http://www.webreaper.net/
119 User-agent: WebReaper
120 Disallow: /
121
122 # Prevent TurnItIn
123 User-agent: TurnitinBot
124 Disallow: /
125
126 # Disable AI harvesting bots
127 User-agent: CCBot
128 Disallow: /
129
130 User-agent: ChatGPT-User
131 Disallow: /
132
133 User-agent: GPTBot
134 Disallow: /
135
136 User-agent: Google-Extended
137 Disallow: /
138
139 User-agent: Omgilibot
140 Disallow: /
141
142 User-agent: FacebookBot
143 Disallow: /
144
145
146 # Don't allow the wayback-maschine to index user-pages
147 #User-agent: ia_archiver
148 #Disallow: /wiki/User
149 #Disallow: /wiki/Benutzer
150
151 #
152 # Friendly, low-speed bots are welcome viewing article pages, but not
153 # dynamically-generated pages please.
154 #
155 # Inktomi's "Slurp" can read a minimum delay between hits; if your
156 # bot supports such a thing using the 'Crawl-delay' or another
157 # instruction, please let us know.
158 #
159 User-agent: *
160 Disallow: /mediawiki/
161 Disallow: /trap/
162 Disallow: /Special
163 Disallow: /Special:Random
164 Disallow: /Special%3ARandom
165 Disallow: /Special:Search
166 Disallow: /Special%3ASearch
167
168 ## *at least* 1 second please. preferably more :D
169 Crawl-delay: 123