From ff645ddc75ab23534b25c4935838b5e63d3c3548 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Sat, 30 Sep 2023 15:39:59 +0100 Subject: [PATCH] Added robots file --- assets/js/main.js | 4 +- package.json | 2 +- robots.txt | 169 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 robots.txt diff --git a/assets/js/main.js b/assets/js/main.js index 6c250d8..fee7225 100644 --- a/assets/js/main.js +++ b/assets/js/main.js @@ -7,11 +7,11 @@ jQuery(document).ready(function($) { var config = { 'share-selected-text': true, 'load-more': true, - 'infinite-scroll': false, + 'infinite-scroll': true, 'infinite-scroll-step': 3, 'disqus-shortname': 'hauntedthemes-demo', 'content-api-host': 'https://work.njae.me.uk', - 'content-api-key': '', + 'content-api-key': 'fef214bac9c7c7e49bcc56f57b', }; var w = Math.max(document.documentElement.clientWidth, window.innerWidth || 0), diff --git a/package.json b/package.json index c41617f..df71b4b 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "tawau-neil", "description": "We love this one! It would be best described as the realm of the wordsmiths where content is king. It's easier to check it out and get convinced for yourself.", "demo": "https://tawau.hauntedthemes.com/", - "version": "2.0.2", + "version": "2.0.3", "license": "GNU General Public License, Version 2", "engines": { "ghost": ">=2.0.0" diff --git a/robots.txt b/robots.txt new file mode 100644 index 0000000..7ce703b --- /dev/null +++ b/robots.txt @@ -0,0 +1,169 @@ +# +# robots.txt, based on the one for http://www.wikipedia.org/ and friends +# +# Please note: There are a lot of pages on this site, and there are +# some misbehaved spiders out there that go _way_ too fast. If you're +# irresponsible, your access to the site may be blocked. +# + +# advertising-related bots: +User-agent: Mediapartners-Google* +Disallow: / + +# Wikipedia work bots: +User-agent: IsraBot +Disallow: / + +User-agent: Orthogaffe +Disallow: / + +# Crawlers that are kind enough to obey, but which we'd rather not have +# unless they're feeding search engines. +User-agent: UbiCrawler +Disallow: / + +User-agent: DOC +Disallow: / + +User-agent: Zao +Disallow: / + +# Some bots are known to be trouble, particularly those designed to copy +# entire sites. Please obey robots.txt. +User-agent: sitecheck.internetseer.com +Disallow: / + +User-agent: Zealbot +Disallow: / + +User-agent: MSIECrawler +Disallow: / + +User-agent: SiteSnagger +Disallow: / + +User-agent: WebStripper +Disallow: / + +User-agent: WebCopier +Disallow: / + +User-agent: Fetch +Disallow: / + +User-agent: Offline Explorer +Disallow: / + +User-agent: Teleport +Disallow: / + +User-agent: TeleportPro +Disallow: / + +User-agent: WebZIP +Disallow: / + +User-agent: linko +Disallow: / + +User-agent: HTTrack +Disallow: / + +User-agent: Microsoft.URL.Control +Disallow: / + +User-agent: Xenu +Disallow: / + +User-agent: larbin +Disallow: / + +User-agent: libwww +Disallow: / + +User-agent: ZyBORG +Disallow: / + +User-agent: Download Ninja +Disallow: / + +# +# Sorry, wget in its recursive mode is a frequent problem. +# Please read the man page and use it properly; there is a +# --wait option you can use to set the delay between hits, +# for instance. +# +User-agent: wget +Disallow: / + +# +# The 'grub' distributed client has been *very* poorly behaved. +# +User-agent: grub-client +Disallow: / + +# +# Doesn't follow robots.txt anyway, but... +# +User-agent: k2spider +Disallow: / + +# +# Hits many times per second, not acceptable +# http://www.nameprotect.com/botinfo.html +User-agent: NPBot +Disallow: / + +# A capture bot, downloads gazillions of pages with no public benefit +# http://www.webreaper.net/ +User-agent: WebReaper +Disallow: / + +# Prevent TurnItIn +User-agent: TurnitinBot +Disallow: / + +# Disable AI harvesting bots +User-agent: CCBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: GPTBot +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-agent: FacebookBot +Disallow: / + + +# Don't allow the wayback-maschine to index user-pages +#User-agent: ia_archiver +#Disallow: /wiki/User +#Disallow: /wiki/Benutzer + +# +# Friendly, low-speed bots are welcome viewing article pages, but not +# dynamically-generated pages please. +# +# Inktomi's "Slurp" can read a minimum delay between hits; if your +# bot supports such a thing using the 'Crawl-delay' or another +# instruction, please let us know. +# +User-agent: * +Disallow: /mediawiki/ +Disallow: /trap/ +Disallow: /Special +Disallow: /Special:Random +Disallow: /Special%3ARandom +Disallow: /Special:Search +Disallow: /Special%3ASearch + +## *at least* 1 second please. preferably more :D +Crawl-delay: 123 -- 2.34.1