# # This restricts access to only known and registered robots. # #Modified by Daniel - took out all whitelisted bots, we can #add blacklists here and in web server if needed... User-agent: * Disallow: /cgi-bin/ Disallow: /cgi-comment/ Disallow: /cgi-systems/ Disallow: /systems/platforms/linux/software/package/ #page fragments included by CMS Disallow: /webapps/page_body/ Disallow: /webapps/page_title/ #cms is editor, only content is public Disallow: /webapps/cms Disallow: /webapps/directory #Staging server Disallow: /webapps-dev/ #googlebot was abusing the webcalendar # Other bots were abusing it as well #User-agent: Googlebot Disallow: /webcalendar/ Disallow: /webcalendar?* #Added 2015/06/09 #Google Search Appliance seems to abuse classroom calendar. Disallow: /webapps/classrooms/ # I believe the following will eliminate many of the 404's that result from # crawling javascript such as # var AUTH_TOKEN = 'm0IBKGTI83RXdNSm25OtcWWCyfDE6SLQWkkBosLVvmA='; Disallow: /*=$ # Added 2016/09/06 # Block Michael Walfish's old profile which containted some information he # wanted kept private. New profile is at WALFISH__Michael.html Disallow: /people/profiles/WALFISH_Michael.html # Added 2018/10/26 Disallow: /webapps/content/systems/platforms/linux/softare/dropbox-workaround