browsh/interfacer/src/browsh/raw_text_server.go
Thomas Buckley-Houston 4b2ce6abbc Gofmt formatting
2018-07-17 10:10:08 +08:00

175 lines
5.2 KiB
Go

package browsh
import (
"crypto/rand"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"github.com/NYTimes/gziphandler"
"github.com/ulule/limiter"
"github.com/ulule/limiter/drivers/middleware/stdlib"
"github.com/ulule/limiter/drivers/store/memory"
)
// In order to communicate between the incoming HTTP request and the websocket request to the
// real browser to render the webpage, we keep track of requests in a map.
var rawTextRequests = make(map[string]string)
// HTTPServerStart starts the HTTP server is a seperate service from the usual interactive TTY
// app. It accepts normal HTTP requests and uses the path portion of the URL as the entry to the
// Browsh URL bar. It then returns a simple line-broken text version of whatever the browser
// loads. So for example, if you request `curl browsh-http-service.com/http://something.com`,
// it will return:
// `Something `
func HTTPServerStart() {
initialise()
startFirefox()
go startWebSocketServer()
Log("Starting Browsh HTTP server")
serverMux := http.NewServeMux()
uncompressed := http.HandlerFunc(handleHTTPServerRequest)
limiterMiddleware := setupRateLimiter()
serverMux.Handle("/", limiterMiddleware.Handler(gziphandler.GzipHandler(uncompressed)))
if err := http.ListenAndServe(*httpServerBind+":"+*HTTPServerPort, &slashFix{serverMux}); err != nil {
Shutdown(err)
}
}
func setupRateLimiter() *stdlib.Middleware {
rate := limiter.Rate{
Period: 1 * time.Minute,
Limit: 10,
}
// TODO: Centralise store amongst instances with Redis
store := memory.NewStore()
middleware := stdlib.NewMiddleware(limiter.New(store, rate), stdlib.WithForwardHeader(true))
return middleware
}
func pseudoUUID() (uuid string) {
b := make([]byte, 16)
_, err := rand.Read(b)
if err != nil {
fmt.Println("Error: ", err)
return
}
uuid = fmt.Sprintf("%X-%X-%X-%X-%X", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
return uuid
}
type slashFix struct {
mux http.Handler
}
// The default router from net/http collapses double slashes to a single slash in URL paths.
// This is obviously a problem for putting URLs in the path part of a URL, eg;
// https://domain.com/http://anotherdomain.com
// So here is a little hack that simply escapes the entire path portion to make sure it gets
// through the router unchanged.
func (h *slashFix) ServeHTTP(w http.ResponseWriter, r *http.Request) {
r.URL.Path = "/" + url.PathEscape(strings.TrimPrefix(r.URL.RequestURI(), "/"))
h.mux.ServeHTTP(w, r)
}
func handleHTTPServerRequest(w http.ResponseWriter, r *http.Request) {
var message string
var isErrored bool
urlForBrowsh, _ := url.PathUnescape(strings.TrimPrefix(r.URL.Path, "/"))
urlForBrowsh, isErrored = deRecurseURL(urlForBrowsh)
if isErrored {
message = "Invalid URL"
io.WriteString(w, message)
return
}
if isProductionHTTP(r) {
http.Redirect(w, r, "https://"+r.Host+"/"+urlForBrowsh, 301)
return
}
if urlForBrowsh == "favicon.ico" {
http.Redirect(w, r, "https://www.brow.sh/assets/favicon-16x16.png", 301)
return
}
w.Header().Set("Cache-Control", "public, max-age=600")
if isDisallowedURL(urlForBrowsh) {
http.Redirect(w, r, "/", 301)
return
}
if strings.TrimSpace(urlForBrowsh) == "" {
if strings.Contains(r.Host, "text.") {
message = "Welcome to the Browsh plain text client.\n" +
"You can use it by appending URLs like this;\n" +
"https://text.brow.sh/https://www.brow.sh"
io.WriteString(w, message)
return
}
urlForBrowsh = "https://www.brow.sh/html-service-welcome"
}
if urlForBrowsh == "robots.txt" {
message = "User-agent: *\nAllow: /$\nDisallow: /\n"
io.WriteString(w, message)
return
}
rawTextRequestID := pseudoUUID()
mode := getRawTextMode(r)
sendMessageToWebExtension(
"/raw_text_request," + rawTextRequestID + "," +
mode + "," +
urlForBrowsh)
waitForResponse(rawTextRequestID, w)
}
// Prevent https://html.brow.sh/html.brow.sh/... being recursive
func deRecurseURL(urlForBrowsh string) (string, bool) {
nestedURL, err := url.Parse(urlForBrowsh)
if err != nil {
return urlForBrowsh, false
}
if nestedURL.Host != "html.brow.sh" && nestedURL.Host != "text.brow.sh" {
return urlForBrowsh, false
}
return deRecurseURL(strings.TrimPrefix(nestedURL.RequestURI(), "/"))
}
func isDisallowedURL(urlForBrowsh string) bool {
r, _ := regexp.Compile("[mail|accounts].google.com")
return r.MatchString(urlForBrowsh)
}
func isProductionHTTP(r *http.Request) bool {
if strings.Contains(r.Host, "brow.sh") {
return r.Header.Get("X-Forwarded-Proto") == "http"
}
return false
}
// 'PLAIN' mode returns raw text without any HTML whatsoever.
// 'HTML' mode returns some basic HTML tags for things like anchor links.
func getRawTextMode(r *http.Request) string {
var mode = "HTML"
if strings.Contains(r.Host, "text.") {
mode = "PLAIN"
}
if r.Header.Get("X-Browsh-Raw-Mode") == "PLAIN" {
mode = "PLAIN"
}
return mode
}
func waitForResponse(rawTextRequestID string, w http.ResponseWriter) {
var rawTextRequestResponse string
var ok bool
for {
if rawTextRequestResponse, ok = rawTextRequests[rawTextRequestID]; ok {
io.WriteString(w, rawTextRequestResponse)
delete(rawTextRequests, rawTextRequestID)
break
}
time.Sleep(10 * time.Millisecond)
}
}