8 files changed, 1662 insertions, 72 deletions
diff --git a/include/NetUtils.hpp b/include/NetUtils.hpp
index 4770fb4..e719c82 100644
--- a/include/NetUtils.hpp
+++ b/include/NetUtils.hpp
@@ -4,8 +4,15 @@
 #include <vector>
 
 namespace QuickMedia {
+    struct Range {
+        size_t start;
+        size_t length;
+    };
+
     void html_escape_sequences(std::string &str);
     void html_unescape_sequences(std::string &str);
     std::string url_param_encode(const std::string &param);
-    void extract_urls(const std::string &str, std::vector<std::string> &urls);
+    std::vector<Range> extract_urls(const std::string &str);
+    std::vector<std::string> ranges_get_strings(const std::string &str, const std::vector<Range> &ranges);
+    void convert_utf8_to_utf32_ranges(const std::string &str, std::vector<Range> &ranges);
 }
 \ No newline at end of file
diff --git a/include/Text.hpp b/include/Text.hpp
index 3981f13..c74607c 100644
--- a/include/Text.hpp
+++ b/include/Text.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "NetUtils.hpp"
 #include <SFML/Graphics/VertexArray.hpp>
 #include <SFML/System/String.hpp>
 #include <SFML/System/Clock.hpp>
@@ -160,5 +161,7 @@ namespace QuickMedia
         sf::Vector2u renderTargetSize;
 
         std::vector<VertexRef> vertices_linear; // TODO: Use textElements instead
+
+        std::vector<Range> url_ranges;
     };
 }
diff --git a/src/FileAnalyzer.cpp b/src/FileAnalyzer.cpp
index ccad221..0059233 100644
--- a/src/FileAnalyzer.cpp
+++ b/src/FileAnalyzer.cpp
@@ -128,7 +128,7 @@ namespace QuickMedia {
             return false;
         }
 
-        if(width > 0 || height > 0) {
+        if(width > 0 && height > 0) {
             if(create_thumbnail(destination_path_tmp, destination_path, sf::Vector2i(width, height))) {
                 remove(destination_path_tmp.data.c_str());
                 return true;
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
index 5ca6d3e..d5795c2 100644
--- a/src/NetUtils.cpp
+++ b/src/NetUtils.cpp
@@ -3,8 +3,1516 @@
 #include <array>
 #include <sstream>
 #include <iomanip>
+#include <assert.h>
+#include <unordered_set>
 
 namespace QuickMedia {
+    // Source: https://data.iana.org/TLD/tlds-alpha-by-domain.txt
+    static const std::unordered_set<std::string> TLDS = {
+        "aaa",
+        "aarp",
+        "abarth",
+        "abb",
+        "abbott",
+        "abbvie",
+        "abc",
+        "able",
+        "abogado",
+        "abudhabi",
+        "ac",
+        "academy",
+        "accenture",
+        "accountant",
+        "accountants",
+        "aco",
+        "actor",
+        "ad",
+        "adac",
+        "ads",
+        "adult",
+        "ae",
+        "aeg",
+        "aero",
+        "aetna",
+        "af",
+        "afamilycompany",
+        "afl",
+        "africa",
+        "ag",
+        "agakhan",
+        "agency",
+        "ai",
+        "aig",
+        "airbus",
+        "airforce",
+        "airtel",
+        "akdn",
+        "al",
+        "alfaromeo",
+        "alibaba",
+        "alipay",
+        "allfinanz",
+        "allstate",
+        "ally",
+        "alsace",
+        "alstom",
+        "am",
+        "amazon",
+        "americanexpress",
+        "americanfamily",
+        "amex",
+        "amfam",
+        "amica",
+        "amsterdam",
+        "analytics",
+        "android",
+        "anquan",
+        "anz",
+        "ao",
+        "aol",
+        "apartments",
+        "app",
+        "apple",
+        "aq",
+        "aquarelle",
+        "ar",
+        "arab",
+        "aramco",
+        "archi",
+        "army",
+        "arpa",
+        "art",
+        "arte",
+        "as",
+        "asda",
+        "asia",
+        "associates",
+        "at",
+        "athleta",
+        "attorney",
+        "au",
+        "auction",
+        "audi",
+        "audible",
+        "audio",
+        "auspost",
+        "author",
+        "auto",
+        "autos",
+        "avianca",
+        "aw",
+        "aws",
+        "ax",
+        "axa",
+        "az",
+        "azure",
+        "ba",
+        "baby",
+        "baidu",
+        "banamex",
+        "bananarepublic",
+        "band",
+        "bank",
+        "bar",
+        "barcelona",
+        "barclaycard",
+        "barclays",
+        "barefoot",
+        "bargains",
+        "baseball",
+        "basketball",
+        "bauhaus",
+        "bayern",
+        "bb",
+        "bbc",
+        "bbt",
+        "bbva",
+        "bcg",
+        "bcn",
+        "bd",
+        "be",
+        "beats",
+        "beauty",
+        "beer",
+        "bentley",
+        "berlin",
+        "best",
+        "bestbuy",
+        "bet",
+        "bf",
+        "bg",
+        "bh",
+        "bharti",
+        "bi",
+        "bible",
+        "bid",
+        "bike",
+        "bing",
+        "bingo",
+        "bio",
+        "biz",
+        "bj",
+        "black",
+        "blackfriday",
+        "blockbuster",
+        "blog",
+        "bloomberg",
+        "blue",
+        "bm",
+        "bms",
+        "bmw",
+        "bn",
+        "bnpparibas",
+        "bo",
+        "boats",
+        "boehringer",
+        "bofa",
+        "bom",
+        "bond",
+        "boo",
+        "book",
+        "booking",
+        "bosch",
+        "bostik",
+        "boston",
+        "bot",
+        "boutique",
+        "box",
+        "br",
+        "bradesco",
+        "bridgestone",
+        "broadway",
+        "broker",
+        "brother",
+        "brussels",
+        "bs",
+        "bt",
+        "budapest",
+        "bugatti",
+        "build",
+        "builders",
+        "business",
+        "buy",
+        "buzz",
+        "bv",
+        "bw",
+        "by",
+        "bz",
+        "bzh",
+        "ca",
+        "cab",
+        "cafe",
+        "cal",
+        "call",
+        "calvinklein",
+        "cam",
+        "camera",
+        "camp",
+        "cancerresearch",
+        "canon",
+        "capetown",
+        "capital",
+        "capitalone",
+        "car",
+        "caravan",
+        "cards",
+        "care",
+        "career",
+        "careers",
+        "cars",
+        "casa",
+        "case",
+        "cash",
+        "casino",
+        "cat",
+        "catering",
+        "catholic",
+        "cba",
+        "cbn",
+        "cbre",
+        "cbs",
+        "cc",
+        "cd",
+        "center",
+        "ceo",
+        "cern",
+        "cf",
+        "cfa",
+        "cfd",
+        "cg",
+        "ch",
+        "chanel",
+        "channel",
+        "charity",
+        "chase",
+        "chat",
+        "cheap",
+        "chintai",
+        "christmas",
+        "chrome",
+        "church",
+        "ci",
+        "cipriani",
+        "circle",
+        "cisco",
+        "citadel",
+        "citi",
+        "citic",
+        "city",
+        "cityeats",
+        "ck",
+        "cl",
+        "claims",
+        "cleaning",
+        "click",
+        "clinic",
+        "clinique",
+        "clothing",
+        "cloud",
+        "club",
+        "clubmed",
+        "cm",
+        "cn",
+        "co",
+        "coach",
+        "codes",
+        "coffee",
+        "college",
+        "cologne",
+        "com",
+        "comcast",
+        "commbank",
+        "community",
+        "company",
+        "compare",
+        "computer",
+        "comsec",
+        "condos",
+        "construction",
+        "consulting",
+        "contact",
+        "contractors",
+        "cooking",
+        "cookingchannel",
+        "cool",
+        "coop",
+        "corsica",
+        "country",
+        "coupon",
+        "coupons",
+        "courses",
+        "cpa",
+        "cr",
+        "credit",
+        "creditcard",
+        "creditunion",
+        "cricket",
+        "crown",
+        "crs",
+        "cruise",
+        "cruises",
+        "csc",
+        "cu",
+        "cuisinella",
+        "cv",
+        "cw",
+        "cx",
+        "cy",
+        "cymru",
+        "cyou",
+        "cz",
+        "dabur",
+        "dad",
+        "dance",
+        "data",
+        "date",
+        "dating",
+        "datsun",
+        "day",
+        "dclk",
+        "dds",
+        "de",
+        "deal",
+        "dealer",
+        "deals",
+        "degree",
+        "delivery",
+        "dell",
+        "deloitte",
+        "delta",
+        "democrat",
+        "dental",
+        "dentist",
+        "desi",
+        "design",
+        "dev",
+        "dhl",
+        "diamonds",
+        "diet",
+        "digital",
+        "direct",
+        "directory",
+        "discount",
+        "discover",
+        "dish",
+        "diy",
+        "dj",
+        "dk",
+        "dm",
+        "dnp",
+        "do",
+        "docs",
+        "doctor",
+        "dog",
+        "domains",
+        "dot",
+        "download",
+        "drive",
+        "dtv",
+        "dubai",
+        "duck",
+        "dunlop",
+        "dupont",
+        "durban",
+        "dvag",
+        "dvr",
+        "dz",
+        "earth",
+        "eat",
+        "ec",
+        "eco",
+        "edeka",
+        "edu",
+        "education",
+        "ee",
+        "eg",
+        "email",
+        "emerck",
+        "energy",
+        "engineer",
+        "engineering",
+        "enterprises",
+        "epson",
+        "equipment",
+        "er",
+        "ericsson",
+        "erni",
+        "es",
+        "esq",
+        "estate",
+        "et",
+        "etisalat",
+        "eu",
+        "eurovision",
+        "eus",
+        "events",
+        "exchange",
+        "expert",
+        "exposed",
+        "express",
+        "extraspace",
+        "fage",
+        "fail",
+        "fairwinds",
+        "faith",
+        "family",
+        "fan",
+        "fans",
+        "farm",
+        "farmers",
+        "fashion",
+        "fast",
+        "fedex",
+        "feedback",
+        "ferrari",
+        "ferrero",
+        "fi",
+        "fiat",
+        "fidelity",
+        "fido",
+        "film",
+        "final",
+        "finance",
+        "financial",
+        "fire",
+        "firestone",
+        "firmdale",
+        "fish",
+        "fishing",
+        "fit",
+        "fitness",
+        "fj",
+        "fk",
+        "flickr",
+        "flights",
+        "flir",
+        "florist",
+        "flowers",
+        "fly",
+        "fm",
+        "fo",
+        "foo",
+        "food",
+        "foodnetwork",
+        "football",
+        "ford",
+        "forex",
+        "forsale",
+        "forum",
+        "foundation",
+        "fox",
+        "fr",
+        "free",
+        "fresenius",
+        "frl",
+        "frogans",
+        "frontdoor",
+        "frontier",
+        "ftr",
+        "fujitsu",
+        "fun",
+        "fund",
+        "furniture",
+        "futbol",
+        "fyi",
+        "ga",
+        "gal",
+        "gallery",
+        "gallo",
+        "gallup",
+        "game",
+        "games",
+        "gap",
+        "garden",
+        "gay",
+        "gb",
+        "gbiz",
+        "gd",
+        "gdn",
+        "ge",
+        "gea",
+        "gent",
+        "genting",
+        "george",
+        "gf",
+        "gg",
+        "ggee",
+        "gh",
+        "gi",
+        "gift",
+        "gifts",
+        "gives",
+        "giving",
+        "gl",
+        "glade",
+        "glass",
+        "gle",
+        "global",
+        "globo",
+        "gm",
+        "gmail",
+        "gmbh",
+        "gmo",
+        "gmx",
+        "gn",
+        "godaddy",
+        "gold",
+        "goldpoint",
+        "golf",
+        "goo",
+        "goodyear",
+        "goog",
+        "google",
+        "gop",
+        "got",
+        "gov",
+        "gp",
+        "gq",
+        "gr",
+        "grainger",
+        "graphics",
+        "gratis",
+        "green",
+        "gripe",
+        "grocery",
+        "group",
+        "gs",
+        "gt",
+        "gu",
+        "guardian",
+        "gucci",
+        "guge",
+        "guide",
+        "guitars",
+        "guru",
+        "gw",
+        "gy",
+        "hair",
+        "hamburg",
+        "hangout",
+        "haus",
+        "hbo",
+        "hdfc",
+        "hdfcbank",
+        "health",
+        "healthcare",
+        "help",
+        "helsinki",
+        "here",
+        "hermes",
+        "hgtv",
+        "hiphop",
+        "hisamitsu",
+        "hitachi",
+        "hiv",
+        "hk",
+        "hkt",
+        "hm",
+        "hn",
+        "hockey",
+        "holdings",
+        "holiday",
+        "homedepot",
+        "homegoods",
+        "homes",
+        "homesense",
+        "honda",
+        "horse",
+        "hospital",
+        "host",
+        "hosting",
+        "hot",
+        "hoteles",
+        "hotels",
+        "hotmail",
+        "house",
+        "how",
+        "hr",
+        "hsbc",
+        "ht",
+        "hu",
+        "hughes",
+        "hyatt",
+        "hyundai",
+        "ibm",
+        "icbc",
+        "ice",
+        "icu",
+        "id",
+        "ie",
+        "ieee",
+        "ifm",
+        "ikano",
+        "il",
+        "im",
+        "imamat",
+        "imdb",
+        "immo",
+        "immobilien",
+        "in",
+        "inc",
+        "industries",
+        "infiniti",
+        "info",
+        "ing",
+        "ink",
+        "institute",
+        "insurance",
+        "insure",
+        "int",
+        "international",
+        "intuit",
+        "investments",
+        "io",
+        "ipiranga",
+        "iq",
+        "ir",
+        "irish",
+        "is",
+        "ismaili",
+        "ist",
+        "istanbul",
+        "it",
+        "itau",
+        "itv",
+        "iveco",
+        "jaguar",
+        "java",
+        "jcb",
+        "je",
+        "jeep",
+        "jetzt",
+        "jewelry",
+        "jio",
+        "jll",
+        "jm",
+        "jmp",
+        "jnj",
+        "jo",
+        "jobs",
+        "joburg",
+        "jot",
+        "joy",
+        "jp",
+        "jpmorgan",
+        "jprs",
+        "juegos",
+        "juniper",
+        "kaufen",
+        "kddi",
+        "ke",
+        "kerryhotels",
+        "kerrylogistics",
+        "kerryproperties",
+        "kfh",
+        "kg",
+        "kh",
+        "ki",
+        "kia",
+        "kim",
+        "kinder",
+        "kindle",
+        "kitchen",
+        "kiwi",
+        "km",
+        "kn",
+        "koeln",
+        "komatsu",
+        "kosher",
+        "kp",
+        "kpmg",
+        "kpn",
+        "kr",
+        "krd",
+        "kred",
+        "kuokgroup",
+        "kw",
+        "ky",
+        "kyoto",
+        "kz",
+        "la",
+        "lacaixa",
+        "lamborghini",
+        "lamer",
+        "lancaster",
+        "lancia",
+        "land",
+        "landrover",
+        "lanxess",
+        "lasalle",
+        "lat",
+        "latino",
+        "latrobe",
+        "law",
+        "lawyer",
+        "lb",
+        "lc",
+        "lds",
+        "lease",
+        "leclerc",
+        "lefrak",
+        "legal",
+        "lego",
+        "lexus",
+        "lgbt",
+        "li",
+        "lidl",
+        "life",
+        "lifeinsurance",
+        "lifestyle",
+        "lighting",
+        "like",
+        "lilly",
+        "limited",
+        "limo",
+        "lincoln",
+        "linde",
+        "link",
+        "lipsy",
+        "live",
+        "living",
+        "lixil",
+        "lk",
+        "llc",
+        "llp",
+        "loan",
+        "loans",
+        "locker",
+        "locus",
+        "loft",
+        "lol",
+        "london",
+        "lotte",
+        "lotto",
+        "love",
+        "lpl",
+        "lplfinancial",
+        "lr",
+        "ls",
+        "lt",
+        "ltd",
+        "ltda",
+        "lu",
+        "lundbeck",
+        "luxe",
+        "luxury",
+        "lv",
+        "ly",
+        "ma",
+        "macys",
+        "madrid",
+        "maif",
+        "maison",
+        "makeup",
+        "man",
+        "management",
+        "mango",
+        "map",
+        "market",
+        "marketing",
+        "markets",
+        "marriott",
+        "marshalls",
+        "maserati",
+        "mattel",
+        "mba",
+        "mc",
+        "mckinsey",
+        "md",
+        "me",
+        "med",
+        "media",
+        "meet",
+        "melbourne",
+        "meme",
+        "memorial",
+        "men",
+        "menu",
+        "merckmsd",
+        "mg",
+        "mh",
+        "miami",
+        "microsoft",
+        "mil",
+        "mini",
+        "mint",
+        "mit",
+        "mitsubishi",
+        "mk",
+        "ml",
+        "mlb",
+        "mls",
+        "mm",
+        "mma",
+        "mn",
+        "mo",
+        "mobi",
+        "mobile",
+        "moda",
+        "moe",
+        "moi",
+        "mom",
+        "monash",
+        "money",
+        "monster",
+        "mormon",
+        "mortgage",
+        "moscow",
+        "moto",
+        "motorcycles",
+        "mov",
+        "movie",
+        "mp",
+        "mq",
+        "mr",
+        "ms",
+        "msd",
+        "mt",
+        "mtn",
+        "mtr",
+        "mu",
+        "museum",
+        "mutual",
+        "mv",
+        "mw",
+        "mx",
+        "my",
+        "mz",
+        "na",
+        "nab",
+        "nagoya",
+        "name",
+        "nationwide",
+        "natura",
+        "navy",
+        "nba",
+        "nc",
+        "ne",
+        "nec",
+        "net",
+        "netbank",
+        "netflix",
+        "network",
+        "neustar",
+        "new",
+        "news",
+        "next",
+        "nextdirect",
+        "nexus",
+        "nf",
+        "nfl",
+        "ng",
+        "ngo",
+        "nhk",
+        "ni",
+        "nico",
+        "nike",
+        "nikon",
+        "ninja",
+        "nissan",
+        "nissay",
+        "nl",
+        "no",
+        "nokia",
+        "northwesternmutual",
+        "norton",
+        "now",
+        "nowruz",
+        "nowtv",
+        "np",
+        "nr",
+        "nra",
+        "nrw",
+        "ntt",
+        "nu",
+        "nyc",
+        "nz",
+        "obi",
+        "observer",
+        "off",
+        "office",
+        "okinawa",
+        "olayan",
+        "olayangroup",
+        "oldnavy",
+        "ollo",
+        "om",
+        "omega",
+        "one",
+        "ong",
+        "onl",
+        "online",
+        "onyourside",
+        "ooo",
+        "open",
+        "oracle",
+        "orange",
+        "org",
+        "organic",
+        "origins",
+        "osaka",
+        "otsuka",
+        "ott",
+        "ovh",
+        "pa",
+        "page",
+        "panasonic",
+        "paris",
+        "pars",
+        "partners",
+        "parts",
+        "party",
+        "passagens",
+        "pay",
+        "pccw",
+        "pe",
+        "pet",
+        "pf",
+        "pfizer",
+        "pg",
+        "ph",
+        "pharmacy",
+        "phd",
+        "philips",
+        "phone",
+        "photo",
+        "photography",
+        "photos",
+        "physio",
+        "pics",
+        "pictet",
+        "pictures",
+        "pid",
+        "pin",
+        "ping",
+        "pink",
+        "pioneer",
+        "pizza",
+        "pk",
+        "pl",
+        "place",
+        "play",
+        "playstation",
+        "plumbing",
+        "plus",
+        "pm",
+        "pn",
+        "pnc",
+        "pohl",
+        "poker",
+        "politie",
+        "porn",
+        "post",
+        "pr",
+        "pramerica",
+        "praxi",
+        "press",
+        "prime",
+        "pro",
+        "prod",
+        "productions",
+        "prof",
+        "progressive",
+        "promo",
+        "properties",
+        "property",
+        "protection",
+        "pru",
+        "prudential",
+        "ps",
+        "pt",
+        "pub",
+        "pw",
+        "pwc",
+        "py",
+        "qa",
+        "qpon",
+        "quebec",
+        "quest",
+        "qvc",
+        "racing",
+        "radio",
+        "raid",
+        "re",
+        "read",
+        "realestate",
+        "realtor",
+        "realty",
+        "recipes",
+        "red",
+        "redstone",
+        "redumbrella",
+        "rehab",
+        "reise",
+        "reisen",
+        "reit",
+        "reliance",
+        "ren",
+        "rent",
+        "rentals",
+        "repair",
+        "report",
+        "republican",
+        "rest",
+        "restaurant",
+        "review",
+        "reviews",
+        "rexroth",
+        "rich",
+        "richardli",
+        "ricoh",
+        "ril",
+        "rio",
+        "rip",
+        "rmit",
+        "ro",
+        "rocher",
+        "rocks",
+        "rodeo",
+        "rogers",
+        "room",
+        "rs",
+        "rsvp",
+        "ru",
+        "rugby",
+        "ruhr",
+        "run",
+        "rw",
+        "rwe",
+        "ryukyu",
+        "sa",
+        "saarland",
+        "safe",
+        "safety",
+        "sakura",
+        "sale",
+        "salon",
+        "samsclub",
+        "samsung",
+        "sandvik",
+        "sandvikcoromant",
+        "sanofi",
+        "sap",
+        "sarl",
+        "sas",
+        "save",
+        "saxo",
+        "sb",
+        "sbi",
+        "sbs",
+        "sc",
+        "sca",
+        "scb",
+        "schaeffler",
+        "schmidt",
+        "scholarships",
+        "school",
+        "schule",
+        "schwarz",
+        "science",
+        "scjohnson",
+        "scot",
+        "sd",
+        "se",
+        "search",
+        "seat",
+        "secure",
+        "security",
+        "seek",
+        "select",
+        "sener",
+        "services",
+        "ses",
+        "seven",
+        "sew",
+        "sex",
+        "sexy",
+        "sfr",
+        "sg",
+        "sh",
+        "shangrila",
+        "sharp",
+        "shaw",
+        "shell",
+        "shia",
+        "shiksha",
+        "shoes",
+        "shop",
+        "shopping",
+        "shouji",
+        "show",
+        "showtime",
+        "si",
+        "silk",
+        "sina",
+        "singles",
+        "site",
+        "sj",
+        "sk",
+        "ski",
+        "skin",
+        "sky",
+        "skype",
+        "sl",
+        "sling",
+        "sm",
+        "smart",
+        "smile",
+        "sn",
+        "sncf",
+        "so",
+        "soccer",
+        "social",
+        "softbank",
+        "software",
+        "sohu",
+        "solar",
+        "solutions",
+        "song",
+        "sony",
+        "soy",
+        "spa",
+        "space",
+        "sport",
+        "spot",
+        "spreadbetting",
+        "sr",
+        "srl",
+        "ss",
+        "st",
+        "stada",
+        "staples",
+        "star",
+        "statebank",
+        "statefarm",
+        "stc",
+        "stcgroup",
+        "stockholm",
+        "storage",
+        "store",
+        "stream",
+        "studio",
+        "study",
+        "style",
+        "su",
+        "sucks",
+        "supplies",
+        "supply",
+        "support",
+        "surf",
+        "surgery",
+        "suzuki",
+        "sv",
+        "swatch",
+        "swiftcover",
+        "swiss",
+        "sx",
+        "sy",
+        "sydney",
+        "systems",
+        "sz",
+        "tab",
+        "taipei",
+        "talk",
+        "taobao",
+        "target",
+        "tatamotors",
+        "tatar",
+        "tattoo",
+        "tax",
+        "taxi",
+        "tc",
+        "tci",
+        "td",
+        "tdk",
+        "team",
+        "tech",
+        "technology",
+        "tel",
+        "temasek",
+        "tennis",
+        "teva",
+        "tf",
+        "tg",
+        "th",
+        "thd",
+        "theater",
+        "theatre",
+        "tiaa",
+        "tickets",
+        "tienda",
+        "tiffany",
+        "tips",
+        "tires",
+        "tirol",
+        "tj",
+        "tjmaxx",
+        "tjx",
+        "tk",
+        "tkmaxx",
+        "tl",
+        "tm",
+        "tmall",
+        "tn",
+        "to",
+        "today",
+        "tokyo",
+        "tools",
+        "top",
+        "toray",
+        "toshiba",
+        "total",
+        "tours",
+        "town",
+        "toyota",
+        "toys",
+        "tr",
+        "trade",
+        "trading",
+        "training",
+        "travel",
+        "travelchannel",
+        "travelers",
+        "travelersinsurance",
+        "trust",
+        "trv",
+        "tt",
+        "tube",
+        "tui",
+        "tunes",
+        "tushu",
+        "tv",
+        "tvs",
+        "tw",
+        "tz",
+        "ua",
+        "ubank",
+        "ubs",
+        "ug",
+        "uk",
+        "unicom",
+        "university",
+        "uno",
+        "uol",
+        "ups",
+        "us",
+        "uy",
+        "uz",
+        "va",
+        "vacations",
+        "vana",
+        "vanguard",
+        "vc",
+        "ve",
+        "vegas",
+        "ventures",
+        "verisign",
+        "versicherung",
+        "vet",
+        "vg",
+        "vi",
+        "viajes",
+        "video",
+        "vig",
+        "viking",
+        "villas",
+        "vin",
+        "vip",
+        "virgin",
+        "visa",
+        "vision",
+        "viva",
+        "vivo",
+        "vlaanderen",
+        "vn",
+        "vodka",
+        "volkswagen",
+        "volvo",
+        "vote",
+        "voting",
+        "voto",
+        "voyage",
+        "vu",
+        "vuelos",
+        "wales",
+        "walmart",
+        "walter",
+        "wang",
+        "wanggou",
+        "watch",
+        "watches",
+        "weather",
+        "weatherchannel",
+        "webcam",
+        "weber",
+        "website",
+        "wed",
+        "wedding",
+        "weibo",
+        "weir",
+        "wf",
+        "whoswho",
+        "wien",
+        "wiki",
+        "williamhill",
+        "win",
+        "windows",
+        "wine",
+        "winners",
+        "wme",
+        "wolterskluwer",
+        "woodside",
+        "work",
+        "works",
+        "world",
+        "wow",
+        "ws",
+        "wtc",
+        "wtf",
+        "xbox",
+        "xerox",
+        "xfinity",
+        "xihuan",
+        "xin",
+        "xn--11b4c3d",
+        "xn--1ck2e1b",
+        "xn--1qqw23a",
+        "xn--2scrj9c",
+        "xn--30rr7y",
+        "xn--3bst00m",
+        "xn--3ds443g",
+        "xn--3e0b707e",
+        "xn--3hcrj9c",
+        "xn--3oq18vl8pn36a",
+        "xn--3pxu8k",
+        "xn--42c2d9a",
+        "xn--45br5cyl",
+        "xn--45brj9c",
+        "xn--45q11c",
+        "xn--4dbrk0ce",
+        "xn--4gbrim",
+        "xn--54b7fta0cc",
+        "xn--55qw42g",
+        "xn--55qx5d",
+        "xn--5su34j936bgsg",
+        "xn--5tzm5g",
+        "xn--6frz82g",
+        "xn--6qq986b3xl",
+        "xn--80adxhks",
+        "xn--80ao21a",
+        "xn--80aqecdr1a",
+        "xn--80asehdb",
+        "xn--80aswg",
+        "xn--8y0a063a",
+        "xn--90a3ac",
+        "xn--90ae",
+        "xn--90ais",
+        "xn--9dbq2a",
+        "xn--9et52u",
+        "xn--9krt00a",
+        "xn--b4w605ferd",
+        "xn--bck1b9a5dre4c",
+        "xn--c1avg",
+        "xn--c2br7g",
+        "xn--cck2b3b",
+        "xn--cckwcxetd",
+        "xn--cg4bki",
+        "xn--clchc0ea0b2g2a9gcd",
+        "xn--czr694b",
+        "xn--czrs0t",
+        "xn--czru2d",
+        "xn--d1acj3b",
+        "xn--d1alf",
+        "xn--e1a4c",
+        "xn--eckvdtc9d",
+        "xn--efvy88h",
+        "xn--fct429k",
+        "xn--fhbei",
+        "xn--fiq228c5hs",
+        "xn--fiq64b",
+        "xn--fiqs8s",
+        "xn--fiqz9s",
+        "xn--fjq720a",
+        "xn--flw351e",
+        "xn--fpcrj9c3d",
+        "xn--fzc2c9e2c",
+        "xn--fzys8d69uvgm",
+        "xn--g2xx48c",
+        "xn--gckr3f0f",
+        "xn--gecrj9c",
+        "xn--gk3at1e",
+        "xn--h2breg3eve",
+        "xn--h2brj9c",
+        "xn--h2brj9c8c",
+        "xn--hxt814e",
+        "xn--i1b6b1a6a2e",
+        "xn--imr513n",
+        "xn--io0a7i",
+        "xn--j1aef",
+        "xn--j1amh",
+        "xn--j6w193g",
+        "xn--jlq480n2rg",
+        "xn--jlq61u9w7b",
+        "xn--jvr189m",
+        "xn--kcrx77d1x4a",
+        "xn--kprw13d",
+        "xn--kpry57d",
+        "xn--kput3i",
+        "xn--l1acc",
+        "xn--lgbbat1ad8j",
+        "xn--mgb9awbf",
+        "xn--mgba3a3ejt",
+        "xn--mgba3a4f16a",
+        "xn--mgba7c0bbn0a",
+        "xn--mgbaakc7dvf",
+        "xn--mgbaam7a8h",
+        "xn--mgbab2bd",
+        "xn--mgbah1a3hjkrd",
+        "xn--mgbai9azgqp6j",
+        "xn--mgbayh7gpa",
+        "xn--mgbbh1a",
+        "xn--mgbbh1a71e",
+        "xn--mgbc0a9azcg",
+        "xn--mgbca7dzdo",
+        "xn--mgbcpq6gpa1a",
+        "xn--mgberp4a5d4ar",
+        "xn--mgbgu82a",
+        "xn--mgbi4ecexp",
+        "xn--mgbpl2fh",
+        "xn--mgbt3dhd",
+        "xn--mgbtx2b",
+        "xn--mgbx4cd0ab",
+        "xn--mix891f",
+        "xn--mk1bu44c",
+        "xn--mxtq1m",
+        "xn--ngbc5azd",
+        "xn--ngbe9e0a",
+        "xn--ngbrx",
+        "xn--node",
+        "xn--nqv7f",
+        "xn--nqv7fs00ema",
+        "xn--nyqy26a",
+        "xn--o3cw4h",
+        "xn--ogbpf8fl",
+        "xn--otu796d",
+        "xn--p1acf",
+        "xn--p1ai",
+        "xn--pgbs0dh",
+        "xn--pssy2u",
+        "xn--q7ce6a",
+        "xn--q9jyb4c",
+        "xn--qcka1pmc",
+        "xn--qxa6a",
+        "xn--qxam",
+        "xn--rhqv96g",
+        "xn--rovu88b",
+        "xn--rvc1e0am3e",
+        "xn--s9brj9c",
+        "xn--ses554g",
+        "xn--t60b56a",
+        "xn--tckwe",
+        "xn--tiq49xqyj",
+        "xn--unup4y",
+        "xn--vermgensberater-ctb",
+        "xn--vermgensberatung-pwb",
+        "xn--vhquv",
+        "xn--vuq861b",
+        "xn--w4r85el8fhu5dnra",
+        "xn--w4rs40l",
+        "xn--wgbh1c",
+        "xn--wgbl6a",
+        "xn--xhq521b",
+        "xn--xkc2al3hye2a",
+        "xn--xkc2dl3a5ee0h",
+        "xn--y9a3aq",
+        "xn--yfro4i67o",
+        "xn--ygbi2ammx",
+        "xn--zfr164b",
+        "xxx",
+        "xyz",
+        "yachts",
+        "yahoo",
+        "yamaxun",
+        "yandex",
+        "ye",
+        "yodobashi",
+        "yoga",
+        "yokohama",
+        "you",
+        "youtube",
+        "yt",
+        "yun",
+        "za",
+        "zappos",
+        "zara",
+        "zero",
+        "zip",
+        "zm",
+        "zone",
+        "zuerich",
+        "zw"
+    };
+
     struct HtmlEscapeSequence {
         char unescape_char;
         std::string escape_sequence;
@@ -53,6 +1561,10 @@ namespace QuickMedia {
         return c >= '0' && c <= '9';
     }
 
+    static bool is_whitespace(char c) {
+        return c == ' ' || c == '\t' || c == '\n';
+    }
+
     std::string url_param_encode(const std::string &param) {
         std::ostringstream result;
         result.fill('0');
@@ -74,50 +1586,18 @@ namespace QuickMedia {
         return is_alpha(c) || is_digit(c);
     }
 
-    static bool is_url_character(char c) {
-        switch(c) {
-            case '%':
-            // Reserved
-            case ':':
-            case '/':
-            case '?':
-            case '#':
-            case '[':
-            case ']':
-            case '@':
-            case '!':
-            case '$':
-            case '&':
-            case '\'':
-            case '(':
-            case ')':
-            case '*':
-            case '+':
-            case ',':
-            case ';':
-            case '=':
-            // Unreserved:
-            case '-':
-            case '.':
-            case '_':
-            case '~':
-                return true;
-            default:
-                return is_alpha(c) || is_digit(c);
-        }
-    }
-
     // Implementation follows URI standard in general: https://tools.ietf.org/html/rfc3986#section-2.2.
     // Also checks for balanced parentheses to allow text such as: (see: example.com/) that excludes the last parenthesis.
-    void extract_urls(const std::string &str, std::vector<std::string> &urls) {
+    std::vector<Range> extract_urls(const std::string &str) {
+        std::vector<Range> ranges;
+
         int parentheses_depth = 0;
+        bool is_valid_url = false;
         size_t url_start = std::string::npos;
-        size_t url_dot_index = std::string::npos;
+
         // str.size() is fine, we want to include the NULL character so we can extract url at the end of the string
         for(size_t i = 0; i < (size_t)str.size() + 1; ++i) {
             char c = str[i];
-            if(c == '.' && url_start != std::string::npos && url_dot_index == std::string::npos)
-                url_dot_index = i;
 
             if(url_start != std::string::npos) {
                 if(c == '(')
@@ -126,25 +1606,103 @@ namespace QuickMedia {
                     --parentheses_depth;
             }
 
+            if(url_start != std::string::npos && !is_valid_url && (is_whitespace(c) || c == '/' || c == ',' || c == ':' || c == ')' || c == '\0' || (c == '.' && i == str.size()))) {
+                size_t tld_end = i - 1;
+                char prev_char = str[i - 1];
+                // We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..."
+                // and we want those links to work, I guess?
+                if(prev_char == '.' || prev_char == ',')
+                    --tld_end;
+                else if(prev_char == ')' && parentheses_depth != 0)
+                    --tld_end;
+                size_t tld_start = tld_end;
+                
+                while(tld_start > url_start) {
+                    if(str[tld_start] == '.')
+                        break;
+                    --tld_start;
+                }
+
+                if(tld_start > url_start && TLDS.find(str.substr(tld_start + 1, tld_end - tld_start)) != TLDS.end())
+                    is_valid_url = true;
+            }
+
             if(url_start == std::string::npos && is_url_start_char(c)) {
                 url_start = i;
-            } else if(url_start != std::string::npos && !is_url_character(c)) {
+            } else if(url_start != std::string::npos && (is_whitespace(c) || c == '\0')) {
                 // Its only an url if there is a dot and the dot is not the last character in the url, for example "example.com" is an url but "example." is not.
-                if(url_dot_index != std::string::npos && url_dot_index != i - 1) {
+                if(is_valid_url) {
                     size_t url_length = i - url_start;
                     char prev_char = str[i - 1];
-                    // We want to remove the last . or , because the string could contain for example "click on this like: example.com. There you can..."
+                    // We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..."
                     // and we want those links to work, I guess?
                     if(prev_char == '.' || prev_char == ',')
                         --url_length;
-                    if(prev_char == ')' && parentheses_depth != 0)
+                    else if(prev_char == ')' && parentheses_depth != 0)
                         --url_length;
                     if(url_length > 0)
-                        urls.push_back(str.substr(url_start, url_length));
+                        ranges.push_back({url_start, url_length});
                 }
+
                 url_start = std::string::npos;
-                url_dot_index = std::string::npos;
+                is_valid_url = false;
             }
+        }
+
+        return ranges;
+    }
+
+    std::vector<std::string> ranges_get_strings(const std::string &str, const std::vector<Range> &ranges) {
+        std::vector<std::string> strings(ranges.size());
+        for(size_t i = 0; i < ranges.size(); ++i) {
+            const Range &range = ranges[i];
+            strings[i].assign(str.begin() + range.start, str.begin() + range.start + range.length);
+        }
+        return strings;
     }
+
+    static size_t is_start_of_utf8_codepoint(uint8_t c) {
+        if((c & 0x80) == 0)
+            return true;
+        else if((c & 0xE0) == 0xC0)
+            return true;
+        else if((c & 0xF0) == 0xE0)
+            return true;
+        else if((c & 0xF8) == 0xF0)
+            return true;
+        else
+            return false;
+    }
+
+    void convert_utf8_to_utf32_ranges(const std::string &str, std::vector<Range> &ranges) {
+        if(ranges.empty())
+            return;
+
+        size_t ranges_index = 0;
+        size_t prev_range_offset = 0;
+        size_t num_codepoints = 0;
+        bool in_range = false;
+
+        for(size_t i = 0; i < str.size(); ++i) {
+            if(ranges_index > 0 && in_range) {
+                ++prev_range_offset;
+                if(prev_range_offset == ranges[ranges_index - 1].length) {
+                    ranges[ranges_index - 1].length = num_codepoints - ranges[ranges_index - 1].start;
+                    prev_range_offset = 0;
+                    in_range = false;
+                }
+            }
+
+            if(i == ranges[ranges_index].start) {
+                in_range = true;
+                ranges[ranges_index].start = num_codepoints;
+                ++ranges_index;
+                if(ranges_index == ranges.size())
+                    break;
+            }
+
+            if(is_start_of_utf8_codepoint(*(uint8_t*)&str[i]))
+                ++num_codepoints;
+        }
     }
 }
 \ No newline at end of file
diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp
index fbc2699..ea1292d 100644
--- a/src/QuickMedia.cpp
+++ b/src/QuickMedia.cpp
@@ -4232,8 +4232,7 @@ namespace QuickMedia {
             }
 
             // TODO: If content type is a file, show file-manager prompt where it should be saved and asynchronously save it instead
-            std::vector<std::string> urls;
-            extract_urls(selected->get_description(), urls);
+            std::vector<std::string> urls = ranges_get_strings(selected->get_description(), extract_urls(selected->get_description()));
             if(urls.size() == 1) {
                 launch_url(urls[0]);
                 return true;
diff --git a/src/Text.cpp b/src/Text.cpp
index 171bd70..a9f1147 100644
--- a/src/Text.cpp
+++ b/src/Text.cpp
@@ -14,6 +14,7 @@ namespace QuickMedia
 {
     static const float TAB_WIDTH = 4.0f;
     static const float WORD_WRAP_MIN_SIZE = 80.0f;
+    static const sf::Color URL_COLOR(35, 140, 245);
 
     size_t StringViewUtf32::find(const StringViewUtf32 &other, size_t offset) const {
         if(offset >= size)
@@ -322,6 +323,12 @@ namespace QuickMedia
             assert(dirty);
             dirtyText = false;
             splitTextByFont();
+            // TODO: Optimize
+            auto u8 = str.toUtf8();
+            std::string *u8_str = (std::string*)&u8;
+            url_ranges = extract_urls(*u8_str);
+            convert_utf8_to_utf32_ranges(*u8_str, url_ranges);
+            dirty = true;
         }
 
         if(!update_even_if_not_dirty && !dirty)
@@ -343,9 +350,12 @@ namespace QuickMedia
         float latin_font_height = latin_font->getGlyph(' ', characterSize, false).advance;
         float hspace = latin_font_height + characterSpacing;
         float vspace = latin_font->getLineSpacing(characterSize); // TODO: What about japanese font???
+
+        size_t url_range_index = 0;
         
         sf::Vector2f glyphPos;
         sf::Uint32 prevCodePoint = 0;
+        // TODO: Only do this if dirtyText
         for(usize textElementIndex = 0; textElementIndex < textElements.size(); ++textElementIndex)
         {
             TextElement &textElement = textElements[textElementIndex];
@@ -392,6 +402,17 @@ namespace QuickMedia
             textElement.position = glyphPos;
             for(size_t i = 0; i < textElement.text.size; ++i)
             {
+                sf::Color text_color = color;
+                if(url_range_index < url_ranges.size()) {
+                    size_t string_offset = (textElement.text.data + i) - str.getData();
+                    if(string_offset >= url_ranges[url_range_index].start && string_offset < url_ranges[url_range_index].start + url_ranges[url_range_index].length) {
+                        text_color = URL_COLOR;
+                        text_color.a = color.a;
+                        if(string_offset + 1 == url_ranges[url_range_index].start + url_ranges[url_range_index].length)
+                            ++url_range_index;
+                    }
+                }
+
                 sf::Uint32 codePoint = textElement.text[i];
                 // TODO: Make this work when combining multiple different fonts (for example latin and japanese).
                 // For japanese we could use a hack, because all japanese characters are monospace (exception being half-width characters).
@@ -473,12 +494,12 @@ namespace QuickMedia
                 sf::Vector2f textureBottomLeft(glyph.textureRect.left, glyph.textureRect.top + glyph.textureRect.height);
                 sf::Vector2f textureBottomRight(glyph.textureRect.left + glyph.textureRect.width, glyph.textureRect.top + glyph.textureRect.height);
                 
-                vertices[vertices_index].append({ vertexTopRight, color, textureTopRight });
-                vertices[vertices_index].append({ vertexTopLeft, color, textureTopLeft });
-                vertices[vertices_index].append({ vertexBottomLeft, color, textureBottomLeft });
-                vertices[vertices_index].append({ vertexBottomLeft, color, textureBottomLeft });
-                vertices[vertices_index].append({ vertexBottomRight, color, textureBottomRight });
-                vertices[vertices_index].append({ vertexTopRight, color, textureTopRight });
+                vertices[vertices_index].append({ vertexTopRight, text_color, textureTopRight });
+                vertices[vertices_index].append({ vertexTopLeft, text_color, textureTopLeft });
+                vertices[vertices_index].append({ vertexBottomLeft, text_color, textureBottomLeft });
+                vertices[vertices_index].append({ vertexBottomLeft, text_color, textureBottomLeft });
+                vertices[vertices_index].append({ vertexBottomRight, text_color, textureBottomRight });
+                vertices[vertices_index].append({ vertexTopRight, text_color, textureTopRight });
                 
                 glyphPos.x += glyph.advance + characterSpacing;
                 vertices_linear.push_back({vertices_index, vertexStart, 0, codePoint});
@@ -559,6 +580,7 @@ namespace QuickMedia
         }
         boundingBox.height = num_lines * line_height;
 
+        //url_ranges.clear();
         if(!editable)
             vertices_linear.clear();
     }
diff --git a/src/plugins/Matrix.cpp b/src/plugins/Matrix.cpp
index daae545..583bad8 100644
--- a/src/plugins/Matrix.cpp
+++ b/src/plugins/Matrix.cpp
@@ -2143,7 +2143,7 @@ namespace QuickMedia {
             message_content_extract_thumbnail_size(*content_json, message->thumbnail_size);
             message->type = MessageType::VIDEO;
             if(message->thumbnail_url.empty())
-                prefix = "🎥 play ";
+                prefix = "🎥 Play ";
         } else if(strcmp(content_type.GetString(), "m.audio") == 0) {
             const rapidjson::Value &url_json = GetMember(*content_json, "url");
             if(!url_json.IsString() || strncmp(url_json.GetString(), "mxc://", 6) != 0)
@@ -2151,7 +2151,7 @@ namespace QuickMedia {
 
             message->url = homeserver + "/_matrix/media/r0/download/" + (url_json.GetString() + 6);
             message->type = MessageType::AUDIO;
-            prefix = "🎵 play ";
+            prefix = "🎵 Play ";
         } else if(strcmp(content_type.GetString(), "m.file") == 0) {
             const rapidjson::Value &url_json = GetMember(*content_json, "url");
             if(!url_json.IsString() || strncmp(url_json.GetString(), "mxc://", 6) != 0)
@@ -2159,7 +2159,7 @@ namespace QuickMedia {
 
             message->url = homeserver + "/_matrix/media/r0/download/" + (url_json.GetString() + 6);
             message->type = MessageType::FILE;
-            prefix = "💾 download ";
+            prefix = "💾 Download ";
         } else if(strcmp(content_type.GetString(), "m.emote") == 0) { // this is a /me message, TODO: show /me messages differently
             message->type = MessageType::TEXT;
             prefix = "*" + room_data->get_user_display_name(user) + "* ";
diff --git a/tests/main.cpp b/tests/main.cpp
index 647fdff..c5138e3 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -6,44 +6,45 @@
 
 int main() {
     std::vector<std::string> urls;
+    const char *str;
 
-    urls.clear();
-    QuickMedia::extract_urls("example.com", urls);
+    str = "example.com";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 1);
     assert_equals(urls[0], "example.com");
 
-    urls.clear();
-    QuickMedia::extract_urls("example.com, is where I like to go", urls);
+    str = "example.com, is where I like to go";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 1);
     assert_equals(urls[0], "example.com");
 
-    urls.clear();
-    QuickMedia::extract_urls("The website I like to go to is example.com", urls);
+    str = "The website I like to go to is example.com";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 1);
     assert_equals(urls[0], "example.com");
 
-    urls.clear();
-    QuickMedia::extract_urls("example.com. Is also a website", urls);
+    str = "example.com. Is also a website";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 1);
     assert_equals(urls[0], "example.com");
 
-    urls.clear();
-    QuickMedia::extract_urls("these. are. not. websites.", urls);
+    str = "these. are. not. websites.";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 0);
 
-    urls.clear();
-    QuickMedia::extract_urls("This is not an url: example.", urls);
+    str = "This is not an url: example.";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 0);
 
-    urls.clear();
-    QuickMedia::extract_urls("the.se/~#423-_/2f.no/3df a.re considered sub.websit.es", urls);
+    str = "the.se/~#423-_/2f.no/3df a.re considered sub.websit.es, this.is.not";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 3);
     assert_equals(urls[0], "the.se/~#423-_/2f.no/3df");
     assert_equals(urls[1], "a.re");
     assert_equals(urls[2], "sub.websit.es");
 
-    urls.clear();
-    QuickMedia::extract_urls("(see https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/)", urls);
+    str = "(see https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/)";
+    urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
     assert_equals(urls.size(), 1);
     assert_equals(urls[0], "https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/");
     return 0;