aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/NetUtils.hpp9
-rw-r--r--include/Text.hpp3
-rw-r--r--src/FileAnalyzer.cpp2
-rw-r--r--src/NetUtils.cpp1644
-rw-r--r--src/QuickMedia.cpp3
-rw-r--r--src/Text.cpp34
-rw-r--r--src/plugins/Matrix.cpp6
-rw-r--r--tests/main.cpp33
8 files changed, 1662 insertions, 72 deletions
diff --git a/include/NetUtils.hpp b/include/NetUtils.hpp
index 4770fb4..e719c82 100644
--- a/include/NetUtils.hpp
+++ b/include/NetUtils.hpp
@@ -4,8 +4,15 @@
#include <vector>
namespace QuickMedia {
+ struct Range {
+ size_t start;
+ size_t length;
+ };
+
void html_escape_sequences(std::string &str);
void html_unescape_sequences(std::string &str);
std::string url_param_encode(const std::string &param);
- void extract_urls(const std::string &str, std::vector<std::string> &urls);
+ std::vector<Range> extract_urls(const std::string &str);
+ std::vector<std::string> ranges_get_strings(const std::string &str, const std::vector<Range> &ranges);
+ void convert_utf8_to_utf32_ranges(const std::string &str, std::vector<Range> &ranges);
} \ No newline at end of file
diff --git a/include/Text.hpp b/include/Text.hpp
index 3981f13..c74607c 100644
--- a/include/Text.hpp
+++ b/include/Text.hpp
@@ -1,5 +1,6 @@
#pragma once
+#include "NetUtils.hpp"
#include <SFML/Graphics/VertexArray.hpp>
#include <SFML/System/String.hpp>
#include <SFML/System/Clock.hpp>
@@ -160,5 +161,7 @@ namespace QuickMedia
sf::Vector2u renderTargetSize;
std::vector<VertexRef> vertices_linear; // TODO: Use textElements instead
+
+ std::vector<Range> url_ranges;
};
}
diff --git a/src/FileAnalyzer.cpp b/src/FileAnalyzer.cpp
index ccad221..0059233 100644
--- a/src/FileAnalyzer.cpp
+++ b/src/FileAnalyzer.cpp
@@ -128,7 +128,7 @@ namespace QuickMedia {
return false;
}
- if(width > 0 || height > 0) {
+ if(width > 0 && height > 0) {
if(create_thumbnail(destination_path_tmp, destination_path, sf::Vector2i(width, height))) {
remove(destination_path_tmp.data.c_str());
return true;
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
index 5ca6d3e..d5795c2 100644
--- a/src/NetUtils.cpp
+++ b/src/NetUtils.cpp
@@ -3,8 +3,1516 @@
#include <array>
#include <sstream>
#include <iomanip>
+#include <assert.h>
+#include <unordered_set>
namespace QuickMedia {
+ // Source: https://data.iana.org/TLD/tlds-alpha-by-domain.txt
+ static const std::unordered_set<std::string> TLDS = {
+ "aaa",
+ "aarp",
+ "abarth",
+ "abb",
+ "abbott",
+ "abbvie",
+ "abc",
+ "able",
+ "abogado",
+ "abudhabi",
+ "ac",
+ "academy",
+ "accenture",
+ "accountant",
+ "accountants",
+ "aco",
+ "actor",
+ "ad",
+ "adac",
+ "ads",
+ "adult",
+ "ae",
+ "aeg",
+ "aero",
+ "aetna",
+ "af",
+ "afamilycompany",
+ "afl",
+ "africa",
+ "ag",
+ "agakhan",
+ "agency",
+ "ai",
+ "aig",
+ "airbus",
+ "airforce",
+ "airtel",
+ "akdn",
+ "al",
+ "alfaromeo",
+ "alibaba",
+ "alipay",
+ "allfinanz",
+ "allstate",
+ "ally",
+ "alsace",
+ "alstom",
+ "am",
+ "amazon",
+ "americanexpress",
+ "americanfamily",
+ "amex",
+ "amfam",
+ "amica",
+ "amsterdam",
+ "analytics",
+ "android",
+ "anquan",
+ "anz",
+ "ao",
+ "aol",
+ "apartments",
+ "app",
+ "apple",
+ "aq",
+ "aquarelle",
+ "ar",
+ "arab",
+ "aramco",
+ "archi",
+ "army",
+ "arpa",
+ "art",
+ "arte",
+ "as",
+ "asda",
+ "asia",
+ "associates",
+ "at",
+ "athleta",
+ "attorney",
+ "au",
+ "auction",
+ "audi",
+ "audible",
+ "audio",
+ "auspost",
+ "author",
+ "auto",
+ "autos",
+ "avianca",
+ "aw",
+ "aws",
+ "ax",
+ "axa",
+ "az",
+ "azure",
+ "ba",
+ "baby",
+ "baidu",
+ "banamex",
+ "bananarepublic",
+ "band",
+ "bank",
+ "bar",
+ "barcelona",
+ "barclaycard",
+ "barclays",
+ "barefoot",
+ "bargains",
+ "baseball",
+ "basketball",
+ "bauhaus",
+ "bayern",
+ "bb",
+ "bbc",
+ "bbt",
+ "bbva",
+ "bcg",
+ "bcn",
+ "bd",
+ "be",
+ "beats",
+ "beauty",
+ "beer",
+ "bentley",
+ "berlin",
+ "best",
+ "bestbuy",
+ "bet",
+ "bf",
+ "bg",
+ "bh",
+ "bharti",
+ "bi",
+ "bible",
+ "bid",
+ "bike",
+ "bing",
+ "bingo",
+ "bio",
+ "biz",
+ "bj",
+ "black",
+ "blackfriday",
+ "blockbuster",
+ "blog",
+ "bloomberg",
+ "blue",
+ "bm",
+ "bms",
+ "bmw",
+ "bn",
+ "bnpparibas",
+ "bo",
+ "boats",
+ "boehringer",
+ "bofa",
+ "bom",
+ "bond",
+ "boo",
+ "book",
+ "booking",
+ "bosch",
+ "bostik",
+ "boston",
+ "bot",
+ "boutique",
+ "box",
+ "br",
+ "bradesco",
+ "bridgestone",
+ "broadway",
+ "broker",
+ "brother",
+ "brussels",
+ "bs",
+ "bt",
+ "budapest",
+ "bugatti",
+ "build",
+ "builders",
+ "business",
+ "buy",
+ "buzz",
+ "bv",
+ "bw",
+ "by",
+ "bz",
+ "bzh",
+ "ca",
+ "cab",
+ "cafe",
+ "cal",
+ "call",
+ "calvinklein",
+ "cam",
+ "camera",
+ "camp",
+ "cancerresearch",
+ "canon",
+ "capetown",
+ "capital",
+ "capitalone",
+ "car",
+ "caravan",
+ "cards",
+ "care",
+ "career",
+ "careers",
+ "cars",
+ "casa",
+ "case",
+ "cash",
+ "casino",
+ "cat",
+ "catering",
+ "catholic",
+ "cba",
+ "cbn",
+ "cbre",
+ "cbs",
+ "cc",
+ "cd",
+ "center",
+ "ceo",
+ "cern",
+ "cf",
+ "cfa",
+ "cfd",
+ "cg",
+ "ch",
+ "chanel",
+ "channel",
+ "charity",
+ "chase",
+ "chat",
+ "cheap",
+ "chintai",
+ "christmas",
+ "chrome",
+ "church",
+ "ci",
+ "cipriani",
+ "circle",
+ "cisco",
+ "citadel",
+ "citi",
+ "citic",
+ "city",
+ "cityeats",
+ "ck",
+ "cl",
+ "claims",
+ "cleaning",
+ "click",
+ "clinic",
+ "clinique",
+ "clothing",
+ "cloud",
+ "club",
+ "clubmed",
+ "cm",
+ "cn",
+ "co",
+ "coach",
+ "codes",
+ "coffee",
+ "college",
+ "cologne",
+ "com",
+ "comcast",
+ "commbank",
+ "community",
+ "company",
+ "compare",
+ "computer",
+ "comsec",
+ "condos",
+ "construction",
+ "consulting",
+ "contact",
+ "contractors",
+ "cooking",
+ "cookingchannel",
+ "cool",
+ "coop",
+ "corsica",
+ "country",
+ "coupon",
+ "coupons",
+ "courses",
+ "cpa",
+ "cr",
+ "credit",
+ "creditcard",
+ "creditunion",
+ "cricket",
+ "crown",
+ "crs",
+ "cruise",
+ "cruises",
+ "csc",
+ "cu",
+ "cuisinella",
+ "cv",
+ "cw",
+ "cx",
+ "cy",
+ "cymru",
+ "cyou",
+ "cz",
+ "dabur",
+ "dad",
+ "dance",
+ "data",
+ "date",
+ "dating",
+ "datsun",
+ "day",
+ "dclk",
+ "dds",
+ "de",
+ "deal",
+ "dealer",
+ "deals",
+ "degree",
+ "delivery",
+ "dell",
+ "deloitte",
+ "delta",
+ "democrat",
+ "dental",
+ "dentist",
+ "desi",
+ "design",
+ "dev",
+ "dhl",
+ "diamonds",
+ "diet",
+ "digital",
+ "direct",
+ "directory",
+ "discount",
+ "discover",
+ "dish",
+ "diy",
+ "dj",
+ "dk",
+ "dm",
+ "dnp",
+ "do",
+ "docs",
+ "doctor",
+ "dog",
+ "domains",
+ "dot",
+ "download",
+ "drive",
+ "dtv",
+ "dubai",
+ "duck",
+ "dunlop",
+ "dupont",
+ "durban",
+ "dvag",
+ "dvr",
+ "dz",
+ "earth",
+ "eat",
+ "ec",
+ "eco",
+ "edeka",
+ "edu",
+ "education",
+ "ee",
+ "eg",
+ "email",
+ "emerck",
+ "energy",
+ "engineer",
+ "engineering",
+ "enterprises",
+ "epson",
+ "equipment",
+ "er",
+ "ericsson",
+ "erni",
+ "es",
+ "esq",
+ "estate",
+ "et",
+ "etisalat",
+ "eu",
+ "eurovision",
+ "eus",
+ "events",
+ "exchange",
+ "expert",
+ "exposed",
+ "express",
+ "extraspace",
+ "fage",
+ "fail",
+ "fairwinds",
+ "faith",
+ "family",
+ "fan",
+ "fans",
+ "farm",
+ "farmers",
+ "fashion",
+ "fast",
+ "fedex",
+ "feedback",
+ "ferrari",
+ "ferrero",
+ "fi",
+ "fiat",
+ "fidelity",
+ "fido",
+ "film",
+ "final",
+ "finance",
+ "financial",
+ "fire",
+ "firestone",
+ "firmdale",
+ "fish",
+ "fishing",
+ "fit",
+ "fitness",
+ "fj",
+ "fk",
+ "flickr",
+ "flights",
+ "flir",
+ "florist",
+ "flowers",
+ "fly",
+ "fm",
+ "fo",
+ "foo",
+ "food",
+ "foodnetwork",
+ "football",
+ "ford",
+ "forex",
+ "forsale",
+ "forum",
+ "foundation",
+ "fox",
+ "fr",
+ "free",
+ "fresenius",
+ "frl",
+ "frogans",
+ "frontdoor",
+ "frontier",
+ "ftr",
+ "fujitsu",
+ "fun",
+ "fund",
+ "furniture",
+ "futbol",
+ "fyi",
+ "ga",
+ "gal",
+ "gallery",
+ "gallo",
+ "gallup",
+ "game",
+ "games",
+ "gap",
+ "garden",
+ "gay",
+ "gb",
+ "gbiz",
+ "gd",
+ "gdn",
+ "ge",
+ "gea",
+ "gent",
+ "genting",
+ "george",
+ "gf",
+ "gg",
+ "ggee",
+ "gh",
+ "gi",
+ "gift",
+ "gifts",
+ "gives",
+ "giving",
+ "gl",
+ "glade",
+ "glass",
+ "gle",
+ "global",
+ "globo",
+ "gm",
+ "gmail",
+ "gmbh",
+ "gmo",
+ "gmx",
+ "gn",
+ "godaddy",
+ "gold",
+ "goldpoint",
+ "golf",
+ "goo",
+ "goodyear",
+ "goog",
+ "google",
+ "gop",
+ "got",
+ "gov",
+ "gp",
+ "gq",
+ "gr",
+ "grainger",
+ "graphics",
+ "gratis",
+ "green",
+ "gripe",
+ "grocery",
+ "group",
+ "gs",
+ "gt",
+ "gu",
+ "guardian",
+ "gucci",
+ "guge",
+ "guide",
+ "guitars",
+ "guru",
+ "gw",
+ "gy",
+ "hair",
+ "hamburg",
+ "hangout",
+ "haus",
+ "hbo",
+ "hdfc",
+ "hdfcbank",
+ "health",
+ "healthcare",
+ "help",
+ "helsinki",
+ "here",
+ "hermes",
+ "hgtv",
+ "hiphop",
+ "hisamitsu",
+ "hitachi",
+ "hiv",
+ "hk",
+ "hkt",
+ "hm",
+ "hn",
+ "hockey",
+ "holdings",
+ "holiday",
+ "homedepot",
+ "homegoods",
+ "homes",
+ "homesense",
+ "honda",
+ "horse",
+ "hospital",
+ "host",
+ "hosting",
+ "hot",
+ "hoteles",
+ "hotels",
+ "hotmail",
+ "house",
+ "how",
+ "hr",
+ "hsbc",
+ "ht",
+ "hu",
+ "hughes",
+ "hyatt",
+ "hyundai",
+ "ibm",
+ "icbc",
+ "ice",
+ "icu",
+ "id",
+ "ie",
+ "ieee",
+ "ifm",
+ "ikano",
+ "il",
+ "im",
+ "imamat",
+ "imdb",
+ "immo",
+ "immobilien",
+ "in",
+ "inc",
+ "industries",
+ "infiniti",
+ "info",
+ "ing",
+ "ink",
+ "institute",
+ "insurance",
+ "insure",
+ "int",
+ "international",
+ "intuit",
+ "investments",
+ "io",
+ "ipiranga",
+ "iq",
+ "ir",
+ "irish",
+ "is",
+ "ismaili",
+ "ist",
+ "istanbul",
+ "it",
+ "itau",
+ "itv",
+ "iveco",
+ "jaguar",
+ "java",
+ "jcb",
+ "je",
+ "jeep",
+ "jetzt",
+ "jewelry",
+ "jio",
+ "jll",
+ "jm",
+ "jmp",
+ "jnj",
+ "jo",
+ "jobs",
+ "joburg",
+ "jot",
+ "joy",
+ "jp",
+ "jpmorgan",
+ "jprs",
+ "juegos",
+ "juniper",
+ "kaufen",
+ "kddi",
+ "ke",
+ "kerryhotels",
+ "kerrylogistics",
+ "kerryproperties",
+ "kfh",
+ "kg",
+ "kh",
+ "ki",
+ "kia",
+ "kim",
+ "kinder",
+ "kindle",
+ "kitchen",
+ "kiwi",
+ "km",
+ "kn",
+ "koeln",
+ "komatsu",
+ "kosher",
+ "kp",
+ "kpmg",
+ "kpn",
+ "kr",
+ "krd",
+ "kred",
+ "kuokgroup",
+ "kw",
+ "ky",
+ "kyoto",
+ "kz",
+ "la",
+ "lacaixa",
+ "lamborghini",
+ "lamer",
+ "lancaster",
+ "lancia",
+ "land",
+ "landrover",
+ "lanxess",
+ "lasalle",
+ "lat",
+ "latino",
+ "latrobe",
+ "law",
+ "lawyer",
+ "lb",
+ "lc",
+ "lds",
+ "lease",
+ "leclerc",
+ "lefrak",
+ "legal",
+ "lego",
+ "lexus",
+ "lgbt",
+ "li",
+ "lidl",
+ "life",
+ "lifeinsurance",
+ "lifestyle",
+ "lighting",
+ "like",
+ "lilly",
+ "limited",
+ "limo",
+ "lincoln",
+ "linde",
+ "link",
+ "lipsy",
+ "live",
+ "living",
+ "lixil",
+ "lk",
+ "llc",
+ "llp",
+ "loan",
+ "loans",
+ "locker",
+ "locus",
+ "loft",
+ "lol",
+ "london",
+ "lotte",
+ "lotto",
+ "love",
+ "lpl",
+ "lplfinancial",
+ "lr",
+ "ls",
+ "lt",
+ "ltd",
+ "ltda",
+ "lu",
+ "lundbeck",
+ "luxe",
+ "luxury",
+ "lv",
+ "ly",
+ "ma",
+ "macys",
+ "madrid",
+ "maif",
+ "maison",
+ "makeup",
+ "man",
+ "management",
+ "mango",
+ "map",
+ "market",
+ "marketing",
+ "markets",
+ "marriott",
+ "marshalls",
+ "maserati",
+ "mattel",
+ "mba",
+ "mc",
+ "mckinsey",
+ "md",
+ "me",
+ "med",
+ "media",
+ "meet",
+ "melbourne",
+ "meme",
+ "memorial",
+ "men",
+ "menu",
+ "merckmsd",
+ "mg",
+ "mh",
+ "miami",
+ "microsoft",
+ "mil",
+ "mini",
+ "mint",
+ "mit",
+ "mitsubishi",
+ "mk",
+ "ml",
+ "mlb",
+ "mls",
+ "mm",
+ "mma",
+ "mn",
+ "mo",
+ "mobi",
+ "mobile",
+ "moda",
+ "moe",
+ "moi",
+ "mom",
+ "monash",
+ "money",
+ "monster",
+ "mormon",
+ "mortgage",
+ "moscow",
+ "moto",
+ "motorcycles",
+ "mov",
+ "movie",
+ "mp",
+ "mq",
+ "mr",
+ "ms",
+ "msd",
+ "mt",
+ "mtn",
+ "mtr",
+ "mu",
+ "museum",
+ "mutual",
+ "mv",
+ "mw",
+ "mx",
+ "my",
+ "mz",
+ "na",
+ "nab",
+ "nagoya",
+ "name",
+ "nationwide",
+ "natura",
+ "navy",
+ "nba",
+ "nc",
+ "ne",
+ "nec",
+ "net",
+ "netbank",
+ "netflix",
+ "network",
+ "neustar",
+ "new",
+ "news",
+ "next",
+ "nextdirect",
+ "nexus",
+ "nf",
+ "nfl",
+ "ng",
+ "ngo",
+ "nhk",
+ "ni",
+ "nico",
+ "nike",
+ "nikon",
+ "ninja",
+ "nissan",
+ "nissay",
+ "nl",
+ "no",
+ "nokia",
+ "northwesternmutual",
+ "norton",
+ "now",
+ "nowruz",
+ "nowtv",
+ "np",
+ "nr",
+ "nra",
+ "nrw",
+ "ntt",
+ "nu",
+ "nyc",
+ "nz",
+ "obi",
+ "observer",
+ "off",
+ "office",
+ "okinawa",
+ "olayan",
+ "olayangroup",
+ "oldnavy",
+ "ollo",
+ "om",
+ "omega",
+ "one",
+ "ong",
+ "onl",
+ "online",
+ "onyourside",
+ "ooo",
+ "open",
+ "oracle",
+ "orange",
+ "org",
+ "organic",
+ "origins",
+ "osaka",
+ "otsuka",
+ "ott",
+ "ovh",
+ "pa",
+ "page",
+ "panasonic",
+ "paris",
+ "pars",
+ "partners",
+ "parts",
+ "party",
+ "passagens",
+ "pay",
+ "pccw",
+ "pe",
+ "pet",
+ "pf",
+ "pfizer",
+ "pg",
+ "ph",
+ "pharmacy",
+ "phd",
+ "philips",
+ "phone",
+ "photo",
+ "photography",
+ "photos",
+ "physio",
+ "pics",
+ "pictet",
+ "pictures",
+ "pid",
+ "pin",
+ "ping",
+ "pink",
+ "pioneer",
+ "pizza",
+ "pk",
+ "pl",
+ "place",
+ "play",
+ "playstation",
+ "plumbing",
+ "plus",
+ "pm",
+ "pn",
+ "pnc",
+ "pohl",
+ "poker",
+ "politie",
+ "porn",
+ "post",
+ "pr",
+ "pramerica",
+ "praxi",
+ "press",
+ "prime",
+ "pro",
+ "prod",
+ "productions",
+ "prof",
+ "progressive",
+ "promo",
+ "properties",
+ "property",
+ "protection",
+ "pru",
+ "prudential",
+ "ps",
+ "pt",
+ "pub",
+ "pw",
+ "pwc",
+ "py",
+ "qa",
+ "qpon",
+ "quebec",
+ "quest",
+ "qvc",
+ "racing",
+ "radio",
+ "raid",
+ "re",
+ "read",
+ "realestate",
+ "realtor",
+ "realty",
+ "recipes",
+ "red",
+ "redstone",
+ "redumbrella",
+ "rehab",
+ "reise",
+ "reisen",
+ "reit",
+ "reliance",
+ "ren",
+ "rent",
+ "rentals",
+ "repair",
+ "report",
+ "republican",
+ "rest",
+ "restaurant",
+ "review",
+ "reviews",
+ "rexroth",
+ "rich",
+ "richardli",
+ "ricoh",
+ "ril",
+ "rio",
+ "rip",
+ "rmit",
+ "ro",
+ "rocher",
+ "rocks",
+ "rodeo",
+ "rogers",
+ "room",
+ "rs",
+ "rsvp",
+ "ru",
+ "rugby",
+ "ruhr",
+ "run",
+ "rw",
+ "rwe",
+ "ryukyu",
+ "sa",
+ "saarland",
+ "safe",
+ "safety",
+ "sakura",
+ "sale",
+ "salon",
+ "samsclub",
+ "samsung",
+ "sandvik",
+ "sandvikcoromant",
+ "sanofi",
+ "sap",
+ "sarl",
+ "sas",
+ "save",
+ "saxo",
+ "sb",
+ "sbi",
+ "sbs",
+ "sc",
+ "sca",
+ "scb",
+ "schaeffler",
+ "schmidt",
+ "scholarships",
+ "school",
+ "schule",
+ "schwarz",
+ "science",
+ "scjohnson",
+ "scot",
+ "sd",
+ "se",
+ "search",
+ "seat",
+ "secure",
+ "security",
+ "seek",
+ "select",
+ "sener",
+ "services",
+ "ses",
+ "seven",
+ "sew",
+ "sex",
+ "sexy",
+ "sfr",
+ "sg",
+ "sh",
+ "shangrila",
+ "sharp",
+ "shaw",
+ "shell",
+ "shia",
+ "shiksha",
+ "shoes",
+ "shop",
+ "shopping",
+ "shouji",
+ "show",
+ "showtime",
+ "si",
+ "silk",
+ "sina",
+ "singles",
+ "site",
+ "sj",
+ "sk",
+ "ski",
+ "skin",
+ "sky",
+ "skype",
+ "sl",
+ "sling",
+ "sm",
+ "smart",
+ "smile",
+ "sn",
+ "sncf",
+ "so",
+ "soccer",
+ "social",
+ "softbank",
+ "software",
+ "sohu",
+ "solar",
+ "solutions",
+ "song",
+ "sony",
+ "soy",
+ "spa",
+ "space",
+ "sport",
+ "spot",
+ "spreadbetting",
+ "sr",
+ "srl",
+ "ss",
+ "st",
+ "stada",
+ "staples",
+ "star",
+ "statebank",
+ "statefarm",
+ "stc",
+ "stcgroup",
+ "stockholm",
+ "storage",
+ "store",
+ "stream",
+ "studio",
+ "study",
+ "style",
+ "su",
+ "sucks",
+ "supplies",
+ "supply",
+ "support",
+ "surf",
+ "surgery",
+ "suzuki",
+ "sv",
+ "swatch",
+ "swiftcover",
+ "swiss",
+ "sx",
+ "sy",
+ "sydney",
+ "systems",
+ "sz",
+ "tab",
+ "taipei",
+ "talk",
+ "taobao",
+ "target",
+ "tatamotors",
+ "tatar",
+ "tattoo",
+ "tax",
+ "taxi",
+ "tc",
+ "tci",
+ "td",
+ "tdk",
+ "team",
+ "tech",
+ "technology",
+ "tel",
+ "temasek",
+ "tennis",
+ "teva",
+ "tf",
+ "tg",
+ "th",
+ "thd",
+ "theater",
+ "theatre",
+ "tiaa",
+ "tickets",
+ "tienda",
+ "tiffany",
+ "tips",
+ "tires",
+ "tirol",
+ "tj",
+ "tjmaxx",
+ "tjx",
+ "tk",
+ "tkmaxx",
+ "tl",
+ "tm",
+ "tmall",
+ "tn",
+ "to",
+ "today",
+ "tokyo",
+ "tools",
+ "top",
+ "toray",
+ "toshiba",
+ "total",
+ "tours",
+ "town",
+ "toyota",
+ "toys",
+ "tr",
+ "trade",
+ "trading",
+ "training",
+ "travel",
+ "travelchannel",
+ "travelers",
+ "travelersinsurance",
+ "trust",
+ "trv",
+ "tt",
+ "tube",
+ "tui",
+ "tunes",
+ "tushu",
+ "tv",
+ "tvs",
+ "tw",
+ "tz",
+ "ua",
+ "ubank",
+ "ubs",
+ "ug",
+ "uk",
+ "unicom",
+ "university",
+ "uno",
+ "uol",
+ "ups",
+ "us",
+ "uy",
+ "uz",
+ "va",
+ "vacations",
+ "vana",
+ "vanguard",
+ "vc",
+ "ve",
+ "vegas",
+ "ventures",
+ "verisign",
+ "versicherung",
+ "vet",
+ "vg",
+ "vi",
+ "viajes",
+ "video",
+ "vig",
+ "viking",
+ "villas",
+ "vin",
+ "vip",
+ "virgin",
+ "visa",
+ "vision",
+ "viva",
+ "vivo",
+ "vlaanderen",
+ "vn",
+ "vodka",
+ "volkswagen",
+ "volvo",
+ "vote",
+ "voting",
+ "voto",
+ "voyage",
+ "vu",
+ "vuelos",
+ "wales",
+ "walmart",
+ "walter",
+ "wang",
+ "wanggou",
+ "watch",
+ "watches",
+ "weather",
+ "weatherchannel",
+ "webcam",
+ "weber",
+ "website",
+ "wed",
+ "wedding",
+ "weibo",
+ "weir",
+ "wf",
+ "whoswho",
+ "wien",
+ "wiki",
+ "williamhill",
+ "win",
+ "windows",
+ "wine",
+ "winners",
+ "wme",
+ "wolterskluwer",
+ "woodside",
+ "work",
+ "works",
+ "world",
+ "wow",
+ "ws",
+ "wtc",
+ "wtf",
+ "xbox",
+ "xerox",
+ "xfinity",
+ "xihuan",
+ "xin",
+ "xn--11b4c3d",
+ "xn--1ck2e1b",
+ "xn--1qqw23a",
+ "xn--2scrj9c",
+ "xn--30rr7y",
+ "xn--3bst00m",
+ "xn--3ds443g",
+ "xn--3e0b707e",
+ "xn--3hcrj9c",
+ "xn--3oq18vl8pn36a",
+ "xn--3pxu8k",
+ "xn--42c2d9a",
+ "xn--45br5cyl",
+ "xn--45brj9c",
+ "xn--45q11c",
+ "xn--4dbrk0ce",
+ "xn--4gbrim",
+ "xn--54b7fta0cc",
+ "xn--55qw42g",
+ "xn--55qx5d",
+ "xn--5su34j936bgsg",
+ "xn--5tzm5g",
+ "xn--6frz82g",
+ "xn--6qq986b3xl",
+ "xn--80adxhks",
+ "xn--80ao21a",
+ "xn--80aqecdr1a",
+ "xn--80asehdb",
+ "xn--80aswg",
+ "xn--8y0a063a",
+ "xn--90a3ac",
+ "xn--90ae",
+ "xn--90ais",
+ "xn--9dbq2a",
+ "xn--9et52u",
+ "xn--9krt00a",
+ "xn--b4w605ferd",
+ "xn--bck1b9a5dre4c",
+ "xn--c1avg",
+ "xn--c2br7g",
+ "xn--cck2b3b",
+ "xn--cckwcxetd",
+ "xn--cg4bki",
+ "xn--clchc0ea0b2g2a9gcd",
+ "xn--czr694b",
+ "xn--czrs0t",
+ "xn--czru2d",
+ "xn--d1acj3b",
+ "xn--d1alf",
+ "xn--e1a4c",
+ "xn--eckvdtc9d",
+ "xn--efvy88h",
+ "xn--fct429k",
+ "xn--fhbei",
+ "xn--fiq228c5hs",
+ "xn--fiq64b",
+ "xn--fiqs8s",
+ "xn--fiqz9s",
+ "xn--fjq720a",
+ "xn--flw351e",
+ "xn--fpcrj9c3d",
+ "xn--fzc2c9e2c",
+ "xn--fzys8d69uvgm",
+ "xn--g2xx48c",
+ "xn--gckr3f0f",
+ "xn--gecrj9c",
+ "xn--gk3at1e",
+ "xn--h2breg3eve",
+ "xn--h2brj9c",
+ "xn--h2brj9c8c",
+ "xn--hxt814e",
+ "xn--i1b6b1a6a2e",
+ "xn--imr513n",
+ "xn--io0a7i",
+ "xn--j1aef",
+ "xn--j1amh",
+ "xn--j6w193g",
+ "xn--jlq480n2rg",
+ "xn--jlq61u9w7b",
+ "xn--jvr189m",
+ "xn--kcrx77d1x4a",
+ "xn--kprw13d",
+ "xn--kpry57d",
+ "xn--kput3i",
+ "xn--l1acc",
+ "xn--lgbbat1ad8j",
+ "xn--mgb9awbf",
+ "xn--mgba3a3ejt",
+ "xn--mgba3a4f16a",
+ "xn--mgba7c0bbn0a",
+ "xn--mgbaakc7dvf",
+ "xn--mgbaam7a8h",
+ "xn--mgbab2bd",
+ "xn--mgbah1a3hjkrd",
+ "xn--mgbai9azgqp6j",
+ "xn--mgbayh7gpa",
+ "xn--mgbbh1a",
+ "xn--mgbbh1a71e",
+ "xn--mgbc0a9azcg",
+ "xn--mgbca7dzdo",
+ "xn--mgbcpq6gpa1a",
+ "xn--mgberp4a5d4ar",
+ "xn--mgbgu82a",
+ "xn--mgbi4ecexp",
+ "xn--mgbpl2fh",
+ "xn--mgbt3dhd",
+ "xn--mgbtx2b",
+ "xn--mgbx4cd0ab",
+ "xn--mix891f",
+ "xn--mk1bu44c",
+ "xn--mxtq1m",
+ "xn--ngbc5azd",
+ "xn--ngbe9e0a",
+ "xn--ngbrx",
+ "xn--node",
+ "xn--nqv7f",
+ "xn--nqv7fs00ema",
+ "xn--nyqy26a",
+ "xn--o3cw4h",
+ "xn--ogbpf8fl",
+ "xn--otu796d",
+ "xn--p1acf",
+ "xn--p1ai",
+ "xn--pgbs0dh",
+ "xn--pssy2u",
+ "xn--q7ce6a",
+ "xn--q9jyb4c",
+ "xn--qcka1pmc",
+ "xn--qxa6a",
+ "xn--qxam",
+ "xn--rhqv96g",
+ "xn--rovu88b",
+ "xn--rvc1e0am3e",
+ "xn--s9brj9c",
+ "xn--ses554g",
+ "xn--t60b56a",
+ "xn--tckwe",
+ "xn--tiq49xqyj",
+ "xn--unup4y",
+ "xn--vermgensberater-ctb",
+ "xn--vermgensberatung-pwb",
+ "xn--vhquv",
+ "xn--vuq861b",
+ "xn--w4r85el8fhu5dnra",
+ "xn--w4rs40l",
+ "xn--wgbh1c",
+ "xn--wgbl6a",
+ "xn--xhq521b",
+ "xn--xkc2al3hye2a",
+ "xn--xkc2dl3a5ee0h",
+ "xn--y9a3aq",
+ "xn--yfro4i67o",
+ "xn--ygbi2ammx",
+ "xn--zfr164b",
+ "xxx",
+ "xyz",
+ "yachts",
+ "yahoo",
+ "yamaxun",
+ "yandex",
+ "ye",
+ "yodobashi",
+ "yoga",
+ "yokohama",
+ "you",
+ "youtube",
+ "yt",
+ "yun",
+ "za",
+ "zappos",
+ "zara",
+ "zero",
+ "zip",
+ "zm",
+ "zone",
+ "zuerich",
+ "zw"
+ };
+
struct HtmlEscapeSequence {
char unescape_char;
std::string escape_sequence;
@@ -53,6 +1561,10 @@ namespace QuickMedia {
return c >= '0' && c <= '9';
}
+ static bool is_whitespace(char c) {
+ return c == ' ' || c == '\t' || c == '\n';
+ }
+
std::string url_param_encode(const std::string &param) {
std::ostringstream result;
result.fill('0');
@@ -74,50 +1586,18 @@ namespace QuickMedia {
return is_alpha(c) || is_digit(c);
}
- static bool is_url_character(char c) {
- switch(c) {
- case '%':
- // Reserved
- case ':':
- case '/':
- case '?':
- case '#':
- case '[':
- case ']':
- case '@':
- case '!':
- case '$':
- case '&':
- case '\'':
- case '(':
- case ')':
- case '*':
- case '+':
- case ',':
- case ';':
- case '=':
- // Unreserved:
- case '-':
- case '.':
- case '_':
- case '~':
- return true;
- default:
- return is_alpha(c) || is_digit(c);
- }
- }
-
// Implementation follows URI standard in general: https://tools.ietf.org/html/rfc3986#section-2.2.
// Also checks for balanced parentheses to allow text such as: (see: example.com/) that excludes the last parenthesis.
- void extract_urls(const std::string &str, std::vector<std::string> &urls) {
+ std::vector<Range> extract_urls(const std::string &str) {
+ std::vector<Range> ranges;
+
int parentheses_depth = 0;
+ bool is_valid_url = false;
size_t url_start = std::string::npos;
- size_t url_dot_index = std::string::npos;
+
// str.size() is fine, we want to include the NULL character so we can extract url at the end of the string
for(size_t i = 0; i < (size_t)str.size() + 1; ++i) {
char c = str[i];
- if(c == '.' && url_start != std::string::npos && url_dot_index == std::string::npos)
- url_dot_index = i;
if(url_start != std::string::npos) {
if(c == '(')
@@ -126,25 +1606,103 @@ namespace QuickMedia {
--parentheses_depth;
}
+ if(url_start != std::string::npos && !is_valid_url && (is_whitespace(c) || c == '/' || c == ',' || c == ':' || c == ')' || c == '\0' || (c == '.' && i == str.size()))) {
+ size_t tld_end = i - 1;
+ char prev_char = str[i - 1];
+ // We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..."
+ // and we want those links to work, I guess?
+ if(prev_char == '.' || prev_char == ',')
+ --tld_end;
+ else if(prev_char == ')' && parentheses_depth != 0)
+ --tld_end;
+ size_t tld_start = tld_end;
+
+ while(tld_start > url_start) {
+ if(str[tld_start] == '.')
+ break;
+ --tld_start;
+ }
+
+ if(tld_start > url_start && TLDS.find(str.substr(tld_start + 1, tld_end - tld_start)) != TLDS.end())
+ is_valid_url = true;
+ }
+
if(url_start == std::string::npos && is_url_start_char(c)) {
url_start = i;
- } else if(url_start != std::string::npos && !is_url_character(c)) {
+ } else if(url_start != std::string::npos && (is_whitespace(c) || c == '\0')) {
// Its only an url if there is a dot and the dot is not the last character in the url, for example "example.com" is an url but "example." is not.
- if(url_dot_index != std::string::npos && url_dot_index != i - 1) {
+ if(is_valid_url) {
size_t url_length = i - url_start;
char prev_char = str[i - 1];
- // We want to remove the last . or , because the string could contain for example "click on this like: example.com. There you can..."
+ // We want to remove the last . or , because the string could contain for example "click on this link: example.com. There you can..."
// and we want those links to work, I guess?
if(prev_char == '.' || prev_char == ',')
--url_length;
- if(prev_char == ')' && parentheses_depth != 0)
+ else if(prev_char == ')' && parentheses_depth != 0)
--url_length;
if(url_length > 0)
- urls.push_back(str.substr(url_start, url_length));
+ ranges.push_back({url_start, url_length});
}
+
url_start = std::string::npos;
- url_dot_index = std::string::npos;
+ is_valid_url = false;
}
+ }
+
+ return ranges;
+ }
+
+ std::vector<std::string> ranges_get_strings(const std::string &str, const std::vector<Range> &ranges) {
+ std::vector<std::string> strings(ranges.size());
+ for(size_t i = 0; i < ranges.size(); ++i) {
+ const Range &range = ranges[i];
+ strings[i].assign(str.begin() + range.start, str.begin() + range.start + range.length);
+ }
+ return strings;
}
+
+ static size_t is_start_of_utf8_codepoint(uint8_t c) {
+ if((c & 0x80) == 0)
+ return true;
+ else if((c & 0xE0) == 0xC0)
+ return true;
+ else if((c & 0xF0) == 0xE0)
+ return true;
+ else if((c & 0xF8) == 0xF0)
+ return true;
+ else
+ return false;
+ }
+
+ void convert_utf8_to_utf32_ranges(const std::string &str, std::vector<Range> &ranges) {
+ if(ranges.empty())
+ return;
+
+ size_t ranges_index = 0;
+ size_t prev_range_offset = 0;
+ size_t num_codepoints = 0;
+ bool in_range = false;
+
+ for(size_t i = 0; i < str.size(); ++i) {
+ if(ranges_index > 0 && in_range) {
+ ++prev_range_offset;
+ if(prev_range_offset == ranges[ranges_index - 1].length) {
+ ranges[ranges_index - 1].length = num_codepoints - ranges[ranges_index - 1].start;
+ prev_range_offset = 0;
+ in_range = false;
+ }
+ }
+
+ if(i == ranges[ranges_index].start) {
+ in_range = true;
+ ranges[ranges_index].start = num_codepoints;
+ ++ranges_index;
+ if(ranges_index == ranges.size())
+ break;
+ }
+
+ if(is_start_of_utf8_codepoint(*(uint8_t*)&str[i]))
+ ++num_codepoints;
+ }
}
} \ No newline at end of file
diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp
index fbc2699..ea1292d 100644
--- a/src/QuickMedia.cpp
+++ b/src/QuickMedia.cpp
@@ -4232,8 +4232,7 @@ namespace QuickMedia {
}
// TODO: If content type is a file, show file-manager prompt where it should be saved and asynchronously save it instead
- std::vector<std::string> urls;
- extract_urls(selected->get_description(), urls);
+ std::vector<std::string> urls = ranges_get_strings(selected->get_description(), extract_urls(selected->get_description()));
if(urls.size() == 1) {
launch_url(urls[0]);
return true;
diff --git a/src/Text.cpp b/src/Text.cpp
index 171bd70..a9f1147 100644
--- a/src/Text.cpp
+++ b/src/Text.cpp
@@ -14,6 +14,7 @@ namespace QuickMedia
{
static const float TAB_WIDTH = 4.0f;
static const float WORD_WRAP_MIN_SIZE = 80.0f;
+ static const sf::Color URL_COLOR(35, 140, 245);
size_t StringViewUtf32::find(const StringViewUtf32 &other, size_t offset) const {
if(offset >= size)
@@ -322,6 +323,12 @@ namespace QuickMedia
assert(dirty);
dirtyText = false;
splitTextByFont();
+ // TODO: Optimize
+ auto u8 = str.toUtf8();
+ std::string *u8_str = (std::string*)&u8;
+ url_ranges = extract_urls(*u8_str);
+ convert_utf8_to_utf32_ranges(*u8_str, url_ranges);
+ dirty = true;
}
if(!update_even_if_not_dirty && !dirty)
@@ -343,9 +350,12 @@ namespace QuickMedia
float latin_font_height = latin_font->getGlyph(' ', characterSize, false).advance;
float hspace = latin_font_height + characterSpacing;
float vspace = latin_font->getLineSpacing(characterSize); // TODO: What about japanese font???
+
+ size_t url_range_index = 0;
sf::Vector2f glyphPos;
sf::Uint32 prevCodePoint = 0;
+ // TODO: Only do this if dirtyText
for(usize textElementIndex = 0; textElementIndex < textElements.size(); ++textElementIndex)
{
TextElement &textElement = textElements[textElementIndex];
@@ -392,6 +402,17 @@ namespace QuickMedia
textElement.position = glyphPos;
for(size_t i = 0; i < textElement.text.size; ++i)
{
+ sf::Color text_color = color;
+ if(url_range_index < url_ranges.size()) {
+ size_t string_offset = (textElement.text.data + i) - str.getData();
+ if(string_offset >= url_ranges[url_range_index].start && string_offset < url_ranges[url_range_index].start + url_ranges[url_range_index].length) {
+ text_color = URL_COLOR;
+ text_color.a = color.a;
+ if(string_offset + 1 == url_ranges[url_range_index].start + url_ranges[url_range_index].length)
+ ++url_range_index;
+ }
+ }
+
sf::Uint32 codePoint = textElement.text[i];
// TODO: Make this work when combining multiple different fonts (for example latin and japanese).
// For japanese we could use a hack, because all japanese characters are monospace (exception being half-width characters).
@@ -473,12 +494,12 @@ namespace QuickMedia
sf::Vector2f textureBottomLeft(glyph.textureRect.left, glyph.textureRect.top + glyph.textureRect.height);
sf::Vector2f textureBottomRight(glyph.textureRect.left + glyph.textureRect.width, glyph.textureRect.top + glyph.textureRect.height);
- vertices[vertices_index].append({ vertexTopRight, color, textureTopRight });
- vertices[vertices_index].append({ vertexTopLeft, color, textureTopLeft });
- vertices[vertices_index].append({ vertexBottomLeft, color, textureBottomLeft });
- vertices[vertices_index].append({ vertexBottomLeft, color, textureBottomLeft });
- vertices[vertices_index].append({ vertexBottomRight, color, textureBottomRight });
- vertices[vertices_index].append({ vertexTopRight, color, textureTopRight });
+ vertices[vertices_index].append({ vertexTopRight, text_color, textureTopRight });
+ vertices[vertices_index].append({ vertexTopLeft, text_color, textureTopLeft });
+ vertices[vertices_index].append({ vertexBottomLeft, text_color, textureBottomLeft });
+ vertices[vertices_index].append({ vertexBottomLeft, text_color, textureBottomLeft });
+ vertices[vertices_index].append({ vertexBottomRight, text_color, textureBottomRight });
+ vertices[vertices_index].append({ vertexTopRight, text_color, textureTopRight });
glyphPos.x += glyph.advance + characterSpacing;
vertices_linear.push_back({vertices_index, vertexStart, 0, codePoint});
@@ -559,6 +580,7 @@ namespace QuickMedia
}
boundingBox.height = num_lines * line_height;
+ //url_ranges.clear();
if(!editable)
vertices_linear.clear();
}
diff --git a/src/plugins/Matrix.cpp b/src/plugins/Matrix.cpp
index daae545..583bad8 100644
--- a/src/plugins/Matrix.cpp
+++ b/src/plugins/Matrix.cpp
@@ -2143,7 +2143,7 @@ namespace QuickMedia {
message_content_extract_thumbnail_size(*content_json, message->thumbnail_size);
message->type = MessageType::VIDEO;
if(message->thumbnail_url.empty())
- prefix = "🎥 play ";
+ prefix = "🎥 Play ";
} else if(strcmp(content_type.GetString(), "m.audio") == 0) {
const rapidjson::Value &url_json = GetMember(*content_json, "url");
if(!url_json.IsString() || strncmp(url_json.GetString(), "mxc://", 6) != 0)
@@ -2151,7 +2151,7 @@ namespace QuickMedia {
message->url = homeserver + "/_matrix/media/r0/download/" + (url_json.GetString() + 6);
message->type = MessageType::AUDIO;
- prefix = "🎵 play ";
+ prefix = "🎵 Play ";
} else if(strcmp(content_type.GetString(), "m.file") == 0) {
const rapidjson::Value &url_json = GetMember(*content_json, "url");
if(!url_json.IsString() || strncmp(url_json.GetString(), "mxc://", 6) != 0)
@@ -2159,7 +2159,7 @@ namespace QuickMedia {
message->url = homeserver + "/_matrix/media/r0/download/" + (url_json.GetString() + 6);
message->type = MessageType::FILE;
- prefix = "💾 download ";
+ prefix = "💾 Download ";
} else if(strcmp(content_type.GetString(), "m.emote") == 0) { // this is a /me message, TODO: show /me messages differently
message->type = MessageType::TEXT;
prefix = "*" + room_data->get_user_display_name(user) + "* ";
diff --git a/tests/main.cpp b/tests/main.cpp
index 647fdff..c5138e3 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -6,44 +6,45 @@
int main() {
std::vector<std::string> urls;
+ const char *str;
- urls.clear();
- QuickMedia::extract_urls("example.com", urls);
+ str = "example.com";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 1);
assert_equals(urls[0], "example.com");
- urls.clear();
- QuickMedia::extract_urls("example.com, is where I like to go", urls);
+ str = "example.com, is where I like to go";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 1);
assert_equals(urls[0], "example.com");
- urls.clear();
- QuickMedia::extract_urls("The website I like to go to is example.com", urls);
+ str = "The website I like to go to is example.com";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 1);
assert_equals(urls[0], "example.com");
- urls.clear();
- QuickMedia::extract_urls("example.com. Is also a website", urls);
+ str = "example.com. Is also a website";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 1);
assert_equals(urls[0], "example.com");
- urls.clear();
- QuickMedia::extract_urls("these. are. not. websites.", urls);
+ str = "these. are. not. websites.";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 0);
- urls.clear();
- QuickMedia::extract_urls("This is not an url: example.", urls);
+ str = "This is not an url: example.";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 0);
- urls.clear();
- QuickMedia::extract_urls("the.se/~#423-_/2f.no/3df a.re considered sub.websit.es", urls);
+ str = "the.se/~#423-_/2f.no/3df a.re considered sub.websit.es, this.is.not";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 3);
assert_equals(urls[0], "the.se/~#423-_/2f.no/3df");
assert_equals(urls[1], "a.re");
assert_equals(urls[2], "sub.websit.es");
- urls.clear();
- QuickMedia::extract_urls("(see https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/)", urls);
+ str = "(see https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/)";
+ urls = QuickMedia::ranges_get_strings(str, QuickMedia::extract_urls(str));
assert_equals(urls.size(), 1);
assert_equals(urls[0], "https://emojipedia.org/emoji/%23%EF%B8%8F%E2%83%A3/");
return 0;