// Package parser extracts cycling distances from free-text Discord messages. package parser import ( "regexp" "strconv" "strings" ) const miToKM = 1.60934 // thousandsRe detects numbers where comma is a thousands separator (e.g. 1,000 or 5,981.9) var thousandsRe = regexp.MustCompile(`^\d{1,3}(,\d{3})+`) var ( // Matches: 25km, 25.5 km, 25,5km, 25KM, 25 kilometers, 25 kilometres kmPattern = regexp.MustCompile( `(?i)\b(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:[.,]\d+)?)\s*(?:km|kms|kilometer|kilometers|kilometre|kilometres)\b`, ) // Matches standalone "k" used in cycling context: "did a 100k", "50k ride" // Only match when followed by a word boundary and a non-unit word (ride, loop, etc.) // or preceded by cycling verbs. kPattern = regexp.MustCompile( `(?i)\b(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:[.,]\d+)?)\s*k\b`, ) // Matches: 25mi, 25 mi, 25 miles, 25mile miPattern = regexp.MustCompile( `(?i)\b(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:[.,]\d+)?)\s*(?:mi|mile|miles)\b`, ) ) // ParseKM extracts the first distance found in text and returns it in KM. // Returns 0, false if no distance could be parsed. func ParseKM(text string) (float64, bool) { if km, ok := firstMatch(kmPattern, text, 1.0); ok { return km, true } if km, ok := firstMatch(miPattern, text, miToKM); ok { return km, true } // "k" alone is ambiguous; only accept it when the message looks cycling-related if looksLikeCycling(text) { if km, ok := firstMatch(kPattern, text, 1.0); ok { return km, true } } return 0, false } func firstMatch(re *regexp.Regexp, text string, multiplier float64) (float64, bool) { m := re.FindStringSubmatch(text) if m == nil { return 0, false } numStr := m[1] if strings.Contains(numStr, ",") { if thousandsRe.MatchString(numStr) { // e.g. "1,000" or "5,981.9" — comma is thousands separator numStr = strings.ReplaceAll(numStr, ",", "") } else { // e.g. "25,5" — comma is decimal separator numStr = strings.ReplaceAll(numStr, ",", ".") } } v, err := strconv.ParseFloat(numStr, 64) if err != nil { return 0, false } return v * multiplier, true } var cyclingKeywords = []string{ "ride", "rode", "cycl", "bike", "biked", "biking", "cycle", "zwift", "strava", "trainer", "gravel", "mtb", "road", "spin", "century", "loop", "route", "segment", "climb", "climbing", } func looksLikeCycling(text string) bool { lower := strings.ToLower(text) for _, kw := range cyclingKeywords { if strings.Contains(lower, kw) { return true } } return false }