Files
cyclingbot/parser/parser.go
2026-04-25 19:17:58 -05:00

89 lines
2.5 KiB
Go

// Package parser extracts cycling distances from free-text Discord messages.
package parser
import (
"regexp"
"strconv"
"strings"
)
const miToKM = 1.60934
// thousandsRe detects numbers where comma is a thousands separator (e.g. 1,000 or 5,981.9)
var thousandsRe = regexp.MustCompile(`^\d{1,3}(,\d{3})+`)
var (
// Matches: 25km, 25.5 km, 25,5km, 25KM, 25 kilometers, 25 kilometres
kmPattern = regexp.MustCompile(
`(?i)\b(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:[.,]\d+)?)\s*(?:km|kms|kilometer|kilometers|kilometre|kilometres)\b`,
)
// Matches standalone "k" used in cycling context: "did a 100k", "50k ride"
// Only match when followed by a word boundary and a non-unit word (ride, loop, etc.)
// or preceded by cycling verbs.
kPattern = regexp.MustCompile(
`(?i)\b(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:[.,]\d+)?)\s*k\b`,
)
// Matches: 25mi, 25 mi, 25 miles, 25mile
miPattern = regexp.MustCompile(
`(?i)\b(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:[.,]\d+)?)\s*(?:mi|mile|miles)\b`,
)
)
// ParseKM extracts the first distance found in text and returns it in KM.
// Returns 0, false if no distance could be parsed.
func ParseKM(text string) (float64, bool) {
if km, ok := firstMatch(kmPattern, text, 1.0); ok {
return km, true
}
if km, ok := firstMatch(miPattern, text, miToKM); ok {
return km, true
}
// "k" alone is ambiguous; only accept it when the message looks cycling-related
if looksLikeCycling(text) {
if km, ok := firstMatch(kPattern, text, 1.0); ok {
return km, true
}
}
return 0, false
}
func firstMatch(re *regexp.Regexp, text string, multiplier float64) (float64, bool) {
m := re.FindStringSubmatch(text)
if m == nil {
return 0, false
}
numStr := m[1]
if strings.Contains(numStr, ",") {
if thousandsRe.MatchString(numStr) {
// e.g. "1,000" or "5,981.9" — comma is thousands separator
numStr = strings.ReplaceAll(numStr, ",", "")
} else {
// e.g. "25,5" — comma is decimal separator
numStr = strings.ReplaceAll(numStr, ",", ".")
}
}
v, err := strconv.ParseFloat(numStr, 64)
if err != nil {
return 0, false
}
return v * multiplier, true
}
var cyclingKeywords = []string{
"ride", "rode", "cycl", "bike", "biked", "biking", "cycle",
"zwift", "strava", "trainer", "gravel", "mtb", "road", "spin",
"century", "loop", "route", "segment", "climb", "climbing",
}
func looksLikeCycling(text string) bool {
lower := strings.ToLower(text)
for _, kw := range cyclingKeywords {
if strings.Contains(lower, kw) {
return true
}
}
return false
}