Files
cyclingbot/parser/parser.go
Blake Ridgway ba1770b493 first commit
2026-04-11 14:06:59 -05:00

78 lines
2.0 KiB
Go

// Package parser extracts cycling distances from free-text Discord messages.
package parser
import (
"regexp"
"strconv"
"strings"
)
const miToKM = 1.60934
var (
// Matches: 25km, 25.5 km, 25,5km, 25KM, 25 kilometers, 25 kilometres
kmPattern = regexp.MustCompile(
`(?i)\b(\d+(?:[.,]\d+)?)\s*(?:km|kms|kilometer|kilometers|kilometre|kilometres)\b`,
)
// Matches standalone "k" used in cycling context: "did a 100k", "50k ride"
// Only match when followed by a word boundary and a non-unit word (ride, loop, etc.)
// or preceded by cycling verbs.
kPattern = regexp.MustCompile(
`(?i)\b(\d+(?:[.,]\d+)?)\s*k\b`,
)
// Matches: 25mi, 25 mi, 25 miles, 25mile
miPattern = regexp.MustCompile(
`(?i)\b(\d+(?:[.,]\d+)?)\s*(?:mi|mile|miles)\b`,
)
)
// ParseKM extracts the first distance found in text and returns it in KM.
// Returns 0, false if no distance could be parsed.
func ParseKM(text string) (float64, bool) {
if km, ok := firstMatch(kmPattern, text, 1.0); ok {
return km, true
}
if km, ok := firstMatch(miPattern, text, miToKM); ok {
return km, true
}
// "k" alone is ambiguous; only accept it when the message looks cycling-related
if looksLikeCycling(text) {
if km, ok := firstMatch(kPattern, text, 1.0); ok {
return km, true
}
}
return 0, false
}
func firstMatch(re *regexp.Regexp, text string, multiplier float64) (float64, bool) {
m := re.FindStringSubmatch(text)
if m == nil {
return 0, false
}
// Normalise comma decimal separator
numStr := strings.ReplaceAll(m[1], ",", ".")
v, err := strconv.ParseFloat(numStr, 64)
if err != nil {
return 0, false
}
return v * multiplier, true
}
var cyclingKeywords = []string{
"ride", "rode", "cycl", "bike", "biked", "biking", "cycle",
"zwift", "strava", "trainer", "gravel", "mtb", "road", "spin",
"century", "loop", "route", "segment", "climb", "climbing",
}
func looksLikeCycling(text string) bool {
lower := strings.ToLower(text)
for _, kw := range cyclingKeywords {
if strings.Contains(lower, kw) {
return true
}
}
return false
}