get a litte stricter

This commit is contained in:
Blake Ridgway
2025-12-01 07:52:23 -06:00
parent fd2cd5b79d
commit 3f7814d9c8

View File

@@ -370,51 +370,58 @@ func isEnglishText(text string) bool {
return true return true
} }
englishCharCount := 0
nonASCIICount := 0
totalCharCount := 0
// Common English words to boost score
commonEnglish := []string{
"the ", "and ", "is ", "to ", "of ", "for ", "that ", "with ", "this ", "have ",
"from ", "would ", "could ", "about ", "more ", "which ", "been ", "their ",
}
lowerText := strings.ToLower(text) lowerText := strings.ToLower(text)
englishWordBoost := 0
for _, word := range commonEnglish { // Very common English words that should appear in legitimate English messages
if strings.Contains(lowerText, word) { requiredEnglishWords := []string{
englishWordBoost += 10 "the", "and", "is", "to", "of", "for", "that", "with", "this", "have",
} "from", "be", "are", "was", "were", "been", "i", "you", "he", "she",
"we", "they", "my", "your", "his", "her", "it", "what", "which", "who",
"when", "where", "why", "how", "can", "will", "would", "should", "could",
"do", "does", "did", "get", "got", "go", "going", "make", "made", "know",
"think", "want", "need", "like", "help", "work", "use", "ask", "say", "tell",
"give", "find", "tell", "become", "leave", "feel", "try", "ask", "need",
"meet", "include", "continue", "set", "learn", "change", "lead", "understand",
} }
for _, r := range text { englishWordCount := 0
if unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsSpace(r) || unicode.IsPunct(r) { totalWords := 0
totalCharCount++
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || // Split into words
r == ' ' || r == '.' || r == ',' || r == '!' || r == '?' || r == '-' || r == '\'' || r == '"' || words := strings.FieldsFunc(lowerText, func(r rune) bool {
r == ';' || r == ':' || r == '(' || r == ')' || r == '\n' || r == '\t' { return !unicode.IsLetter(r) && !unicode.IsNumber(r)
englishCharCount++ })
} else if r > 127 {
nonASCIICount++ for _, word := range words {
if len(word) > 0 {
totalWords++
// Check if word is in our English word list
for _, engWord := range requiredEnglishWords {
if word == engWord {
englishWordCount++
break
}
} }
} }
} }
if totalCharCount == 0 { // For short messages (less than 50 characters), be more lenient
return true if len(text) < 50 {
return englishWordCount >= 1
} }
// If more than 3 non-ASCII characters, likely spam/bot // For medium messages (50-200 chars), require at least 2 English words
if nonASCIICount > 3 { if len(text) < 200 {
return false return englishWordCount >= 2
} }
englishPercentage := float64(englishCharCount) / float64(totalCharCount) // For longer messages, require at least 10% of words to be common English words
if totalWords > 0 {
englishPercentage := float64(englishWordCount) / float64(totalWords)
return englishPercentage >= 0.1
}
// Stricter requirements with word boost return true
return englishPercentage >= 0.75 || (englishPercentage >= 0.65 && englishWordBoost > 0)
} }
// isSpamMessage checks if a message looks like spam // isSpamMessage checks if a message looks like spam