get a litte stricter

This commit is contained in:
Blake Ridgway
2025-12-01 07:52:23 -06:00
parent fd2cd5b79d
commit 3f7814d9c8

View File

@@ -370,53 +370,60 @@ func isEnglishText(text string) bool {
return true return true
} }
englishCharCount := 0
nonASCIICount := 0
totalCharCount := 0
// Common English words to boost score
commonEnglish := []string{
"the ", "and ", "is ", "to ", "of ", "for ", "that ", "with ", "this ", "have ",
"from ", "would ", "could ", "about ", "more ", "which ", "been ", "their ",
}
lowerText := strings.ToLower(text) lowerText := strings.ToLower(text)
englishWordBoost := 0
for _, word := range commonEnglish { // Very common English words that should appear in legitimate English messages
if strings.Contains(lowerText, word) { requiredEnglishWords := []string{
englishWordBoost += 10 "the", "and", "is", "to", "of", "for", "that", "with", "this", "have",
} "from", "be", "are", "was", "were", "been", "i", "you", "he", "she",
"we", "they", "my", "your", "his", "her", "it", "what", "which", "who",
"when", "where", "why", "how", "can", "will", "would", "should", "could",
"do", "does", "did", "get", "got", "go", "going", "make", "made", "know",
"think", "want", "need", "like", "help", "work", "use", "ask", "say", "tell",
"give", "find", "tell", "become", "leave", "feel", "try", "ask", "need",
"meet", "include", "continue", "set", "learn", "change", "lead", "understand",
} }
for _, r := range text { englishWordCount := 0
if unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsSpace(r) || unicode.IsPunct(r) { totalWords := 0
totalCharCount++
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || // Split into words
r == ' ' || r == '.' || r == ',' || r == '!' || r == '?' || r == '-' || r == '\'' || r == '"' || words := strings.FieldsFunc(lowerText, func(r rune) bool {
r == ';' || r == ':' || r == '(' || r == ')' || r == '\n' || r == '\t' { return !unicode.IsLetter(r) && !unicode.IsNumber(r)
englishCharCount++ })
} else if r > 127 {
nonASCIICount++ for _, word := range words {
if len(word) > 0 {
totalWords++
// Check if word is in our English word list
for _, engWord := range requiredEnglishWords {
if word == engWord {
englishWordCount++
break
}
} }
} }
} }
if totalCharCount == 0 { // For short messages (less than 50 characters), be more lenient
if len(text) < 50 {
return englishWordCount >= 1
}
// For medium messages (50-200 chars), require at least 2 English words
if len(text) < 200 {
return englishWordCount >= 2
}
// For longer messages, require at least 10% of words to be common English words
if totalWords > 0 {
englishPercentage := float64(englishWordCount) / float64(totalWords)
return englishPercentage >= 0.1
}
return true return true
} }
// If more than 3 non-ASCII characters, likely spam/bot
if nonASCIICount > 3 {
return false
}
englishPercentage := float64(englishCharCount) / float64(totalCharCount)
// Stricter requirements with word boost
return englishPercentage >= 0.75 || (englishPercentage >= 0.65 && englishWordBoost > 0)
}
// isSpamMessage checks if a message looks like spam // isSpamMessage checks if a message looks like spam
func isSpamMessage(message string) bool { func isSpamMessage(message string) bool {
// Convert to lowercase for checks // Convert to lowercase for checks