get a litte stricter
This commit is contained in:
@@ -370,53 +370,60 @@ func isEnglishText(text string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
englishCharCount := 0
|
|
||||||
nonASCIICount := 0
|
|
||||||
totalCharCount := 0
|
|
||||||
|
|
||||||
// Common English words to boost score
|
|
||||||
commonEnglish := []string{
|
|
||||||
"the ", "and ", "is ", "to ", "of ", "for ", "that ", "with ", "this ", "have ",
|
|
||||||
"from ", "would ", "could ", "about ", "more ", "which ", "been ", "their ",
|
|
||||||
}
|
|
||||||
|
|
||||||
lowerText := strings.ToLower(text)
|
lowerText := strings.ToLower(text)
|
||||||
englishWordBoost := 0
|
|
||||||
for _, word := range commonEnglish {
|
// Very common English words that should appear in legitimate English messages
|
||||||
if strings.Contains(lowerText, word) {
|
requiredEnglishWords := []string{
|
||||||
englishWordBoost += 10
|
"the", "and", "is", "to", "of", "for", "that", "with", "this", "have",
|
||||||
}
|
"from", "be", "are", "was", "were", "been", "i", "you", "he", "she",
|
||||||
|
"we", "they", "my", "your", "his", "her", "it", "what", "which", "who",
|
||||||
|
"when", "where", "why", "how", "can", "will", "would", "should", "could",
|
||||||
|
"do", "does", "did", "get", "got", "go", "going", "make", "made", "know",
|
||||||
|
"think", "want", "need", "like", "help", "work", "use", "ask", "say", "tell",
|
||||||
|
"give", "find", "tell", "become", "leave", "feel", "try", "ask", "need",
|
||||||
|
"meet", "include", "continue", "set", "learn", "change", "lead", "understand",
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range text {
|
englishWordCount := 0
|
||||||
if unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsSpace(r) || unicode.IsPunct(r) {
|
totalWords := 0
|
||||||
totalCharCount++
|
|
||||||
|
|
||||||
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') ||
|
// Split into words
|
||||||
r == ' ' || r == '.' || r == ',' || r == '!' || r == '?' || r == '-' || r == '\'' || r == '"' ||
|
words := strings.FieldsFunc(lowerText, func(r rune) bool {
|
||||||
r == ';' || r == ':' || r == '(' || r == ')' || r == '\n' || r == '\t' {
|
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
||||||
englishCharCount++
|
})
|
||||||
} else if r > 127 {
|
|
||||||
nonASCIICount++
|
for _, word := range words {
|
||||||
|
if len(word) > 0 {
|
||||||
|
totalWords++
|
||||||
|
// Check if word is in our English word list
|
||||||
|
for _, engWord := range requiredEnglishWords {
|
||||||
|
if word == engWord {
|
||||||
|
englishWordCount++
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if totalCharCount == 0 {
|
// For short messages (less than 50 characters), be more lenient
|
||||||
|
if len(text) < 50 {
|
||||||
|
return englishWordCount >= 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// For medium messages (50-200 chars), require at least 2 English words
|
||||||
|
if len(text) < 200 {
|
||||||
|
return englishWordCount >= 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// For longer messages, require at least 10% of words to be common English words
|
||||||
|
if totalWords > 0 {
|
||||||
|
englishPercentage := float64(englishWordCount) / float64(totalWords)
|
||||||
|
return englishPercentage >= 0.1
|
||||||
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// If more than 3 non-ASCII characters, likely spam/bot
|
|
||||||
if nonASCIICount > 3 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
englishPercentage := float64(englishCharCount) / float64(totalCharCount)
|
|
||||||
|
|
||||||
// Stricter requirements with word boost
|
|
||||||
return englishPercentage >= 0.75 || (englishPercentage >= 0.65 && englishWordBoost > 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// isSpamMessage checks if a message looks like spam
|
// isSpamMessage checks if a message looks like spam
|
||||||
func isSpamMessage(message string) bool {
|
func isSpamMessage(message string) bool {
|
||||||
// Convert to lowercase for checks
|
// Convert to lowercase for checks
|
||||||
|
|||||||
Reference in New Issue
Block a user