61 lines
1.4 KiB
Go
61 lines
1.4 KiB
Go
package fetcher
|
|
|
|
import(
|
|
"project.hechon.fr/internal/config"
|
|
"log"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
func StripAlternativeVersification(rawChapter config.Chapter) config.Chapter{
|
|
/* a handful of verses have 2 traditions of numbering, the alternative
|
|
* rendering appears then between brackets. This is a simple use of
|
|
* regexp to get rid of it
|
|
*/
|
|
|
|
|
|
regExp:=regexp.MustCompile(`\(\d+:\d+\)`)
|
|
|
|
var treatedChapter config.Chapter
|
|
treatedChapter.Number = rawChapter.Number
|
|
for _, verse := range rawChapter.Verses{
|
|
modifiedVerse := string(regExp.ReplaceAll([]byte(verse), []byte("")))
|
|
log.Printf("%v\n", modifiedVerse)
|
|
treatedChapter.Verses = append(treatedChapter.Verses, modifiedVerse)
|
|
}
|
|
|
|
return treatedChapter
|
|
}
|
|
|
|
func SplitInPart(verse string, properName map[string]struct{})([]string){
|
|
|
|
var parts []string
|
|
currentPart := ""
|
|
words := strings.Split(strings.TrimSpace(verse), " ")
|
|
wordsCount := len(words)
|
|
isTreated := false
|
|
|
|
for i :=0 ; i < wordsCount ; i++{
|
|
word := words[i]
|
|
r, _ := utf8.DecodeRuneInString(word)
|
|
trimedWord := strings.TrimRight(word, ".,!;:")
|
|
_, isProperName := properName[trimedWord]
|
|
if i != 0 && unicode.IsUpper(r) && !isProperName{
|
|
parts = append(parts, currentPart)
|
|
currentPart = word
|
|
isTreated = true
|
|
}else{
|
|
currentPart += " " + word
|
|
isTreated = false
|
|
}
|
|
}
|
|
|
|
if !isTreated {
|
|
parts = append(parts, currentPart)
|
|
}
|
|
return parts
|
|
|
|
}
|