package fetcher import( "project.hechon.fr/internal/config" "log" "regexp" "strings" "unicode" "unicode/utf8" ) func StripAlternativeVersification(rawChapter config.Chapter) config.Chapter{ /* a handful of verses have 2 traditions of numbering, the alternative * rendering appears then between brackets. This is a simple use of * regexp to get rid of it */ regExp:=regexp.MustCompile(`\(\d+:\d+\)`) var treatedChapter config.Chapter for _, verse := range rawChapter.Verses{ modifiedVerse := string(regExp.ReplaceAll([]byte(verse), []byte(""))) log.Printf("%v\n", modifiedVerse) treatedChapter.Verses = append(treatedChapter.Verses, modifiedVerse) } return treatedChapter } func SplitInPart(verse string, properName map[string]struct{})([]string){ var parts []string currentPart := "" words := strings.Split(strings.TrimSpace(verse), " ") wordsCount := len(words) isTreated := false for i :=0 ; i < wordsCount ; i++{ word := words[i] r, _ := utf8.DecodeRuneInString(word) trimedWord := strings.TrimRight(word, ".,") _, isProperName := properName[trimedWord] if i != 0 && unicode.IsUpper(r) && !isProperName{ parts = append(parts, currentPart) currentPart = word isTreated = true }else{ currentPart += " " + word isTreated = false } } if !isTreated { parts = append(parts, currentPart) } return parts }