Java源码示例:zemberek.morphology.TurkishMorphology
示例1
public void initializeController() {
post("/generate_word", (req, res) -> {
String word = req.queryParams("word");
String morphemes = req.queryParams("morphemes");
morphemes = morphemes.replace('|', '+');
morphemes = morphemes.replace('→', '+');
TurkishMorphology morphology =
TurkishMorphology.builder().addDictionaryLines(word).disableCache().build();
DictionaryItem item = morphology.getLexicon().getMatchingItems(word).get(0);
List<GenerateWordResult> wordResults = new ArrayList<>();
List<Result> results = morphology.getWordGenerator().generate(item, morphemes.split("\\+"));
for (Result generateResult : results) {
GenerateWordResult wordResult = new GenerateWordResult();
wordResult.word = word;
wordResult.no_surface = generateResult.analysis.formatMorphemesLexical();
wordResult.surface = generateResult.surface;
wordResult.analysis = generateResult.analysis.formatLong();
wordResults.add(wordResult);
}
return jsonConverter.toJson(wordResults);
});
}
示例2
public void initializeController(Gson jsonConverter, TurkishMorphology morphology) {
post("/analyze_word", (req, res) -> {
String word = req.queryParams("word");
AnalyzeWordResults analyze_result = new AnalyzeWordResults();
if (showInput(req)) {
analyze_result.input = word;
}
WordAnalysis results = morphology.analyze(word);
List<AnalyzeWordItem> analyze_list = new ArrayList<>();
for (SingleAnalysis result : results) {
analyze_list.add(AnalyzeWordItem.fromSingleAnalysis(result));
}
analyze_result.results = analyze_list;
return jsonConverter.toJson(analyze_result);
});
}
示例3
public static void main(String[] args) throws IOException {
// JSON converter
Gson jsonConverter = new GsonBuilder()
.addSerializationExclusionStrategy(new ZemberekExclusionStrategy())
.disableInnerClassSerialization()
.create();
// Turkish default morphology
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
new FindPOSController(jsonConverter, morphology);
new SentenceBoundaryDetectionController(jsonConverter);
new TurkishTokenizationController(jsonConverter);
new SpellingController(jsonConverter, morphology);
new StemmingAndLemmatizationController(jsonConverter, morphology);
new AnalyzeWordController(jsonConverter, morphology);
new AnalyzeSentenceController(jsonConverter, morphology);
new GenerateWordsController(jsonConverter);
}
示例4
private static void parse(String word, TurkishMorphology morphology) {
WordAnalysis results = morphology.analyze(word);
System.out.println("Word = " + word + " has " + results.analysisCount() + " many solutions");
if (results.analysisCount() == 0) return;
System.out.println("Parses: ");
for (SingleAnalysis result : results) {
System.out.println("number of morphemes = " + result.getMorphemeDataList().size()) ;
System.out.println(result.formatLong());
System.out.println("\tStems = " + result.getStems());
System.out.println("\tLemmas = " + result.getLemmas());
System.out.println("\tStemAndEnding = " + result.getStemAndEnding());
System.out.println("-------------------");
}
System.out.println("final selected stem : " + Zemberek3StemFilter.stem(results, "maxLength"));
System.out.println("==================================");
}
示例5
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
AddNewDictionaryItem app = new AddNewDictionaryItem(morphology);
Log.info("Proper Noun Test - 1 :");
app.test("Meydan'a",
new DictionaryItem("Meydan", "meydan", "meydan",
PrimaryPos.Noun, SecondaryPos.ProperNoun));
Log.info("----");
Log.info("Proper Noun Test - 2 :");
app.test("Meeeydan'a",
new DictionaryItem("Meeeydan", "meeeydan", "meeeydan",
PrimaryPos.Noun, SecondaryPos.ProperNoun));
Log.info("----");
Log.info("Verb Test : ");
app.test("tweetleyeyazdım",
new DictionaryItem("tweetlemek", "tweetle", "tivitle",
PrimaryPos.Verb, SecondaryPos.None));
}
示例6
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String sentence = "Bol baharatlı bir yemek yaptıralım.";
Log.info("Sentence = " + sentence);
List<WordAnalysis> analyses = morphology.analyzeSentence(sentence);
Log.info("Sentence word analysis result:");
for (WordAnalysis entry : analyses) {
Log.info("Word = " + entry.getInput());
for (SingleAnalysis analysis : entry) {
Log.info(analysis.formatLong());
}
}
SentenceAnalysis result = morphology.disambiguate(sentence, analyses);
Log.info("\nAfter ambiguity resolution : ");
result.bestAnalysis().forEach(Log::info);
}
示例7
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
DictionaryItem newStem = morphology.getLexicon().getMatchingItems("poğaça").get(0);
String word = "simidime";
Log.info("Input Word = " + word);
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
List<Result> generated =
morphology.getWordGenerator().generate(newStem, result.getMorphemes());
for (Result s : generated) {
Log.info("Input analysis: " + result.formatLong());
Log.info("After stem change, word = " + s.surface);
Log.info("After stem change, Analysis = " + s.analysis.formatLong());
}
}
}
示例8
public static void main(String[] args) {
String[] number = {"A3sg", "A3pl"};
String[] possessives = {"P1sg", "P2sg", "P3sg"};
String[] cases = {"Dat", "Loc", "Abl"};
TurkishMorphology morphology =
TurkishMorphology.builder().setLexicon("armut").disableCache().build();
DictionaryItem item = morphology.getLexicon().getMatchingItems("armut").get(0);
for (String numberM : number) {
for (String possessiveM : possessives) {
for (String caseM : cases) {
List<Result> results =
morphology.getWordGenerator().generate(item, numberM, possessiveM, caseM);
results.forEach(s->System.out.println(s.surface));
}
}
}
}
示例9
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
TurkishSpellChecker spellChecker = new TurkishSpellChecker(morphology);
Log.info("Check if written correctly.");
String[] words = {"Ankara'ya", "Ankar'aya", "yapbileceksen", "yapabileceğinizden"};
for (String word : words) {
Log.info(word + " -> " + spellChecker.check(word));
}
Log.info();
Log.info("Give suggestions.");
String[] toSuggest = {"Kraamanda", "okumuştk", "yapbileceksen", "oukyamıyorum"};
for (String s : toSuggest) {
Log.info(s + " -> " + spellChecker.suggestForWord(s));
}
}
示例10
public static void main(String[] args) throws IOException {
// you will need ner-train and ner-test files to run this example.
Path trainPath = Paths.get("ner-train");
Path testPath = Paths.get("ner-test");
Path modelRoot = Paths.get("my-model");
NerDataSet trainingSet = NerDataSet.load(trainPath, AnnotationStyle.BRACKET);
Log.info(trainingSet.info()); // prints information
NerDataSet testSet = NerDataSet.load(testPath, AnnotationStyle.BRACKET);
Log.info(testSet.info());
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
// Training occurs here. Result is a PerceptronNer instance.
// There will be 7 iterations with 0.1 learning rate.
PerceptronNer ner = new PerceptronNerTrainer(morphology)
.train(trainingSet, testSet, 13, 0.1f);
Files.createDirectories(modelRoot);
ner.saveModelAsText(modelRoot);
}
示例11
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
AddNewDictionaryItem app = new AddNewDictionaryItem(morphology);
Log.info("Proper Noun Test - 1 :");
app.test("Meydan'a",
new DictionaryItem("Meydan", "meydan", "meydan",
PrimaryPos.Noun, SecondaryPos.ProperNoun));
Log.info("----");
Log.info("Proper Noun Test - 2 :");
app.test("Meeeydan'a",
new DictionaryItem("Meeeydan", "meeeydan", "meeeydan",
PrimaryPos.Noun, SecondaryPos.ProperNoun));
Log.info("----");
Log.info("Verb Test : ");
app.test("tweetleyeyazdım",
new DictionaryItem("tweetlemek", "tweetle", "tivitle",
PrimaryPos.Verb, SecondaryPos.None));
}
示例12
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String sentence = "Bol baharatlı bir yemek yaptıralım.";
Log.info("Sentence = " + sentence);
List<WordAnalysis> analyses = morphology.analyzeSentence(sentence);
Log.info("Sentence word analysis result:");
for (WordAnalysis entry : analyses) {
Log.info("Word = " + entry.getInput());
for (SingleAnalysis analysis : entry) {
Log.info(analysis.formatLong());
}
}
SentenceAnalysis result = morphology.disambiguate(sentence, analyses);
Log.info("\nAfter ambiguity resolution : ");
result.bestAnalysis().forEach(Log::info);
}
示例13
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
DictionaryItem newStem = morphology.getLexicon().getMatchingItems("poğaça").get(0);
String word = "simidime";
Log.info("Input Word = " + word);
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
List<Result> generated =
morphology.getWordGenerator().generate(newStem, result.getMorphemes());
for (Result s : generated) {
Log.info("Input analysis: " + result.formatLong());
Log.info("After stem change, word = " + s.surface);
Log.info("After stem change, Analysis = " + s.analysis.formatLong());
}
}
}
示例14
public static void main(String[] args) {
String[] number = {"A3sg", "A3pl"};
String[] possessives = {"P1sg", "P2sg", "P3sg"};
String[] cases = {"Dat", "Loc", "Abl"};
TurkishMorphology morphology =
TurkishMorphology.builder().setLexicon("armut").disableCache().build();
DictionaryItem item = morphology.getLexicon().getMatchingItems("armut").get(0);
for (String numberM : number) {
for (String possessiveM : possessives) {
for (String caseM : cases) {
List<Result> results =
morphology.getWordGenerator().generate(item, numberM, possessiveM, caseM);
results.forEach(s->System.out.println(s.surface));
}
}
}
}
示例15
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
TurkishSpellChecker spellChecker = new TurkishSpellChecker(morphology);
Log.info("Check if written correctly.");
String[] words = {"Ankara'ya", "Ankar'aya", "yapbileceksen", "yapabileceğinizden"};
for (String word : words) {
Log.info(word + " -> " + spellChecker.check(word));
}
Log.info();
Log.info("Give suggestions.");
String[] toSuggest = {"Kraamanda", "okumuştk", "yapbileceksen", "oukyamıyorum"};
for (String s : toSuggest) {
Log.info(s + " -> " + spellChecker.suggestForWord(s));
}
}
示例16
public static void main(String[] args) throws IOException {
// you will need ner-train and ner-test files to run this example.
Path trainPath = Paths.get("ner-train");
Path testPath = Paths.get("ner-test");
Path modelRoot = Paths.get("my-model");
NerDataSet trainingSet = NerDataSet.load(trainPath, AnnotationStyle.BRACKET);
Log.info(trainingSet.info()); // prints information
NerDataSet testSet = NerDataSet.load(testPath, AnnotationStyle.BRACKET);
Log.info(testSet.info());
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
// Training occurs here. Result is a PerceptronNer instance.
// There will be 7 iterations with 0.1 learning rate.
PerceptronNer ner = new PerceptronNerTrainer(morphology)
.train(trainingSet, testSet, 13, 0.1f);
Files.createDirectories(modelRoot);
ner.saveModelAsText(modelRoot);
}
示例17
@Override
public void inform(ResourceLoader loader) throws IOException {
if (dictionaryFiles == null || dictionaryFiles.trim().isEmpty()) {
this.morphology = TurkishMorphology.createWithDefaults();
// Use default dictionaries shipped with Zemberek3.
return;
}
List<String> lines = new ArrayList<>();
List<String> files = splitFileNames(dictionaryFiles);
if (files.size() > 0) {
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
lines.addAll(wlist);
}
}
if (lines.isEmpty()) {
this.morphology = TurkishMorphology.createWithDefaults();
// Use default dictionaries shipped with Zemberek3.
return;
}
String[] linesArray = new String[lines.size()];
linesArray = lines.toArray(linesArray);
morphology = (new TurkishMorphology.Builder()).setLexicon(linesArray).build();
}
示例18
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String sentence = "Keşke yarın hava güzel olsa.";
Log.info("Sentence = " + sentence);
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
for (SentenceWordAnalysis a : analysis) {
PrimaryPos primaryPos = a.getBestAnalysis().getPos();
Log.info("%s : %s ",
a.getWordAnalysis().getInput(),
primaryPos);
}
}
示例19
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String word = "kalemi";
Log.info("Word = " + word);
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
Log.info("Lexical and Surface : " + result.formatLong());
Log.info("Only Lexical : " + result.formatLexical());
Log.info("Oflazer style : " +
AnalysisFormatters.OFLAZER_STYLE.format(result));
Log.info();
}
}
示例20
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String word = "kutucuğumuz";
Log.info("Word = " + word);
Log.info("Results: ");
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
Log.info(result.formatLong());
Log.info("\tStems = " + result.getStems());
Log.info("\tLemmas = " + result.getLemmas());
}
}
示例21
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.builder()
.ignoreDiacriticsInAnalysis()
.setLexicon(RootLexicon.getDefault())
.build();
morphology.analyze("kisi").forEach(System.out::println);
}
示例22
public static void main(String[] args) throws IOException {
String[] examples = {
"Yrn okua gidicem",
"Tmm, yarin havuza giricem ve aksama kadar yaticam :)",
"ah aynen ya annemde fark ettı siz evinizden cıkmayın diyo",
"gercek mı bu? Yuh! Artık unutulması bile beklenmiyo",
"Hayır hayat telaşm olmasa alacam buraları gökdelen dikicem.",
"yok hocam kesınlıkle oyle birşey yok",
"herseyi soyle hayatında olmaması gerek bence boyle ınsanların falan baskı yapıyosa"
};
// change paths with your normalization data root folder and language model file paths.
// Example: https://drive.google.com/drive/folders/1tztjRiUs9BOTH-tb1v7FWyixl-iUpydW
// download lm and normalization folders to some local directory.
Path zemberekDataRoot = Paths.get("/home/aaa/zemberek-data");
Path lookupRoot = zemberekDataRoot.resolve("normalization");
Path lmPath = zemberekDataRoot.resolve("lm/lm.2gram.slm");
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
TurkishSentenceNormalizer normalizer = new
TurkishSentenceNormalizer(morphology, lookupRoot, lmPath);
for (String example : examples) {
System.out.println(example);
System.out.println(normalizer.normalize(example));
System.out.println();
}
}
示例23
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String sentence = "Keşke yarın hava güzel olsa.";
Log.info("Sentence = " + sentence);
SentenceAnalysis analysis = morphology.analyzeAndDisambiguate(sentence);
for (SentenceWordAnalysis a : analysis) {
PrimaryPos primaryPos = a.getBestAnalysis().getPos();
Log.info("%s : %s ",
a.getWordAnalysis().getInput(),
primaryPos);
}
}
示例24
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String word = "kalemi";
Log.info("Word = " + word);
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
Log.info("Lexical and Surface : " + result.formatLong());
Log.info("Only Lexical : " + result.formatLexical());
Log.info("Oflazer style : " +
AnalysisFormatters.OFLAZER_STYLE.format(result));
Log.info();
}
}
示例25
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String word = "kutucuğumuz";
Log.info("Word = " + word);
Log.info("Results: ");
WordAnalysis results = morphology.analyze(word);
for (SingleAnalysis result : results) {
Log.info(result.formatLong());
Log.info("\tStems = " + result.getStems());
Log.info("\tLemmas = " + result.getLemmas());
}
}
示例26
public static void main(String[] args) throws IOException {
TurkishMorphology morphology = TurkishMorphology.builder()
.ignoreDiacriticsInAnalysis()
.setLexicon(RootLexicon.getDefault())
.build();
morphology.analyze("kisi").forEach(System.out::println);
}
示例27
public static void main(String[] args) throws IOException {
String[] examples = {
"Yrn okua gidicem",
"Tmm, yarin havuza giricem ve aksama kadar yaticam :)",
"ah aynen ya annemde fark ettı siz evinizden cıkmayın diyo",
"gercek mı bu? Yuh! Artık unutulması bile beklenmiyo",
"Hayır hayat telaşm olmasa alacam buraları gökdelen dikicem.",
"yok hocam kesınlıkle oyle birşey yok",
"herseyi soyle hayatında olmaması gerek bence boyle ınsanların falan baskı yapıyosa"
};
// change paths with your normalization data root folder and language model file paths.
// Example: https://drive.google.com/drive/folders/1tztjRiUs9BOTH-tb1v7FWyixl-iUpydW
// download lm and normalization folders to some local directory.
Path zemberekDataRoot = Paths.get("/home/aaa/zemberek-data");
Path lookupRoot = zemberekDataRoot.resolve("normalization");
Path lmPath = zemberekDataRoot.resolve("lm/lm.2gram.slm");
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
TurkishSentenceNormalizer normalizer = new
TurkishSentenceNormalizer(morphology, lookupRoot, lmPath);
for (String example : examples) {
System.out.println(example);
System.out.println(normalizer.normalize(example));
System.out.println();
}
}
示例28
public AnalyzeSentenceController(Gson jsonConverter, TurkishMorphology morphology) {
super(jsonConverter);
initializeController(jsonConverter, morphology);
}
示例29
public StemmingAndLemmatizationController(Gson jsonConverter, TurkishMorphology morphology) {
super(jsonConverter);
initializeController(morphology);
}
示例30
public FindPOSController(Gson jsonConverter, TurkishMorphology morphology) {
super(jsonConverter);
this.morphology = morphology;
initializeController();
}