| Analysis | |
|---|---|
| Morphology | Distribution of nominal morphemes over EG and ES vocabulary | 
| Predictability by word | Concordances, 3D scatterplots and statistics on word embeddings | 
| Predictability by text | Heatmaps, Oracc links | 
| More info | Information about the process and data. | 
| Acknowledgements: Niek Veldhuis, Steve Tinney, Noah Kröll, Sebastian Fink, Krister Lindén (PI) | |
generated with emesal_vectors.py -- asahala 2022
def demo():
    PREFIX = '2nd_mill_'
    threshold = 10
    #purge(PREFIX)
    window = 10 # prediction context window
    vector_window = 3 # vector window
    split_lines = True
    dataset = [text for text in EmesalFinder.find_texts()
               if text.millennium == '2nd' and
               text.word_count > 30 and
               text.emesal_ratio >= 0.1 and
               text.lacuna_ratio <= 0.33]
    #batch_process(n=5, threshold=threshold, data=dataset, filename=PREFIX, vector_window=vector_window, window=window, split_lines=split_lines)
    #general_vectors_makedata(PREFIX, vector_window=vector_window)
    #general_vectors_similarities(PREFIX)
    predict_emesal(PREFIX, dataset, window=window)
    generate_statistics(PREFIX, dataset, threshold)
    generate_morphology_table(PREFIX, dataset, threshold)
demo()