SoLiXG:Greece’s recovery and resilience plan: Difference between revisions
Jump to navigation
Jump to search
(Created page with "=== Word frequencies === ===== Script ===== <syntaxhighlight lang="python"> import re from wordfreq import word_frequency <nowiki>#</nowiki> this is a script to find the m...") |
|||
Line 3: | Line 3: | ||
===== Script ===== | ===== Script ===== | ||
<syntaxhighlight lang="python"> | <syntaxhighlight lang="python"> | ||
import re | import re | ||
from wordfreq import word_frequency | from wordfreq import word_frequency | ||
# this is a script to find the most frequent words in a textfile | |||
lines = open('gr-policy.txt', 'r') | lines = open('gr-policy.txt', 'r') | ||
text=lines.read() | text=lines.read() | ||
text_list=text.replace('\n', ' ').split(".") | text_list=text.replace('\n', ' ').split(".") | ||
lines.close() | lines.close() | ||
sep_words=[] | sep_words=[] | ||
new_list=[] | new_list=[] | ||
all_freq={} | all_freq={} | ||
frequency={} | frequency={} | ||
with open("output.txt", "a") as f: | with open("output.txt", "a") as f: | ||
for l in text_list: | for l in text_list: | ||
for w in l.split(): | for w in l.split(): | ||
sep_words.append(w) | sep_words.append(w) | ||
for word in sep_words: | for word in sep_words: | ||
freq = sep_words.count(word) | freq = sep_words.count(word) | ||
frequency={word:freq} | frequency={word:freq} | ||
all_freq.update(frequency) | all_freq.update(frequency) | ||
# all_freq.append(frequency) | # all_freq.append(frequency) | ||
new_list=sorted(all_freq.items(), key=lambda item: item[1], reverse=True ) | new_list=sorted(all_freq.items(), key=lambda item: item[1], reverse=True ) | ||
print(*new_list, sep = "\n", file=f) | print(*new_list, sep = "\n", file=f) | ||
</syntaxhighlight> | </syntaxhighlight> |
Revision as of 17:14, 15 February 2023
Word frequencies
Script
<syntaxhighlight lang="python"> import re
from wordfreq import word_frequency
- this is a script to find the most frequent words in a textfile
lines = open('gr-policy.txt', 'r') text=lines.read() text_list=text.replace('\n', ' ').split(".")
lines.close()
sep_words=[] new_list=[] all_freq={}
frequency={} with open("output.txt", "a") as f: for l in text_list: for w in l.split(): sep_words.append(w) for word in sep_words: freq = sep_words.count(word) frequency={word:freq} all_freq.update(frequency) # all_freq.append(frequency) new_list=sorted(all_freq.items(), key=lambda item: item[1], reverse=True ) print(*new_list, sep = "\n", file=f)
</syntaxhighlight>