SoLiXG:Greece’s recovery and resilience plan: Difference between revisions

From titipi
Jump to navigation Jump to search
(Created page with "=== Word frequencies === ===== Script ===== <syntaxhighlight lang="python"> import re from wordfreq import word_frequency <nowiki>#</nowiki> this is a script to find the m...")
 
Line 3: Line 3:
===== Script =====
===== Script =====
<syntaxhighlight lang="python">
<syntaxhighlight lang="python">
import re
import re


from wordfreq import word_frequency
from wordfreq import word_frequency
 
# this is a script to find the most frequent words in a textfile
<nowiki>#</nowiki> this is a script to find the most frequent words in a textfile


lines = open('gr-policy.txt', 'r')
lines = open('gr-policy.txt', 'r')
text=lines.read()
text=lines.read()
text_list=text.replace('\n', ' ').split(".")
text_list=text.replace('\n', ' ').split(".")
<nowiki>#</nowiki> text_list=text_first.replace('(', ' ')


lines.close()
lines.close()


sep_words=[]
sep_words=[]
new_list=[]
new_list=[]
all_freq={}
all_freq={}


frequency={}
frequency={}
with open("output.txt", "a") as f:
with open("output.txt", "a") as f:
   for l in text_list:
   for l in text_list:
       for w in l.split():
       for w in l.split():
           sep_words.append(w)
           sep_words.append(w)
   for word in sep_words:
   for word in sep_words:
       freq = sep_words.count(word)  
       freq = sep_words.count(word)  
       frequency={word:freq}
       frequency={word:freq}
       all_freq.update(frequency)    
       all_freq.update(frequency)    
           # all_freq.append(frequency)
           # all_freq.append(frequency)
   new_list=sorted(all_freq.items(), key=lambda item: item[1], reverse=True )
   new_list=sorted(all_freq.items(), key=lambda item: item[1], reverse=True )
   print(*new_list, sep = "\n", file=f)
   print(*new_list, sep = "\n", file=f)


</syntaxhighlight>
</syntaxhighlight>

Revision as of 17:14, 15 February 2023

Word frequencies

Script

<syntaxhighlight lang="python"> import re

from wordfreq import word_frequency

  1. this is a script to find the most frequent words in a textfile

lines = open('gr-policy.txt', 'r') text=lines.read() text_list=text.replace('\n', ' ').split(".")

lines.close()

sep_words=[] new_list=[] all_freq={}

frequency={} with open("output.txt", "a") as f:    for l in text_list:        for w in l.split():            sep_words.append(w)    for word in sep_words:        freq = sep_words.count(word)        frequency={word:freq}        all_freq.update(frequency)                # all_freq.append(frequency)    new_list=sorted(all_freq.items(), key=lambda item: item[1], reverse=True )    print(*new_list, sep = "\n", file=f)

</syntaxhighlight>