The group you are posting to is a
Usenet group . Messages posted to this group will make your email address visible to anyone on the Internet.
Your reply message has not been sent.
Your post was successful
From:
way4thesub <kapel... @gmail.com>
Date: Tue, 6 Jan 2009 15:16:45 -0800 (PST)
Local: Tues 6 Jan 2009 23:16
Subject: translated to Ruby
Hello all,
I've translated the php-text-statistics package to Ruby, you can view
the files below. Please note I couldn't get the Gunning Fog Score to
work 100%
Regards,
Adam
############### Code
require 'collections/sequenced_hash'
module ReadabilityIndices
class Readability
NumDecimalPlaces = 1
Titles = SequencedHash.new
Titles[:flesch_kincaid_grade_level] = 'Flesch-Kincaid Grade level'
Titles[:flesch_kincaid_reading_ease] = 'Flesch-Kincaid Reading
Ease'
Titles[:gunning_fog_score] = 'Gunning-Fog score'
Titles[:coleman_liau_index] = 'Coleman-Liau Index'
Titles[:smog_index] = 'SMOG Index'
Titles[:automated_readability_index] = 'Automated Readability
Index'
attr_accessor :text
def initialize(text = '')
self.text = clean_text(text)
end
def valid_index?(index)
Titles[index] ? true : false
end
def flesch_kincaid_grade_level
round(0.39 * average_words_per_sentence + 11.8 *
average_syllables_per_word - 15.59, NumDecimalPlaces)
end
def flesch_kincaid_reading_ease
round(206.835 - 1.015 * average_words_per_sentence - 84.6 *
average_syllables_per_word, NumDecimalPlaces)
end
def gunning_fog_score
round((average_words_per_sentence +
percentage_words_with_three_syllables(false)) * 0.4, NumDecimalPlaces)
end
def coleman_liau_index
round(5.89 * letter_count / word_count - 0.3 * sentence_count /
word_count - 15.8, NumDecimalPlaces)
end
def smog_index
round(1.043 * Math.sqrt((words_with_three_syllables * (30 /
sentence_count)) + 3.1291), NumDecimalPlaces)
end
def automated_readability_index
round(4.71 * letter_count / word_count + 0.5 * word_count /
sentence_count - 21.43, NumDecimalPlaces)
end
Colon = ": "
Separator = ", "
def get_indices_as_string(indices = [], diagnostics = true)
indices = (indices.empty? ? Titles.keys : indices)
str = indices.inject([]){|arr, index| arr << "#{Titles[index]}#
{Colon} #{self.send(index)}"; arr}.join(Separator)
return diagnostics ? "words#{Colon} #{word_count}#{Separator}
sentences#{Colon} #{sentence_count}#{Separator} characters#{Colon} #
{letter_count}#{Separator}" + str : str
end
# private
def clean_text(text)
text.gsub!(/[,:;()-]/, ' ') # Replace commans, hyphens etc
(count them as spaces)
text.gsub!(/[\.!?]/, '.') # Unify terminators
text = text.strip + '.' # Add final terminator, just in case
it's missing.
text.gsub!(/[ ]*(\n|\r\n|\r)[ ]*/, ' ') # Replace new lines with
spaces
text.gsub!(/([\.])[\.\s?]+/, ".") # Check for duplicated
terminators
text.gsub!(/[ ]*([\.])/, "#{$1} ") # Pad sentence terminators
text.gsub!(/[ ]+/, ' ') # Remove multiple spaces
#$strText = preg_replace_callback('/\. [^ ]+/', create_function
('$matches', 'return strtolower($matches[0]);'), $strText); // Lower
case all words following terminators (for gunning fog score)
return text.strip
end
def round(num, decimals)
(num * 10 * decimals).round / (10 * decimals).to_f
end
def letter_count
self.text.gsub(/[^A-Za-z]+/, '').length.to_i
end
def sentence_count
[1, self.text.split(/\.!?/).length].max
end
def word_count
get_words.length
end
def get_words
@words ||= self.text.split(/\s+/)
end
def average_words_per_sentence
word_count / sentence_count.to_f
end
def average_syllables_per_word
total_syllables / get_words.length.to_f
end
def total_syllables
get_words.inject(0){|sum, word| sum + syllable_count(word)}
end
def words_with_three_syllables(count_proper_nouns = true)
get_words.inject(0) do |sum, word|
if syllable_count(word) >= 3
if count_proper_nouns
sum += 1
else
sum += 1 if word[0..0] == word[0..0].downcase
end
end
sum
end
end
def percentage_words_with_three_syllables(count_proper_nouns =
true)
words_with_three_syllables(count_proper_nouns) / word_count.to_f
* 100
end
ProblemWords = {
'simile' => 3,
'forever' => 3,
'shoreline' => 2
}
MultiSyllablesThatAreOne = [
/cial/,
/tia/,
/cius/,
/cious/,
/giu/,
/ion/,
/iou/,
/sia$/,
/[^aeiuoyt]{2,}ed$/,
/.ely$/,
/[cg]h?e[rsd]?$/,
/rved?$/,
/[aeiouy][dt]es?$/,
/[aeiouy][^aeiouydt]e[rsd]?$/,
/^[dr]e[aeiou][^aeiou]+$/, #Sorts out deal, deign etc
/[aeiouy]rse$/ #Purse, hears
]
UniSyllablesThatAreTwo = [
/ia/,
/riet/,
/dien/,
/iu/,
/io/,
/ii/,
/[aeiouym]bl$/,
/[aeiou]{3}/,
/^mc/,
/ism$/,
/([^aeiouy])\1l$/,
/[^l]lien/,
/^coa[dglx]./,
/[^gq]ua[^auieo]/,
/dnt$/,
/uity$/,
/ie(r|st)$/
]
PrefixesAndSuffixes = [
/^un/,
/^fore/,
/ly$/,
/less$/,
/ful$/,
/ers?$/,
/ings?$/
]
def syllable_count(word)
word = word.downcase.strip
#handle problem words first
return ProblemWords[word] if ProblemWords[word]
#find number and delete prefixes and suffixes
num_syllables = PrefixesAndSuffixes.inject(0) do |sum, prefix|
word.scan(prefix){sum += 1}
word.gsub!(prefix, '')
sum
end
#remove non-word chars
word.gsub!(/[^a-z]/is, '')
#count word parts:
num_syllables += word.split(/[^aeiouy]+/).inject(0){|sum,
word_part| sum + (word_part.blank? ? 0 : 1)}
#subtract out syllables that are really one:
MultiSyllablesThatAreOne.each{|syl| word.scan(syl){num_syllables
-= 1}}
#add syllables that are really two:
UniSyllablesThatAreTwo.each{|syl| word.scan(syl){num_syllables
+= 1}}
return [1, num_syllables].max
end
end
end
############### RSpec tests
include ReadabilityIndices
describe "readability indices" do
before(:each) do
@readability_blank = Readability.new
end
it "should count simple syllable words correctly" do
@readability_blank.syllable_count('a').should == 1
@readability_blank.syllable_count('was').should == 1
@readability_blank.syllable_count('the').should == 1
@readability_blank.syllable_count('and').should == 1
@readability_blank.syllable_count('foobar').should == 2
@readability_blank.syllable_count('hello').should == 2
@readability_blank.syllable_count('world').should == 1
@readability_blank.syllable_count('wonderful').should == 3
@readability_blank.syllable_count('simple').should == 2
@readability_blank.syllable_count('easy').should == 2
@readability_blank.syllable_count('hard').should == 1
@readability_blank.syllable_count('quick').should == 1
@readability_blank.syllable_count('brown').should == 1
@readability_blank.syllable_count('fox').should == 1
@readability_blank.syllable_count('jumped').should == 1
@readability_blank.syllable_count('over').should == 2
@readability_blank.syllable_count('lazy').should == 2
@readability_blank.syllable_count('dog').should == 1
@readability_blank.syllable_count('camera').should == 3
end
it "should count syllables on programmed exceptions" do
@readability_blank.syllable_count('simile').should == 3
@readability_blank.syllable_count('shoreline').should == 2
@readability_blank.syllable_count('forever').should == 3
end
it "should count complex syllable words correctly" do
@readability_blank.syllable_count
('antidisestablishmentarianism').should == 12
@readability_blank.syllable_count
('supercalifragilisticexpialidocious').should == 14
@readability_blank.syllable_count
('chlorofluorocarbonation').should == 8
@readability_blank.syllable_count('forethoughtfulness').should
== 4
@readability_blank.syllable_count('phosphorescent').should == 4
@readability_blank.syllable_count('theoretician').should == 5
@readability_blank.syllable_count('promiscuity').should == 5
@readability_blank.syllable_count('unbutlering').should == 4
@readability_blank.syllable_count('continuity').should == 5
@readability_blank.syllable_count('craunched').should == 1
@readability_blank.syllable_count('squelched').should == 1
@readability_blank.syllable_count('scrounge').should == 1
@readability_blank.syllable_count('coughed').should == 1
@readability_blank.syllable_count('smile').should == 1
@readability_blank.syllable_count('monopoly').should == 4
@readability_blank.syllable_count('doughey').should == 2
@readability_blank.syllable_count('doughier').should == 3
@readability_blank.syllable_count('leguminous').should == 4
@readability_blank.syllable_count('thoroughbreds').should == 3
@readability_blank.syllable_count('special').should == 2
@readability_blank.syllable_count('delicious').should == 3
@readability_blank.syllable_count('spatial').should == 2
@readability_blank.syllable_count('pacifism').should == 4
@readability_blank.syllable_count('coagulant').should == 4
@readability_blank.syllable_count('shouldn\'t').should == 2
@readability_blank.syllable_count('mcdonald').should == 3
@readability_blank.syllable_count('audience').should == 3
@readability_blank.syllable_count('finance').should == 2
@readability_blank.syllable_count('prevalence').should == 3
@readability_blank.syllable_count('impropriety').should == 5
@readability_blank.syllable_count('alien').should
...
read more »
You must
Sign in before you can post messages.
You do not have the permission required to post.
From:
"David Child" <d... @addedbytes.com>
Date: Wed, 7 Jan 2009 10:02:25 +0000
Local: Wed 7 Jan 2009 10:02
Subject: Re: translated to Ruby
Great work Adam!
On Tue, Jan 6, 2009 at 11:16 PM, way4thesub <kapel
... @gmail.com> wrote:
> Hello all,
> I've translated the php-text-statistics package to Ruby, you can view
> the files below. Please note I couldn't get the Gunning Fog Score to
> work 100%
> Regards,
> Adam
> ############### Code
> require 'collections/sequenced_hash'
> module ReadabilityIndices
> class Readability
> NumDecimalPlaces = 1
> Titles = SequencedHash.new
> Titles[:flesch_kincaid_grade_level] = 'Flesch-Kincaid Grade level'
> Titles[:flesch_kincaid_reading_ease] = 'Flesch-Kincaid Reading
> Ease'
> Titles[:gunning_fog_score] = 'Gunning-Fog score'
> Titles[:coleman_liau_index] = 'Coleman-Liau Index'
> Titles[:smog_index] = 'SMOG Index'
> Titles[:automated_readability_index] = 'Automated Readability
> Index'
> attr_accessor :text
> def initialize(text = '')
> self.text = clean_text(text)
> end
> def valid_index?(index)
> Titles[index] ? true : false
> end
> def flesch_kincaid_grade_level
> round(0.39 * average_words_per_sentence + 11.8 *
> average_syllables_per_word - 15.59, NumDecimalPlaces)
> end
> def flesch_kincaid_reading_ease
> round(206.835 - 1.015 * average_words_per_sentence - 84.6 *
> average_syllables_per_word, NumDecimalPlaces)
> end
> def gunning_fog_score
> round((average_words_per_sentence +
> percentage_words_with_three_syllables(false)) * 0.4, NumDecimalPlaces)
> end
> def coleman_liau_index
> round(5.89 * letter_count / word_count - 0.3 * sentence_count /
> word_count - 15.8, NumDecimalPlaces)
> end
> def smog_index
> round(1.043 * Math.sqrt((words_with_three_syllables * (30 /
> sentence_count)) + 3.1291), NumDecimalPlaces)
> end
> def automated_readability_index
> round(4.71 * letter_count / word_count + 0.5 * word_count /
> sentence_count - 21.43, NumDecimalPlaces)
> end
> Colon = ": "
> Separator = ", "
> def get_indices_as_string(indices = [], diagnostics = true)
> indices = (indices.empty? ? Titles.keys : indices)
> str = indices.inject([]){|arr, index| arr << "#{Titles[index]}#
> {Colon} #{self.send(index)}"; arr}.join(Separator)
> return diagnostics ? "words#{Colon} #{word_count}#{Separator}
> sentences#{Colon} #{sentence_count}#{Separator} characters#{Colon} #
> {letter_count}#{Separator}" + str : str
> end
> # private
> def clean_text(text)
> text.gsub!(/[,:;()-]/, ' ') # Replace commans, hyphens etc
> (count them as spaces)
> text.gsub!(/[\.!?]/, '.') # Unify terminators
> text = text.strip + '.' # Add final terminator, just in case
> it's missing.
> text.gsub!(/[ ]*(\n|\r\n|\r)[ ]*/, ' ') # Replace new lines with
> spaces
> text.gsub!(/([\.])[\.\s?]+/, ".") # Check for duplicated
> terminators
> text.gsub!(/[ ]*([\.])/, "#{$1} ") # Pad sentence terminators
> text.gsub!(/[ ]+/, ' ') # Remove multiple spaces
> #$strText = preg_replace_callback('/\. [^ ]+/', create_function
> ('$matches', 'return strtolower($matches[0]);'), $strText); // Lower
> case all words following terminators (for gunning fog score)
> return text.strip
> end
> def round(num, decimals)
> (num * 10 * decimals).round / (10 * decimals).to_f
> end
> def letter_count
> self.text.gsub(/[^A-Za-z]+/, '').length.to_i
> end
> def sentence_count
> [1, self.text.split(/\.!?/).length].max
> end
> def word_count
> get_words.length
> end
> def get_words
> @words ||= self.text.split(/\s+/)
> end
> def average_words_per_sentence
> word_count / sentence_count.to_f
> end
> def average_syllables_per_word
> total_syllables / get_words.length.to_f
> end
> def total_syllables
> get_words.inject(0){|sum, word| sum + syllable_count(word)}
> end
> def words_with_three_syllables(count_proper_nouns = true)
> get_words.inject(0) do |sum, word|
> if syllable_count(word) >= 3
> if count_proper_nouns
> sum += 1
> else
> sum += 1 if word[0..0] == word[0..0].downcase
> end
> end
> sum
> end
> end
> def percentage_words_with_three_syllables(count_proper_nouns =
> true)
> words_with_three_syllables(count_proper_nouns) / word_count.to_f
> * 100
> end
> ProblemWords = {
> 'simile' => 3,
> 'forever' => 3,
> 'shoreline' => 2
> }
> MultiSyllablesThatAreOne = [
> /cial/,
> /tia/,
> /cius/,
> /cious/,
> /giu/,
> /ion/,
> /iou/,
> /sia$/,
> /[^aeiuoyt]{2,}ed$/,
> /.ely$/,
> /[cg]h?e[rsd]?$/,
> /rved?$/,
> /[aeiouy][dt]es?$/,
> /[aeiouy][^aeiouydt]e[rsd]?$/,
> /^[dr]e[aeiou][^aeiou]+$/, #Sorts out deal, deign etc
> /[aeiouy]rse$/ #Purse, hears
> ]
> UniSyllablesThatAreTwo = [
> /ia/,
> /riet/,
> /dien/,
> /iu/,
> /io/,
> /ii/,
> /[aeiouym]bl$/,
> /[aeiou]{3}/,
> /^mc/,
> /ism$/,
> /([^aeiouy])\1l$/,
> /[^l]lien/,
> /^coa[dglx]./,
> /[^gq]ua[^auieo]/,
> /dnt$/,
> /uity$/,
> /ie(r|st)$/
> ]
> PrefixesAndSuffixes = [
> /^un/,
> /^fore/,
> /ly$/,
> /less$/,
> /ful$/,
> /ers?$/,
> /ings?$/
> ]
> def syllable_count(word)
> word = word.downcase.strip
> #handle problem words first
> return ProblemWords[word] if ProblemWords[word]
> #find number and delete prefixes and suffixes
> num_syllables = PrefixesAndSuffixes.inject(0) do |sum, prefix|
> word.scan(prefix){sum += 1}
> word.gsub!(prefix, '')
> sum
> end
> #remove non-word chars
> word.gsub!(/[^a-z]/is, '')
> #count word parts:
> num_syllables += word.split(/[^aeiouy]+/).inject(0){|sum,
> word_part| sum + (word_part.blank? ? 0 : 1)}
> #subtract out syllables that are really one:
> MultiSyllablesThatAreOne.each{|syl| word.scan(syl){num_syllables
> -= 1}}
> #add syllables that are really two:
> UniSyllablesThatAreTwo.each{|syl| word.scan(syl){num_syllables
> += 1}}
> return [1, num_syllables].max
> end
> end
> end
> ############### RSpec tests
> include ReadabilityIndices
> describe "readability indices" do
> before(:each) do
> @readability_blank = Readability.new
> end
> it "should count simple syllable words correctly" do
> @readability_blank.syllable_count('a').should == 1
> @readability_blank.syllable_count('was').should == 1
> @readability_blank.syllable_count('the').should == 1
> @readability_blank.syllable_count('and').should == 1
> @readability_blank.syllable_count('foobar').should == 2
> @readability_blank.syllable_count('hello').should == 2
> @readability_blank.syllable_count('world').should == 1
> @readability_blank.syllable_count('wonderful').should == 3
> @readability_blank.syllable_count('simple').should == 2
> @readability_blank.syllable_count('easy').should == 2
> @readability_blank.syllable_count('hard').should == 1
> @readability_blank.syllable_count('quick').should == 1
> @readability_blank.syllable_count('brown').should == 1
> @readability_blank.syllable_count('fox').should == 1
> @readability_blank.syllable_count('jumped').should == 1
> @readability_blank.syllable_count('over').should == 2
> @readability_blank.syllable_count('lazy').should == 2
> @readability_blank.syllable_count('dog').should == 1
> @readability_blank.syllable_count('camera').should == 3
> end
> it "should count syllables on programmed exceptions" do
> @readability_blank.syllable_count('simile').should == 3
> @readability_blank.syllable_count('shoreline').should == 2
> @readability_blank.syllable_count('forever').should == 3
> end
> it "should count complex syllable words correctly" do
> @readability_blank.syllable_count
> ('antidisestablishmentarianism').should == 12
> @readability_blank.syllable_count
> ('supercalifragilisticexpialidocious').should == 14
> @readability_blank.syllable_count
> ('chlorofluorocarbonation').should == 8
> @readability_blank.syllable_count('forethoughtfulness').should
> == 4
> @readability_blank.syllable_count('phosphorescent').should == 4
> @readability_blank.syllable_count('theoretician').should == 5
> @readability_blank.syllable_count('promiscuity').should == 5
> @readability_blank.syllable_count('unbutlering').should == 4
> @readability_blank.syllable_count('continuity').should == 5
> @readability_blank.syllable_count('craunched').should == 1
> @readability_blank.syllable_count('squelched').should == 1
> @readability_blank.syllable_count('scrounge').should == 1
> @readability_blank.syllable_count('coughed').should == 1
> @readability_blank.syllable_count('smile').should == 1
> @readability_blank.syllable_count('monopoly').should == 4
> @readability_blank.syllable_count('doughey').should == 2
> @readability_blank.syllable_count('doughier').should == 3
> @readability_blank.syllable_count('leguminous').should == 4
> @readability_blank.syllable_count('thoroughbreds').should == 3
> @readability_blank.syllable_count('special').should == 2
> @readability_blank.syllable_count('delicious').should == 3
> @readability_blank.syllable_count('spatial').should == 2
> @readability_blank.syllable_count('pacifism').should == 4
> @readability_blank.syllable_count('coagulant').should == 4
...
read more »
You must
Sign in before you can post messages.
You do not have the permission required to post.
From:
"Adam Kapelner" <kapel... @gmail.com>
Date: Wed, 7 Jan 2009 10:46:37 -0800
Local: Wed 7 Jan 2009 18:46
Subject: Re: translated to Ruby
My pleasure.
I couldn't figure out how to translate that one line in the clean text
function:
#$strText = preg_replace_callback('/\. [^ ]+/', create_function
('$matches', 'return strtolower($matches[0]);'), $strText); // Lower
case all words following terminators (for gunning fog score)
So I guess that's the reason why my Gunning-Fog is off a bit.
Would you mind adding these two files to the repository? I won't have any
time for the next two months.
Thanks!
Adam
On Wed, Jan 7, 2009 at 2:02 AM, David Child <d
... @addedbytes.com> wrote:
> Great work Adam!
> On Tue, Jan 6, 2009 at 11:16 PM, way4thesub <kapel... @gmail.com> wrote:
> > Hello all,
> > I've translated the php-text-statistics package to Ruby, you can view
> > the files below. Please note I couldn't get the Gunning Fog Score to
> > work 100%
> > Regards,
> > Adam
> > ############### Code
> > require 'collections/sequenced_hash'
> > module ReadabilityIndices
> > class Readability
> > NumDecimalPlaces = 1
> > Titles = SequencedHash.new
> > Titles[:flesch_kincaid_grade_level] = 'Flesch-Kincaid Grade level'
> > Titles[:flesch_kincaid_reading_ease] = 'Flesch-Kincaid Reading
> > Ease'
> > Titles[:gunning_fog_score] = 'Gunning-Fog score'
> > Titles[:coleman_liau_index] = 'Coleman-Liau Index'
> > Titles[:smog_index] = 'SMOG Index'
> > Titles[:automated_readability_index] = 'Automated Readability
> > Index'
> > attr_accessor :text
> > def initialize(text = '')
> > self.text = clean_text(text)
> > end
> > def valid_index?(index)
> > Titles[index] ? true : false
> > end
> > def flesch_kincaid_grade_level
> > round(0.39 * average_words_per_sentence + 11.8 *
> > average_syllables_per_word - 15.59, NumDecimalPlaces)
> > end
> > def flesch_kincaid_reading_ease
> > round(206.835 - 1.015 * average_words_per_sentence - 84.6 *
> > average_syllables_per_word, NumDecimalPlaces)
> > end
> > def gunning_fog_score
> > round((average_words_per_sentence +
> > percentage_words_with_three_syllables(false)) * 0.4, NumDecimalPlaces)
> > end
> > def coleman_liau_index
> > round(5.89 * letter_count / word_count - 0.3 * sentence_count /
> > word_count - 15.8, NumDecimalPlaces)
> > end
> > def smog_index
> > round(1.043 * Math.sqrt((words_with_three_syllables * (30 /
> > sentence_count)) + 3.1291), NumDecimalPlaces)
> > end
> > def automated_readability_index
> > round(4.71 * letter_count / word_count + 0.5 * word_count /
> > sentence_count - 21.43, NumDecimalPlaces)
> > end
> > Colon = ": "
> > Separator = ", "
> > def get_indices_as_string(indices = [], diagnostics = true)
> > indices = (indices.empty? ? Titles.keys : indices)
> > str = indices.inject([]){|arr, index| arr << "#{Titles[index]}#
> > {Colon} #{self.send(index)}"; arr}.join(Separator)
> > return diagnostics ? "words#{Colon} #{word_count}#{Separator}
> > sentences#{Colon} #{sentence_count}#{Separator} characters#{Colon} #
> > {letter_count}#{Separator}" + str : str
> > end
> > # private
> > def clean_text(text)
> > text.gsub!(/[,:;()-]/, ' ') # Replace commans, hyphens etc
> > (count them as spaces)
> > text.gsub!(/[\.!?]/, '.') # Unify terminators
> > text = text.strip + '.' # Add final terminator, just in case
> > it's missing.
> > text.gsub!(/[ ]*(\n|\r\n|\r)[ ]*/, ' ') # Replace new lines with
> > spaces
> > text.gsub!(/([\.])[\.\s?]+/, ".") # Check for duplicated
> > terminators
> > text.gsub!(/[ ]*([\.])/, "#{$1} ") # Pad sentence terminators
> > text.gsub!(/[ ]+/, ' ') # Remove multiple spaces
> > #$strText = preg_replace_callback('/\. [^ ]+/', create_function
> > ('$matches', 'return strtolower($matches[0]);'), $strText); // Lower
> > case all words following terminators (for gunning fog score)
> > return text.strip
> > end
> > def round(num, decimals)
> > (num * 10 * decimals).round / (10 * decimals).to_f
> > end
> > def letter_count
> > self.text.gsub(/[^A-Za-z]+/, '').length.to_i
> > end
> > def sentence_count
> > [1, self.text.split(/\.!?/).length].max
> > end
> > def word_count
> > get_words.length
> > end
> > def get_words
> > @words ||= self.text.split(/\s+/)
> > end
> > def average_words_per_sentence
> > word_count / sentence_count.to_f
> > end
> > def average_syllables_per_word
> > total_syllables / get_words.length.to_f
> > end
> > def total_syllables
> > get_words.inject(0){|sum, word| sum + syllable_count(word)}
> > end
> > def words_with_three_syllables(count_proper_nouns = true)
> > get_words.inject(0) do |sum, word|
> > if syllable_count(word) >= 3
> > if count_proper_nouns
> > sum += 1
> > else
> > sum += 1 if word[0..0] == word[0..0].downcase
> > end
> > end
> > sum
> > end
> > end
> > def percentage_words_with_three_syllables(count_proper_nouns =
> > true)
> > words_with_three_syllables(count_proper_nouns) / word_count.to_f
> > * 100
> > end
> > ProblemWords = {
> > 'simile' => 3,
> > 'forever' => 3,
> > 'shoreline' => 2
> > }
> > MultiSyllablesThatAreOne = [
> > /cial/,
> > /tia/,
> > /cius/,
> > /cious/,
> > /giu/,
> > /ion/,
> > /iou/,
> > /sia$/,
> > /[^aeiuoyt]{2,}ed$/,
> > /.ely$/,
> > /[cg]h?e[rsd]?$/,
> > /rved?$/,
> > /[aeiouy][dt]es?$/,
> > /[aeiouy][^aeiouydt]e[rsd]?$/,
> > /^[dr]e[aeiou][^aeiou]+$/, #Sorts out deal, deign etc
> > /[aeiouy]rse$/ #Purse, hears
> > ]
> > UniSyllablesThatAreTwo = [
> > /ia/,
> > /riet/,
> > /dien/,
> > /iu/,
> > /io/,
> > /ii/,
> > /[aeiouym]bl$/,
> > /[aeiou]{3}/,
> > /^mc/,
> > /ism$/,
> > /([^aeiouy])\1l$/,
> > /[^l]lien/,
> > /^coa[dglx]./,
> > /[^gq]ua[^auieo]/,
> > /dnt$/,
> > /uity$/,
> > /ie(r|st)$/
> > ]
> > PrefixesAndSuffixes = [
> > /^un/,
> > /^fore/,
> > /ly$/,
> > /less$/,
> > /ful$/,
> > /ers?$/,
> > /ings?$/
> > ]
> > def syllable_count(word)
> > word = word.downcase.strip
> > #handle problem words first
> > return ProblemWords[word] if ProblemWords[word]
> > #find number and delete prefixes and suffixes
> > num_syllables = PrefixesAndSuffixes.inject(0) do |sum, prefix|
> > word.scan(prefix){sum += 1}
> > word.gsub!(prefix, '')
> > sum
> > end
> > #remove non-word chars
> > word.gsub!(/[^a-z]/is, '')
> > #count word parts:
> > num_syllables += word.split(/[^aeiouy]+/).inject(0){|sum,
> > word_part| sum + (word_part.blank? ? 0 : 1)}
> > #subtract out syllables that are really one:
> > MultiSyllablesThatAreOne.each{|syl| word.scan(syl){num_syllables
> > -= 1}}
> > #add syllables that are really two:
> > UniSyllablesThatAreTwo.each{|syl| word.scan(syl){num_syllables
> > += 1}}
> > return [1, num_syllables].max
> > end
> > end
> > end
> > ############### RSpec tests
> > include ReadabilityIndices
> > describe "readability indices" do
> > before(:each) do
> > @readability_blank = Readability.new
> > end
> > it "should count simple syllable words correctly" do
> > @readability_blank.syllable_count('a').should == 1
> > @readability_blank.syllable_count('was').should == 1
> > @readability_blank.syllable_count('the').should == 1
> > @readability_blank.syllable_count('and').should == 1
> > @readability_blank.syllable_count('foobar').should == 2
> > @readability_blank.syllable_count('hello').should == 2
> > @readability_blank.syllable_count('world').should == 1
> > @readability_blank.syllable_count('wonderful').should == 3
> > @readability_blank.syllable_count('simple').should == 2
> > @readability_blank.syllable_count('easy').should == 2
> > @readability_blank.syllable_count('hard').should == 1
> > @readability_blank.syllable_count('quick').should == 1
> > @readability_blank.syllable_count('brown').should == 1
> > @readability_blank.syllable_count('fox').should == 1
> > @readability_blank.syllable_count('jumped').should == 1
> > @readability_blank.syllable_count('over').should == 2
> > @readability_blank.syllable_count('lazy').should == 2
> > @readability_blank.syllable_count('dog').should == 1
> > @readability_blank.syllable_count('camera').should == 3
> > end
> > it "should count syllables on programmed exceptions" do
> > @readability_blank.syllable_count('simile').should == 3
> > @readability_blank.syllable_count('shoreline').should == 2
> > @readability_blank.syllable_count('forever').should == 3
> > end
> > it "should count complex syllable words correctly" do
> > @readability_blank.syllable_count
> > ('antidisestablishmentarianism').should == 12
> > @readability_blank.syllable_count
> > ('supercalifragilisticexpialidocious').should == 14
> > @readability_blank.syllable_count
> > ('chlorofluorocarbonation').should == 8
> > @readability_blank.syllable_count('forethoughtfulness').should
> > == 4
> > @readability_blank.syllable_count('phosphorescent').should == 4
> > @readability_blank.syllable_count('theoretician').should == 5
> > @readability_blank.syllable_count('promiscuity').should == 5
...
read more »
You must
Sign in before you can post messages.
You do not have the permission required to post.