Online File
Rick Aster: Professional SAS Programming Shortcuts: Contents
data work.word (keep=word wordtext length sentence sn) / view=work.word; length wordtext word $ 24 endchar $ 1; retain sentenceend 0 sentence 1; infile text flowover; input wordtext @@; * Word: convert word text to uppercase and remove most punctuation. *; word = upcase(compress(wordtext, '!"#$%&()*+,-./:;<=>?@[\]^`{|}~')); length = length(compress(word, "'-")); if word ne '' then do; * Check for sentence break. ; if sentenceend then do; * If first letter is uppercase, start new sentence. ; li = anyalpha(word); if li then letter = substr(word, li); else letter = ' '; if anyalpha(letter) then do; sentence + 1; sn = 0; end; else sentenceend = 0; end; n + 1; sn + 1; output; end; * Possible end of sentence: word text ends in period, exclamation point, or question mark, possibly followed by quotation marks. *; if sn > 0 then do; endchari = length(translate(wordtext, ' ', '"''')); endchar = substr(wordtext, endchari, 1); sentenceend = endchar in ('.', '!', '?'); end; run; data work.sentence (keep=sentence sn rename=(sn=slength)); set work.word; by sentence; if last.sentence; run; proc summary data=work.word; var length; output mean= out=work.wsum (rename=(_freq_=wcount)); run; proc summary data=work.sentence; var slength; output mean= out=work.ssum (rename=(_freq_=scount)); run; title1 'Thoreau Paragraph'; data _null_; set work.wsum; set work.ssum; file print; put / 'Word count: ' wcount : comma9. / 'Average word length: ' length : 5.2 / 'Sentence count: ' scount : comma7. / 'Words per sentence: ' slength : 6.2; run; proc summary data=work.word order=freq; class word; output out=work.wordlist1 (rename=(_freq_=n)); run; data wordlist (keep=word n percent); if _n_ = 1 then set work.wordlist1 (where=(_type_ = 0) rename=(n=wordcount)); set work.wordlist1 (where=(_type_ > 0)); percent = n/wordcount*100; run; proc print data=wordlist (where=(percent >= 1)) heading=horizontal noobs; var word n percent; format n comma6. percent f7.3; run;