vB_RelatedText_Vocabulary
in package
uses
vB_Trait_NoSerialize
Table of Contents
Methods
- __construct() : mixed
- __serialize() : array<string|int, mixed>
- __sleep() : array<string|int, mixed>
- __unserialize() : void
- __wakeup() : void
- createFromFile() : self
- filterRemovedTerms() : void
- Remove terms from the data array that are no longer in the vocabulary.
- filterTermsByFrequency() : void
- getCount() : int
- getDocCounts() : array<string|int, mixed>
- getTotalDocCount() : int
- getVocabulary() : array<string|int, mixed>
- ordinalToWord() : string|null
- processText() : array<string|int, mixed>
- wordToOrdinal() : int|null
- writeVocabulary() : void
Methods
__construct()
public
__construct(vB_Utility_String $stringutil, int $minlength, int $maxlength) : mixed
Parameters
- $stringutil : vB_Utility_String
- $minlength : int
- $maxlength : int
__serialize()
public
__serialize() : array<string|int, mixed>
Return values
array<string|int, mixed>__sleep()
public
__sleep() : array<string|int, mixed>
Return values
array<string|int, mixed>__unserialize()
public
__unserialize(array<string|int, mixed> $serialized) : void
Parameters
- $serialized : array<string|int, mixed>
__wakeup()
public
__wakeup() : void
createFromFile()
public
static createFromFile(string $filename, vB_Utility_String $stringutil) : self
Parameters
- $filename : string
- $stringutil : vB_Utility_String
Return values
selffilterRemovedTerms()
Remove terms from the data array that are no longer in the vocabulary.
public
filterRemovedTerms(array<string|int, mixed> &$data) : void
This is typically due to calling filterTermsByFrequency
Parameters
- $data : array<string|int, mixed>
filterTermsByFrequency()
public
filterTermsByFrequency(int $minDocCount, float $maxDocRatio) : void
Parameters
- $minDocCount : int
- $maxDocRatio : float
getCount()
public
getCount() : int
Return values
intgetDocCounts()
public
getDocCounts() : array<string|int, mixed>
Return values
array<string|int, mixed>getTotalDocCount()
public
getTotalDocCount() : int
Return values
intgetVocabulary()
public
getVocabulary() : array<string|int, mixed>
Return values
array<string|int, mixed>ordinalToWord()
public
ordinalToWord(int $ordinal) : string|null
Parameters
- $ordinal : int
Return values
string|nullprocessText()
public
processText(string $text) : array<string|int, mixed>
Parameters
- $text : string
Return values
array<string|int, mixed>wordToOrdinal()
public
wordToOrdinal(string $word) : int|null
Parameters
- $word : string
Return values
int|nullwriteVocabulary()
public
writeVocabulary(string $filename) : void
Parameters
- $filename : string