logo_smallAxellero.io

File Search

Search for files and content within the sandbox environment using advanced filtering, content matching, and metadata-based search capabilities.

File Search

Search for files and content within the sandbox environment with comprehensive filtering options, content matching, metadata-based search, and advanced query capabilities for efficient file discovery.

🔍 Advanced Search Capabilities

File search supports content-based searching, metadata filtering, regex patterns, and multi-criteria search to efficiently locate files and data within the sandbox environment.

Overview

The File Search tool provides powerful search capabilities within the sandbox environment, supporting filename patterns, content matching, metadata filtering, and advanced search criteria for comprehensive file discovery and content analysis.

Key Features

  • Content Searching - Search within file contents using text and regex patterns
  • Metadata Filtering - Filter by file size, date, type, and properties
  • Pattern Matching - Advanced filename and path pattern matching
  • Multi-Criteria Search - Combine multiple search criteria for precise results
  • Performance Optimization - Indexed searching and result caching for large datasets

Methods

fileSearch

Search for files and content in the sandbox environment.

ParameterTypeRequiredDescription
searchPathStringNoBase path to search in (default: '/sandbox')
queryStringNoText to search for in file contents
filePatternStringNoFile name pattern or glob expression
contentPatternStringNoRegex pattern for content matching
fileTypesArrayNoFilter by file extensions (e.g., ['.txt', '.csv'])
sizeRangeObjectNoFile size range filter
dateRangeObjectNoFile date range filter
recursiveBooleanNoSearch subdirectories recursively (default: true)
maxResultsNumberNoMaximum number of results to return (default: 100)
includeContentBooleanNoInclude matched content snippets (default: false)
{
  "searchPath": "/sandbox/data",
  "query": "error analysis",
  "filePattern": "*.log",
  "sizeRange": {
    "min": 1024,
    "max": 10485760
  },
  "recursive": true,
  "maxResults": 50,
  "includeContent": true
}

Output:

  • success (Boolean) - Search operation success status
  • totalResults (Number) - Total number of matching results
  • returnedResults (Number) - Number of results returned in response
  • searchTime (Number) - Search operation duration in milliseconds
  • results (Array) - Array of matching files and content
    • filePath (String) - Path to matching file
    • fileName (String) - File name
    • fileSize (Number) - File size in bytes
    • lastModified (String) - File modification timestamp
    • matches (Array) - Content matches (if content search performed)
      • lineNumber (Number) - Line number of match
      • content (String) - Matched content snippet
      • context (String) - Surrounding context
  • searchCriteria (Object) - Applied search criteria summary

Content-Based Searching

File-Based Searching

Advanced Search Operations

def comprehensive_file_search(search_config):
    """Perform comprehensive search with multiple criteria."""
    
    search_results = {}
    
    for search_name, criteria in search_config.items():
        print(f"🔍 Executing search: {search_name}")
        
        # Build search parameters
        search_params = {
            "searchPath": criteria.get('path', '/sandbox'),
            "recursive": criteria.get('recursive', True),
            "maxResults": criteria.get('max_results', 100)
        }
        
        # Add optional parameters
        if 'query' in criteria:
            search_params['query'] = criteria['query']
        if 'file_pattern' in criteria:
            search_params['filePattern'] = criteria['file_pattern']
        if 'content_pattern' in criteria:
            search_params['contentPattern'] = criteria['content_pattern']
        if 'file_types' in criteria:
            search_params['fileTypes'] = criteria['file_types']
        if 'size_range' in criteria:
            search_params['sizeRange'] = criteria['size_range']
        if 'date_range' in criteria:
            search_params['dateRange'] = criteria['date_range']
        if 'include_content' in criteria:
            search_params['includeContent'] = criteria['include_content']
        
        # Execute search
        result = fileSearch(search_params)
        
        if result['success']:
            search_results[search_name] = {
                "success": True,
                "matches": result['totalResults'],
                "files": result['results'],
                "search_time": result['searchTime'],
                "criteria": criteria
            }
            print(f"   ✅ Found {result['totalResults']} matches in {result['searchTime']}ms")
        else:
            search_results[search_name] = {
                "success": False,
                "error": result.get('error'),
                "criteria": criteria
            }
            print(f"   ❌ Search failed: {result.get('error')}")
    
    return search_results

def generate_search_report(search_results):
    """Generate comprehensive search report."""
    
    report = {
        "summary": {
            "total_searches": len(search_results),
            "successful_searches": len([r for r in search_results.values() if r['success']]),
            "total_matches": sum(r.get('matches', 0) for r in search_results.values() if r['success']),
            "total_search_time": sum(r.get('search_time', 0) for r in search_results.values() if r['success'])
        },
        "detailed_results": search_results
    }
    
    print("\n📊 Search Report Summary:")
    print(f"   Searches executed: {report['summary']['total_searches']}")
    print(f"   Successful: {report['summary']['successful_searches']}")
    print(f"   Total matches: {report['summary']['total_matches']}")
    print(f"   Total search time: {report['summary']['total_search_time']}ms")
    
    # Top performing searches
    successful_searches = [
        (name, result) for name, result in search_results.items() 
        if result['success']
    ]
    
    if successful_searches:
        print("\n🏆 Top searches by matches:")
        top_searches = sorted(successful_searches, key=lambda x: x[1]['matches'], reverse=True)[:5]
        for i, (name, result) in enumerate(top_searches, 1):
            print(f"   {i}. {name}: {result['matches']} matches")
    
    return report

# Usage with comprehensive search configuration
search_configuration = {
    "error_analysis": {
        "query": "error",
        "file_types": [".log", ".txt"],
        "path": "/sandbox/logs",
        "include_content": True,
        "max_results": 50
    },
    "large_data_files": {
        "file_pattern": "*data*",
        "file_types": [".csv", ".xlsx", ".json"],
        "size_range": {"min": 5 * 1024 * 1024},  # > 5MB
        "max_results": 30
    },
    "recent_python_files": {
        "file_types": [".py", ".ipynb"],
        "date_range": {
            "start": (datetime.datetime.now() - datetime.timedelta(days=30)).isoformat()
        },
        "max_results": 100
    },
    "config_with_passwords": {
        "content_pattern": r"password\s*[:=]\s*['\"][^'\"]+['\"]",
        "file_types": [".json", ".yaml", ".ini", ".conf"],
        "include_content": True,
        "max_results": 20
    },
    "temporary_files": {
        "file_pattern": "*.tmp",
        "path": "/sandbox/temp",
        "recursive": True,
        "max_results": 200
    }
}

comprehensive_results = comprehensive_file_search(search_configuration)
search_report = generate_search_report(comprehensive_results)

Search Performance and Optimization

Search Result Caching

def cached_search_manager():
    """Manage cached search results for performance."""
    
    search_cache = {}
    
    def execute_search_with_cache(search_key, search_params, cache_duration_minutes=30):
        """Execute search with result caching."""
        
        import time
        import hashlib
        
        # Generate cache key
        cache_key = hashlib.md5(str(sorted(search_params.items())).encode()).hexdigest()
        current_time = time.time()
        
        # Check cache
        if cache_key in search_cache:
            cached_result = search_cache[cache_key]
            if current_time - cached_result['timestamp'] < cache_duration_minutes * 60:
                print(f"🚀 Using cached result for: {search_key}")
                return cached_result['result']
        
        # Execute search
        print(f"🔍 Executing fresh search: {search_key}")
        result = fileSearch(search_params)
        
        # Cache result
        if result['success']:
            search_cache[cache_key] = {
                'result': result,
                'timestamp': current_time,
                'search_key': search_key
            }
        
        return result
    
    def clear_expired_cache():
        """Clear expired cache entries."""
        current_time = time.time()
        expired_keys = [
            key for key, data in search_cache.items()
            if current_time - data['timestamp'] > 1800  # 30 minutes
        ]
        
        for key in expired_keys:
            del search_cache[key]
        
        print(f"🧹 Cleared {len(expired_keys)} expired cache entries")
    
    def get_cache_stats():
        """Get cache statistics."""
        return {
            "total_entries": len(search_cache),
            "entries": [
                {
                    "search_key": data['search_key'],
                    "age_minutes": (time.time() - data['timestamp']) / 60,
                    "results": data['result']['totalResults']
                }
                for data in search_cache.values()
            ]
        }
    
    return execute_search_with_cache, clear_expired_cache, get_cache_stats

# Usage
cached_search, clear_cache, cache_stats = cached_search_manager()

# Perform searches with caching
result1 = cached_search("error_logs", {
    "query": "ERROR",
    "fileTypes": [".log"],
    "recursive": True
})

# Same search will use cache
result2 = cached_search("error_logs", {
    "query": "ERROR", 
    "fileTypes": [".log"],
    "recursive": True
})

# Check cache statistics
stats = cache_stats()
print(f"Cache contains {stats['total_entries']} entries")

Error Handling

Common Search Issues

Error TypeCauseResolution
Path Not FoundSearch path doesn't existVerify path and check permissions
Permission DeniedInsufficient read permissionsCheck directory and file permissions
Pattern Too BroadSearch pattern matches too many filesRefine search criteria and add filters
Timeout ErrorSearch operation takes too longUse smaller search scope or pagination
Memory LimitToo many results in memoryReduce maxResults or use streaming

Robust Search Implementation

def robust_search_with_fallbacks(primary_search, fallback_searches=None):
    """Perform search with fallback strategies."""
    
    # Try primary search first
    try:
        result = fileSearch(primary_search)
        
        if result['success'] and result['totalResults'] > 0:
            return {
                "success": True,
                "search_type": "primary",
                "result": result
            }
        elif result['success'] and result['totalResults'] == 0:
            print("⚠️ Primary search returned no results, trying fallbacks...")
        else:
            print(f"⚠️ Primary search failed: {result.get('error')}")
    
    except Exception as e:
        print(f"💥 Primary search exception: {str(e)}")
    
    # Try fallback searches
    if fallback_searches:
        for i, fallback in enumerate(fallback_searches):
            print(f"🔄 Trying fallback {i+1}: {fallback.get('description', 'Unknown')}")
            
            try:
                fallback_result = fileSearch(fallback['params'])
                
                if fallback_result['success'] and fallback_result['totalResults'] > 0:
                    return {
                        "success": True,
                        "search_type": f"fallback_{i+1}",
                        "description": fallback.get('description'),
                        "result": fallback_result
                    }
            
            except Exception as e:
                print(f"💥 Fallback {i+1} exception: {str(e)}")
    
    # All searches failed
    return {
        "success": False,
        "error": "All search strategies failed",
        "attempted": ["primary"] + [f"fallback_{i+1}" for i in range(len(fallback_searches or []))]
    }

# Usage with fallback strategies
primary_search_params = {
    "query": "specific error message",
    "fileTypes": [".log"],
    "contentPattern": r"ERROR.*database.*connection",
    "recursive": True
}

fallback_strategies = [
    {
        "description": "Broader error search",
        "params": {
            "query": "error",
            "fileTypes": [".log"],
            "recursive": True
        }
    },
    {
        "description": "Any log files",
        "params": {
            "filePattern": "*.log",
            "recursive": True
        }
    },
    {
        "description": "Any text files with error",
        "params": {
            "query": "error",
            "fileTypes": [".txt", ".log", ".out"],
            "recursive": True
        }
    }
]

robust_result = robust_search_with_fallbacks(primary_search_params, fallback_strategies)

if robust_result['success']:
    print(f"✅ Search successful using {robust_result['search_type']}")
    print(f"   Found {robust_result['result']['totalResults']} results")
else:
    print(f"❌ All search attempts failed")

Next Steps: Combine with Read File to analyze found content, or use File Metadata for detailed file analysis.