๐ Data Loading & Inspection
pd.read_csv() - Load data from CSV files into DataFrame
df.head() - Display first 5 rows of DataFrame
df.tail() - Display last 5 rows of DataFrame
df.info() - Get DataFrame summary including data types
df.describe() - Generate descriptive statistics
df.shape - Get dimensions (rows, columns)
๐ก Examples
# Load and inspect data
df = pd.read_csv('data.csv')
df.head(3) # First 3 rows
df.tail(2) # Last 2 rows
df.info() # Data types and memory usage
df.describe() # Statistical summary
print(df.shape) # (rows, columns) - e.g., (1000, 5)
๐ Data Selection & Filtering
df['column'] - Select single column
df[['col1', 'col2']] - Select multiple columns
df.loc[] - Label-based data selection
df.iloc[] - Position-based data selection
df.query() - Filter data using string expressions
df[condition] - Boolean indexing for filtering
๐ก Examples
# Select data
df['name'] # Single column
df[['name', 'age']] # Multiple columns
df.loc[0:2, 'name':'age'] # Rows 0-2, columns name to age
df.iloc[0:3, 0:2] # First 3 rows, first 2 columns
df.query('age > 25') # Filter using query
df[df['age'] > 25] # Boolean indexing
๐งน Data Cleaning
df.isnull() - Detect missing values
df.dropna() - Remove rows/columns with missing values
df.fillna() - Fill missing values
df.drop_duplicates() - Remove duplicate rows
df.replace() - Replace values in DataFrame
df.astype() - Convert data types
๐ก Examples
# Clean data
df.isnull().sum() # Count missing values
df.dropna() # Remove rows with any NaN
df.fillna(0) # Fill NaN with 0
df.fillna(df.mean()) # Fill with column mean
df.drop_duplicates() # Remove duplicate rows
df.replace('old', 'new') # Replace values
df['col'].astype('int') # Convert to integer
๐ Aggregation & Statistics
df.groupby() - Group data for aggregation
df.agg() - Apply multiple aggregation functions
df.sum() - Calculate sum of values
df.mean() - Calculate mean/average
df.count() - Count non-null values
df.value_counts() - Count unique values
๐ก Examples
# Aggregate data
df.groupby('category').sum() # Group by category, sum
df.groupby('dept').agg({'salary': ['mean', 'max']}) # Multiple aggs
df['sales'].sum() # Sum of sales column
df.mean() # Mean of all numeric columns
df.count() # Count non-null values
df['status'].value_counts() # Count unique values
๐ Data Transformation
df.apply() - Apply function along axis
df.map() - Map values using dictionary/function
df.sort_values() - Sort DataFrame by column values
df.reset_index() - Reset DataFrame index
df.set_index() - Set column as index
df.rename() - Rename columns or index
๐ก Examples
# Transform data
df.apply(lambda x: x.upper()) # Apply function to each element
df['grade'].map({'A': 90, 'B': 80}) # Map values using dictionary
df.sort_values('age', ascending=False) # Sort by age (descending)
df.reset_index(drop=True) # Reset index, drop old
df.set_index('id') # Set 'id' column as index
df.rename(columns={'old': 'new'}) # Rename columns
๐ Data Merging & Joining
pd.merge() - Merge DataFrames on columns
pd.concat() - Concatenate DataFrames
df.join() - Join DataFrames on index
df.append() - Append rows to DataFrame
df.pivot() - Reshape data (pivot table)
df.melt() - Unpivot DataFrame
๐ก Examples
# Merge and join data
pd.merge(df1, df2, on='id') # Merge on 'id' column
pd.concat([df1, df2]) # Concatenate vertically
df1.join(df2, on='key') # Join on index
df.append(new_row, ignore_index=True) # Append new row
df.pivot(index='date', columns='type', values='amount') # Pivot
df.melt(id_vars=['id'], value_vars=['col1', 'col2']) # Melt
๐พ Data Export
df.to_csv() - Export DataFrame to CSV
df.to_excel() - Export DataFrame to Excel
df.to_json() - Export DataFrame to JSON
df.to_sql() - Export DataFrame to SQL database
df.to_html() - Export DataFrame to HTML
df.to_dict() - Convert DataFrame to dictionary
๐ก Examples
# Export data
df.to_csv('output.csv', index=False) # Save to CSV
df.to_excel('output.xlsx', sheet_name='Data') # Save to Excel
df.to_json('output.json', orient='records') # Save to JSON
df.to_sql('table_name', con=engine) # Save to database
df.to_html('output.html') # Save to HTML
df.to_dict('records') # Convert to dict list